/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1202 - (show annotations)
Sun Nov 4 16:13:29 2012 UTC (6 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 163878 byte(s)
Lose compiler warnings.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136 #include "pcre_internal.h"
137
138 /* The pcre_printint() function, which prints the internal form of a compiled
139 regex, is held in a separate file so that (a) it can be compiled in either
140 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
141 when that is compiled in debug mode. */
142
143 #ifdef SUPPORT_PCRE8
144 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
145 #endif
146 #ifdef SUPPORT_PCRE16
147 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
148 #endif
149 #ifdef SUPPORT_PCRE32
150 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152
153 /* We need access to some of the data tables that PCRE uses. So as not to have
154 to keep two copies, we include the source files here, changing the names of the
155 external symbols to prevent clashes. */
156
157 #define PCRE_INCLUDED
158
159 #include "pcre_tables.c"
160 #include "pcre_ucd.c"
161
162 /* The definition of the macro PRINTABLE, which determines whether to print an
163 output character as-is or as a hex value when showing compiled patterns, is
164 the same as in the printint.src file. We uses it here in cases when the locale
165 has not been explicitly changed, so as to get consistent output from systems
166 that differ in their output from isprint() even in the "C" locale. */
167
168 #ifdef EBCDIC
169 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
170 #else
171 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
172 #endif
173
174 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
175
176 /* Posix support is disabled in 16 or 32 bit only mode. */
177 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
178 #define NOPOSIX
179 #endif
180
181 /* It is possible to compile this test program without including support for
182 testing the POSIX interface, though this is not available via the standard
183 Makefile. */
184
185 #if !defined NOPOSIX
186 #include "pcreposix.h"
187 #endif
188
189 /* It is also possible, originally for the benefit of a version that was
190 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
191 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
192 automatically cut out the UTF support if PCRE is built without it. */
193
194 #ifndef SUPPORT_UTF
195 #ifndef NOUTF
196 #define NOUTF
197 #endif
198 #endif
199
200 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
201 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
202 only from one place and is handled differently). I couldn't dream up any way of
203 using a single macro to do this in a generic way, because of the many different
204 argument requirements. We know that at least one of SUPPORT_PCRE8 and
205 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
206 use these in the definitions of generic macros.
207
208 **** Special note about the PCHARSxxx macros: the address of the string to be
209 printed is always given as two arguments: a base address followed by an offset.
210 The base address is cast to the correct data size for 8 or 16 bit data; the
211 offset is in units of this size. If the string were given as base+offset in one
212 argument, the casting might be incorrectly applied. */
213
214 #ifdef SUPPORT_PCRE8
215
216 #define PCHARS8(lv, p, offset, len, f) \
217 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
218
219 #define PCHARSV8(p, offset, len, f) \
220 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
221
222 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
223 p = read_capture_name8(p, cn8, re)
224
225 #define STRLEN8(p) ((int)strlen((char *)p))
226
227 #define SET_PCRE_CALLOUT8(callout) \
228 pcre_callout = callout
229
230 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
231 pcre_assign_jit_stack(extra, callback, userdata)
232
233 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
234 re = pcre_compile((char *)pat, options, error, erroffset, tables)
235
236 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
237 namesptr, cbuffer, size) \
238 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
239 (char *)namesptr, cbuffer, size)
240
241 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
242 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
243
244 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
245 offsets, size_offsets, workspace, size_workspace) \
246 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace)
248
249 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
250 offsets, size_offsets) \
251 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
252 offsets, size_offsets)
253
254 #define PCRE_FREE_STUDY8(extra) \
255 pcre_free_study(extra)
256
257 #define PCRE_FREE_SUBSTRING8(substring) \
258 pcre_free_substring(substring)
259
260 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
261 pcre_free_substring_list(listptr)
262
263 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
264 getnamesptr, subsptr) \
265 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
266 (char *)getnamesptr, subsptr)
267
268 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
269 n = pcre_get_stringnumber(re, (char *)ptr)
270
271 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
272 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
273
274 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
275 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
276
277 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
278 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
279
280 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
281 pcre_printint(re, outfile, debug_lengths)
282
283 #define PCRE_STUDY8(extra, re, options, error) \
284 extra = pcre_study(re, options, error)
285
286 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
287 pcre_jit_stack_alloc(startsize, maxsize)
288
289 #define PCRE_JIT_STACK_FREE8(stack) \
290 pcre_jit_stack_free(stack)
291
292 #define pcre8_maketables pcre_maketables
293
294 #endif /* SUPPORT_PCRE8 */
295
296 /* -----------------------------------------------------------*/
297
298 #ifdef SUPPORT_PCRE16
299
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
307 p = read_capture_name16(p, cn16, re)
308
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
313
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
317
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
321
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
330
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
336
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
341
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
344
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
347
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
362
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
366
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
370
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
373
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382
383 #endif /* SUPPORT_PCRE16 */
384
385 /* -----------------------------------------------------------*/
386
387 #ifdef SUPPORT_PCRE32
388
389 #define PCHARS32(lv, p, offset, len, f) \
390 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
391
392 #define PCHARSV32(p, offset, len, f) \
393 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
394
395 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396 p = read_capture_name32(p, cn32, re)
397
398 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
399
400 #define SET_PCRE_CALLOUT32(callout) \
401 pcre32_callout = (int (*)(pcre32_callout_block *))callout
402
403 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
404 pcre32_assign_jit_stack((pcre32_extra *)extra, \
405 (pcre32_jit_callback)callback, userdata)
406
407 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
408 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
409 tables)
410
411 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
412 namesptr, cbuffer, size) \
413 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
414 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
415
416 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
417 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
418 (PCRE_UCHAR32 *)cbuffer, size/2)
419
420 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
421 offsets, size_offsets, workspace, size_workspace) \
422 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
423 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
424 workspace, size_workspace)
425
426 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets) \
428 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
429 len, start_offset, options, offsets, size_offsets)
430
431 #define PCRE_FREE_STUDY32(extra) \
432 pcre32_free_study((pcre32_extra *)extra)
433
434 #define PCRE_FREE_SUBSTRING32(substring) \
435 pcre32_free_substring((PCRE_SPTR32)substring)
436
437 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
438 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
439
440 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
441 getnamesptr, subsptr) \
442 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
443 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
444
445 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
446 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
447
448 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
449 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
450 (PCRE_SPTR32 *)(void*)subsptr)
451
452 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
453 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
454 (PCRE_SPTR32 **)(void*)listptr)
455
456 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
457 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
458 tables)
459
460 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
461 pcre32_printint(re, outfile, debug_lengths)
462
463 #define PCRE_STUDY32(extra, re, options, error) \
464 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
465
466 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
467 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
468
469 #define PCRE_JIT_STACK_FREE32(stack) \
470 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
471
472 #endif /* SUPPORT_PCRE32 */
473
474
475 /* ----- More than one mode is supported; a runtime test is needed, except for
476 pcre_config(), and the JIT stack functions, when it doesn't matter which
477 available version is called. ----- */
478
479 enum {
480 PCRE8_MODE,
481 PCRE16_MODE,
482 PCRE32_MODE
483 };
484
485 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486 defined (SUPPORT_PCRE32)) >= 2
487
488 #define CHAR_SIZE (1 << pcre_mode)
489
490 /* There doesn't seem to be an easy way of writing these macros that can cope
491 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492 cases separately. */
493
494 /* ----- All three modes supported ----- */
495
496 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
497
498 #define PCHARS(lv, p, offset, len, f) \
499 if (pcre_mode == PCRE32_MODE) \
500 PCHARS32(lv, p, offset, len, f); \
501 else if (pcre_mode == PCRE16_MODE) \
502 PCHARS16(lv, p, offset, len, f); \
503 else \
504 PCHARS8(lv, p, offset, len, f)
505
506 #define PCHARSV(p, offset, len, f) \
507 if (pcre_mode == PCRE32_MODE) \
508 PCHARSV32(p, offset, len, f); \
509 else if (pcre_mode == PCRE16_MODE) \
510 PCHARSV16(p, offset, len, f); \
511 else \
512 PCHARSV8(p, offset, len, f)
513
514 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
515 if (pcre_mode == PCRE32_MODE) \
516 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
517 else if (pcre_mode == PCRE16_MODE) \
518 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
519 else \
520 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
521
522 #define SET_PCRE_CALLOUT(callout) \
523 if (pcre_mode == PCRE32_MODE) \
524 SET_PCRE_CALLOUT32(callout); \
525 else if (pcre_mode == PCRE16_MODE) \
526 SET_PCRE_CALLOUT16(callout); \
527 else \
528 SET_PCRE_CALLOUT8(callout)
529
530 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
531
532 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
533 if (pcre_mode == PCRE32_MODE) \
534 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
535 else if (pcre_mode == PCRE16_MODE) \
536 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
537 else \
538 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
539
540 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
541 if (pcre_mode == PCRE32_MODE) \
542 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
543 else if (pcre_mode == PCRE16_MODE) \
544 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
545 else \
546 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
547
548 #define PCRE_CONFIG pcre_config
549
550 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
551 namesptr, cbuffer, size) \
552 if (pcre_mode == PCRE32_MODE) \
553 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else \
559 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size)
561
562 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
563 if (pcre_mode == PCRE32_MODE) \
564 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
565 else if (pcre_mode == PCRE16_MODE) \
566 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
567 else \
568 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
569
570 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
571 offsets, size_offsets, workspace, size_workspace) \
572 if (pcre_mode == PCRE32_MODE) \
573 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace); \
575 else if (pcre_mode == PCRE16_MODE) \
576 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else \
579 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace)
581
582 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else \
591 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets)
593
594 #define PCRE_FREE_STUDY(extra) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_FREE_STUDY32(extra); \
597 else if (pcre_mode == PCRE16_MODE) \
598 PCRE_FREE_STUDY16(extra); \
599 else \
600 PCRE_FREE_STUDY8(extra)
601
602 #define PCRE_FREE_SUBSTRING(substring) \
603 if (pcre_mode == PCRE32_MODE) \
604 PCRE_FREE_SUBSTRING32(substring); \
605 else if (pcre_mode == PCRE16_MODE) \
606 PCRE_FREE_SUBSTRING16(substring); \
607 else \
608 PCRE_FREE_SUBSTRING8(substring)
609
610 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
611 if (pcre_mode == PCRE32_MODE) \
612 PCRE_FREE_SUBSTRING_LIST32(listptr); \
613 else if (pcre_mode == PCRE16_MODE) \
614 PCRE_FREE_SUBSTRING_LIST16(listptr); \
615 else \
616 PCRE_FREE_SUBSTRING_LIST8(listptr)
617
618 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
619 getnamesptr, subsptr) \
620 if (pcre_mode == PCRE32_MODE) \
621 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr); \
623 else if (pcre_mode == PCRE16_MODE) \
624 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else \
627 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr)
629
630 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
631 if (pcre_mode == PCRE32_MODE) \
632 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
633 else if (pcre_mode == PCRE16_MODE) \
634 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
635 else \
636 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
637
638 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
639 if (pcre_mode == PCRE32_MODE) \
640 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
641 else if (pcre_mode == PCRE16_MODE) \
642 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
643 else \
644 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
645
646 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
647 if (pcre_mode == PCRE32_MODE) \
648 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
649 else if (pcre_mode == PCRE16_MODE) \
650 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
651 else \
652 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
653
654 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
655 (pcre_mode == PCRE32_MODE ? \
656 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
657 : pcre_mode == PCRE16_MODE ? \
658 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
659 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
660
661 #define PCRE_JIT_STACK_FREE(stack) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_JIT_STACK_FREE32(stack); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_JIT_STACK_FREE16(stack); \
666 else \
667 PCRE_JIT_STACK_FREE8(stack)
668
669 #define PCRE_MAKETABLES \
670 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
671
672 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
673 if (pcre_mode == PCRE32_MODE) \
674 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
675 else if (pcre_mode == PCRE16_MODE) \
676 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
677 else \
678 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
679
680 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
681 if (pcre_mode == PCRE32_MODE) \
682 PCRE_PRINTINT32(re, outfile, debug_lengths); \
683 else if (pcre_mode == PCRE16_MODE) \
684 PCRE_PRINTINT16(re, outfile, debug_lengths); \
685 else \
686 PCRE_PRINTINT8(re, outfile, debug_lengths)
687
688 #define PCRE_STUDY(extra, re, options, error) \
689 if (pcre_mode == PCRE32_MODE) \
690 PCRE_STUDY32(extra, re, options, error); \
691 else if (pcre_mode == PCRE16_MODE) \
692 PCRE_STUDY16(extra, re, options, error); \
693 else \
694 PCRE_STUDY8(extra, re, options, error)
695
696
697 /* ----- Two out of three modes are supported ----- */
698
699 #else
700
701 /* We can use some macro trickery to make a single set of definitions work in
702 the three different cases. */
703
704 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
705
706 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707 #define BITONE 32
708 #define BITTWO 16
709
710 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
711
712 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713 #define BITONE 32
714 #define BITTWO 8
715
716 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
717
718 #else
719 #define BITONE 16
720 #define BITTWO 8
721 #endif
722
723 #define glue(a,b) a##b
724 #define G(a,b) glue(a,b)
725
726
727 /* ----- Common macros for two-mode cases ----- */
728
729 #define PCHARS(lv, p, offset, len, f) \
730 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731 G(PCHARS,BITONE)(lv, p, offset, len, f); \
732 else \
733 G(PCHARS,BITTWO)(lv, p, offset, len, f)
734
735 #define PCHARSV(p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARSV,BITONE)(p, offset, len, f); \
738 else \
739 G(PCHARSV,BITTWO)(p, offset, len, f)
740
741 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744 else \
745 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
746
747 #define SET_PCRE_CALLOUT(callout) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(SET_PCRE_CALLOUT,BITONE)(callout); \
750 else \
751 G(SET_PCRE_CALLOUT,BITTWO)(callout)
752
753 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
755
756 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759 else \
760 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
761
762 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765 else \
766 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
767
768 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
769
770 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771 namesptr, cbuffer, size) \
772 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774 namesptr, cbuffer, size); \
775 else \
776 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size)
778
779 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782 else \
783 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
784
785 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786 offsets, size_offsets, workspace, size_workspace) \
787 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789 offsets, size_offsets, workspace, size_workspace); \
790 else \
791 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace)
793
794 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets) \
796 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets); \
799 else \
800 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets)
802
803 #define PCRE_FREE_STUDY(extra) \
804 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805 G(PCRE_FREE_STUDY,BITONE)(extra); \
806 else \
807 G(PCRE_FREE_STUDY,BITTWO)(extra)
808
809 #define PCRE_FREE_SUBSTRING(substring) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812 else \
813 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
814
815 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818 else \
819 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
820
821 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822 getnamesptr, subsptr) \
823 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825 getnamesptr, subsptr); \
826 else \
827 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr)
829
830 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833 else \
834 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
835
836 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839 else \
840 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
841
842 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845 else \
846 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
847
848 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
852
853 #define PCRE_JIT_STACK_FREE(stack) \
854 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856 else \
857 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
858
859 #define PCRE_MAKETABLES \
860 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
862
863 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866 else \
867 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
868
869 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872 else \
873 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
874
875 #define PCRE_STUDY(extra, re, options, error) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878 else \
879 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
880
881 #endif /* Two out of three modes */
882
883 /* ----- End of cases where more than one mode is supported ----- */
884
885
886 /* ----- Only 8-bit mode is supported ----- */
887
888 #elif defined SUPPORT_PCRE8
889 #define CHAR_SIZE 1
890 #define PCHARS PCHARS8
891 #define PCHARSV PCHARSV8
892 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
893 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
894 #define STRLEN STRLEN8
895 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
896 #define PCRE_COMPILE PCRE_COMPILE8
897 #define PCRE_CONFIG pcre_config
898 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
899 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
900 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
901 #define PCRE_EXEC PCRE_EXEC8
902 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
903 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
904 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
905 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
906 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
907 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
908 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
909 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
910 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
911 #define PCRE_MAKETABLES pcre_maketables()
912 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
913 #define PCRE_PRINTINT PCRE_PRINTINT8
914 #define PCRE_STUDY PCRE_STUDY8
915
916 /* ----- Only 16-bit mode is supported ----- */
917
918 #elif defined SUPPORT_PCRE16
919 #define CHAR_SIZE 2
920 #define PCHARS PCHARS16
921 #define PCHARSV PCHARSV16
922 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
923 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
924 #define STRLEN STRLEN16
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
926 #define PCRE_COMPILE PCRE_COMPILE16
927 #define PCRE_CONFIG pcre16_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
931 #define PCRE_EXEC PCRE_EXEC16
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
941 #define PCRE_MAKETABLES pcre16_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
943 #define PCRE_PRINTINT PCRE_PRINTINT16
944 #define PCRE_STUDY PCRE_STUDY16
945
946 /* ----- Only 32-bit mode is supported ----- */
947
948 #elif defined SUPPORT_PCRE32
949 #define CHAR_SIZE 4
950 #define PCHARS PCHARS32
951 #define PCHARSV PCHARSV32
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
954 #define STRLEN STRLEN32
955 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
956 #define PCRE_COMPILE PCRE_COMPILE32
957 #define PCRE_CONFIG pcre32_config
958 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
959 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
960 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
961 #define PCRE_EXEC PCRE_EXEC32
962 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
963 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
964 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
965 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
966 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
967 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
968 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
969 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
970 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
971 #define PCRE_MAKETABLES pcre32_maketables()
972 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
973 #define PCRE_PRINTINT PCRE_PRINTINT32
974 #define PCRE_STUDY PCRE_STUDY32
975
976 #endif
977
978 /* ----- End of mode-specific function call macros ----- */
979
980
981 /* Other parameters */
982
983 #ifndef CLOCKS_PER_SEC
984 #ifdef CLK_TCK
985 #define CLOCKS_PER_SEC CLK_TCK
986 #else
987 #define CLOCKS_PER_SEC 100
988 #endif
989 #endif
990
991 #if !defined NODFA
992 #define DFA_WS_DIMENSION 1000
993 #endif
994
995 /* This is the default loop count for timing. */
996
997 #define LOOPREPEAT 500000
998
999 /* Static variables */
1000
1001 static FILE *outfile;
1002 static int log_store = 0;
1003 static int callout_count;
1004 static int callout_extra;
1005 static int callout_fail_count;
1006 static int callout_fail_id;
1007 static int debug_lengths;
1008 static int first_callout;
1009 static int jit_was_used;
1010 static int locale_set = 0;
1011 static int show_malloc;
1012 static int use_utf;
1013 static size_t gotten_store;
1014 static size_t first_gotten_store = 0;
1015 static const unsigned char *last_callout_mark = NULL;
1016
1017 /* The buffers grow automatically if very long input lines are encountered. */
1018
1019 static int buffer_size = 50000;
1020 static pcre_uint8 *buffer = NULL;
1021 static pcre_uint8 *pbuffer = NULL;
1022
1023 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1024
1025 #ifdef COMPILE_PCRE16
1026 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1027 #endif
1028
1029 #ifdef COMPILE_PCRE32
1030 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031 #endif
1032
1033 /* We need buffers for building 16/32-bit strings, and the tables of operator
1034 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035 pattern for saving/reloading testing. Luckily, the data for these tables is
1036 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038 LINK_SIZE is also used later in this program. */
1039
1040 #ifdef SUPPORT_PCRE16
1041 #undef IMM2_SIZE
1042 #define IMM2_SIZE 1
1043
1044 #if LINK_SIZE == 2
1045 #undef LINK_SIZE
1046 #define LINK_SIZE 1
1047 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1048 #undef LINK_SIZE
1049 #define LINK_SIZE 2
1050 #else
1051 #error LINK_SIZE must be either 2, 3, or 4
1052 #endif
1053
1054 static int buffer16_size = 0;
1055 static pcre_uint16 *buffer16 = NULL;
1056 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057 #endif /* SUPPORT_PCRE16 */
1058
1059 #ifdef SUPPORT_PCRE32
1060 #undef IMM2_SIZE
1061 #define IMM2_SIZE 1
1062 #undef LINK_SIZE
1063 #define LINK_SIZE 1
1064
1065 static int buffer32_size = 0;
1066 static pcre_uint32 *buffer32 = NULL;
1067 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068 #endif /* SUPPORT_PCRE32 */
1069
1070 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071 support, it can be changed by an option. If there is no 8-bit support, there
1072 must be 16-or 32-bit support, so default it to 1. */
1073
1074 #if defined SUPPORT_PCRE8
1075 static int pcre_mode = PCRE8_MODE;
1076 #elif defined SUPPORT_PCRE16
1077 static int pcre_mode = PCRE16_MODE;
1078 #elif defined SUPPORT_PCRE32
1079 static int pcre_mode = PCRE32_MODE;
1080 #endif
1081
1082 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1083
1084 static int jit_study_bits[] =
1085 {
1086 PCRE_STUDY_JIT_COMPILE,
1087 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1088 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1089 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1090 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1091 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1092 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1093 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1094 };
1095
1096 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1097 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1098
1099 /* Textual explanations for runtime error codes */
1100
1101 static const char *errtexts[] = {
1102 NULL, /* 0 is no error */
1103 NULL, /* NOMATCH is handled specially */
1104 "NULL argument passed",
1105 "bad option value",
1106 "magic number missing",
1107 "unknown opcode - pattern overwritten?",
1108 "no more memory",
1109 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1110 "match limit exceeded",
1111 "callout error code",
1112 NULL, /* BADUTF8/16 is handled specially */
1113 NULL, /* BADUTF8/16 offset is handled specially */
1114 NULL, /* PARTIAL is handled specially */
1115 "not used - internal error",
1116 "internal error - pattern overwritten?",
1117 "bad count value",
1118 "item unsupported for DFA matching",
1119 "backreference condition or recursion test not supported for DFA matching",
1120 "match limit not supported for DFA matching",
1121 "workspace size exceeded in DFA matching",
1122 "too much recursion for DFA matching",
1123 "recursion limit exceeded",
1124 "not used - internal error",
1125 "invalid combination of newline options",
1126 "bad offset value",
1127 NULL, /* SHORTUTF8/16 is handled specially */
1128 "nested recursion at the same subject position",
1129 "JIT stack limit reached",
1130 "pattern compiled in wrong mode: 8-bit/16-bit error",
1131 "pattern compiled with other endianness",
1132 "invalid data in workspace for DFA restart",
1133 "bad JIT option",
1134 "bad length"
1135 };
1136
1137
1138 /*************************************************
1139 * Alternate character tables *
1140 *************************************************/
1141
1142 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1143 using the default tables of the library. However, the T option can be used to
1144 select alternate sets of tables, for different kinds of testing. Note also that
1145 the L (locale) option also adjusts the tables. */
1146
1147 /* This is the set of tables distributed as default with PCRE. It recognizes
1148 only ASCII characters. */
1149
1150 static const pcre_uint8 tables0[] = {
1151
1152 /* This table is a lower casing table. */
1153
1154 0, 1, 2, 3, 4, 5, 6, 7,
1155 8, 9, 10, 11, 12, 13, 14, 15,
1156 16, 17, 18, 19, 20, 21, 22, 23,
1157 24, 25, 26, 27, 28, 29, 30, 31,
1158 32, 33, 34, 35, 36, 37, 38, 39,
1159 40, 41, 42, 43, 44, 45, 46, 47,
1160 48, 49, 50, 51, 52, 53, 54, 55,
1161 56, 57, 58, 59, 60, 61, 62, 63,
1162 64, 97, 98, 99,100,101,102,103,
1163 104,105,106,107,108,109,110,111,
1164 112,113,114,115,116,117,118,119,
1165 120,121,122, 91, 92, 93, 94, 95,
1166 96, 97, 98, 99,100,101,102,103,
1167 104,105,106,107,108,109,110,111,
1168 112,113,114,115,116,117,118,119,
1169 120,121,122,123,124,125,126,127,
1170 128,129,130,131,132,133,134,135,
1171 136,137,138,139,140,141,142,143,
1172 144,145,146,147,148,149,150,151,
1173 152,153,154,155,156,157,158,159,
1174 160,161,162,163,164,165,166,167,
1175 168,169,170,171,172,173,174,175,
1176 176,177,178,179,180,181,182,183,
1177 184,185,186,187,188,189,190,191,
1178 192,193,194,195,196,197,198,199,
1179 200,201,202,203,204,205,206,207,
1180 208,209,210,211,212,213,214,215,
1181 216,217,218,219,220,221,222,223,
1182 224,225,226,227,228,229,230,231,
1183 232,233,234,235,236,237,238,239,
1184 240,241,242,243,244,245,246,247,
1185 248,249,250,251,252,253,254,255,
1186
1187 /* This table is a case flipping table. */
1188
1189 0, 1, 2, 3, 4, 5, 6, 7,
1190 8, 9, 10, 11, 12, 13, 14, 15,
1191 16, 17, 18, 19, 20, 21, 22, 23,
1192 24, 25, 26, 27, 28, 29, 30, 31,
1193 32, 33, 34, 35, 36, 37, 38, 39,
1194 40, 41, 42, 43, 44, 45, 46, 47,
1195 48, 49, 50, 51, 52, 53, 54, 55,
1196 56, 57, 58, 59, 60, 61, 62, 63,
1197 64, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122, 91, 92, 93, 94, 95,
1201 96, 65, 66, 67, 68, 69, 70, 71,
1202 72, 73, 74, 75, 76, 77, 78, 79,
1203 80, 81, 82, 83, 84, 85, 86, 87,
1204 88, 89, 90,123,124,125,126,127,
1205 128,129,130,131,132,133,134,135,
1206 136,137,138,139,140,141,142,143,
1207 144,145,146,147,148,149,150,151,
1208 152,153,154,155,156,157,158,159,
1209 160,161,162,163,164,165,166,167,
1210 168,169,170,171,172,173,174,175,
1211 176,177,178,179,180,181,182,183,
1212 184,185,186,187,188,189,190,191,
1213 192,193,194,195,196,197,198,199,
1214 200,201,202,203,204,205,206,207,
1215 208,209,210,211,212,213,214,215,
1216 216,217,218,219,220,221,222,223,
1217 224,225,226,227,228,229,230,231,
1218 232,233,234,235,236,237,238,239,
1219 240,241,242,243,244,245,246,247,
1220 248,249,250,251,252,253,254,255,
1221
1222 /* This table contains bit maps for various character classes. Each map is 32
1223 bytes long and the bits run from the least significant end of each byte. The
1224 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1225 graph, print, punct, and cntrl. Other classes are built from combinations. */
1226
1227 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1228 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1229 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1230 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1231
1232 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1233 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1234 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1236
1237 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1244 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246
1247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1248 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1249 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1251
1252 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1253 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1255 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1256
1257 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1258 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261
1262 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1263 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1264 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266
1267 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1268 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271
1272 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1274 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276
1277 /* This table identifies various classes of character by individual bits:
1278 0x01 white space character
1279 0x02 letter
1280 0x04 decimal digit
1281 0x08 hexadecimal digit
1282 0x10 alphanumeric or '_'
1283 0x80 regular expression metacharacter or binary zero
1284 */
1285
1286 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1287 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1288 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1289 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1290 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1291 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1292 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1293 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1294 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1295 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1296 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1297 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1298 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1299 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1300 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1301 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1318
1319 /* This is a set of tables that came orginally from a Windows user. It seems to
1320 be at least an approximation of ISO 8859. In particular, there are characters
1321 greater than 128 that are marked as spaces, letters, etc. */
1322
1323 static const pcre_uint8 tables1[] = {
1324 0,1,2,3,4,5,6,7,
1325 8,9,10,11,12,13,14,15,
1326 16,17,18,19,20,21,22,23,
1327 24,25,26,27,28,29,30,31,
1328 32,33,34,35,36,37,38,39,
1329 40,41,42,43,44,45,46,47,
1330 48,49,50,51,52,53,54,55,
1331 56,57,58,59,60,61,62,63,
1332 64,97,98,99,100,101,102,103,
1333 104,105,106,107,108,109,110,111,
1334 112,113,114,115,116,117,118,119,
1335 120,121,122,91,92,93,94,95,
1336 96,97,98,99,100,101,102,103,
1337 104,105,106,107,108,109,110,111,
1338 112,113,114,115,116,117,118,119,
1339 120,121,122,123,124,125,126,127,
1340 128,129,130,131,132,133,134,135,
1341 136,137,138,139,140,141,142,143,
1342 144,145,146,147,148,149,150,151,
1343 152,153,154,155,156,157,158,159,
1344 160,161,162,163,164,165,166,167,
1345 168,169,170,171,172,173,174,175,
1346 176,177,178,179,180,181,182,183,
1347 184,185,186,187,188,189,190,191,
1348 224,225,226,227,228,229,230,231,
1349 232,233,234,235,236,237,238,239,
1350 240,241,242,243,244,245,246,215,
1351 248,249,250,251,252,253,254,223,
1352 224,225,226,227,228,229,230,231,
1353 232,233,234,235,236,237,238,239,
1354 240,241,242,243,244,245,246,247,
1355 248,249,250,251,252,253,254,255,
1356 0,1,2,3,4,5,6,7,
1357 8,9,10,11,12,13,14,15,
1358 16,17,18,19,20,21,22,23,
1359 24,25,26,27,28,29,30,31,
1360 32,33,34,35,36,37,38,39,
1361 40,41,42,43,44,45,46,47,
1362 48,49,50,51,52,53,54,55,
1363 56,57,58,59,60,61,62,63,
1364 64,97,98,99,100,101,102,103,
1365 104,105,106,107,108,109,110,111,
1366 112,113,114,115,116,117,118,119,
1367 120,121,122,91,92,93,94,95,
1368 96,65,66,67,68,69,70,71,
1369 72,73,74,75,76,77,78,79,
1370 80,81,82,83,84,85,86,87,
1371 88,89,90,123,124,125,126,127,
1372 128,129,130,131,132,133,134,135,
1373 136,137,138,139,140,141,142,143,
1374 144,145,146,147,148,149,150,151,
1375 152,153,154,155,156,157,158,159,
1376 160,161,162,163,164,165,166,167,
1377 168,169,170,171,172,173,174,175,
1378 176,177,178,179,180,181,182,183,
1379 184,185,186,187,188,189,190,191,
1380 224,225,226,227,228,229,230,231,
1381 232,233,234,235,236,237,238,239,
1382 240,241,242,243,244,245,246,215,
1383 248,249,250,251,252,253,254,223,
1384 192,193,194,195,196,197,198,199,
1385 200,201,202,203,204,205,206,207,
1386 208,209,210,211,212,213,214,247,
1387 216,217,218,219,220,221,222,255,
1388 0,62,0,0,1,0,0,0,
1389 0,0,0,0,0,0,0,0,
1390 32,0,0,0,1,0,0,0,
1391 0,0,0,0,0,0,0,0,
1392 0,0,0,0,0,0,255,3,
1393 126,0,0,0,126,0,0,0,
1394 0,0,0,0,0,0,0,0,
1395 0,0,0,0,0,0,0,0,
1396 0,0,0,0,0,0,255,3,
1397 0,0,0,0,0,0,0,0,
1398 0,0,0,0,0,0,12,2,
1399 0,0,0,0,0,0,0,0,
1400 0,0,0,0,0,0,0,0,
1401 254,255,255,7,0,0,0,0,
1402 0,0,0,0,0,0,0,0,
1403 255,255,127,127,0,0,0,0,
1404 0,0,0,0,0,0,0,0,
1405 0,0,0,0,254,255,255,7,
1406 0,0,0,0,0,4,32,4,
1407 0,0,0,128,255,255,127,255,
1408 0,0,0,0,0,0,255,3,
1409 254,255,255,135,254,255,255,7,
1410 0,0,0,0,0,4,44,6,
1411 255,255,127,255,255,255,127,255,
1412 0,0,0,0,254,255,255,255,
1413 255,255,255,255,255,255,255,127,
1414 0,0,0,0,254,255,255,255,
1415 255,255,255,255,255,255,255,255,
1416 0,2,0,0,255,255,255,255,
1417 255,255,255,255,255,255,255,127,
1418 0,0,0,0,255,255,255,255,
1419 255,255,255,255,255,255,255,255,
1420 0,0,0,0,254,255,0,252,
1421 1,0,0,248,1,0,0,120,
1422 0,0,0,0,254,255,255,255,
1423 0,0,128,0,0,0,128,0,
1424 255,255,255,255,0,0,0,0,
1425 0,0,0,0,0,0,0,128,
1426 255,255,255,255,0,0,0,0,
1427 0,0,0,0,0,0,0,0,
1428 128,0,0,0,0,0,0,0,
1429 0,1,1,0,1,1,0,0,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 1,0,0,0,128,0,0,0,
1433 128,128,128,128,0,0,128,0,
1434 28,28,28,28,28,28,28,28,
1435 28,28,0,0,0,0,0,128,
1436 0,26,26,26,26,26,26,18,
1437 18,18,18,18,18,18,18,18,
1438 18,18,18,18,18,18,18,18,
1439 18,18,18,128,128,0,128,16,
1440 0,26,26,26,26,26,26,18,
1441 18,18,18,18,18,18,18,18,
1442 18,18,18,18,18,18,18,18,
1443 18,18,18,128,128,0,0,0,
1444 0,0,0,0,0,1,0,0,
1445 0,0,0,0,0,0,0,0,
1446 0,0,0,0,0,0,0,0,
1447 0,0,0,0,0,0,0,0,
1448 1,0,0,0,0,0,0,0,
1449 0,0,18,0,0,0,0,0,
1450 0,0,20,20,0,18,0,0,
1451 0,20,18,0,0,0,0,0,
1452 18,18,18,18,18,18,18,18,
1453 18,18,18,18,18,18,18,18,
1454 18,18,18,18,18,18,18,0,
1455 18,18,18,18,18,18,18,18,
1456 18,18,18,18,18,18,18,18,
1457 18,18,18,18,18,18,18,18,
1458 18,18,18,18,18,18,18,0,
1459 18,18,18,18,18,18,18,18
1460 };
1461
1462
1463
1464
1465 #ifndef HAVE_STRERROR
1466 /*************************************************
1467 * Provide strerror() for non-ANSI libraries *
1468 *************************************************/
1469
1470 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1471 in their libraries, but can provide the same facility by this simple
1472 alternative function. */
1473
1474 extern int sys_nerr;
1475 extern char *sys_errlist[];
1476
1477 char *
1478 strerror(int n)
1479 {
1480 if (n < 0 || n >= sys_nerr) return "unknown error number";
1481 return sys_errlist[n];
1482 }
1483 #endif /* HAVE_STRERROR */
1484
1485
1486
1487 /*************************************************
1488 * Print newline configuration *
1489 *************************************************/
1490
1491 /*
1492 Arguments:
1493 rc the return code from PCRE_CONFIG_NEWLINE
1494 isc TRUE if called from "-C newline"
1495 Returns: nothing
1496 */
1497
1498 static void
1499 print_newline_config(int rc, BOOL isc)
1500 {
1501 const char *s = NULL;
1502 if (!isc) printf(" Newline sequence is ");
1503 switch(rc)
1504 {
1505 case CHAR_CR: s = "CR"; break;
1506 case CHAR_LF: s = "LF"; break;
1507 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1508 case -1: s = "ANY"; break;
1509 case -2: s = "ANYCRLF"; break;
1510
1511 default:
1512 printf("a non-standard value: 0x%04x\n", rc);
1513 return;
1514 }
1515
1516 printf("%s\n", s);
1517 }
1518
1519
1520
1521 /*************************************************
1522 * JIT memory callback *
1523 *************************************************/
1524
1525 static pcre_jit_stack* jit_callback(void *arg)
1526 {
1527 jit_was_used = TRUE;
1528 return (pcre_jit_stack *)arg;
1529 }
1530
1531
1532 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1533 /*************************************************
1534 * Convert UTF-8 string to value *
1535 *************************************************/
1536
1537 /* This function takes one or more bytes that represents a UTF-8 character,
1538 and returns the value of the character.
1539
1540 Argument:
1541 utf8bytes a pointer to the byte vector
1542 vptr a pointer to an int to receive the value
1543
1544 Returns: > 0 => the number of bytes consumed
1545 -6 to 0 => malformed UTF-8 character at offset = (-return)
1546 */
1547
1548 static int
1549 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1550 {
1551 pcre_uint32 c = *utf8bytes++;
1552 pcre_uint32 d = c;
1553 int i, j, s;
1554
1555 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1556 {
1557 if ((d & 0x80) == 0) break;
1558 d <<= 1;
1559 }
1560
1561 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1562 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1563
1564 /* i now has a value in the range 1-5 */
1565
1566 s = 6*i;
1567 d = (c & utf8_table3[i]) << s;
1568
1569 for (j = 0; j < i; j++)
1570 {
1571 c = *utf8bytes++;
1572 if ((c & 0xc0) != 0x80) return -(j+1);
1573 s -= 6;
1574 d |= (c & 0x3f) << s;
1575 }
1576
1577 /* Check that encoding was the correct unique one */
1578
1579 for (j = 0; j < utf8_table1_size; j++)
1580 if (d <= (pcre_uint32)utf8_table1[j]) break;
1581 if (j != i) return -(i+1);
1582
1583 /* Valid value */
1584
1585 *vptr = d;
1586 return i+1;
1587 }
1588 #endif /* NOUTF || SUPPORT_PCRE16 */
1589
1590
1591
1592 #if defined SUPPORT_PCRE8 && !defined NOUTF
1593 /*************************************************
1594 * Convert character value to UTF-8 *
1595 *************************************************/
1596
1597 /* This function takes an integer value in the range 0 - 0x7fffffff
1598 and encodes it as a UTF-8 character in 0 to 6 bytes.
1599
1600 Arguments:
1601 cvalue the character value
1602 utf8bytes pointer to buffer for result - at least 6 bytes long
1603
1604 Returns: number of characters placed in the buffer
1605 */
1606
1607 static int
1608 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1609 {
1610 register int i, j;
1611 if (cvalue > 0x7fffffffu)
1612 return -1;
1613 for (i = 0; i < utf8_table1_size; i++)
1614 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1615 utf8bytes += i;
1616 for (j = i; j > 0; j--)
1617 {
1618 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1619 cvalue >>= 6;
1620 }
1621 *utf8bytes = utf8_table2[i] | cvalue;
1622 return i + 1;
1623 }
1624 #endif
1625
1626
1627 #ifdef SUPPORT_PCRE16
1628 /*************************************************
1629 * Convert a string to 16-bit *
1630 *************************************************/
1631
1632 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1633 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1634 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1635 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1636 result is always left in buffer16.
1637
1638 Note that this function does not object to surrogate values. This is
1639 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1640 for the purpose of testing that they are correctly faulted.
1641
1642 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1643 in UTF-8 so that values greater than 255 can be handled.
1644
1645 Arguments:
1646 data TRUE if converting a data line; FALSE for a regex
1647 p points to a byte string
1648 utf true if UTF-8 (to be converted to UTF-16)
1649 len number of bytes in the string (excluding trailing zero)
1650
1651 Returns: number of 16-bit data items used (excluding trailing zero)
1652 OR -1 if a UTF-8 string is malformed
1653 OR -2 if a value > 0x10ffff is encountered
1654 OR -3 if a value > 0xffff is encountered when not in UTF mode
1655 */
1656
1657 static int
1658 to16(int data, pcre_uint8 *p, int utf, int len)
1659 {
1660 pcre_uint16 *pp;
1661
1662 if (buffer16_size < 2*len + 2)
1663 {
1664 if (buffer16 != NULL) free(buffer16);
1665 buffer16_size = 2*len + 2;
1666 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1667 if (buffer16 == NULL)
1668 {
1669 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1670 exit(1);
1671 }
1672 }
1673
1674 pp = buffer16;
1675
1676 if (!utf && !data)
1677 {
1678 while (len-- > 0) *pp++ = *p++;
1679 }
1680
1681 else
1682 {
1683 pcre_uint32 c = 0;
1684 while (len > 0)
1685 {
1686 int chlen = utf82ord(p, &c);
1687 if (chlen <= 0) return -1;
1688 if (c > 0x10ffff) return -2;
1689 p += chlen;
1690 len -= chlen;
1691 if (c < 0x10000) *pp++ = c; else
1692 {
1693 if (!utf) return -3;
1694 c -= 0x10000;
1695 *pp++ = 0xD800 | (c >> 10);
1696 *pp++ = 0xDC00 | (c & 0x3ff);
1697 }
1698 }
1699 }
1700
1701 *pp = 0;
1702 return pp - buffer16;
1703 }
1704 #endif
1705
1706 #ifdef SUPPORT_PCRE32
1707 /*************************************************
1708 * Convert a string to 32-bit *
1709 *************************************************/
1710
1711 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1712 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1713 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1714 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1715 result is always left in buffer32.
1716
1717 Note that this function does not object to surrogate values. This is
1718 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1719 for the purpose of testing that they are correctly faulted.
1720
1721 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1722 in UTF-8 so that values greater than 255 can be handled.
1723
1724 Arguments:
1725 data TRUE if converting a data line; FALSE for a regex
1726 p points to a byte string
1727 utf true if UTF-8 (to be converted to UTF-32)
1728 len number of bytes in the string (excluding trailing zero)
1729
1730 Returns: number of 32-bit data items used (excluding trailing zero)
1731 OR -1 if a UTF-8 string is malformed
1732 OR -2 if a value > 0x10ffff is encountered
1733 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1734 */
1735
1736 static int
1737 to32(int data, pcre_uint8 *p, int utf, int len)
1738 {
1739 pcre_uint32 *pp;
1740
1741 if (buffer32_size < 4*len + 4)
1742 {
1743 if (buffer32 != NULL) free(buffer32);
1744 buffer32_size = 4*len + 4;
1745 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1746 if (buffer32 == NULL)
1747 {
1748 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1749 exit(1);
1750 }
1751 }
1752
1753 pp = buffer32;
1754
1755 if (!utf && !data)
1756 {
1757 while (len-- > 0) *pp++ = *p++;
1758 }
1759
1760 else
1761 {
1762 pcre_uint32 c = 0;
1763 while (len > 0)
1764 {
1765 int chlen = utf82ord(p, &c);
1766 if (chlen <= 0) return -1;
1767 if (utf)
1768 {
1769 if (c > 0x10ffff) return -2;
1770 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1771 }
1772
1773 p += chlen;
1774 len -= chlen;
1775 *pp++ = c;
1776 }
1777 }
1778
1779 *pp = 0;
1780 return pp - buffer32;
1781 }
1782
1783 /* Check that a 32-bit character string is valid UTF-32.
1784
1785 Arguments:
1786 string points to the string
1787 length length of string, or -1 if the string is zero-terminated
1788
1789 Returns: TRUE if the string is a valid UTF-32 string
1790 FALSE otherwise
1791 */
1792
1793 #ifdef NEVER
1794
1795 #ifdef SUPPORT_UTF
1796 static BOOL
1797 valid_utf32(pcre_uint32 *string, int length)
1798 {
1799 register pcre_uint32 *p;
1800 register pcre_uint32 c;
1801
1802 for (p = string; length-- > 0; p++)
1803 {
1804 c = *p;
1805
1806 if (c > 0x10ffffu)
1807 return FALSE;
1808
1809 /* A surrogate */
1810 if ((c & 0xfffff800u) == 0xd800u)
1811 return FALSE;
1812
1813 /* Non-character */
1814 if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1815 return FALSE;
1816 }
1817
1818 return TRUE;
1819 }
1820 #endif /* SUPPORT_UTF */
1821
1822 #endif /* NEVER */
1823
1824
1825 #endif
1826
1827 /*************************************************
1828 * Read or extend an input line *
1829 *************************************************/
1830
1831 /* Input lines are read into buffer, but both patterns and data lines can be
1832 continued over multiple input lines. In addition, if the buffer fills up, we
1833 want to automatically expand it so as to be able to handle extremely large
1834 lines that are needed for certain stress tests. When the input buffer is
1835 expanded, the other two buffers must also be expanded likewise, and the
1836 contents of pbuffer, which are a copy of the input for callouts, must be
1837 preserved (for when expansion happens for a data line). This is not the most
1838 optimal way of handling this, but hey, this is just a test program!
1839
1840 Arguments:
1841 f the file to read
1842 start where in buffer to start (this *must* be within buffer)
1843 prompt for stdin or readline()
1844
1845 Returns: pointer to the start of new data
1846 could be a copy of start, or could be moved
1847 NULL if no data read and EOF reached
1848 */
1849
1850 static pcre_uint8 *
1851 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1852 {
1853 pcre_uint8 *here = start;
1854
1855 for (;;)
1856 {
1857 size_t rlen = (size_t)(buffer_size - (here - buffer));
1858
1859 if (rlen > 1000)
1860 {
1861 int dlen;
1862
1863 /* If libreadline or libedit support is required, use readline() to read a
1864 line if the input is a terminal. Note that readline() removes the trailing
1865 newline, so we must put it back again, to be compatible with fgets(). */
1866
1867 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1868 if (isatty(fileno(f)))
1869 {
1870 size_t len;
1871 char *s = readline(prompt);
1872 if (s == NULL) return (here == start)? NULL : start;
1873 len = strlen(s);
1874 if (len > 0) add_history(s);
1875 if (len > rlen - 1) len = rlen - 1;
1876 memcpy(here, s, len);
1877 here[len] = '\n';
1878 here[len+1] = 0;
1879 free(s);
1880 }
1881 else
1882 #endif
1883
1884 /* Read the next line by normal means, prompting if the file is stdin. */
1885
1886 {
1887 if (f == stdin) printf("%s", prompt);
1888 if (fgets((char *)here, rlen, f) == NULL)
1889 return (here == start)? NULL : start;
1890 }
1891
1892 dlen = (int)strlen((char *)here);
1893 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1894 here += dlen;
1895 }
1896
1897 else
1898 {
1899 int new_buffer_size = 2*buffer_size;
1900 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1901 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1902
1903 if (new_buffer == NULL || new_pbuffer == NULL)
1904 {
1905 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1906 exit(1);
1907 }
1908
1909 memcpy(new_buffer, buffer, buffer_size);
1910 memcpy(new_pbuffer, pbuffer, buffer_size);
1911
1912 buffer_size = new_buffer_size;
1913
1914 start = new_buffer + (start - buffer);
1915 here = new_buffer + (here - buffer);
1916
1917 free(buffer);
1918 free(pbuffer);
1919
1920 buffer = new_buffer;
1921 pbuffer = new_pbuffer;
1922 }
1923 }
1924
1925 return NULL; /* Control never gets here */
1926 }
1927
1928
1929
1930 /*************************************************
1931 * Read number from string *
1932 *************************************************/
1933
1934 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1935 around with conditional compilation, just do the job by hand. It is only used
1936 for unpicking arguments, so just keep it simple.
1937
1938 Arguments:
1939 str string to be converted
1940 endptr where to put the end pointer
1941
1942 Returns: the unsigned long
1943 */
1944
1945 static int
1946 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1947 {
1948 int result = 0;
1949 while(*str != 0 && isspace(*str)) str++;
1950 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1951 *endptr = str;
1952 return(result);
1953 }
1954
1955
1956
1957 /*************************************************
1958 * Print one character *
1959 *************************************************/
1960
1961 /* Print a single character either literally, or as a hex escape. */
1962
1963 static int pchar(pcre_uint32 c, FILE *f)
1964 {
1965 int n = 0;
1966 if (PRINTOK(c))
1967 {
1968 if (f != NULL) fprintf(f, "%c", c);
1969 return 1;
1970 }
1971
1972 if (c < 0x100)
1973 {
1974 if (use_utf)
1975 {
1976 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1977 return 6;
1978 }
1979 else
1980 {
1981 if (f != NULL) fprintf(f, "\\x%02x", c);
1982 return 4;
1983 }
1984 }
1985
1986 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1987 return n >= 0 ? n : 0;
1988 }
1989
1990
1991
1992 #ifdef SUPPORT_PCRE8
1993 /*************************************************
1994 * Print 8-bit character string *
1995 *************************************************/
1996
1997 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1998 If handed a NULL file, just counts chars without printing. */
1999
2000 static int pchars(pcre_uint8 *p, int length, FILE *f)
2001 {
2002 pcre_uint32 c = 0;
2003 int yield = 0;
2004
2005 if (length < 0)
2006 length = strlen((char *)p);
2007
2008 while (length-- > 0)
2009 {
2010 #if !defined NOUTF
2011 if (use_utf)
2012 {
2013 int rc = utf82ord(p, &c);
2014 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2015 {
2016 length -= rc - 1;
2017 p += rc;
2018 yield += pchar(c, f);
2019 continue;
2020 }
2021 }
2022 #endif
2023 c = *p++;
2024 yield += pchar(c, f);
2025 }
2026
2027 return yield;
2028 }
2029 #endif
2030
2031
2032
2033 #ifdef SUPPORT_PCRE16
2034 /*************************************************
2035 * Find length of 0-terminated 16-bit string *
2036 *************************************************/
2037
2038 static int strlen16(PCRE_SPTR16 p)
2039 {
2040 int len = 0;
2041 while (*p++ != 0) len++;
2042 return len;
2043 }
2044 #endif /* SUPPORT_PCRE16 */
2045
2046
2047
2048 #ifdef SUPPORT_PCRE32
2049 /*************************************************
2050 * Find length of 0-terminated 32-bit string *
2051 *************************************************/
2052
2053 static int strlen32(PCRE_SPTR32 p)
2054 {
2055 int len = 0;
2056 while (*p++ != 0) len++;
2057 return len;
2058 }
2059 #endif /* SUPPORT_PCRE32 */
2060
2061
2062
2063 #ifdef SUPPORT_PCRE16
2064 /*************************************************
2065 * Print 16-bit character string *
2066 *************************************************/
2067
2068 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2069 If handed a NULL file, just counts chars without printing. */
2070
2071 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2072 {
2073 int yield = 0;
2074
2075 if (length < 0)
2076 length = strlen16(p);
2077
2078 while (length-- > 0)
2079 {
2080 pcre_uint32 c = *p++ & 0xffff;
2081 #if !defined NOUTF
2082 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2083 {
2084 int d = *p & 0xffff;
2085 if (d >= 0xDC00 && d < 0xDFFF)
2086 {
2087 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2088 length--;
2089 p++;
2090 }
2091 }
2092 #endif
2093 yield += pchar(c, f);
2094 }
2095
2096 return yield;
2097 }
2098 #endif /* SUPPORT_PCRE16 */
2099
2100
2101
2102 #ifdef SUPPORT_PCRE32
2103 /*************************************************
2104 * Print 32-bit character string *
2105 *************************************************/
2106
2107 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2108 If handed a NULL file, just counts chars without printing. */
2109
2110 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2111 {
2112 int yield = 0;
2113
2114 (void)(utf); /* Avoid compiler warning */
2115
2116 if (length < 0)
2117 length = strlen32(p);
2118
2119 while (length-- > 0)
2120 {
2121 pcre_uint32 c = *p++;
2122 yield += pchar(c, f);
2123 }
2124
2125 return yield;
2126 }
2127 #endif /* SUPPORT_PCRE32 */
2128
2129
2130
2131 #ifdef SUPPORT_PCRE8
2132 /*************************************************
2133 * Read a capture name (8-bit) and check it *
2134 *************************************************/
2135
2136 static pcre_uint8 *
2137 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2138 {
2139 pcre_uint8 *npp = *pp;
2140 while (isalnum(*p)) *npp++ = *p++;
2141 *npp++ = 0;
2142 *npp = 0;
2143 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2144 {
2145 fprintf(outfile, "no parentheses with name \"");
2146 PCHARSV(*pp, 0, -1, outfile);
2147 fprintf(outfile, "\"\n");
2148 }
2149
2150 *pp = npp;
2151 return p;
2152 }
2153 #endif /* SUPPORT_PCRE8 */
2154
2155
2156
2157 #ifdef SUPPORT_PCRE16
2158 /*************************************************
2159 * Read a capture name (16-bit) and check it *
2160 *************************************************/
2161
2162 /* Note that the text being read is 8-bit. */
2163
2164 static pcre_uint8 *
2165 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2166 {
2167 pcre_uint16 *npp = *pp;
2168 while (isalnum(*p)) *npp++ = *p++;
2169 *npp++ = 0;
2170 *npp = 0;
2171 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2172 {
2173 fprintf(outfile, "no parentheses with name \"");
2174 PCHARSV(*pp, 0, -1, outfile);
2175 fprintf(outfile, "\"\n");
2176 }
2177 *pp = npp;
2178 return p;
2179 }
2180 #endif /* SUPPORT_PCRE16 */
2181
2182
2183
2184 #ifdef SUPPORT_PCRE32
2185 /*************************************************
2186 * Read a capture name (32-bit) and check it *
2187 *************************************************/
2188
2189 /* Note that the text being read is 8-bit. */
2190
2191 static pcre_uint8 *
2192 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2193 {
2194 pcre_uint32 *npp = *pp;
2195 while (isalnum(*p)) *npp++ = *p++;
2196 *npp++ = 0;
2197 *npp = 0;
2198 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2199 {
2200 fprintf(outfile, "no parentheses with name \"");
2201 PCHARSV(*pp, 0, -1, outfile);
2202 fprintf(outfile, "\"\n");
2203 }
2204 *pp = npp;
2205 return p;
2206 }
2207 #endif /* SUPPORT_PCRE32 */
2208
2209
2210
2211 /*************************************************
2212 * Callout function *
2213 *************************************************/
2214
2215 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2216 the match. Yield zero unless more callouts than the fail count, or the callout
2217 data is not zero. */
2218
2219 static int callout(pcre_callout_block *cb)
2220 {
2221 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2222 int i, pre_start, post_start, subject_length;
2223
2224 if (callout_extra)
2225 {
2226 fprintf(f, "Callout %d: last capture = %d\n",
2227 cb->callout_number, cb->capture_last);
2228
2229 for (i = 0; i < cb->capture_top * 2; i += 2)
2230 {
2231 if (cb->offset_vector[i] < 0)
2232 fprintf(f, "%2d: <unset>\n", i/2);
2233 else
2234 {
2235 fprintf(f, "%2d: ", i/2);
2236 PCHARSV(cb->subject, cb->offset_vector[i],
2237 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2238 fprintf(f, "\n");
2239 }
2240 }
2241 }
2242
2243 /* Re-print the subject in canonical form, the first time or if giving full
2244 datails. On subsequent calls in the same match, we use pchars just to find the
2245 printed lengths of the substrings. */
2246
2247 if (f != NULL) fprintf(f, "--->");
2248
2249 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2250 PCHARS(post_start, cb->subject, cb->start_match,
2251 cb->current_position - cb->start_match, f);
2252
2253 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2254
2255 PCHARSV(cb->subject, cb->current_position,
2256 cb->subject_length - cb->current_position, f);
2257
2258 if (f != NULL) fprintf(f, "\n");
2259
2260 /* Always print appropriate indicators, with callout number if not already
2261 shown. For automatic callouts, show the pattern offset. */
2262
2263 if (cb->callout_number == 255)
2264 {
2265 fprintf(outfile, "%+3d ", cb->pattern_position);
2266 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2267 }
2268 else
2269 {
2270 if (callout_extra) fprintf(outfile, " ");
2271 else fprintf(outfile, "%3d ", cb->callout_number);
2272 }
2273
2274 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2275 fprintf(outfile, "^");
2276
2277 if (post_start > 0)
2278 {
2279 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2280 fprintf(outfile, "^");
2281 }
2282
2283 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2284 fprintf(outfile, " ");
2285
2286 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2287 pbuffer + cb->pattern_position);
2288
2289 fprintf(outfile, "\n");
2290 first_callout = 0;
2291
2292 if (cb->mark != last_callout_mark)
2293 {
2294 if (cb->mark == NULL)
2295 fprintf(outfile, "Latest Mark: <unset>\n");
2296 else
2297 {
2298 fprintf(outfile, "Latest Mark: ");
2299 PCHARSV(cb->mark, 0, -1, outfile);
2300 putc('\n', outfile);
2301 }
2302 last_callout_mark = cb->mark;
2303 }
2304
2305 if (cb->callout_data != NULL)
2306 {
2307 int callout_data = *((int *)(cb->callout_data));
2308 if (callout_data != 0)
2309 {
2310 fprintf(outfile, "Callout data = %d\n", callout_data);
2311 return callout_data;
2312 }
2313 }
2314
2315 return (cb->callout_number != callout_fail_id)? 0 :
2316 (++callout_count >= callout_fail_count)? 1 : 0;
2317 }
2318
2319
2320 /*************************************************
2321 * Local malloc functions *
2322 *************************************************/
2323
2324 /* Alternative malloc function, to test functionality and save the size of a
2325 compiled re, which is the first store request that pcre_compile() makes. The
2326 show_malloc variable is set only during matching. */
2327
2328 static void *new_malloc(size_t size)
2329 {
2330 void *block = malloc(size);
2331 gotten_store = size;
2332 if (first_gotten_store == 0) first_gotten_store = size;
2333 if (show_malloc)
2334 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2335 return block;
2336 }
2337
2338 static void new_free(void *block)
2339 {
2340 if (show_malloc)
2341 fprintf(outfile, "free %p\n", block);
2342 free(block);
2343 }
2344
2345 /* For recursion malloc/free, to test stacking calls */
2346
2347 static void *stack_malloc(size_t size)
2348 {
2349 void *block = malloc(size);
2350 if (show_malloc)
2351 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2352 return block;
2353 }
2354
2355 static void stack_free(void *block)
2356 {
2357 if (show_malloc)
2358 fprintf(outfile, "stack_free %p\n", block);
2359 free(block);
2360 }
2361
2362
2363 /*************************************************
2364 * Call pcre_fullinfo() *
2365 *************************************************/
2366
2367 /* Get one piece of information from the pcre_fullinfo() function. When only
2368 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2369 value, but the code is defensive.
2370
2371 Arguments:
2372 re compiled regex
2373 study study data
2374 option PCRE_INFO_xxx option
2375 ptr where to put the data
2376
2377 Returns: 0 when OK, < 0 on error
2378 */
2379
2380 static int
2381 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2382 {
2383 int rc;
2384
2385 if (pcre_mode == PCRE32_MODE)
2386 #ifdef SUPPORT_PCRE32
2387 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2388 #else
2389 rc = PCRE_ERROR_BADMODE;
2390 #endif
2391 else if (pcre_mode == PCRE16_MODE)
2392 #ifdef SUPPORT_PCRE16
2393 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2394 #else
2395 rc = PCRE_ERROR_BADMODE;
2396 #endif
2397 else
2398 #ifdef SUPPORT_PCRE8
2399 rc = pcre_fullinfo(re, study, option, ptr);
2400 #else
2401 rc = PCRE_ERROR_BADMODE;
2402 #endif
2403
2404 if (rc < 0)
2405 {
2406 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2407 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2408 if (rc == PCRE_ERROR_BADMODE)
2409 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2410 "%d-bit mode\n", 8 * CHAR_SIZE,
2411 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2412 }
2413
2414 return rc;
2415 }
2416
2417
2418
2419 /*************************************************
2420 * Swap byte functions *
2421 *************************************************/
2422
2423 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2424 value, respectively.
2425
2426 Arguments:
2427 value any number
2428
2429 Returns: the byte swapped value
2430 */
2431
2432 static pcre_uint32
2433 swap_uint32(pcre_uint32 value)
2434 {
2435 return ((value & 0x000000ff) << 24) |
2436 ((value & 0x0000ff00) << 8) |
2437 ((value & 0x00ff0000) >> 8) |
2438 (value >> 24);
2439 }
2440
2441 static pcre_uint16
2442 swap_uint16(pcre_uint16 value)
2443 {
2444 return (value >> 8) | (value << 8);
2445 }
2446
2447
2448
2449 /*************************************************
2450 * Flip bytes in a compiled pattern *
2451 *************************************************/
2452
2453 /* This function is called if the 'F' option was present on a pattern that is
2454 to be written to a file. We flip the bytes of all the integer fields in the
2455 regex data block and the study block. In 16-bit mode this also flips relevant
2456 bytes in the pattern itself. This is to make it possible to test PCRE's
2457 ability to reload byte-flipped patterns, e.g. those compiled on a different
2458 architecture. */
2459
2460 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2461 static void
2462 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2463 {
2464 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2465 #ifdef SUPPORT_PCRE16
2466 int op;
2467 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2468 int length = re->name_count * re->name_entry_size;
2469 #ifdef SUPPORT_UTF
2470 BOOL utf = (re->options & PCRE_UTF16) != 0;
2471 BOOL utf16_char = FALSE;
2472 #endif /* SUPPORT_UTF */
2473 #endif /* SUPPORT_PCRE16 */
2474
2475 /* Always flip the bytes in the main data block and study blocks. */
2476
2477 re->magic_number = REVERSED_MAGIC_NUMBER;
2478 re->size = swap_uint32(re->size);
2479 re->options = swap_uint32(re->options);
2480 re->flags = swap_uint16(re->flags);
2481 re->top_bracket = swap_uint16(re->top_bracket);
2482 re->top_backref = swap_uint16(re->top_backref);
2483 re->first_char = swap_uint16(re->first_char);
2484 re->req_char = swap_uint16(re->req_char);
2485 re->name_table_offset = swap_uint16(re->name_table_offset);
2486 re->name_entry_size = swap_uint16(re->name_entry_size);
2487 re->name_count = swap_uint16(re->name_count);
2488
2489 if (extra != NULL)
2490 {
2491 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2492 rsd->size = swap_uint32(rsd->size);
2493 rsd->flags = swap_uint32(rsd->flags);
2494 rsd->minlength = swap_uint32(rsd->minlength);
2495 }
2496
2497 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2498 in the name table, if present, and then in the pattern itself. */
2499
2500 #ifdef SUPPORT_PCRE16
2501 if (pcre_mode != PCRE16_MODE) return;
2502
2503 while(TRUE)
2504 {
2505 /* Swap previous characters. */
2506 while (length-- > 0)
2507 {
2508 *ptr = swap_uint16(*ptr);
2509 ptr++;
2510 }
2511 #ifdef SUPPORT_UTF
2512 if (utf16_char)
2513 {
2514 if ((ptr[-1] & 0xfc00) == 0xd800)
2515 {
2516 /* We know that there is only one extra character in UTF-16. */
2517 *ptr = swap_uint16(*ptr);
2518 ptr++;
2519 }
2520 }
2521 utf16_char = FALSE;
2522 #endif /* SUPPORT_UTF */
2523
2524 /* Get next opcode. */
2525
2526 length = 0;
2527 op = *ptr;
2528 *ptr++ = swap_uint16(op);
2529
2530 switch (op)
2531 {
2532 case OP_END:
2533 return;
2534
2535 #ifdef SUPPORT_UTF
2536 case OP_CHAR:
2537 case OP_CHARI:
2538 case OP_NOT:
2539 case OP_NOTI:
2540 case OP_STAR:
2541 case OP_MINSTAR:
2542 case OP_PLUS:
2543 case OP_MINPLUS:
2544 case OP_QUERY:
2545 case OP_MINQUERY:
2546 case OP_UPTO:
2547 case OP_MINUPTO:
2548 case OP_EXACT:
2549 case OP_POSSTAR:
2550 case OP_POSPLUS:
2551 case OP_POSQUERY:
2552 case OP_POSUPTO:
2553 case OP_STARI:
2554 case OP_MINSTARI:
2555 case OP_PLUSI:
2556 case OP_MINPLUSI:
2557 case OP_QUERYI:
2558 case OP_MINQUERYI:
2559 case OP_UPTOI:
2560 case OP_MINUPTOI:
2561 case OP_EXACTI:
2562 case OP_POSSTARI:
2563 case OP_POSPLUSI:
2564 case OP_POSQUERYI:
2565 case OP_POSUPTOI:
2566 case OP_NOTSTAR:
2567 case OP_NOTMINSTAR:
2568 case OP_NOTPLUS:
2569 case OP_NOTMINPLUS:
2570 case OP_NOTQUERY:
2571 case OP_NOTMINQUERY:
2572 case OP_NOTUPTO:
2573 case OP_NOTMINUPTO:
2574 case OP_NOTEXACT:
2575 case OP_NOTPOSSTAR:
2576 case OP_NOTPOSPLUS:
2577 case OP_NOTPOSQUERY:
2578 case OP_NOTPOSUPTO:
2579 case OP_NOTSTARI:
2580 case OP_NOTMINSTARI:
2581 case OP_NOTPLUSI:
2582 case OP_NOTMINPLUSI:
2583 case OP_NOTQUERYI:
2584 case OP_NOTMINQUERYI:
2585 case OP_NOTUPTOI:
2586 case OP_NOTMINUPTOI:
2587 case OP_NOTEXACTI:
2588 case OP_NOTPOSSTARI:
2589 case OP_NOTPOSPLUSI:
2590 case OP_NOTPOSQUERYI:
2591 case OP_NOTPOSUPTOI:
2592 if (utf) utf16_char = TRUE;
2593 #endif
2594 /* Fall through. */
2595
2596 default:
2597 length = OP_lengths16[op] - 1;
2598 break;
2599
2600 case OP_CLASS:
2601 case OP_NCLASS:
2602 /* Skip the character bit map. */
2603 ptr += 32/sizeof(pcre_uint16);
2604 length = 0;
2605 break;
2606
2607 case OP_XCLASS:
2608 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2609 if (LINK_SIZE > 1)
2610 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2611 - (1 + LINK_SIZE + 1));
2612 else
2613 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2614
2615 /* Reverse the size of the XCLASS instance. */
2616 *ptr = swap_uint16(*ptr);
2617 ptr++;
2618 if (LINK_SIZE > 1)
2619 {
2620 *ptr = swap_uint16(*ptr);
2621 ptr++;
2622 }
2623
2624 op = *ptr;
2625 *ptr = swap_uint16(op);
2626 ptr++;
2627 if ((op & XCL_MAP) != 0)
2628 {
2629 /* Skip the character bit map. */
2630 ptr += 32/sizeof(pcre_uint16);
2631 length -= 32/sizeof(pcre_uint16);
2632 }
2633 break;
2634 }
2635 }
2636 /* Control should never reach here in 16 bit mode. */
2637 #endif /* SUPPORT_PCRE16 */
2638 }
2639 #endif /* SUPPORT_PCRE[8|16] */
2640
2641
2642
2643 #if defined SUPPORT_PCRE32
2644 static void
2645 regexflip_32(pcre *ere, pcre_extra *extra)
2646 {
2647 real_pcre32 *re = (real_pcre32 *)ere;
2648 int op;
2649 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2650 int length = re->name_count * re->name_entry_size;
2651
2652 /* Always flip the bytes in the main data block and study blocks. */
2653
2654 re->magic_number = REVERSED_MAGIC_NUMBER;
2655 re->size = swap_uint32(re->size);
2656 re->options = swap_uint32(re->options);
2657 re->flags = swap_uint16(re->flags);
2658 re->top_bracket = swap_uint16(re->top_bracket);
2659 re->top_backref = swap_uint16(re->top_backref);
2660 re->first_char = swap_uint32(re->first_char);
2661 re->req_char = swap_uint32(re->req_char);
2662 re->name_table_offset = swap_uint16(re->name_table_offset);
2663 re->name_entry_size = swap_uint16(re->name_entry_size);
2664 re->name_count = swap_uint16(re->name_count);
2665
2666 if (extra != NULL)
2667 {
2668 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2669 rsd->size = swap_uint32(rsd->size);
2670 rsd->flags = swap_uint32(rsd->flags);
2671 rsd->minlength = swap_uint32(rsd->minlength);
2672 }
2673
2674 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2675 the pattern itself. */
2676
2677 while(TRUE)
2678 {
2679 /* Swap previous characters. */
2680 while (length-- > 0)
2681 {
2682 *ptr = swap_uint32(*ptr);
2683 ptr++;
2684 }
2685
2686 /* Get next opcode. */
2687
2688 length = 0;
2689 op = *ptr;
2690 *ptr++ = swap_uint32(op);
2691
2692 switch (op)
2693 {
2694 case OP_END:
2695 return;
2696
2697 default:
2698 length = OP_lengths32[op] - 1;
2699 break;
2700
2701 case OP_CLASS:
2702 case OP_NCLASS:
2703 /* Skip the character bit map. */
2704 ptr += 32/sizeof(pcre_uint32);
2705 length = 0;
2706 break;
2707
2708 case OP_XCLASS:
2709 /* LINK_SIZE can only be 1 in 32-bit mode. */
2710 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2711
2712 /* Reverse the size of the XCLASS instance. */
2713 *ptr = swap_uint32(*ptr);
2714 ptr++;
2715
2716 op = *ptr;
2717 *ptr = swap_uint32(op);
2718 ptr++;
2719 if ((op & XCL_MAP) != 0)
2720 {
2721 /* Skip the character bit map. */
2722 ptr += 32/sizeof(pcre_uint32);
2723 length -= 32/sizeof(pcre_uint32);
2724 }
2725 break;
2726 }
2727 }
2728 /* Control should never reach here in 32 bit mode. */
2729 }
2730
2731 #endif /* SUPPORT_PCRE32 */
2732
2733
2734
2735 static void
2736 regexflip(pcre *ere, pcre_extra *extra)
2737 {
2738 #if defined SUPPORT_PCRE32
2739 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2740 regexflip_32(ere, extra);
2741 #endif
2742 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2743 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2744 regexflip8_or_16(ere, extra);
2745 #endif
2746 }
2747
2748
2749
2750 /*************************************************
2751 * Check match or recursion limit *
2752 *************************************************/
2753
2754 static int
2755 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2756 int start_offset, int options, int *use_offsets, int use_size_offsets,
2757 int flag, unsigned long int *limit, int errnumber, const char *msg)
2758 {
2759 int count;
2760 int min = 0;
2761 int mid = 64;
2762 int max = -1;
2763
2764 extra->flags |= flag;
2765
2766 for (;;)
2767 {
2768 *limit = mid;
2769
2770 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2771 use_offsets, use_size_offsets);
2772
2773 if (count == errnumber)
2774 {
2775 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2776 min = mid;
2777 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2778 }
2779
2780 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2781 count == PCRE_ERROR_PARTIAL)
2782 {
2783 if (mid == min + 1)
2784 {
2785 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2786 break;
2787 }
2788 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2789 max = mid;
2790 mid = (min + mid)/2;
2791 }
2792 else break; /* Some other error */
2793 }
2794
2795 extra->flags &= ~flag;
2796 return count;
2797 }
2798
2799
2800
2801 /*************************************************
2802 * Case-independent strncmp() function *
2803 *************************************************/
2804
2805 /*
2806 Arguments:
2807 s first string
2808 t second string
2809 n number of characters to compare
2810
2811 Returns: < 0, = 0, or > 0, according to the comparison
2812 */
2813
2814 static int
2815 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2816 {
2817 while (n--)
2818 {
2819 int c = tolower(*s++) - tolower(*t++);
2820 if (c) return c;
2821 }
2822 return 0;
2823 }
2824
2825
2826
2827 /*************************************************
2828 * Check newline indicator *
2829 *************************************************/
2830
2831 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2832 a message and return 0 if there is no match.
2833
2834 Arguments:
2835 p points after the leading '<'
2836 f file for error message
2837
2838 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2839 */
2840
2841 static int
2842 check_newline(pcre_uint8 *p, FILE *f)
2843 {
2844 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2845 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2846 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2847 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2848 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2849 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2850 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2851 fprintf(f, "Unknown newline type at: <%s\n", p);
2852 return 0;
2853 }
2854
2855
2856
2857 /*************************************************
2858 * Usage function *
2859 *************************************************/
2860
2861 static void
2862 usage(void)
2863 {
2864 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2865 printf("Input and output default to stdin and stdout.\n");
2866 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2867 printf("If input is a terminal, readline() is used to read from it.\n");
2868 #else
2869 printf("This version of pcretest is not linked with readline().\n");
2870 #endif
2871 printf("\nOptions:\n");
2872 #ifdef SUPPORT_PCRE16
2873 printf(" -16 use the 16-bit library\n");
2874 #endif
2875 #ifdef SUPPORT_PCRE32
2876 printf(" -32 use the 32-bit library\n");
2877 #endif
2878 printf(" -b show compiled code\n");
2879 printf(" -C show PCRE compile-time options and exit\n");
2880 printf(" -C arg show a specific compile-time option\n");
2881 printf(" and exit with its value. The arg can be:\n");
2882 printf(" linksize internal link size [2, 3, 4]\n");
2883 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2884 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2885 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2886 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2887 printf(" ucp Unicode Properties supported [0, 1]\n");
2888 printf(" jit Just-in-time compiler supported [0, 1]\n");
2889 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2890 printf(" -d debug: show compiled code and information (-b and -i)\n");
2891 #if !defined NODFA
2892 printf(" -dfa force DFA matching for all subjects\n");
2893 #endif
2894 printf(" -help show usage information\n");
2895 printf(" -i show information about compiled patterns\n"
2896 " -M find MATCH_LIMIT minimum for each subject\n"
2897 " -m output memory used information\n"
2898 " -o <n> set size of offsets vector to <n>\n");
2899 #if !defined NOPOSIX
2900 printf(" -p use POSIX interface\n");
2901 #endif
2902 printf(" -q quiet: do not output PCRE version number at start\n");
2903 printf(" -S <n> set stack size to <n> megabytes\n");
2904 printf(" -s force each pattern to be studied at basic level\n"
2905 " -s+ force each pattern to be studied, using JIT if available\n"
2906 " -s++ ditto, verifying when JIT was actually used\n"
2907 " -s+n force each pattern to be studied, using JIT if available,\n"
2908 " where 1 <= n <= 7 selects JIT options\n"
2909 " -s++n ditto, verifying when JIT was actually used\n"
2910 " -t time compilation and execution\n");
2911 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2912 printf(" -tm time execution (matching) only\n");
2913 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2914 }
2915
2916
2917
2918 /*************************************************
2919 * Main Program *
2920 *************************************************/
2921
2922 /* Read lines from named file or stdin and write to named file or stdout; lines
2923 consist of a regular expression, in delimiters and optionally followed by
2924 options, followed by a set of test data, terminated by an empty line. */
2925
2926 int main(int argc, char **argv)
2927 {
2928 FILE *infile = stdin;
2929 const char *version;
2930 int options = 0;
2931 int study_options = 0;
2932 int default_find_match_limit = FALSE;
2933 int op = 1;
2934 int timeit = 0;
2935 int timeitm = 0;
2936 int showinfo = 0;
2937 int showstore = 0;
2938 int force_study = -1;
2939 int force_study_options = 0;
2940 int quiet = 0;
2941 int size_offsets = 45;
2942 int size_offsets_max;
2943 int *offsets = NULL;
2944 int debug = 0;
2945 int done = 0;
2946 int all_use_dfa = 0;
2947 int verify_jit = 0;
2948 int yield = 0;
2949 int stack_size;
2950 pcre_uint8 *dbuffer = NULL;
2951 size_t dbuffer_size = 1u << 14;
2952
2953 #if !defined NOPOSIX
2954 int posix = 0;
2955 #endif
2956 #if !defined NODFA
2957 int *dfa_workspace = NULL;
2958 #endif
2959
2960 pcre_jit_stack *jit_stack = NULL;
2961
2962 /* These vectors store, end-to-end, a list of zero-terminated captured
2963 substring names, each list itself being terminated by an empty name. Assume
2964 that 1024 is plenty long enough for the few names we'll be testing. It is
2965 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2966 for the actual memory, to ensure alignment. */
2967
2968 pcre_uint32 copynames[1024];
2969 pcre_uint32 getnames[1024];
2970
2971 #ifdef SUPPORT_PCRE32
2972 pcre_uint32 *cn32ptr;
2973 pcre_uint32 *gn32ptr;
2974 #endif
2975
2976 #ifdef SUPPORT_PCRE16
2977 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2978 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2979 pcre_uint16 *cn16ptr;
2980 pcre_uint16 *gn16ptr;
2981 #endif
2982
2983 #ifdef SUPPORT_PCRE8
2984 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2985 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2986 pcre_uint8 *cn8ptr;
2987 pcre_uint8 *gn8ptr;
2988 #endif
2989
2990 /* Get buffers from malloc() so that valgrind will check their misuse when
2991 debugging. They grow automatically when very long lines are read. The 16-
2992 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2993
2994 buffer = (pcre_uint8 *)malloc(buffer_size);
2995 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2996
2997 /* The outfile variable is static so that new_malloc can use it. */
2998
2999 outfile = stdout;
3000
3001 /* The following _setmode() stuff is some Windows magic that tells its runtime
3002 library to translate CRLF into a single LF character. At least, that's what
3003 I've been told: never having used Windows I take this all on trust. Originally
3004 it set 0x8000, but then I was advised that _O_BINARY was better. */
3005
3006 #if defined(_WIN32) || defined(WIN32)
3007 _setmode( _fileno( stdout ), _O_BINARY );
3008 #endif
3009
3010 /* Get the version number: both pcre_version() and pcre16_version() give the
3011 same answer. We just need to ensure that we call one that is available. */
3012
3013 #if defined SUPPORT_PCRE8
3014 version = pcre_version();
3015 #elif defined SUPPORT_PCRE16
3016 version = pcre16_version();
3017 #elif defined SUPPORT_PCRE32
3018 version = pcre32_version();
3019 #endif
3020
3021 /* Scan options */
3022
3023 while (argc > 1 && argv[op][0] == '-')
3024 {
3025 pcre_uint8 *endptr;
3026 char *arg = argv[op];
3027
3028 if (strcmp(arg, "-m") == 0) showstore = 1;
3029 else if (strcmp(arg, "-s") == 0) force_study = 0;
3030
3031 else if (strncmp(arg, "-s+", 3) == 0)
3032 {
3033 arg += 3;
3034 if (*arg == '+') { arg++; verify_jit = TRUE; }
3035 force_study = 1;
3036 if (*arg == 0)
3037 force_study_options = jit_study_bits[6];
3038 else if (*arg >= '1' && *arg <= '7')
3039 force_study_options = jit_study_bits[*arg - '1'];
3040 else goto BAD_ARG;
3041 }
3042 else if (strcmp(arg, "-8") == 0)
3043 {
3044 #ifdef SUPPORT_PCRE8
3045 pcre_mode = PCRE8_MODE;
3046 #else
3047 printf("** This version of PCRE was built without 8-bit support\n");
3048 exit(1);
3049 #endif
3050 }
3051 else if (strcmp(arg, "-16") == 0)
3052 {
3053 #ifdef SUPPORT_PCRE16
3054 pcre_mode = PCRE16_MODE;
3055 #else
3056 printf("** This version of PCRE was built without 16-bit support\n");
3057 exit(1);
3058 #endif
3059 }
3060 else if (strcmp(arg, "-32") == 0)
3061 {
3062 #ifdef SUPPORT_PCRE32
3063 pcre_mode = PCRE32_MODE;
3064 #else
3065 printf("** This version of PCRE was built without 32-bit support\n");
3066 exit(1);
3067 #endif
3068 }
3069 else if (strcmp(arg, "-q") == 0) quiet = 1;
3070 else if (strcmp(arg, "-b") == 0) debug = 1;
3071 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3072 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3073 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3074 #if !defined NODFA
3075 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3076 #endif
3077 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3078 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3079 *endptr == 0))
3080 {
3081 op++;
3082 argc--;
3083 }
3084 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3085 {
3086 int both = arg[2] == 0;
3087 int temp;
3088 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3089 *endptr == 0))
3090 {
3091 timeitm = temp;
3092 op++;
3093 argc--;
3094 }
3095 else timeitm = LOOPREPEAT;
3096 if (both) timeit = timeitm;
3097 }
3098 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3099 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3100 *endptr == 0))
3101 {
3102 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3103 printf("PCRE: -S not supported on this OS\n");
3104 exit(1);
3105 #else
3106 int rc;
3107 struct rlimit rlim;
3108 getrlimit(RLIMIT_STACK, &rlim);
3109 rlim.rlim_cur = stack_size * 1024 * 1024;
3110 rc = setrlimit(RLIMIT_STACK, &rlim);
3111 if (rc != 0)
3112 {
3113 printf("PCRE: setrlimit() failed with error %d\n", rc);
3114 exit(1);
3115 }
3116 op++;
3117 argc--;
3118 #endif
3119 }
3120 #if !defined NOPOSIX
3121 else if (strcmp(arg, "-p") == 0) posix = 1;
3122 #endif
3123 else if (strcmp(arg, "-C") == 0)
3124 {
3125 int rc;
3126 unsigned long int lrc;
3127
3128 if (argc > 2)
3129 {
3130 if (strcmp(argv[op + 1], "linksize") == 0)
3131 {
3132 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3133 printf("%d\n", rc);
3134 yield = rc;
3135 }
3136 else if (strcmp(argv[op + 1], "pcre8") == 0)
3137 {
3138 #ifdef SUPPORT_PCRE8
3139 printf("1\n");
3140 yield = 1;
3141 #else
3142 printf("0\n");
3143 yield = 0;
3144 #endif
3145 }
3146 else if (strcmp(argv[op + 1], "pcre16") == 0)
3147 {
3148 #ifdef SUPPORT_PCRE16
3149 printf("1\n");
3150 yield = 1;
3151 #else
3152 printf("0\n");
3153 yield = 0;
3154 #endif
3155 }
3156 else if (strcmp(argv[op + 1], "pcre32") == 0)
3157 {
3158 #ifdef SUPPORT_PCRE32
3159 printf("1\n");
3160 yield = 1;
3161 #else
3162 printf("0\n");
3163 yield = 0;
3164 #endif
3165 goto EXIT;
3166 }
3167 if (strcmp(argv[op + 1], "utf") == 0)
3168 {
3169 #ifdef SUPPORT_PCRE8
3170 if (pcre_mode == PCRE8_MODE)
3171 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3172 #endif
3173 #ifdef SUPPORT_PCRE16
3174 if (pcre_mode == PCRE16_MODE)
3175 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3176 #endif
3177 #ifdef SUPPORT_PCRE32
3178 if (pcre_mode == PCRE32_MODE)
3179 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3180 #endif
3181 printf("%d\n", rc);
3182 yield = rc;
3183 goto EXIT;
3184 }
3185 else if (strcmp(argv[op + 1], "ucp") == 0)
3186 {
3187 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3188 printf("%d\n", rc);
3189 yield = rc;
3190 }
3191 else if (strcmp(argv[op + 1], "jit") == 0)
3192 {
3193 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3194 printf("%d\n", rc);
3195 yield = rc;
3196 }
3197 else if (strcmp(argv[op + 1], "newline") == 0)
3198 {
3199 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3200 print_newline_config(rc, TRUE);
3201 }
3202 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3203 {
3204 #ifdef EBCDIC
3205 printf("1\n");
3206 yield = 1;
3207 #else
3208 printf("0\n");
3209 #endif
3210 }
3211 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3212 {
3213 #ifdef EBCDIC
3214 printf("0x%02x\n", CHAR_LF);
3215 #else
3216 printf("0\n");
3217 #endif
3218 }
3219 else
3220 {
3221 printf("Unknown -C option: %s\n", argv[op + 1]);
3222 }
3223 goto EXIT;
3224 }
3225
3226 /* No argument for -C: output all configuration information. */
3227
3228 printf("PCRE version %s\n", version);
3229 printf("Compiled with\n");
3230
3231 #ifdef EBCDIC
3232 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3233 #endif
3234
3235 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3236 are set, either both UTFs are supported or both are not supported. */
3237
3238 #ifdef SUPPORT_PCRE8
3239 printf(" 8-bit support\n");
3240 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3241 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3242 #endif
3243 #ifdef SUPPORT_PCRE16
3244 printf(" 16-bit support\n");
3245 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3246 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3247 #endif
3248 #ifdef SUPPORT_PCRE32
3249 printf(" 32-bit support\n");
3250 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3251 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3252 #endif
3253
3254 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3255 printf(" %sUnicode properties support\n", rc? "" : "No ");
3256 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3257 if (rc)
3258 {
3259 const char *arch;
3260 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3261 printf(" Just-in-time compiler support: %s\n", arch);
3262 }
3263 else
3264 printf(" No just-in-time compiler support\n");
3265 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3266 print_newline_config(rc, FALSE);
3267 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3268 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3269 "all Unicode newlines");
3270 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3271 printf(" Internal link size = %d\n", rc);
3272 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3273 printf(" POSIX malloc threshold = %d\n", rc);
3274 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3275 printf(" Default match limit = %ld\n", lrc);
3276 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3277 printf(" Default recursion depth limit = %ld\n", lrc);
3278 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3279 printf(" Match recursion uses %s", rc? "stack" : "heap");
3280 if (showstore)
3281 {
3282 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3283 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3284 }
3285 printf("\n");
3286 goto EXIT;
3287 }
3288 else if (strcmp(arg, "-help") == 0 ||
3289 strcmp(arg, "--help") == 0)
3290 {
3291 usage();
3292 goto EXIT;
3293 }
3294 else
3295 {
3296 BAD_ARG:
3297 printf("** Unknown or malformed option %s\n", arg);
3298 usage();
3299 yield = 1;
3300 goto EXIT;
3301 }
3302 op++;
3303 argc--;
3304 }
3305
3306 /* Get the store for the offsets vector, and remember what it was */
3307
3308 size_offsets_max = size_offsets;
3309 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3310 if (offsets == NULL)
3311 {
3312 printf("** Failed to get %d bytes of memory for offsets vector\n",
3313 (int)(size_offsets_max * sizeof(int)));
3314 yield = 1;
3315 goto EXIT;
3316 }
3317
3318 /* Sort out the input and output files */
3319
3320 if (argc > 1)
3321 {
3322 infile = fopen(argv[op], INPUT_MODE);
3323 if (infile == NULL)
3324 {
3325 printf("** Failed to open %s\n", argv[op]);
3326 yield = 1;
3327 goto EXIT;
3328 }
3329 }
3330
3331 if (argc > 2)
3332 {
3333 outfile = fopen(argv[op+1], OUTPUT_MODE);
3334 if (outfile == NULL)
3335 {
3336 printf("** Failed to open %s\n", argv[op+1]);
3337 yield = 1;
3338 goto EXIT;
3339 }
3340 }
3341
3342 /* Set alternative malloc function */
3343
3344 #ifdef SUPPORT_PCRE8
3345 pcre_malloc = new_malloc;
3346 pcre_free = new_free;
3347 pcre_stack_malloc = stack_malloc;
3348 pcre_stack_free = stack_free;
3349 #endif
3350
3351 #ifdef SUPPORT_PCRE16
3352 pcre16_malloc = new_malloc;
3353 pcre16_free = new_free;
3354 pcre16_stack_malloc = stack_malloc;
3355 pcre16_stack_free = stack_free;
3356 #endif
3357
3358 #ifdef SUPPORT_PCRE32
3359 pcre32_malloc = new_malloc;
3360 pcre32_free = new_free;
3361 pcre32_stack_malloc = stack_malloc;
3362 pcre32_stack_free = stack_free;
3363 #endif
3364
3365 /* Heading line unless quiet, then prompt for first regex if stdin */
3366
3367 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3368
3369 /* Main loop */
3370
3371 while (!done)
3372 {
3373 pcre *re = NULL;
3374 pcre_extra *extra = NULL;
3375
3376 #if !defined NOPOSIX /* There are still compilers that require no indent */
3377 regex_t preg;
3378 int do_posix = 0;
3379 #endif
3380
3381 const char *error;
3382 pcre_uint8 *markptr;
3383 pcre_uint8 *p, *pp, *ppp;
3384 pcre_uint8 *to_file = NULL;
3385 const pcre_uint8 *tables = NULL;
3386 unsigned long int get_options;
3387 unsigned long int true_size, true_study_size = 0;
3388 size_t size, regex_gotten_store;
3389 int do_allcaps = 0;
3390 int do_mark = 0;
3391 int do_study = 0;
3392 int no_force_study = 0;
3393 int do_debug = debug;
3394 int do_G = 0;
3395 int do_g = 0;
3396 int do_showinfo = showinfo;
3397 int do_showrest = 0;
3398 int do_showcaprest = 0;
3399 int do_flip = 0;
3400 int erroroffset, len, delimiter, poffset;
3401
3402 #if !defined NODFA
3403 int dfa_matched = 0;
3404 #endif
3405
3406 use_utf = 0;
3407 debug_lengths = 1;
3408
3409 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3410 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3411 fflush(outfile);
3412
3413 p = buffer;
3414 while (isspace(*p)) p++;
3415 if (*p == 0) continue;
3416
3417 /* See if the pattern is to be loaded pre-compiled from a file. */
3418
3419 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3420 {
3421 pcre_uint32 magic;
3422 pcre_uint8 sbuf[8];
3423 FILE *f;
3424
3425 p++;
3426 if (*p == '!')
3427 {
3428 do_debug = TRUE;
3429 do_showinfo = TRUE;
3430 p++;
3431 }
3432
3433 pp = p + (int)strlen((char *)p);
3434 while (isspace(pp[-1])) pp--;
3435 *pp = 0;
3436
3437 f = fopen((char *)p, "rb");
3438 if (f == NULL)
3439 {
3440 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3441 continue;
3442 }
3443
3444 first_gotten_store = 0;
3445 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3446
3447 true_size =
3448 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3449 true_study_size =
3450 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3451
3452 re = (pcre *)new_malloc(true_size);
3453 if (re == NULL)
3454 {
3455 printf("** Failed to get %d bytes of memory for pcre object\n",
3456 (int)true_size);
3457 yield = 1;
3458 goto EXIT;
3459 }
3460 regex_gotten_store = first_gotten_store;
3461
3462 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3463
3464 magic = REAL_PCRE_MAGIC(re);
3465 if (magic != MAGIC_NUMBER)
3466 {
3467 if (swap_uint32(magic) == MAGIC_NUMBER)
3468 {
3469 do_flip = 1;
3470 }
3471 else
3472 {
3473 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3474 new_free(re);
3475 fclose(f);
3476 continue;
3477 }
3478 }
3479
3480 /* We hide the byte-invert info for little and big endian tests. */
3481 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3482 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3483
3484 /* Now see if there is any following study data. */
3485
3486 if (true_study_size != 0)
3487 {
3488 pcre_study_data *psd;
3489
3490 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3491 extra->flags = PCRE_EXTRA_STUDY_DATA;
3492
3493 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3494 extra->study_data = psd;
3495
3496 if (fread(psd, 1, true_study_size, f) != true_study_size)
3497 {
3498 FAIL_READ:
3499 fprintf(outfile, "Failed to read data from %s\n", p);
3500 if (extra != NULL)
3501 {
3502 PCRE_FREE_STUDY(extra);
3503 }
3504 new_free(re);
3505 fclose(f);
3506 continue;
3507 }
3508 fprintf(outfile, "Study data loaded from %s\n", p);
3509 do_study = 1; /* To get the data output if requested */
3510 }
3511 else fprintf(outfile, "No study data\n");
3512
3513 /* Flip the necessary bytes. */
3514 if (do_flip)
3515 {
3516 int rc;
3517 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3518 if (rc == PCRE_ERROR_BADMODE)
3519 {
3520 pcre_uint16 flags_in_host_byte_order;
3521 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3522 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3523 else
3524 flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3525 /* Simulate the result of the function call below. */
3526 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3527 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3528 PCRE_INFO_OPTIONS);
3529 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3530 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3531 new_free(re);
3532 fclose(f);
3533 continue;
3534 }
3535 }
3536
3537 /* Need to know if UTF-8 for printing data strings. */
3538
3539 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3540 {
3541 new_free(re);
3542 fclose(f);
3543 continue;
3544 }
3545 use_utf = (get_options & PCRE_UTF8) != 0;
3546
3547 fclose(f);
3548 goto SHOW_INFO;
3549 }
3550
3551 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3552 the pattern; if it isn't complete, read more. */
3553
3554 delimiter = *p++;
3555
3556 if (isalnum(delimiter) || delimiter == '\\')
3557 {
3558 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3559 goto SKIP_DATA;
3560 }
3561
3562 pp = p;
3563 poffset = (int)(p - buffer);
3564
3565 for(;;)
3566 {
3567 while (*pp != 0)
3568 {
3569 if (*pp == '\\' && pp[1] != 0) pp++;
3570 else if (*pp == delimiter) break;
3571 pp++;
3572 }
3573 if (*pp != 0) break;
3574 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3575 {
3576 fprintf(outfile, "** Unexpected EOF\n");
3577 done = 1;
3578 goto CONTINUE;
3579 }
3580 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3581 }
3582
3583 /* The buffer may have moved while being extended; reset the start of data
3584 pointer to the correct relative point in the buffer. */
3585
3586 p = buffer + poffset;
3587
3588 /* If the first character after the delimiter is backslash, make
3589 the pattern end with backslash. This is purely to provide a way
3590 of testing for the error message when a pattern ends with backslash. */
3591
3592 if (pp[1] == '\\') *pp++ = '\\';
3593
3594 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3595 for callouts. */
3596
3597 *pp++ = 0;
3598 strcpy((char *)pbuffer, (char *)p);
3599
3600 /* Look for options after final delimiter */
3601
3602 options = 0;
3603 study_options = force_study_options;
3604 log_store = showstore; /* default from command line */
3605
3606 while (*pp != 0)
3607 {
3608 switch (*pp++)
3609 {
3610 case 'f': options |= PCRE_FIRSTLINE; break;
3611 case 'g': do_g = 1; break;
3612 case 'i': options |= PCRE_CASELESS; break;
3613 case 'm': options |= PCRE_MULTILINE; break;
3614 case 's': options |= PCRE_DOTALL; break;
3615 case 'x': options |= PCRE_EXTENDED; break;
3616
3617 case '+':
3618 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3619 break;
3620
3621 case '=': do_allcaps = 1; break;
3622 case 'A': options |= PCRE_ANCHORED; break;
3623 case 'B': do_debug = 1; break;
3624 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3625 case 'D': do_debug = do_showinfo = 1; break;
3626 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3627 case 'F': do_flip = 1; break;
3628 case 'G': do_G = 1; break;
3629 case 'I': do_showinfo = 1; break;
3630 case 'J': options |= PCRE_DUPNAMES; break;
3631 case 'K': do_mark = 1; break;
3632 case 'M': log_store = 1; break;
3633 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3634
3635 #if !defined NOPOSIX
3636 case 'P': do_posix = 1; break;
3637 #endif
3638
3639 case 'S':
3640 do_study = 1;
3641 for (;;)
3642 {
3643 switch (*pp++)
3644 {
3645 case 'S':
3646 do_study = 0;
3647 no_force_study = 1;
3648 break;
3649
3650 case '!':
3651 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3652 break;
3653
3654 case '+':
3655 if (*pp == '+')
3656 {
3657 verify_jit = TRUE;
3658 pp++;
3659 }
3660 if (*pp >= '1' && *pp <= '7')
3661 study_options |= jit_study_bits[*pp++ - '1'];
3662 else
3663 study_options |= jit_study_bits[6];
3664 break;
3665
3666 case '-':
3667 study_options &= ~PCRE_STUDY_ALLJIT;
3668 break;
3669
3670 default:
3671 pp--;
3672 goto ENDLOOP;
3673 }
3674 }
3675 ENDLOOP:
3676 break;
3677
3678 case 'U': options |= PCRE_UNGREEDY; break;
3679 case 'W': options |= PCRE_UCP; break;
3680 case 'X': options |= PCRE_EXTRA; break;
3681 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3682 case 'Z': debug_lengths = 0; break;
3683 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3684 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3685
3686 case 'T':
3687 switch (*pp++)
3688 {
3689 case '0': tables = tables0; break;
3690 case '1': tables = tables1; break;
3691
3692 case '\r':
3693 case '\n':
3694 case ' ':
3695 case 0:
3696 fprintf(outfile, "** Missing table number after /T\n");
3697 goto SKIP_DATA;
3698
3699 default:
3700 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3701 goto SKIP_DATA;
3702 }
3703 break;
3704
3705 case 'L':
3706 ppp = pp;
3707 /* The '\r' test here is so that it works on Windows. */
3708 /* The '0' test is just in case this is an unterminated line. */
3709 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3710 *ppp = 0;
3711 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3712 {
3713 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3714 goto SKIP_DATA;
3715 }
3716 locale_set = 1;
3717 tables = PCRE_MAKETABLES;
3718 pp = ppp;
3719 break;
3720
3721 case '>':
3722 to_file = pp;
3723 while (*pp != 0) pp++;
3724 while (isspace(pp[-1])) pp--;
3725 *pp = 0;
3726 break;
3727
3728 case '<':
3729 {
3730 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3731 {
3732 options |= PCRE_JAVASCRIPT_COMPAT;
3733 pp += 3;
3734 }
3735 else
3736 {
3737 int x = check_newline(pp, outfile);
3738 if (x == 0) goto SKIP_DATA;
3739 options |= x;
3740 while (*pp++ != '>');
3741 }
3742 }
3743 break;
3744
3745 case '\r': /* So that it works in Windows */
3746 case '\n':
3747 case ' ':
3748 break;
3749
3750 default:
3751 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3752 goto SKIP_DATA;
3753 }
3754 }
3755
3756 /* Handle compiling via the POSIX interface, which doesn't support the
3757 timing, showing, or debugging options, nor the ability to pass over
3758 local character tables. Neither does it have 16-bit support. */
3759
3760 #if !defined NOPOSIX
3761 if (posix || do_posix)
3762 {
3763 int rc;
3764 int cflags = 0;
3765
3766 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3767 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3768 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3769 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3770 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3771 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3772 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3773
3774 first_gotten_store = 0;
3775 rc = regcomp(&preg, (char *)p, cflags);
3776
3777 /* Compilation failed; go back for another re, skipping to blank line
3778 if non-interactive. */
3779
3780 if (rc != 0)
3781 {
3782 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3783 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3784 goto SKIP_DATA;
3785 }
3786 }
3787
3788 /* Handle compiling via the native interface */
3789
3790 else
3791 #endif /* !defined NOPOSIX */
3792
3793 {
3794 /* In 16- or 32-bit mode, convert the input. */
3795
3796 #ifdef SUPPORT_PCRE16
3797 if (pcre_mode == PCRE16_MODE)
3798 {
3799 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3800 {
3801 case -1:
3802 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3803 "converted to UTF-16\n");
3804 goto SKIP_DATA;
3805
3806 case -2:
3807 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3808 "cannot be converted to UTF-16\n");
3809 goto SKIP_DATA;
3810
3811 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3812 fprintf(outfile, "**Failed: character value greater than 0xffff "
3813 "cannot be converted to 16-bit in non-UTF mode\n");
3814 goto SKIP_DATA;
3815
3816 default:
3817 break;
3818 }
3819 p = (pcre_uint8 *)buffer16;
3820 }
3821 #endif
3822
3823 #ifdef SUPPORT_PCRE32
3824 if (pcre_mode == PCRE32_MODE)
3825 {
3826 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3827 {
3828 case -1:
3829 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3830 "converted to UTF-32\n");
3831 goto SKIP_DATA;
3832
3833 case -2:
3834 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3835 "cannot be converted to UTF-32\n");
3836 goto SKIP_DATA;
3837
3838 case -3:
3839 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3840 goto SKIP_DATA;
3841
3842 default:
3843 break;
3844 }
3845 p = (pcre_uint8 *)buffer32;
3846 }
3847 #endif
3848
3849 /* Compile many times when timing */
3850
3851 if (timeit > 0)
3852 {
3853 register int i;
3854 clock_t time_taken;
3855 clock_t start_time = clock();
3856 for (i = 0; i < timeit; i++)
3857 {
3858 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3859 if (re != NULL) free(re);
3860 }
3861 time_taken = clock() - start_time;
3862 fprintf(outfile, "Compile time %.4f milliseconds\n",
3863 (((double)time_taken * 1000.0) / (double)timeit) /
3864 (double)CLOCKS_PER_SEC);
3865 }
3866
3867 first_gotten_store = 0;
3868 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3869
3870 /* Compilation failed; go back for another re, skipping to blank line
3871 if non-interactive. */
3872
3873 if (re == NULL)
3874 {
3875 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3876 SKIP_DATA:
3877 if (infile != stdin)
3878 {
3879 for (;;)
3880 {
3881 if (extend_inputline(infile, buffer, NULL) == NULL)
3882 {
3883 done = 1;
3884 goto CONTINUE;
3885 }
3886 len = (int)strlen((char *)buffer);
3887 while (len > 0 && isspace(buffer[len-1])) len--;
3888 if (len == 0) break;
3889 }
3890 fprintf(outfile, "\n");
3891 }
3892 goto CONTINUE;
3893 }
3894
3895 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3896 within the regex; check for this so that we know how to process the data
3897 lines. */
3898
3899 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3900 goto SKIP_DATA;
3901 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3902
3903 /* Extract the size for possible writing before possibly flipping it,
3904 and remember the store that was got. */
3905
3906 true_size = REAL_PCRE_SIZE(re);
3907 regex_gotten_store = first_gotten_store;
3908
3909 /* Output code size information if requested */
3910
3911 if (log_store)
3912 {
3913 int name_count, name_entry_size, real_pcre_size;
3914
3915 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3916 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3917 real_pcre_size = 0;
3918 #ifdef SUPPORT_PCRE8
3919 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3920 real_pcre_size = sizeof(real_pcre);
3921 #endif
3922 #ifdef SUPPORT_PCRE16
3923 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3924 real_pcre_size = sizeof(real_pcre16);
3925 #endif
3926 #ifdef SUPPORT_PCRE32
3927 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3928 real_pcre_size = sizeof(real_pcre32);
3929 #endif
3930 fprintf(outfile, "Memory allocation (code space): %d\n",
3931 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3932 }
3933
3934 /* If -s or /S was present, study the regex to generate additional info to
3935 help with the matching, unless the pattern has the SS option, which
3936 suppresses the effect of /S (used for a few test patterns where studying is
3937 never sensible). */
3938
3939 if (do_study || (force_study >= 0 && !no_force_study))
3940 {
3941 if (timeit > 0)
3942 {
3943 register int i;
3944 clock_t time_taken;
3945 clock_t start_time = clock();
3946 for (i = 0; i < timeit; i++)
3947 {
3948 PCRE_STUDY(extra, re, study_options, &error);
3949 }
3950 time_taken = clock() - start_time;
3951 if (extra != NULL)
3952 {
3953 PCRE_FREE_STUDY(extra);
3954 }
3955 fprintf(outfile, " Study time %.4f milliseconds\n",
3956 (((double)time_taken * 1000.0) / (double)timeit) /
3957 (double)CLOCKS_PER_SEC);
3958 }
3959 PCRE_STUDY(extra, re, study_options, &error);
3960 if (error != NULL)
3961 fprintf(outfile, "Failed to study: %s\n", error);
3962 else if (extra != NULL)
3963 {
3964 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3965 if (log_store)
3966 {
3967 size_t jitsize;
3968 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3969 jitsize != 0)
3970 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3971 }
3972 }
3973 }
3974
3975 /* If /K was present, we set up for handling MARK data. */
3976
3977 if (do_mark)
3978 {
3979 if (extra == NULL)
3980 {
3981 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3982 extra->flags = 0;
3983 }
3984 extra->mark = &markptr;
3985 extra->flags |= PCRE_EXTRA_MARK;
3986 }
3987
3988 /* Extract and display information from the compiled data if required. */
3989
3990 SHOW_INFO:
3991
3992 if (do_debug)
3993 {
3994 fprintf(outfile, "------------------------------------------------------------------\n");
3995 PCRE_PRINTINT(re, outfile, debug_lengths);
3996 }
3997
3998 /* We already have the options in get_options (see above) */
3999
4000 if (do_showinfo)
4001 {
4002 unsigned long int all_options;
4003 pcre_uint32 first_char, need_char;
4004 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4005 hascrorlf, maxlookbehind;
4006 int nameentrysize, namecount;
4007 const pcre_uint8 *nametable;
4008
4009 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4010 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4011 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4012 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4013 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4014 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4015 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4016 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4017 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4018 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4019 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4020 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4021 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4022 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4023 != 0)
4024 goto SKIP_DATA;
4025
4026 if (size != regex_gotten_store) fprintf(outfile,
4027 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4028 (int)size, (int)regex_gotten_store);
4029
4030 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4031 if (backrefmax > 0)
4032 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4033
4034 if (namecount > 0)
4035 {
4036 fprintf(outfile, "Named capturing subpatterns:\n");
4037 while (namecount-- > 0)
4038 {
4039 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4040 int length = (int)STRLEN(nametable + imm2_size);
4041 fprintf(outfile, " ");
4042 PCHARSV(nametable, imm2_size, length, outfile);
4043 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4044 #ifdef SUPPORT_PCRE32
4045 if (pcre_mode == PCRE32_MODE)
4046 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4047 #endif
4048 #ifdef SUPPORT_PCRE16
4049 if (pcre_mode == PCRE16_MODE)
4050 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4051 #endif
4052 #ifdef SUPPORT_PCRE8
4053 if (pcre_mode == PCRE8_MODE)
4054 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4055 #endif
4056 nametable += nameentrysize * CHAR_SIZE;
4057 }
4058 }
4059
4060 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4061 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4062
4063 all_options = REAL_PCRE_OPTIONS(re);
4064 if (do_flip) all_options = swap_uint32(all_options);
4065
4066 if (get_options == 0) fprintf(outfile, "No options\n");
4067 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4068 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4069 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4070 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4071 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4072 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4073 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4074 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4075 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4076 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4077 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4078 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4079 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4080 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4081 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4082 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4083 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4084 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4085
4086 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4087
4088 switch (get_options & PCRE_NEWLINE_BITS)
4089 {
4090 case PCRE_NEWLINE_CR:
4091 fprintf(outfile, "Forced newline sequence: CR\n");
4092 break;
4093
4094 case PCRE_NEWLINE_LF:
4095 fprintf(outfile, "Forced newline sequence: LF\n");
4096 break;
4097
4098 case PCRE_NEWLINE_CRLF:
4099 fprintf(outfile, "Forced newline sequence: CRLF\n");
4100 break;
4101
4102 case PCRE_NEWLINE_ANYCRLF:
4103 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4104 break;
4105
4106 case PCRE_NEWLINE_ANY:
4107 fprintf(outfile, "Forced newline sequence: ANY\n");
4108 break;
4109
4110 default:
4111 break;
4112 }
4113
4114 if (first_char_set == 2)
4115 {
4116 fprintf(outfile, "First char at start or follows newline\n");
4117 }
4118 else if (first_char_set == 1)
4119 {
4120 const char *caseless =
4121 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4122 "" : " (caseless)";
4123
4124 if (PRINTOK(first_char))
4125 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4126 else
4127 {
4128 fprintf(outfile, "First char = ");
4129 pchar(first_char, outfile);
4130 fprintf(outfile, "%s\n", caseless);
4131 }
4132 }
4133 else
4134 {
4135 fprintf(outfile, "No first char\n");
4136 }
4137
4138 if (need_char_set == 0)
4139 {
4140 fprintf(outfile, "No need char\n");
4141 }
4142 else
4143 {
4144 const char *caseless =
4145 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4146 "" : " (caseless)";
4147
4148 if (PRINTOK(need_char))
4149 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4150 else
4151 {
4152 fprintf(outfile, "Need char = ");
4153 pchar(need_char, outfile);
4154 fprintf(outfile, "%s\n", caseless);
4155 }
4156 }
4157
4158 if (maxlookbehind > 0)
4159 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4160
4161 /* Don't output study size; at present it is in any case a fixed
4162 value, but it varies, depending on the computer architecture, and
4163 so messes up the test suite. (And with the /F option, it might be
4164 flipped.) If study was forced by an external -s, don't show this
4165 information unless -i or -d was also present. This means that, except
4166 when auto-callouts are involved, the output from runs with and without
4167 -s should be identical. */
4168
4169 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4170 {
4171 if (extra == NULL)
4172 fprintf(outfile, "Study returned NULL\n");
4173 else
4174 {
4175 pcre_uint8 *start_bits = NULL;
4176 int minlength;
4177
4178 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4179 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4180
4181 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4182 {
4183 if (start_bits == NULL)
4184 fprintf(outfile, "No set of starting bytes\n");
4185 else
4186 {
4187 int i;
4188 int c = 24;
4189 fprintf(outfile, "Starting byte set: ");
4190 for (i = 0; i < 256; i++)
4191 {
4192 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4193 {
4194 if (c > 75)
4195 {
4196 fprintf(outfile, "\n ");
4197 c = 2;
4198 }
4199 if (PRINTOK(i) && i != ' ')
4200 {
4201 fprintf(outfile, "%c ", i);
4202 c += 2;
4203 }
4204 else
4205 {
4206 fprintf(outfile, "\\x%02x ", i);
4207 c += 5;
4208 }
4209 }
4210 }
4211 fprintf(outfile, "\n");
4212 }
4213 }
4214 }
4215
4216 /* Show this only if the JIT was set by /S, not by -s. */
4217
4218 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4219 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4220 {
4221 int jit;
4222 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4223 {
4224 if (jit)
4225 fprintf(outfile, "JIT study was successful\n");
4226 else
4227 #ifdef SUPPORT_JIT
4228 fprintf(outfile, "JIT study was not successful\n");
4229 #else
4230 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4231 #endif
4232 }
4233 }
4234 }
4235 }
4236
4237 /* If the '>' option was present, we write out the regex to a file, and
4238 that is all. The first 8 bytes of the file are the regex length and then
4239 the study length, in big-endian order. */
4240
4241 if (to_file != NULL)
4242 {
4243 FILE *f = fopen((char *)to_file, "wb");
4244 if (f == NULL)
4245 {
4246 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4247 }
4248 else
4249 {
4250 pcre_uint8 sbuf[8];
4251
4252 if (do_flip) regexflip(re, extra);
4253 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4254 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4255 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4256 sbuf[3] = (pcre_uint8)((true_size) & 255);
4257 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4258 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4259 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4260 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4261
4262 if (fwrite(sbuf, 1, 8, f) < 8 ||
4263 fwrite(re, 1, true_size, f) < true_size)
4264 {
4265 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4266 }
4267 else
4268 {
4269 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4270
4271 /* If there is study data, write it. */
4272
4273 if (extra != NULL)
4274 {
4275 if (fwrite(extra->study_data, 1, true_study_size, f) <
4276 true_study_size)
4277 {
4278 fprintf(outfile, "Write error on %s: %s\n", to_file,
4279 strerror(errno));
4280 }
4281 else fprintf(outfile, "Study data written to %s\n", to_file);
4282 }
4283 }
4284 fclose(f);
4285 }
4286
4287 new_free(re);
4288 if (extra != NULL)
4289 {
4290 PCRE_FREE_STUDY(extra);
4291 }
4292 if (locale_set)
4293 {
4294 new_free((void *)tables);
4295 setlocale(LC_CTYPE, "C");
4296 locale_set = 0;
4297 }
4298 continue; /* With next regex */
4299 }
4300 } /* End of non-POSIX compile */
4301
4302 /* Read data lines and test them */
4303
4304 for (;;)
4305 {
4306 #ifdef SUPPORT_PCRE8
4307 pcre_uint8 *q8;
4308 #endif
4309 #ifdef SUPPORT_PCRE16
4310 pcre_uint16 *q16;
4311 #endif
4312 #ifdef SUPPORT_PCRE32
4313 pcre_uint32 *q32;
4314 #endif
4315 pcre_uint8 *bptr;
4316 int *use_offsets = offsets;
4317 int use_size_offsets = size_offsets;
4318 int callout_data = 0;
4319 int callout_data_set = 0;
4320 int count;
4321 pcre_uint32 c;
4322 int copystrings = 0;
4323 int find_match_limit = default_find_match_limit;
4324 int getstrings = 0;
4325 int getlist = 0;
4326 int gmatched = 0;
4327 int start_offset = 0;
4328 int start_offset_sign = 1;
4329 int g_notempty = 0;
4330 int use_dfa = 0;
4331
4332 *copynames = 0;
4333 *getnames = 0;
4334
4335 #ifdef SUPPORT_PCRE32
4336 cn32ptr = copynames;
4337 gn32ptr = getnames;
4338 #endif
4339 #ifdef SUPPORT_PCRE16
4340 cn16ptr = copynames16;
4341 gn16ptr = getnames16;
4342 #endif
4343 #ifdef SUPPORT_PCRE8
4344 cn8ptr = copynames8;
4345 gn8ptr = getnames8;
4346 #endif
4347
4348 SET_PCRE_CALLOUT(callout);
4349 first_callout = 1;
4350 last_callout_mark = NULL;
4351 callout_extra = 0;
4352 callout_count = 0;
4353 callout_fail_count = 999999;
4354 callout_fail_id = -1;
4355 show_malloc = 0;
4356 options = 0;
4357
4358 if (extra != NULL) extra->flags &=
4359 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4360
4361 len = 0;
4362 for (;;)
4363 {
4364 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4365 {
4366 if (len > 0) /* Reached EOF without hitting a newline */
4367 {
4368 fprintf(outfile, "\n");
4369 break;
4370 }
4371 done = 1;
4372 goto CONTINUE;
4373 }
4374 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4375 len = (int)strlen((char *)buffer);
4376 if (buffer[len-1] == '\n') break;
4377 }
4378
4379 while (len > 0 && isspace(buffer[len-1])) len--;
4380 buffer[len] = 0;
4381 if (len == 0) break;
4382
4383 p = buffer;
4384 while (isspace(*p)) p++;
4385
4386 #ifndef NOUTF
4387 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4388 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4389 if (use_utf)
4390 {
4391 pcre_uint8 *q;
4392 pcre_uint32 cc;
4393 int n = 1;
4394
4395 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4396 if (n <= 0)
4397 {
4398 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4399 goto NEXT_DATA;
4400 }
4401 }
4402 #endif
4403
4404 #ifdef SUPPORT_VALGRIND
4405 /* Mark the dbuffer as addressable but undefined again. */
4406 if (dbuffer != NULL)
4407 {
4408 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4409 }
4410 #endif
4411
4412 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4413 the number of pcre_uchar units that will be needed. */
4414 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4415 {
4416 dbuffer_size *= 2;
4417 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4418 if (dbuffer == NULL)
4419 {
4420 fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size);
4421 exit(1);
4422 }
4423 }
4424
4425 #ifdef SUPPORT_PCRE8
4426 q8 = (pcre_uint8 *) dbuffer;
4427 #endif
4428 #ifdef SUPPORT_PCRE16
4429 q16 = (pcre_uint16 *) dbuffer;
4430 #endif
4431 #ifdef SUPPORT_PCRE32
4432 q32 = (pcre_uint32 *) dbuffer;
4433 #endif
4434
4435 while ((c = *p++) != 0)
4436 {
4437 int i = 0;
4438 int n = 0;
4439
4440 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4441 In non-UTF mode, allow the value of the byte to fall through to later,
4442 where values greater than 127 are turned into UTF-8 when running in
4443 16-bit or 32-bit mode. */
4444
4445 if (c != '\\')
4446 {
4447 #ifndef NOUTF
4448 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4449 #endif
4450 }
4451
4452 /* Handle backslash escapes */
4453
4454 else switch ((c = *p++))
4455 {
4456 case 'a': c = 7; break;
4457 case 'b': c = '\b'; break;
4458 case 'e': c = 27; break;
4459 case 'f': c = '\f'; break;
4460 case 'n': c = '\n'; break;
4461 case 'r': c = '\r'; break;
4462 case 't': c = '\t'; break;
4463 case 'v': c = '\v'; break;
4464
4465 case '0': case '1': case '2': case '3':
4466 case '4': case '5': case '6': case '7':
4467 c -= '0';
4468 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4469 c = c * 8 + *p++ - '0';
4470 break;
4471
4472 case 'x':
4473 if (*p == '{')
4474 {
4475 pcre_uint8 *pt = p;
4476 c = 0;
4477
4478 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4479 when isxdigit() is a macro that refers to its argument more than
4480 once. This is banned by the C Standard, but apparently happens in at
4481 least one MacOS environment. */
4482
4483 for (pt++; isxdigit(*pt); pt++)
4484 {
4485 if (++i == 9)
4486 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4487 "using only the first eight.\n");
4488 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4489 }
4490 if (*pt == '}')
4491 {
4492 p = pt + 1;
4493 break;
4494 }
4495 /* Not correct form for \x{...}; fall through */
4496 }
4497
4498 /* \x without {} always defines just one byte in 8-bit mode. This
4499 allows UTF-8 characters to be constructed byte by byte, and also allows
4500 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4501 Otherwise, pass it down to later code so that it can be turned into
4502 UTF-8 when running in 16/32-bit mode. */
4503
4504 c = 0;
4505 while (i++ < 2 && isxdigit(*p))
4506 {
4507 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4508 p++;
4509 }
4510 #if !defined NOUTF && defined SUPPORT_PCRE8
4511 if (use_utf && (pcre_mode == PCRE8_MODE))
4512 {
4513 *q8++ = c;
4514 continue;
4515 }
4516 #endif
4517 break;
4518
4519 case 0: /* \ followed by EOF allows for an empty line */
4520 p--;
4521 continue;
4522
4523 case '>':
4524 if (*p == '-')
4525 {
4526 start_offset_sign = -1;
4527 p++;
4528 }
4529 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4530 start_offset *= start_offset_sign;
4531 continue;
4532
4533 case 'A': /* Option setting */
4534 options |= PCRE_ANCHORED;
4535 continue;
4536
4537 case 'B':
4538 options |= PCRE_NOTBOL;
4539 continue;
4540
4541 case 'C':
4542 if (isdigit(*p)) /* Set copy string */
4543 {
4544 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4545 copystrings |= 1 << n;
4546 }
4547 else if (isalnum(*p))
4548 {
4549 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4550 }
4551 else if (*p == '+')
4552 {
4553 callout_extra = 1;
4554 p++;
4555 }
4556 else if (*p == '-')
4557 {
4558 SET_PCRE_CALLOUT(NULL);
4559 p++;
4560 }
4561 else if (*p == '!')
4562 {
4563 callout_fail_id = 0;
4564 p++;
4565 while(isdigit(*p))
4566 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4567 callout_fail_count = 0;
4568 if (*p == '!')
4569 {
4570 p++;
4571 while(isdigit(*p))
4572 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4573 }
4574 }
4575 else if (*p == '*')
4576 {
4577 int sign = 1;
4578 callout_data = 0;
4579 if (*(++p) == '-') { sign = -1; p++; }
4580 while(isdigit(*p))
4581 callout_data = callout_data * 10 + *p++ - '0';
4582 callout_data *= sign;
4583 callout_data_set = 1;
4584 }
4585 continue;
4586
4587 #if !defined NODFA
4588 case 'D':
4589 #if !defined NOPOSIX
4590 if (posix || do_posix)
4591 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4592 else
4593 #endif
4594 use_dfa = 1;
4595 continue;
4596 #endif
4597
4598 #if !defined NODFA
4599 case 'F':
4600 options |= PCRE_DFA_SHORTEST;
4601 continue;
4602 #endif
4603
4604 case 'G':
4605 if (isdigit(*p))
4606 {
4607 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4608 getstrings |= 1 << n;
4609 }
4610 else if (isalnum(*p))
4611 {
4612 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4613 }
4614 continue;
4615
4616 case 'J':
4617 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4618 if (extra != NULL
4619 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4620 && extra->executable_jit != NULL)
4621 {
4622 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4623 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4624 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4625 }
4626 continue;
4627
4628 case 'L':
4629 getlist = 1;
4630 continue;
4631
4632 case 'M':
4633 find_match_limit = 1;
4634 continue;
4635
4636 case 'N':
4637 if ((options & PCRE_NOTEMPTY) != 0)
4638 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4639 else
4640 options |= PCRE_NOTEMPTY;
4641 continue;
4642
4643 case 'O':
4644 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4645 if (n > size_offsets_max)
4646 {
4647 size_offsets_max = n;
4648 free(offsets);
4649 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4650 if (offsets == NULL)
4651 {
4652 printf("** Failed to get %d bytes of memory for offsets vector\n",
4653 (int)(size_offsets_max * sizeof(int)));
4654 yield = 1;
4655 goto EXIT;
4656 }
4657 }
4658 use_size_offsets = n;
4659 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4660 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4661 continue;
4662
4663 case 'P':
4664 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4665 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4666 continue;
4667
4668 case 'Q':
4669 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4670 if (extra == NULL)
4671 {
4672 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4673 extra->flags = 0;
4674 }
4675 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4676 extra->match_limit_recursion = n;
4677 continue;
4678
4679 case 'q':
4680 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4681 if (extra == NULL)
4682 {
4683 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4684 extra->flags = 0;
4685 }
4686 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4687 extra->match_limit = n;
4688 continue;
4689
4690 #if !defined NODFA
4691 case 'R':
4692 options |= PCRE_DFA_RESTART;
4693 continue;
4694 #endif
4695
4696 case 'S':
4697 show_malloc = 1;
4698 continue;
4699
4700 case 'Y':
4701 options |= PCRE_NO_START_OPTIMIZE;
4702 continue;
4703
4704 case 'Z':
4705 options |= PCRE_NOTEOL;
4706 continue;
4707
4708 case '?':
4709 options |= PCRE_NO_UTF8_CHECK;
4710 continue;
4711
4712 case '<':
4713 {
4714 int x = check_newline(p, outfile);
4715 if (x == 0) goto NEXT_DATA;
4716 options |= x;
4717 while (*p++ != '>');
4718 }
4719 continue;
4720 }
4721
4722 /* We now have a character value in c that may be greater than 255.
4723 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4724 than 127 in UTF mode must have come from \x{...} or octal constructs
4725 because values from \x.. get this far only in non-UTF mode. */
4726
4727 #ifdef SUPPORT_PCRE8
4728 if (pcre_mode == PCRE8_MODE)
4729 {
4730 #ifndef NOUTF
4731 if (use_utf)
4732 {
4733 if (c > 0x7fffffff)
4734 {
4735 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4736 "and so cannot be converted to UTF-8\n", c);
4737 goto NEXT_DATA;
4738 }
4739 q8 += ord2utf8(c, q8);
4740 }
4741 else
4742 #endif
4743 {
4744 if (c > 0xffu)
4745 {
4746 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4747 "and UTF-8 mode is not enabled.\n", c);
4748 fprintf(outfile, "** Truncation will probably give the wrong "
4749 "result.\n");
4750 }
4751 *q8++ = c;
4752 }
4753 }
4754 #endif
4755 #ifdef SUPPORT_PCRE16
4756 if (pcre_mode == PCRE16_MODE)
4757 {
4758 #ifndef NOUTF
4759 if (use_utf)
4760 {
4761 if (c > 0x10ffffu)
4762 {
4763 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4764 "0x10ffff and so cannot be converted to UTF-16\n", c);
4765 goto NEXT_DATA;
4766 }
4767 else if (c >= 0x10000u)
4768 {
4769 c-= 0x10000u;
4770 *q16++ = 0xD800 | (c >> 10);
4771 *q16++ = 0xDC00 | (c & 0x3ff);
4772 }
4773 else
4774 *q16++ = c;
4775 }
4776 else
4777 #endif
4778 {
4779 if (c > 0xffffu)
4780 {
4781 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4782 "and UTF-16 mode is not enabled.\n", c);
4783 fprintf(outfile, "** Truncation will probably give the wrong "
4784 "result.\n");
4785 }
4786
4787 *q16++ = c;
4788 }
4789 }
4790 #endif
4791 #ifdef SUPPORT_PCRE32
4792 if (pcre_mode == PCRE32_MODE)
4793 {
4794 *q32++ = c;
4795 }
4796 #endif
4797
4798 }
4799
4800 /* Reached end of subject string */
4801
4802 #ifdef SUPPORT_PCRE8
4803 if (pcre_mode == PCRE8_MODE)
4804 {
4805 *q8 = 0;
4806 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4807 }
4808 #endif
4809 #ifdef SUPPORT_PCRE16
4810 if (pcre_mode == PCRE16_MODE)
4811 {
4812 *q16 = 0;
4813 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4814 }
4815 #endif
4816 #ifdef SUPPORT_PCRE32
4817 if (pcre_mode == PCRE32_MODE)
4818 {
4819 *q32 = 0;
4820 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4821 }
4822 #endif
4823
4824 /* If we're compiling with explicit valgrind support, Mark the data from after
4825 its end to the end of the buffer as unaddressable, so that a read over the end
4826 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4827 If we're not building with valgrind support, at least move the data to the end
4828 of the buffer so that it might at least cause a crash.
4829 If we are using the POSIX interface, we must include the terminating zero. */
4830
4831 bptr = dbuffer;
4832
4833 #if !defined NOPOSIX
4834 if (posix || do_posix)
4835 {
4836 #ifdef SUPPORT_VALGRIND
4837 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4838 #else
4839 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4840 bptr += dbuffer_size - len - 1;
4841 #endif
4842 }
4843 else
4844 #endif
4845 {
4846 #ifdef SUPPORT_VALGRIND
4847 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4848 #else
4849 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4850 #endif
4851 }
4852
4853 if ((all_use_dfa || use_dfa) && find_match_limit)
4854 {
4855 printf("**Match limit not relevant for DFA matching: ignored\n");
4856 find_match_limit = 0;
4857 }
4858
4859 /* Handle matching via the POSIX interface, which does not
4860 support timing or playing with the match limit or callout data. */
4861
4862 #if !defined NOPOSIX
4863 if (posix || do_posix)
4864 {
4865 int rc;
4866 int eflags = 0;
4867 regmatch_t *pmatch = NULL;
4868 if (use_size_offsets > 0)
4869 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4870 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4871 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4872 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4873
4874 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4875
4876 if (rc != 0)
4877 {
4878 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4879 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4880 }
4881 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4882 {
4883 fprintf(outfile, "Matched with REG_NOSUB\n");
4884 }
4885 else
4886 {
4887 size_t i;
4888 for (i = 0; i < (size_t)use_size_offsets; i++)
4889 {
4890 if (pmatch[i].rm_so >= 0)
4891 {
4892 fprintf(outfile, "%2d: ", (int)i);
4893 PCHARSV(dbuffer, pmatch[i].rm_so,
4894 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4895 fprintf(outfile, "\n");
4896 if (do_showcaprest || (i == 0 && do_showrest))
4897 {
4898 fprintf(outfile, "%2d+ ", (int)i);
4899 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4900 outfile);
4901 fprintf(outfile, "\n");
4902 }
4903 }
4904 }
4905 }
4906 free(pmatch);
4907 goto NEXT_DATA;
4908 }
4909
4910 #endif /* !defined NOPOSIX */
4911
4912 /* Handle matching via the native interface - repeats for /g and /G */
4913
4914 /* Ensure that there is a JIT callback if we want to verify that JIT was
4915 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4916
4917 if (verify_jit && jit_stack == NULL && extra != NULL)
4918 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4919
4920 for (;; gmatched++) /* Loop for /g or /G */
4921 {
4922 markptr = NULL;
4923 jit_was_used = FALSE;
4924
4925 if (timeitm > 0)
4926 {
4927 register int i;
4928 clock_t time_taken;
4929 clock_t start_time = clock();
4930
4931 #if !defined NODFA
4932 if (all_use_dfa || use_dfa)
4933 {
4934 if ((options & PCRE_DFA_RESTART) != 0)
4935 {
4936 fprintf(outfile, "Timing DFA restarts is not supported\n");
4937 break;
4938 }
4939 if (dfa_workspace == NULL)
4940 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4941 for (i = 0; i < timeitm; i++)
4942 {
4943 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4944 (options | g_notempty), use_offsets, use_size_offsets,
4945 dfa_workspace, DFA_WS_DIMENSION);
4946 }
4947 }
4948 else
4949 #endif
4950
4951 for (i = 0; i < timeitm; i++)
4952 {
4953 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4954 (options | g_notempty), use_offsets, use_size_offsets);
4955 }
4956 time_taken = clock() - start_time;
4957 fprintf(outfile, "Execute time %.4f milliseconds\n",
4958 (((double)time_taken * 1000.0) / (double)timeitm) /
4959 (double)CLOCKS_PER_SEC);
4960 }
4961
4962 /* If find_match_limit is set, we want to do repeated matches with
4963 varying limits in order to find the minimum value for the match limit and
4964 for the recursion limit. The match limits are relevant only to the normal
4965 running of pcre_exec(), so disable the JIT optimization. This makes it
4966 possible to run the same set of tests with and without JIT externally
4967 requested. */
4968
4969 if (find_match_limit)
4970 {
4971 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4972 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4973 extra->flags = 0;
4974
4975 (void)check_match_limit(re, extra, bptr, len, start_offset,
4976 options|g_notempty, use_offsets, use_size_offsets,
4977 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4978 PCRE_ERROR_MATCHLIMIT, "match()");
4979
4980 count = check_match_limit(re, extra, bptr, len, start_offset,
4981 options|g_notempty, use_offsets, use_size_offsets,
4982 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4983 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4984 }
4985
4986 /* If callout_data is set, use the interface with additional data */
4987
4988 else if (callout_data_set)
4989 {
4990 if (extra == NULL)
4991 {
4992 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4993 extra->flags = 0;
4994 }
4995 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4996 extra->callout_data = &callout_data;
4997 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4998 options | g_notempty, use_offsets, use_size_offsets);
4999 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5000 }
5001
5002 /* The normal case is just to do the match once, with the default
5003 value of match_limit. */
5004
5005 #if !defined NODFA
5006 else if (all_use_dfa || use_dfa)
5007 {
5008 if (dfa_workspace == NULL)
5009 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5010 if (dfa_matched++ == 0)
5011 dfa_workspace[0] = -1; /* To catch bad restart */
5012 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5013 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5014 DFA_WS_DIMENSION);
5015 if (count == 0)
5016 {
5017 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5018 count = use_size_offsets/2;
5019 }
5020 }
5021 #endif
5022
5023 else
5024 {
5025 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5026 options | g_notempty, use_offsets, use_size_offsets);
5027 if (count == 0)
5028 {
5029 fprintf(outfile, "Matched, but too many substrings\n");
5030 count = use_size_offsets/3;
5031 }
5032 }
5033
5034 /* Matched */
5035
5036 if (count >= 0)
5037 {
5038 int i, maxcount;
5039 void *cnptr, *gnptr;
5040
5041 #if !defined NODFA
5042 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5043 #endif
5044 maxcount = use_size_offsets/3;
5045
5046 /* This is a check against a lunatic return value. */
5047
5048 if (count > maxcount)
5049 {
5050 fprintf(outfile,
5051 "** PCRE error: returned count %d is too big for offset size %d\n",
5052 count, use_size_offsets);
5053 count = use_size_offsets/3;
5054 if (do_g || do_G)
5055 {
5056 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5057 do_g = do_G = FALSE; /* Break g/G loop */
5058 }
5059 }
5060
5061 /* do_allcaps requests showing of all captures in the pattern, to check
5062 unset ones at the end. */
5063
5064 if (do_allcaps)
5065 {
5066 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5067 goto SKIP_DATA;
5068 count++; /* Allow for full match */
5069 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5070 }
5071
5072 /* Output the captured substrings */
5073
5074 for (i = 0; i < count * 2; i += 2)
5075 {
5076 if (use_offsets[i] < 0)
5077 {
5078 if (use_offsets[i] != -1)
5079 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5080 use_offsets[i], i);
5081 if (use_offsets[i+1] != -1)
5082 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5083 use_offsets[i+1], i+1);
5084 fprintf(outfile, "%2d: <unset>\n", i/2);
5085 }
5086 else
5087 {
5088 fprintf(outfile, "%2d: ", i/2);
5089 PCHARSV(bptr, use_offsets[i],
5090 use_offsets[i+1] - use_offsets[i], outfile);
5091 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5092 fprintf(outfile, "\n");
5093 if (do_showcaprest || (i == 0 && do_showrest))
5094 {
5095 fprintf(outfile, "%2d+ ", i/2);
5096 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5097 outfile);
5098 fprintf(outfile, "\n");
5099 }
5100 }
5101 }
5102
5103 if (markptr != NULL)
5104 {
5105 fprintf(outfile, "MK: ");
5106 PCHARSV(markptr, 0, -1, outfile);
5107 fprintf(outfile, "\n");
5108 }
5109
5110 for (i = 0; i < 32; i++)
5111 {
5112 if ((copystrings & (1 << i)) != 0)
5113 {
5114 int rc;
5115 char copybuffer[256];
5116 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5117 copybuffer, sizeof(copybuffer));
5118 if (rc < 0)
5119 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5120 else
5121 {
5122 fprintf(outfile, "%2dC ", i);
5123 PCHARSV(copybuffer, 0, rc, outfile);
5124 fprintf(outfile, " (%d)\n", rc);
5125 }
5126 }
5127 }
5128
5129 cnptr = copynames;
5130 for (;;)
5131 {
5132 int rc;
5133 char copybuffer[256];
5134
5135 #ifdef SUPPORT_PCRE32
5136 if (pcre_mode == PCRE32_MODE)
5137 {
5138 if (*(pcre_uint32 *)cnptr == 0) break;
5139 }
5140 #endif
5141 #ifdef SUPPORT_PCRE16
5142 if (pcre_mode == PCRE16_MODE)
5143 {
5144 if (*(pcre_uint16 *)cnptr == 0) break;
5145 }
5146 #endif
5147 #ifdef SUPPORT_PCRE8
5148 if (pcre_mode == PCRE8_MODE)
5149 {
5150 if (*(pcre_uint8 *)cnptr == 0) break;
5151 }
5152 #endif
5153
5154 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5155 cnptr, copybuffer, sizeof(copybuffer));
5156
5157 if (rc < 0)
5158 {
5159 fprintf(outfile, "copy substring ");
5160 PCHARSV(cnptr, 0, -1, outfile);
5161 fprintf(outfile, " failed %d\n", rc);
5162 }
5163 else
5164 {
5165 fprintf(outfile, " C ");
5166 PCHARSV(copybuffer, 0, rc, outfile);
5167 fprintf(outfile, " (%d) ", rc);
5168 PCHARSV(cnptr, 0, -1, outfile);
5169 putc('\n', outfile);
5170 }
5171
5172 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5173 }
5174
5175 for (i = 0; i < 32; i++)
5176 {
5177 if ((getstrings & (1 << i)) != 0)
5178 {
5179 int rc;
5180 const char *substring;
5181 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5182 if (rc < 0)
5183 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5184 else
5185 {
5186 fprintf(outfile, "%2dG ", i);
5187 PCHARSV(substring, 0, rc, outfile);
5188 fprintf(outfile, " (%d)\n", rc);
5189 PCRE_FREE_SUBSTRING(substring);
5190 }
5191 }
5192 }
5193
5194 gnptr = getnames;
5195 for (;;)
5196 {
5197 int rc;
5198 const char *substring;
5199
5200 #ifdef SUPPORT_PCRE32
5201 if (pcre_mode == PCRE32_MODE)
5202 {
5203 if (*(pcre_uint32 *)gnptr == 0) break;
5204 }
5205 #endif
5206 #ifdef SUPPORT_PCRE16
5207 if (pcre_mode == PCRE16_MODE)
5208 {
5209 if (*(pcre_uint16 *)gnptr == 0) break;
5210 }
5211 #endif
5212 #ifdef SUPPORT_PCRE8
5213 if (pcre_mode == PCRE8_MODE)
5214 {
5215 if (*(pcre_uint8 *)gnptr == 0) break;
5216 }
5217 #endif
5218
5219 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5220 gnptr, &substring);
5221 if (rc < 0)
5222 {
5223 fprintf(outfile, "get substring ");
5224 PCHARSV(gnptr, 0, -1, outfile);
5225 fprintf(outfile, " failed %d\n", rc);
5226 }
5227 else
5228 {
5229 fprintf(outfile, " G ");
5230 PCHARSV(substring, 0, rc, outfile);
5231 fprintf(outfile, " (%d) ", rc);
5232 PCHARSV(gnptr, 0, -1, outfile);
5233 PCRE_FREE_SUBSTRING(substring);
5234 putc('\n', outfile);
5235 }
5236
5237 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5238 }
5239
5240 if (getlist)
5241 {
5242 int rc;
5243 const char **stringlist;
5244 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5245 if (rc < 0)
5246 fprintf(outfile, "get substring list failed %d\n", rc);
5247 else
5248 {
5249 for (i = 0; i < count; i++)
5250 {
5251 fprintf(outfile, "%2dL ", i);
5252 PCHARSV(stringlist[i], 0, -1, outfile);
5253 putc('\n', outfile);
5254 }
5255 if (stringlist[i] != NULL)
5256 fprintf(outfile, "string list not terminated by NULL\n");
5257 PCRE_FREE_SUBSTRING_LIST(stringlist);
5258 }
5259 }
5260 }
5261
5262 /* There was a partial match */
5263
5264 else if (count == PCRE_ERROR_PARTIAL)
5265 {
5266 if (markptr == NULL) fprintf(outfile, "Partial match");
5267 else
5268 {
5269 fprintf(outfile, "Partial match, mark=");
5270 PCHARSV(markptr, 0, -1, outfile);
5271 }
5272 if (use_size_offsets > 1)
5273 {
5274 fprintf(outfile, ": ");
5275 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5276 outfile);
5277 }
5278 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5279 fprintf(outfile, "\n");
5280 break; /* Out of the /g loop */
5281 }
5282
5283 /* Failed to match. If this is a /g or /G loop and we previously set
5284 g_notempty after a null match, this is not necessarily the end. We want
5285 to advance the start offset, and continue. We won't be at the end of the
5286 string - that was checked before setting g_notempty.
5287
5288 Complication arises in the case when the newline convention is "any",
5289 "crlf", or "anycrlf". If the previous match was at the end of a line
5290 terminated by CRLF, an advance of one character just passes the \r,
5291 whereas we should prefer the longer newline sequence, as does the code in
5292 pcre_exec(). Fudge the offset value to achieve this. We check for a
5293 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5294 find the default.
5295
5296 Otherwise, in the case of UTF-8 matching, the advance must be one
5297 character, not one byte. */
5298
5299 else
5300 {
5301 if (g_notempty != 0)
5302 {
5303 int onechar = 1;
5304 unsigned int obits = REAL_PCRE_OPTIONS(re);
5305 use_offsets[0] = start_offset;
5306 if ((obits & PCRE_NEWLINE_BITS) == 0)
5307 {
5308 int d;
5309 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5310 /* Note that these values are always the ASCII ones, even in
5311 EBCDIC environments. CR = 13, NL = 10. */
5312 obits = (d == 13)? PCRE_NEWLINE_CR :
5313 (d == 10)? PCRE_NEWLINE_LF :
5314 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5315 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5316 (d == -1)? PCRE_NEWLINE_ANY : 0;
5317 }
5318 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5319 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5320 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5321 &&
5322 start_offset < len - 1 && (
5323 #ifdef SUPPORT_PCRE8
5324 (pcre_mode == PCRE8_MODE &&
5325 bptr[start_offset] == '\r' &&
5326 bptr[start_offset + 1] == '\n') ||
5327 #endif
5328 #ifdef SUPPORT_PCRE16
5329 (pcre_mode == PCRE16_MODE &&
5330 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5331 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5332 #endif
5333 #ifdef SUPPORT_PCRE32
5334 (pcre_mode == PCRE32_MODE &&
5335 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5336 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5337 #endif
5338 0))
5339 onechar++;
5340 else if (use_utf)
5341 {
5342 while (start_offset + onechar < len)
5343 {
5344 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5345 onechar++;
5346 }
5347 }
5348 use_offsets[1] = start_offset + onechar;
5349 }
5350 else
5351 {
5352 switch(count)
5353 {
5354 case PCRE_ERROR_NOMATCH:
5355 if (gmatched == 0)
5356 {
5357 if (markptr == NULL)
5358 {
5359 fprintf(outfile, "No match");
5360 }
5361 else
5362 {
5363 fprintf(outfile, "No match, mark = ");
5364 PCHARSV(markptr, 0, -1, outfile);
5365 }
5366 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5367 putc('\n', outfile);
5368 }
5369 break;
5370
5371 case PCRE_ERROR_BADUTF8:
5372 case PCRE_ERROR_SHORTUTF8:
5373 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5374 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5375 8 * CHAR_SIZE);
5376 if (use_size_offsets >= 2)
5377 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5378 use_offsets[1]);
5379 fprintf(outfile, "\n");
5380 break;
5381
5382 case PCRE_ERROR_BADUTF8_OFFSET:
5383 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5384 8 * CHAR_SIZE);
5385 break;
5386
5387 default:
5388 if (count < 0 &&
5389 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5390 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5391 else
5392 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5393 break;
5394 }
5395
5396 break; /* Out of the /g loop */
5397 }
5398 }
5399
5400 /* If not /g or /G we are done */
5401
5402 if (!do_g && !do_G) break;
5403
5404 /* If we have matched an empty string, first check to see if we are at
5405 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5406 Perl's /g options does. This turns out to be rather cunning. First we set
5407 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5408 same point. If this fails (picked up above) we advance to the next
5409 character. */
5410
5411 g_notempty = 0;
5412
5413 if (use_offsets[0] == use_offsets[1])
5414 {
5415 if (use_offsets[0] == len) break;
5416 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5417 }
5418
5419 /* For /g, update the start offset, leaving the rest alone */
5420
5421 if (do_g) start_offset = use_offsets[1];
5422
5423 /* For /G, update the pointer and length */
5424
5425 else
5426 {
5427 bptr += use_offsets[1] * CHAR_SIZE;
5428 len -= use_offsets[1];
5429 }
5430 } /* End of loop for /g and /G */
5431
5432 NEXT_DATA: continue;
5433 } /* End of loop for data lines */
5434
5435 CONTINUE:
5436
5437 #if !defined NOPOSIX
5438 if (posix || do_posix) regfree(&preg);
5439 #endif
5440
5441 if (re != NULL) new_free(re);
5442 if (extra != NULL)
5443 {
5444 PCRE_FREE_STUDY(extra);
5445 }
5446 if (locale_set)
5447 {
5448 new_free((void *)tables);
5449 setlocale(LC_CTYPE, "C");
5450 locale_set = 0;
5451 }
5452 if (jit_stack != NULL)
5453 {
5454 PCRE_JIT_STACK_FREE(jit_stack);
5455 jit_stack = NULL;
5456 }
5457 }
5458
5459 if (infile == stdin) fprintf(outfile, "\n");
5460
5461 EXIT:
5462
5463 if (infile != NULL && infile != stdin) fclose(infile);
5464 if (outfile != NULL && outfile != stdout) fclose(outfile);
5465
5466 free(buffer);
5467 free(dbuffer);
5468 free(pbuffer);
5469 free(offsets);
5470
5471 #ifdef SUPPORT_PCRE16
5472 if (buffer16 != NULL) free(buffer16);
5473 #endif
5474 #ifdef SUPPORT_PCRE32
5475 if (buffer32 != NULL) free(buffer32);
5476 #endif
5477
5478 #if !defined NODFA
5479 if (dfa_workspace != NULL)
5480 free(dfa_workspace);
5481 #endif
5482
5483 return yield;
5484 }
5485
5486 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5