/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1357 - (show annotations)
Tue Aug 27 15:49:16 2013 UTC (6 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 166760 byte(s)
Add -T and -TM to pcretest.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
128
129
130 #define PRIV(name) name
131
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
136
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
140
141 #include "pcre.h"
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
158
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
162
163 #define PCRE_INCLUDED
164
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
167
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
173
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
179
180 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
186
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
190
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
194
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
199
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
205
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
213
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
219
220 #ifdef SUPPORT_PCRE8
221
222 #define PCHARS8(lv, p, offset, len, f) \
223 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224
225 #define PCHARSV8(p, offset, len, f) \
226 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 p = read_capture_name8(p, cn8, re)
230
231 #define STRLEN8(p) ((int)strlen((char *)p))
232
233 #define SET_PCRE_CALLOUT8(callout) \
234 pcre_callout = callout
235
236 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
237 pcre_assign_jit_stack(extra, callback, userdata)
238
239 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
240 re = pcre_compile((char *)pat, options, error, erroffset, tables)
241
242 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
243 namesptr, cbuffer, size) \
244 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
245 (char *)namesptr, cbuffer, size)
246
247 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
248 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
249
250 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
251 offsets, size_offsets, workspace, size_workspace) \
252 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
253 offsets, size_offsets, workspace, size_workspace)
254
255 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
256 offsets, size_offsets) \
257 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
258 offsets, size_offsets)
259
260 #define PCRE_FREE_STUDY8(extra) \
261 pcre_free_study(extra)
262
263 #define PCRE_FREE_SUBSTRING8(substring) \
264 pcre_free_substring(substring)
265
266 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
267 pcre_free_substring_list(listptr)
268
269 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
270 getnamesptr, subsptr) \
271 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
272 (char *)getnamesptr, subsptr)
273
274 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
275 n = pcre_get_stringnumber(re, (char *)ptr)
276
277 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
278 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
279
280 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
281 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
282
283 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
284 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
285
286 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
287 pcre_printint(re, outfile, debug_lengths)
288
289 #define PCRE_STUDY8(extra, re, options, error) \
290 extra = pcre_study(re, options, error)
291
292 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
293 pcre_jit_stack_alloc(startsize, maxsize)
294
295 #define PCRE_JIT_STACK_FREE8(stack) \
296 pcre_jit_stack_free(stack)
297
298 #define pcre8_maketables pcre_maketables
299
300 #endif /* SUPPORT_PCRE8 */
301
302 /* -----------------------------------------------------------*/
303
304 #ifdef SUPPORT_PCRE16
305
306 #define PCHARS16(lv, p, offset, len, f) \
307 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
308
309 #define PCHARSV16(p, offset, len, f) \
310 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
311
312 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
313 p = read_capture_name16(p, cn16, re)
314
315 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
316
317 #define SET_PCRE_CALLOUT16(callout) \
318 pcre16_callout = (int (*)(pcre16_callout_block *))callout
319
320 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
321 pcre16_assign_jit_stack((pcre16_extra *)extra, \
322 (pcre16_jit_callback)callback, userdata)
323
324 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
325 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
326 tables)
327
328 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
329 namesptr, cbuffer, size) \
330 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
331 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
332
333 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
334 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
335 (PCRE_UCHAR16 *)cbuffer, size/2)
336
337 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets, workspace, size_workspace) \
339 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
340 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
341 workspace, size_workspace)
342
343 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets) \
345 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
346 len, start_offset, options, offsets, size_offsets)
347
348 #define PCRE_FREE_STUDY16(extra) \
349 pcre16_free_study((pcre16_extra *)extra)
350
351 #define PCRE_FREE_SUBSTRING16(substring) \
352 pcre16_free_substring((PCRE_SPTR16)substring)
353
354 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
355 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
356
357 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
358 getnamesptr, subsptr) \
359 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
360 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
361
362 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
363 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
364
365 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
366 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
367 (PCRE_SPTR16 *)(void*)subsptr)
368
369 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
370 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
371 (PCRE_SPTR16 **)(void*)listptr)
372
373 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
374 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
375 tables)
376
377 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
378 pcre16_printint(re, outfile, debug_lengths)
379
380 #define PCRE_STUDY16(extra, re, options, error) \
381 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
382
383 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
384 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
385
386 #define PCRE_JIT_STACK_FREE16(stack) \
387 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
388
389 #endif /* SUPPORT_PCRE16 */
390
391 /* -----------------------------------------------------------*/
392
393 #ifdef SUPPORT_PCRE32
394
395 #define PCHARS32(lv, p, offset, len, f) \
396 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
397
398 #define PCHARSV32(p, offset, len, f) \
399 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
400
401 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
402 p = read_capture_name32(p, cn32, re)
403
404 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
405
406 #define SET_PCRE_CALLOUT32(callout) \
407 pcre32_callout = (int (*)(pcre32_callout_block *))callout
408
409 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
410 pcre32_assign_jit_stack((pcre32_extra *)extra, \
411 (pcre32_jit_callback)callback, userdata)
412
413 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
414 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
415 tables)
416
417 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
418 namesptr, cbuffer, size) \
419 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
420 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
421
422 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
423 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
424 (PCRE_UCHAR32 *)cbuffer, size/2)
425
426 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets, workspace, size_workspace) \
428 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
429 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
430 workspace, size_workspace)
431
432 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets) \
434 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
435 len, start_offset, options, offsets, size_offsets)
436
437 #define PCRE_FREE_STUDY32(extra) \
438 pcre32_free_study((pcre32_extra *)extra)
439
440 #define PCRE_FREE_SUBSTRING32(substring) \
441 pcre32_free_substring((PCRE_SPTR32)substring)
442
443 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
444 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
445
446 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr) \
448 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
449 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
450
451 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
452 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
453
454 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
455 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
456 (PCRE_SPTR32 *)(void*)subsptr)
457
458 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
459 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
460 (PCRE_SPTR32 **)(void*)listptr)
461
462 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
463 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
464 tables)
465
466 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
467 pcre32_printint(re, outfile, debug_lengths)
468
469 #define PCRE_STUDY32(extra, re, options, error) \
470 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
471
472 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
473 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
474
475 #define PCRE_JIT_STACK_FREE32(stack) \
476 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
477
478 #endif /* SUPPORT_PCRE32 */
479
480
481 /* ----- More than one mode is supported; a runtime test is needed, except for
482 pcre_config(), and the JIT stack functions, when it doesn't matter which
483 available version is called. ----- */
484
485 enum {
486 PCRE8_MODE,
487 PCRE16_MODE,
488 PCRE32_MODE
489 };
490
491 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
492 defined (SUPPORT_PCRE32)) >= 2
493
494 #define CHAR_SIZE (1 << pcre_mode)
495
496 /* There doesn't seem to be an easy way of writing these macros that can cope
497 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
498 cases separately. */
499
500 /* ----- All three modes supported ----- */
501
502 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
503
504 #define PCHARS(lv, p, offset, len, f) \
505 if (pcre_mode == PCRE32_MODE) \
506 PCHARS32(lv, p, offset, len, f); \
507 else if (pcre_mode == PCRE16_MODE) \
508 PCHARS16(lv, p, offset, len, f); \
509 else \
510 PCHARS8(lv, p, offset, len, f)
511
512 #define PCHARSV(p, offset, len, f) \
513 if (pcre_mode == PCRE32_MODE) \
514 PCHARSV32(p, offset, len, f); \
515 else if (pcre_mode == PCRE16_MODE) \
516 PCHARSV16(p, offset, len, f); \
517 else \
518 PCHARSV8(p, offset, len, f)
519
520 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
521 if (pcre_mode == PCRE32_MODE) \
522 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
523 else if (pcre_mode == PCRE16_MODE) \
524 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
525 else \
526 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
527
528 #define SET_PCRE_CALLOUT(callout) \
529 if (pcre_mode == PCRE32_MODE) \
530 SET_PCRE_CALLOUT32(callout); \
531 else if (pcre_mode == PCRE16_MODE) \
532 SET_PCRE_CALLOUT16(callout); \
533 else \
534 SET_PCRE_CALLOUT8(callout)
535
536 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
537
538 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
539 if (pcre_mode == PCRE32_MODE) \
540 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
541 else if (pcre_mode == PCRE16_MODE) \
542 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
543 else \
544 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
545
546 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
547 if (pcre_mode == PCRE32_MODE) \
548 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
549 else if (pcre_mode == PCRE16_MODE) \
550 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
551 else \
552 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
553
554 #define PCRE_CONFIG pcre_config
555
556 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size) \
558 if (pcre_mode == PCRE32_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else if (pcre_mode == PCRE16_MODE) \
562 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size); \
564 else \
565 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
566 namesptr, cbuffer, size)
567
568 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
569 if (pcre_mode == PCRE32_MODE) \
570 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
571 else if (pcre_mode == PCRE16_MODE) \
572 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
573 else \
574 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
575
576 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace) \
578 if (pcre_mode == PCRE32_MODE) \
579 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else if (pcre_mode == PCRE16_MODE) \
582 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace); \
584 else \
585 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets, workspace, size_workspace)
587
588 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets) \
590 if (pcre_mode == PCRE32_MODE) \
591 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else if (pcre_mode == PCRE16_MODE) \
594 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets); \
596 else \
597 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
598 offsets, size_offsets)
599
600 #define PCRE_FREE_STUDY(extra) \
601 if (pcre_mode == PCRE32_MODE) \
602 PCRE_FREE_STUDY32(extra); \
603 else if (pcre_mode == PCRE16_MODE) \
604 PCRE_FREE_STUDY16(extra); \
605 else \
606 PCRE_FREE_STUDY8(extra)
607
608 #define PCRE_FREE_SUBSTRING(substring) \
609 if (pcre_mode == PCRE32_MODE) \
610 PCRE_FREE_SUBSTRING32(substring); \
611 else if (pcre_mode == PCRE16_MODE) \
612 PCRE_FREE_SUBSTRING16(substring); \
613 else \
614 PCRE_FREE_SUBSTRING8(substring)
615
616 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
617 if (pcre_mode == PCRE32_MODE) \
618 PCRE_FREE_SUBSTRING_LIST32(listptr); \
619 else if (pcre_mode == PCRE16_MODE) \
620 PCRE_FREE_SUBSTRING_LIST16(listptr); \
621 else \
622 PCRE_FREE_SUBSTRING_LIST8(listptr)
623
624 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr) \
626 if (pcre_mode == PCRE32_MODE) \
627 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else if (pcre_mode == PCRE16_MODE) \
630 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr); \
632 else \
633 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
634 getnamesptr, subsptr)
635
636 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
637 if (pcre_mode == PCRE32_MODE) \
638 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
639 else if (pcre_mode == PCRE16_MODE) \
640 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
641 else \
642 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
643
644 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
645 if (pcre_mode == PCRE32_MODE) \
646 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
647 else if (pcre_mode == PCRE16_MODE) \
648 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
649 else \
650 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
651
652 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
653 if (pcre_mode == PCRE32_MODE) \
654 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
655 else if (pcre_mode == PCRE16_MODE) \
656 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
657 else \
658 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
659
660 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
661 (pcre_mode == PCRE32_MODE ? \
662 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
663 : pcre_mode == PCRE16_MODE ? \
664 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
665 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
666
667 #define PCRE_JIT_STACK_FREE(stack) \
668 if (pcre_mode == PCRE32_MODE) \
669 PCRE_JIT_STACK_FREE32(stack); \
670 else if (pcre_mode == PCRE16_MODE) \
671 PCRE_JIT_STACK_FREE16(stack); \
672 else \
673 PCRE_JIT_STACK_FREE8(stack)
674
675 #define PCRE_MAKETABLES \
676 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
677
678 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
679 if (pcre_mode == PCRE32_MODE) \
680 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
681 else if (pcre_mode == PCRE16_MODE) \
682 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
683 else \
684 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
685
686 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
687 if (pcre_mode == PCRE32_MODE) \
688 PCRE_PRINTINT32(re, outfile, debug_lengths); \
689 else if (pcre_mode == PCRE16_MODE) \
690 PCRE_PRINTINT16(re, outfile, debug_lengths); \
691 else \
692 PCRE_PRINTINT8(re, outfile, debug_lengths)
693
694 #define PCRE_STUDY(extra, re, options, error) \
695 if (pcre_mode == PCRE32_MODE) \
696 PCRE_STUDY32(extra, re, options, error); \
697 else if (pcre_mode == PCRE16_MODE) \
698 PCRE_STUDY16(extra, re, options, error); \
699 else \
700 PCRE_STUDY8(extra, re, options, error)
701
702
703 /* ----- Two out of three modes are supported ----- */
704
705 #else
706
707 /* We can use some macro trickery to make a single set of definitions work in
708 the three different cases. */
709
710 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
711
712 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
713 #define BITONE 32
714 #define BITTWO 16
715
716 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
717
718 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
719 #define BITONE 32
720 #define BITTWO 8
721
722 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723
724 #else
725 #define BITONE 16
726 #define BITTWO 8
727 #endif
728
729 #define glue(a,b) a##b
730 #define G(a,b) glue(a,b)
731
732
733 /* ----- Common macros for two-mode cases ----- */
734
735 #define PCHARS(lv, p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARS,BITONE)(lv, p, offset, len, f); \
738 else \
739 G(PCHARS,BITTWO)(lv, p, offset, len, f)
740
741 #define PCHARSV(p, offset, len, f) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(PCHARSV,BITONE)(p, offset, len, f); \
744 else \
745 G(PCHARSV,BITTWO)(p, offset, len, f)
746
747 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
750 else \
751 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
752
753 #define SET_PCRE_CALLOUT(callout) \
754 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
755 G(SET_PCRE_CALLOUT,BITONE)(callout); \
756 else \
757 G(SET_PCRE_CALLOUT,BITTWO)(callout)
758
759 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
760 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
761
762 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
765 else \
766 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
767
768 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
769 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
770 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
771 else \
772 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
773
774 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
775
776 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size) \
778 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
779 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
780 namesptr, cbuffer, size); \
781 else \
782 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
783 namesptr, cbuffer, size)
784
785 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
788 else \
789 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
790
791 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace) \
793 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
794 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets, workspace, size_workspace); \
796 else \
797 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets, workspace, size_workspace)
799
800 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets) \
802 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
803 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
804 offsets, size_offsets); \
805 else \
806 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
807 offsets, size_offsets)
808
809 #define PCRE_FREE_STUDY(extra) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_STUDY,BITONE)(extra); \
812 else \
813 G(PCRE_FREE_STUDY,BITTWO)(extra)
814
815 #define PCRE_FREE_SUBSTRING(substring) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
818 else \
819 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
820
821 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
822 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
823 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
824 else \
825 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
826
827 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr) \
829 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
830 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
831 getnamesptr, subsptr); \
832 else \
833 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
834 getnamesptr, subsptr)
835
836 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
839 else \
840 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
841
842 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
845 else \
846 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
847
848 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
849 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
850 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
851 else \
852 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
853
854 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
855 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
856 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
857 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
858
859 #define PCRE_JIT_STACK_FREE(stack) \
860 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
862 else \
863 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
864
865 #define PCRE_MAKETABLES \
866 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
867 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
868
869 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
872 else \
873 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
874
875 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
878 else \
879 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
880
881 #define PCRE_STUDY(extra, re, options, error) \
882 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
883 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
884 else \
885 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
886
887 #endif /* Two out of three modes */
888
889 /* ----- End of cases where more than one mode is supported ----- */
890
891
892 /* ----- Only 8-bit mode is supported ----- */
893
894 #elif defined SUPPORT_PCRE8
895 #define CHAR_SIZE 1
896 #define PCHARS PCHARS8
897 #define PCHARSV PCHARSV8
898 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
899 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
900 #define STRLEN STRLEN8
901 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
902 #define PCRE_COMPILE PCRE_COMPILE8
903 #define PCRE_CONFIG pcre_config
904 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
905 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
906 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
907 #define PCRE_EXEC PCRE_EXEC8
908 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
909 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
910 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
911 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
912 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
913 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
914 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
915 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
916 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
917 #define PCRE_MAKETABLES pcre_maketables()
918 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
919 #define PCRE_PRINTINT PCRE_PRINTINT8
920 #define PCRE_STUDY PCRE_STUDY8
921
922 /* ----- Only 16-bit mode is supported ----- */
923
924 #elif defined SUPPORT_PCRE16
925 #define CHAR_SIZE 2
926 #define PCHARS PCHARS16
927 #define PCHARSV PCHARSV16
928 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
929 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
930 #define STRLEN STRLEN16
931 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
932 #define PCRE_COMPILE PCRE_COMPILE16
933 #define PCRE_CONFIG pcre16_config
934 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
935 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
936 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
937 #define PCRE_EXEC PCRE_EXEC16
938 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
939 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
940 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
941 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
942 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
943 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
944 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
945 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
946 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
947 #define PCRE_MAKETABLES pcre16_maketables()
948 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
949 #define PCRE_PRINTINT PCRE_PRINTINT16
950 #define PCRE_STUDY PCRE_STUDY16
951
952 /* ----- Only 32-bit mode is supported ----- */
953
954 #elif defined SUPPORT_PCRE32
955 #define CHAR_SIZE 4
956 #define PCHARS PCHARS32
957 #define PCHARSV PCHARSV32
958 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
959 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
960 #define STRLEN STRLEN32
961 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
962 #define PCRE_COMPILE PCRE_COMPILE32
963 #define PCRE_CONFIG pcre32_config
964 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
965 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
966 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
967 #define PCRE_EXEC PCRE_EXEC32
968 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
969 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
970 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
971 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
972 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
973 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
974 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
975 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
976 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
977 #define PCRE_MAKETABLES pcre32_maketables()
978 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
979 #define PCRE_PRINTINT PCRE_PRINTINT32
980 #define PCRE_STUDY PCRE_STUDY32
981
982 #endif
983
984 /* ----- End of mode-specific function call macros ----- */
985
986
987 /* Other parameters */
988
989 #ifndef CLOCKS_PER_SEC
990 #ifdef CLK_TCK
991 #define CLOCKS_PER_SEC CLK_TCK
992 #else
993 #define CLOCKS_PER_SEC 100
994 #endif
995 #endif
996
997 #if !defined NODFA
998 #define DFA_WS_DIMENSION 1000
999 #endif
1000
1001 /* This is the default loop count for timing. */
1002
1003 #define LOOPREPEAT 500000
1004
1005 /* Static variables */
1006
1007 static FILE *outfile;
1008 static int log_store = 0;
1009 static int callout_count;
1010 static int callout_extra;
1011 static int callout_fail_count;
1012 static int callout_fail_id;
1013 static int debug_lengths;
1014 static int first_callout;
1015 static int jit_was_used;
1016 static int locale_set = 0;
1017 static int show_malloc;
1018 static int use_utf;
1019 static size_t gotten_store;
1020 static size_t first_gotten_store = 0;
1021 static const unsigned char *last_callout_mark = NULL;
1022
1023 /* The buffers grow automatically if very long input lines are encountered. */
1024
1025 static int buffer_size = 50000;
1026 static pcre_uint8 *buffer = NULL;
1027 static pcre_uint8 *pbuffer = NULL;
1028
1029 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1030
1031 #ifdef COMPILE_PCRE16
1032 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1033 #endif
1034
1035 #ifdef COMPILE_PCRE32
1036 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1037 #endif
1038
1039 /* We need buffers for building 16/32-bit strings, and the tables of operator
1040 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1041 pattern for saving/reloading testing. Luckily, the data for these tables is
1042 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1043 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1044 LINK_SIZE is also used later in this program. */
1045
1046 #ifdef SUPPORT_PCRE16
1047 #undef IMM2_SIZE
1048 #define IMM2_SIZE 1
1049
1050 #if LINK_SIZE == 2
1051 #undef LINK_SIZE
1052 #define LINK_SIZE 1
1053 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1054 #undef LINK_SIZE
1055 #define LINK_SIZE 2
1056 #else
1057 #error LINK_SIZE must be either 2, 3, or 4
1058 #endif
1059
1060 static int buffer16_size = 0;
1061 static pcre_uint16 *buffer16 = NULL;
1062 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1063 #endif /* SUPPORT_PCRE16 */
1064
1065 #ifdef SUPPORT_PCRE32
1066 #undef IMM2_SIZE
1067 #define IMM2_SIZE 1
1068 #undef LINK_SIZE
1069 #define LINK_SIZE 1
1070
1071 static int buffer32_size = 0;
1072 static pcre_uint32 *buffer32 = NULL;
1073 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1074 #endif /* SUPPORT_PCRE32 */
1075
1076 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1077 support, it can be changed by an option. If there is no 8-bit support, there
1078 must be 16-or 32-bit support, so default it to 1. */
1079
1080 #if defined SUPPORT_PCRE8
1081 static int pcre_mode = PCRE8_MODE;
1082 #elif defined SUPPORT_PCRE16
1083 static int pcre_mode = PCRE16_MODE;
1084 #elif defined SUPPORT_PCRE32
1085 static int pcre_mode = PCRE32_MODE;
1086 #endif
1087
1088 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1089
1090 static int jit_study_bits[] =
1091 {
1092 PCRE_STUDY_JIT_COMPILE,
1093 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1094 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1095 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1096 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1097 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1098 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1099 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1100 };
1101
1102 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1103 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1104
1105 /* Textual explanations for runtime error codes */
1106
1107 static const char *errtexts[] = {
1108 NULL, /* 0 is no error */
1109 NULL, /* NOMATCH is handled specially */
1110 "NULL argument passed",
1111 "bad option value",
1112 "magic number missing",
1113 "unknown opcode - pattern overwritten?",
1114 "no more memory",
1115 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1116 "match limit exceeded",
1117 "callout error code",
1118 NULL, /* BADUTF8/16 is handled specially */
1119 NULL, /* BADUTF8/16 offset is handled specially */
1120 NULL, /* PARTIAL is handled specially */
1121 "not used - internal error",
1122 "internal error - pattern overwritten?",
1123 "bad count value",
1124 "item unsupported for DFA matching",
1125 "backreference condition or recursion test not supported for DFA matching",
1126 "match limit not supported for DFA matching",
1127 "workspace size exceeded in DFA matching",
1128 "too much recursion for DFA matching",
1129 "recursion limit exceeded",
1130 "not used - internal error",
1131 "invalid combination of newline options",
1132 "bad offset value",
1133 NULL, /* SHORTUTF8/16 is handled specially */
1134 "nested recursion at the same subject position",
1135 "JIT stack limit reached",
1136 "pattern compiled in wrong mode: 8-bit/16-bit error",
1137 "pattern compiled with other endianness",
1138 "invalid data in workspace for DFA restart",
1139 "bad JIT option",
1140 "bad length"
1141 };
1142
1143
1144 /*************************************************
1145 * Alternate character tables *
1146 *************************************************/
1147
1148 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1149 using the default tables of the library. However, the T option can be used to
1150 select alternate sets of tables, for different kinds of testing. Note also that
1151 the L (locale) option also adjusts the tables. */
1152
1153 /* This is the set of tables distributed as default with PCRE. It recognizes
1154 only ASCII characters. */
1155
1156 static const pcre_uint8 tables0[] = {
1157
1158 /* This table is a lower casing table. */
1159
1160 0, 1, 2, 3, 4, 5, 6, 7,
1161 8, 9, 10, 11, 12, 13, 14, 15,
1162 16, 17, 18, 19, 20, 21, 22, 23,
1163 24, 25, 26, 27, 28, 29, 30, 31,
1164 32, 33, 34, 35, 36, 37, 38, 39,
1165 40, 41, 42, 43, 44, 45, 46, 47,
1166 48, 49, 50, 51, 52, 53, 54, 55,
1167 56, 57, 58, 59, 60, 61, 62, 63,
1168 64, 97, 98, 99,100,101,102,103,
1169 104,105,106,107,108,109,110,111,
1170 112,113,114,115,116,117,118,119,
1171 120,121,122, 91, 92, 93, 94, 95,
1172 96, 97, 98, 99,100,101,102,103,
1173 104,105,106,107,108,109,110,111,
1174 112,113,114,115,116,117,118,119,
1175 120,121,122,123,124,125,126,127,
1176 128,129,130,131,132,133,134,135,
1177 136,137,138,139,140,141,142,143,
1178 144,145,146,147,148,149,150,151,
1179 152,153,154,155,156,157,158,159,
1180 160,161,162,163,164,165,166,167,
1181 168,169,170,171,172,173,174,175,
1182 176,177,178,179,180,181,182,183,
1183 184,185,186,187,188,189,190,191,
1184 192,193,194,195,196,197,198,199,
1185 200,201,202,203,204,205,206,207,
1186 208,209,210,211,212,213,214,215,
1187 216,217,218,219,220,221,222,223,
1188 224,225,226,227,228,229,230,231,
1189 232,233,234,235,236,237,238,239,
1190 240,241,242,243,244,245,246,247,
1191 248,249,250,251,252,253,254,255,
1192
1193 /* This table is a case flipping table. */
1194
1195 0, 1, 2, 3, 4, 5, 6, 7,
1196 8, 9, 10, 11, 12, 13, 14, 15,
1197 16, 17, 18, 19, 20, 21, 22, 23,
1198 24, 25, 26, 27, 28, 29, 30, 31,
1199 32, 33, 34, 35, 36, 37, 38, 39,
1200 40, 41, 42, 43, 44, 45, 46, 47,
1201 48, 49, 50, 51, 52, 53, 54, 55,
1202 56, 57, 58, 59, 60, 61, 62, 63,
1203 64, 97, 98, 99,100,101,102,103,
1204 104,105,106,107,108,109,110,111,
1205 112,113,114,115,116,117,118,119,
1206 120,121,122, 91, 92, 93, 94, 95,
1207 96, 65, 66, 67, 68, 69, 70, 71,
1208 72, 73, 74, 75, 76, 77, 78, 79,
1209 80, 81, 82, 83, 84, 85, 86, 87,
1210 88, 89, 90,123,124,125,126,127,
1211 128,129,130,131,132,133,134,135,
1212 136,137,138,139,140,141,142,143,
1213 144,145,146,147,148,149,150,151,
1214 152,153,154,155,156,157,158,159,
1215 160,161,162,163,164,165,166,167,
1216 168,169,170,171,172,173,174,175,
1217 176,177,178,179,180,181,182,183,
1218 184,185,186,187,188,189,190,191,
1219 192,193,194,195,196,197,198,199,
1220 200,201,202,203,204,205,206,207,
1221 208,209,210,211,212,213,214,215,
1222 216,217,218,219,220,221,222,223,
1223 224,225,226,227,228,229,230,231,
1224 232,233,234,235,236,237,238,239,
1225 240,241,242,243,244,245,246,247,
1226 248,249,250,251,252,253,254,255,
1227
1228 /* This table contains bit maps for various character classes. Each map is 32
1229 bytes long and the bits run from the least significant end of each byte. The
1230 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1231 graph, print, punct, and cntrl. Other classes are built from combinations. */
1232
1233 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1234 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1236 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237
1238 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1239 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1242
1243 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1244 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247
1248 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1251 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252
1253 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1255 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1256 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1257
1258 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1259 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262
1263 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1264 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267
1268 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1269 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272
1273 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1274 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277
1278 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1279 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1280 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282
1283 /* This table identifies various classes of character by individual bits:
1284 0x01 white space character
1285 0x02 letter
1286 0x04 decimal digit
1287 0x08 hexadecimal digit
1288 0x10 alphanumeric or '_'
1289 0x80 regular expression metacharacter or binary zero
1290 */
1291
1292 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1293 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1294 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1296 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1297 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1298 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1299 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1300 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1301 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1302 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1303 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1304 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1305 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1306 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1307 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1318 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1321 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1324
1325 /* This is a set of tables that came originally from a Windows user. It seems to
1326 be at least an approximation of ISO 8859. In particular, there are characters
1327 greater than 128 that are marked as spaces, letters, etc. */
1328
1329 static const pcre_uint8 tables1[] = {
1330 0,1,2,3,4,5,6,7,
1331 8,9,10,11,12,13,14,15,
1332 16,17,18,19,20,21,22,23,
1333 24,25,26,27,28,29,30,31,
1334 32,33,34,35,36,37,38,39,
1335 40,41,42,43,44,45,46,47,
1336 48,49,50,51,52,53,54,55,
1337 56,57,58,59,60,61,62,63,
1338 64,97,98,99,100,101,102,103,
1339 104,105,106,107,108,109,110,111,
1340 112,113,114,115,116,117,118,119,
1341 120,121,122,91,92,93,94,95,
1342 96,97,98,99,100,101,102,103,
1343 104,105,106,107,108,109,110,111,
1344 112,113,114,115,116,117,118,119,
1345 120,121,122,123,124,125,126,127,
1346 128,129,130,131,132,133,134,135,
1347 136,137,138,139,140,141,142,143,
1348 144,145,146,147,148,149,150,151,
1349 152,153,154,155,156,157,158,159,
1350 160,161,162,163,164,165,166,167,
1351 168,169,170,171,172,173,174,175,
1352 176,177,178,179,180,181,182,183,
1353 184,185,186,187,188,189,190,191,
1354 224,225,226,227,228,229,230,231,
1355 232,233,234,235,236,237,238,239,
1356 240,241,242,243,244,245,246,215,
1357 248,249,250,251,252,253,254,223,
1358 224,225,226,227,228,229,230,231,
1359 232,233,234,235,236,237,238,239,
1360 240,241,242,243,244,245,246,247,
1361 248,249,250,251,252,253,254,255,
1362 0,1,2,3,4,5,6,7,
1363 8,9,10,11,12,13,14,15,
1364 16,17,18,19,20,21,22,23,
1365 24,25,26,27,28,29,30,31,
1366 32,33,34,35,36,37,38,39,
1367 40,41,42,43,44,45,46,47,
1368 48,49,50,51,52,53,54,55,
1369 56,57,58,59,60,61,62,63,
1370 64,97,98,99,100,101,102,103,
1371 104,105,106,107,108,109,110,111,
1372 112,113,114,115,116,117,118,119,
1373 120,121,122,91,92,93,94,95,
1374 96,65,66,67,68,69,70,71,
1375 72,73,74,75,76,77,78,79,
1376 80,81,82,83,84,85,86,87,
1377 88,89,90,123,124,125,126,127,
1378 128,129,130,131,132,133,134,135,
1379 136,137,138,139,140,141,142,143,
1380 144,145,146,147,148,149,150,151,
1381 152,153,154,155,156,157,158,159,
1382 160,161,162,163,164,165,166,167,
1383 168,169,170,171,172,173,174,175,
1384 176,177,178,179,180,181,182,183,
1385 184,185,186,187,188,189,190,191,
1386 224,225,226,227,228,229,230,231,
1387 232,233,234,235,236,237,238,239,
1388 240,241,242,243,244,245,246,215,
1389 248,249,250,251,252,253,254,223,
1390 192,193,194,195,196,197,198,199,
1391 200,201,202,203,204,205,206,207,
1392 208,209,210,211,212,213,214,247,
1393 216,217,218,219,220,221,222,255,
1394 0,62,0,0,1,0,0,0,
1395 0,0,0,0,0,0,0,0,
1396 32,0,0,0,1,0,0,0,
1397 0,0,0,0,0,0,0,0,
1398 0,0,0,0,0,0,255,3,
1399 126,0,0,0,126,0,0,0,
1400 0,0,0,0,0,0,0,0,
1401 0,0,0,0,0,0,0,0,
1402 0,0,0,0,0,0,255,3,
1403 0,0,0,0,0,0,0,0,
1404 0,0,0,0,0,0,12,2,
1405 0,0,0,0,0,0,0,0,
1406 0,0,0,0,0,0,0,0,
1407 254,255,255,7,0,0,0,0,
1408 0,0,0,0,0,0,0,0,
1409 255,255,127,127,0,0,0,0,
1410 0,0,0,0,0,0,0,0,
1411 0,0,0,0,254,255,255,7,
1412 0,0,0,0,0,4,32,4,
1413 0,0,0,128,255,255,127,255,
1414 0,0,0,0,0,0,255,3,
1415 254,255,255,135,254,255,255,7,
1416 0,0,0,0,0,4,44,6,
1417 255,255,127,255,255,255,127,255,
1418 0,0,0,0,254,255,255,255,
1419 255,255,255,255,255,255,255,127,
1420 0,0,0,0,254,255,255,255,
1421 255,255,255,255,255,255,255,255,
1422 0,2,0,0,255,255,255,255,
1423 255,255,255,255,255,255,255,127,
1424 0,0,0,0,255,255,255,255,
1425 255,255,255,255,255,255,255,255,
1426 0,0,0,0,254,255,0,252,
1427 1,0,0,248,1,0,0,120,
1428 0,0,0,0,254,255,255,255,
1429 0,0,128,0,0,0,128,0,
1430 255,255,255,255,0,0,0,0,
1431 0,0,0,0,0,0,0,128,
1432 255,255,255,255,0,0,0,0,
1433 0,0,0,0,0,0,0,0,
1434 128,0,0,0,0,0,0,0,
1435 0,1,1,0,1,1,0,0,
1436 0,0,0,0,0,0,0,0,
1437 0,0,0,0,0,0,0,0,
1438 1,0,0,0,128,0,0,0,
1439 128,128,128,128,0,0,128,0,
1440 28,28,28,28,28,28,28,28,
1441 28,28,0,0,0,0,0,128,
1442 0,26,26,26,26,26,26,18,
1443 18,18,18,18,18,18,18,18,
1444 18,18,18,18,18,18,18,18,
1445 18,18,18,128,128,0,128,16,
1446 0,26,26,26,26,26,26,18,
1447 18,18,18,18,18,18,18,18,
1448 18,18,18,18,18,18,18,18,
1449 18,18,18,128,128,0,0,0,
1450 0,0,0,0,0,1,0,0,
1451 0,0,0,0,0,0,0,0,
1452 0,0,0,0,0,0,0,0,
1453 0,0,0,0,0,0,0,0,
1454 1,0,0,0,0,0,0,0,
1455 0,0,18,0,0,0,0,0,
1456 0,0,20,20,0,18,0,0,
1457 0,20,18,0,0,0,0,0,
1458 18,18,18,18,18,18,18,18,
1459 18,18,18,18,18,18,18,18,
1460 18,18,18,18,18,18,18,0,
1461 18,18,18,18,18,18,18,18,
1462 18,18,18,18,18,18,18,18,
1463 18,18,18,18,18,18,18,18,
1464 18,18,18,18,18,18,18,0,
1465 18,18,18,18,18,18,18,18
1466 };
1467
1468
1469
1470
1471 #ifndef HAVE_STRERROR
1472 /*************************************************
1473 * Provide strerror() for non-ANSI libraries *
1474 *************************************************/
1475
1476 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1477 in their libraries, but can provide the same facility by this simple
1478 alternative function. */
1479
1480 extern int sys_nerr;
1481 extern char *sys_errlist[];
1482
1483 char *
1484 strerror(int n)
1485 {
1486 if (n < 0 || n >= sys_nerr) return "unknown error number";
1487 return sys_errlist[n];
1488 }
1489 #endif /* HAVE_STRERROR */
1490
1491
1492
1493 /*************************************************
1494 * Print newline configuration *
1495 *************************************************/
1496
1497 /*
1498 Arguments:
1499 rc the return code from PCRE_CONFIG_NEWLINE
1500 isc TRUE if called from "-C newline"
1501 Returns: nothing
1502 */
1503
1504 static void
1505 print_newline_config(int rc, BOOL isc)
1506 {
1507 const char *s = NULL;
1508 if (!isc) printf(" Newline sequence is ");
1509 switch(rc)
1510 {
1511 case CHAR_CR: s = "CR"; break;
1512 case CHAR_LF: s = "LF"; break;
1513 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1514 case -1: s = "ANY"; break;
1515 case -2: s = "ANYCRLF"; break;
1516
1517 default:
1518 printf("a non-standard value: 0x%04x\n", rc);
1519 return;
1520 }
1521
1522 printf("%s\n", s);
1523 }
1524
1525
1526
1527 /*************************************************
1528 * JIT memory callback *
1529 *************************************************/
1530
1531 static pcre_jit_stack* jit_callback(void *arg)
1532 {
1533 jit_was_used = TRUE;
1534 return (pcre_jit_stack *)arg;
1535 }
1536
1537
1538 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1539 /*************************************************
1540 * Convert UTF-8 string to value *
1541 *************************************************/
1542
1543 /* This function takes one or more bytes that represents a UTF-8 character,
1544 and returns the value of the character.
1545
1546 Argument:
1547 utf8bytes a pointer to the byte vector
1548 vptr a pointer to an int to receive the value
1549
1550 Returns: > 0 => the number of bytes consumed
1551 -6 to 0 => malformed UTF-8 character at offset = (-return)
1552 */
1553
1554 static int
1555 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1556 {
1557 pcre_uint32 c = *utf8bytes++;
1558 pcre_uint32 d = c;
1559 int i, j, s;
1560
1561 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1562 {
1563 if ((d & 0x80) == 0) break;
1564 d <<= 1;
1565 }
1566
1567 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1568 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1569
1570 /* i now has a value in the range 1-5 */
1571
1572 s = 6*i;
1573 d = (c & utf8_table3[i]) << s;
1574
1575 for (j = 0; j < i; j++)
1576 {
1577 c = *utf8bytes++;
1578 if ((c & 0xc0) != 0x80) return -(j+1);
1579 s -= 6;
1580 d |= (c & 0x3f) << s;
1581 }
1582
1583 /* Check that encoding was the correct unique one */
1584
1585 for (j = 0; j < utf8_table1_size; j++)
1586 if (d <= (pcre_uint32)utf8_table1[j]) break;
1587 if (j != i) return -(i+1);
1588
1589 /* Valid value */
1590
1591 *vptr = d;
1592 return i+1;
1593 }
1594 #endif /* NOUTF || SUPPORT_PCRE16 */
1595
1596
1597
1598 #if defined SUPPORT_PCRE8 && !defined NOUTF
1599 /*************************************************
1600 * Convert character value to UTF-8 *
1601 *************************************************/
1602
1603 /* This function takes an integer value in the range 0 - 0x7fffffff
1604 and encodes it as a UTF-8 character in 0 to 6 bytes.
1605
1606 Arguments:
1607 cvalue the character value
1608 utf8bytes pointer to buffer for result - at least 6 bytes long
1609
1610 Returns: number of characters placed in the buffer
1611 */
1612
1613 static int
1614 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1615 {
1616 register int i, j;
1617 if (cvalue > 0x7fffffffu)
1618 return -1;
1619 for (i = 0; i < utf8_table1_size; i++)
1620 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1621 utf8bytes += i;
1622 for (j = i; j > 0; j--)
1623 {
1624 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1625 cvalue >>= 6;
1626 }
1627 *utf8bytes = utf8_table2[i] | cvalue;
1628 return i + 1;
1629 }
1630 #endif
1631
1632
1633 #ifdef SUPPORT_PCRE16
1634 /*************************************************
1635 * Convert a string to 16-bit *
1636 *************************************************/
1637
1638 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1639 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1640 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1641 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1642 result is always left in buffer16.
1643
1644 Note that this function does not object to surrogate values. This is
1645 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1646 for the purpose of testing that they are correctly faulted.
1647
1648 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1649 in UTF-8 so that values greater than 255 can be handled.
1650
1651 Arguments:
1652 data TRUE if converting a data line; FALSE for a regex
1653 p points to a byte string
1654 utf true if UTF-8 (to be converted to UTF-16)
1655 len number of bytes in the string (excluding trailing zero)
1656
1657 Returns: number of 16-bit data items used (excluding trailing zero)
1658 OR -1 if a UTF-8 string is malformed
1659 OR -2 if a value > 0x10ffff is encountered
1660 OR -3 if a value > 0xffff is encountered when not in UTF mode
1661 */
1662
1663 static int
1664 to16(int data, pcre_uint8 *p, int utf, int len)
1665 {
1666 pcre_uint16 *pp;
1667
1668 if (buffer16_size < 2*len + 2)
1669 {
1670 if (buffer16 != NULL) free(buffer16);
1671 buffer16_size = 2*len + 2;
1672 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1673 if (buffer16 == NULL)
1674 {
1675 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1676 exit(1);
1677 }
1678 }
1679
1680 pp = buffer16;
1681
1682 if (!utf && !data)
1683 {
1684 while (len-- > 0) *pp++ = *p++;
1685 }
1686
1687 else
1688 {
1689 pcre_uint32 c = 0;
1690 while (len > 0)
1691 {
1692 int chlen = utf82ord(p, &c);
1693 if (chlen <= 0) return -1;
1694 if (c > 0x10ffff) return -2;
1695 p += chlen;
1696 len -= chlen;
1697 if (c < 0x10000) *pp++ = c; else
1698 {
1699 if (!utf) return -3;
1700 c -= 0x10000;
1701 *pp++ = 0xD800 | (c >> 10);
1702 *pp++ = 0xDC00 | (c & 0x3ff);
1703 }
1704 }
1705 }
1706
1707 *pp = 0;
1708 return pp - buffer16;
1709 }
1710 #endif
1711
1712 #ifdef SUPPORT_PCRE32
1713 /*************************************************
1714 * Convert a string to 32-bit *
1715 *************************************************/
1716
1717 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1718 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1719 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1720 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1721 result is always left in buffer32.
1722
1723 Note that this function does not object to surrogate values. This is
1724 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1725 for the purpose of testing that they are correctly faulted.
1726
1727 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1728 in UTF-8 so that values greater than 255 can be handled.
1729
1730 Arguments:
1731 data TRUE if converting a data line; FALSE for a regex
1732 p points to a byte string
1733 utf true if UTF-8 (to be converted to UTF-32)
1734 len number of bytes in the string (excluding trailing zero)
1735
1736 Returns: number of 32-bit data items used (excluding trailing zero)
1737 OR -1 if a UTF-8 string is malformed
1738 OR -2 if a value > 0x10ffff is encountered
1739 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1740 */
1741
1742 static int
1743 to32(int data, pcre_uint8 *p, int utf, int len)
1744 {
1745 pcre_uint32 *pp;
1746
1747 if (buffer32_size < 4*len + 4)
1748 {
1749 if (buffer32 != NULL) free(buffer32);
1750 buffer32_size = 4*len + 4;
1751 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1752 if (buffer32 == NULL)
1753 {
1754 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1755 exit(1);
1756 }
1757 }
1758
1759 pp = buffer32;
1760
1761 if (!utf && !data)
1762 {
1763 while (len-- > 0) *pp++ = *p++;
1764 }
1765
1766 else
1767 {
1768 pcre_uint32 c = 0;
1769 while (len > 0)
1770 {
1771 int chlen = utf82ord(p, &c);
1772 if (chlen <= 0) return -1;
1773 if (utf)
1774 {
1775 if (c > 0x10ffff) return -2;
1776 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1777 }
1778
1779 p += chlen;
1780 len -= chlen;
1781 *pp++ = c;
1782 }
1783 }
1784
1785 *pp = 0;
1786 return pp - buffer32;
1787 }
1788
1789 /* Check that a 32-bit character string is valid UTF-32.
1790
1791 Arguments:
1792 string points to the string
1793 length length of string, or -1 if the string is zero-terminated
1794
1795 Returns: TRUE if the string is a valid UTF-32 string
1796 FALSE otherwise
1797 */
1798
1799 #ifdef NEVER /* Not used */
1800 #ifdef SUPPORT_UTF
1801 static BOOL
1802 valid_utf32(pcre_uint32 *string, int length)
1803 {
1804 register pcre_uint32 *p;
1805 register pcre_uint32 c;
1806
1807 for (p = string; length-- > 0; p++)
1808 {
1809 c = *p;
1810 if (c > 0x10ffffu) return FALSE; /* Too big */
1811 if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1812 }
1813
1814 return TRUE;
1815 }
1816 #endif /* SUPPORT_UTF */
1817 #endif /* NEVER */
1818 #endif /* SUPPORT_PCRE32 */
1819
1820
1821 /*************************************************
1822 * Read or extend an input line *
1823 *************************************************/
1824
1825 /* Input lines are read into buffer, but both patterns and data lines can be
1826 continued over multiple input lines. In addition, if the buffer fills up, we
1827 want to automatically expand it so as to be able to handle extremely large
1828 lines that are needed for certain stress tests. When the input buffer is
1829 expanded, the other two buffers must also be expanded likewise, and the
1830 contents of pbuffer, which are a copy of the input for callouts, must be
1831 preserved (for when expansion happens for a data line). This is not the most
1832 optimal way of handling this, but hey, this is just a test program!
1833
1834 Arguments:
1835 f the file to read
1836 start where in buffer to start (this *must* be within buffer)
1837 prompt for stdin or readline()
1838
1839 Returns: pointer to the start of new data
1840 could be a copy of start, or could be moved
1841 NULL if no data read and EOF reached
1842 */
1843
1844 static pcre_uint8 *
1845 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1846 {
1847 pcre_uint8 *here = start;
1848
1849 for (;;)
1850 {
1851 size_t rlen = (size_t)(buffer_size - (here - buffer));
1852
1853 if (rlen > 1000)
1854 {
1855 int dlen;
1856
1857 /* If libreadline or libedit support is required, use readline() to read a
1858 line if the input is a terminal. Note that readline() removes the trailing
1859 newline, so we must put it back again, to be compatible with fgets(). */
1860
1861 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1862 if (isatty(fileno(f)))
1863 {
1864 size_t len;
1865 char *s = readline(prompt);
1866 if (s == NULL) return (here == start)? NULL : start;
1867 len = strlen(s);
1868 if (len > 0) add_history(s);
1869 if (len > rlen - 1) len = rlen - 1;
1870 memcpy(here, s, len);
1871 here[len] = '\n';
1872 here[len+1] = 0;
1873 free(s);
1874 }
1875 else
1876 #endif
1877
1878 /* Read the next line by normal means, prompting if the file is stdin. */
1879
1880 {
1881 if (f == stdin) printf("%s", prompt);
1882 if (fgets((char *)here, rlen, f) == NULL)
1883 return (here == start)? NULL : start;
1884 }
1885
1886 dlen = (int)strlen((char *)here);
1887 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1888 here += dlen;
1889 }
1890
1891 else
1892 {
1893 int new_buffer_size = 2*buffer_size;
1894 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1895 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1896
1897 if (new_buffer == NULL || new_pbuffer == NULL)
1898 {
1899 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1900 exit(1);
1901 }
1902
1903 memcpy(new_buffer, buffer, buffer_size);
1904 memcpy(new_pbuffer, pbuffer, buffer_size);
1905
1906 buffer_size = new_buffer_size;
1907
1908 start = new_buffer + (start - buffer);
1909 here = new_buffer + (here - buffer);
1910
1911 free(buffer);
1912 free(pbuffer);
1913
1914 buffer = new_buffer;
1915 pbuffer = new_pbuffer;
1916 }
1917 }
1918
1919 /* Control never gets here */
1920 }
1921
1922
1923
1924 /*************************************************
1925 * Read number from string *
1926 *************************************************/
1927
1928 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1929 around with conditional compilation, just do the job by hand. It is only used
1930 for unpicking arguments, so just keep it simple.
1931
1932 Arguments:
1933 str string to be converted
1934 endptr where to put the end pointer
1935
1936 Returns: the unsigned long
1937 */
1938
1939 static int
1940 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1941 {
1942 int result = 0;
1943 while(*str != 0 && isspace(*str)) str++;
1944 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1945 *endptr = str;
1946 return(result);
1947 }
1948
1949
1950
1951 /*************************************************
1952 * Print one character *
1953 *************************************************/
1954
1955 /* Print a single character either literally, or as a hex escape. */
1956
1957 static int pchar(pcre_uint32 c, FILE *f)
1958 {
1959 int n = 0;
1960 if (PRINTOK(c))
1961 {
1962 if (f != NULL) fprintf(f, "%c", c);
1963 return 1;
1964 }
1965
1966 if (c < 0x100)
1967 {
1968 if (use_utf)
1969 {
1970 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1971 return 6;
1972 }
1973 else
1974 {
1975 if (f != NULL) fprintf(f, "\\x%02x", c);
1976 return 4;
1977 }
1978 }
1979
1980 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1981 return n >= 0 ? n : 0;
1982 }
1983
1984
1985
1986 #ifdef SUPPORT_PCRE8
1987 /*************************************************
1988 * Print 8-bit character string *
1989 *************************************************/
1990
1991 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1992 If handed a NULL file, just counts chars without printing. */
1993
1994 static int pchars(pcre_uint8 *p, int length, FILE *f)
1995 {
1996 pcre_uint32 c = 0;
1997 int yield = 0;
1998
1999 if (length < 0)
2000 length = strlen((char *)p);
2001
2002 while (length-- > 0)
2003 {
2004 #if !defined NOUTF
2005 if (use_utf)
2006 {
2007 int rc = utf82ord(p, &c);
2008 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2009 {
2010 length -= rc - 1;
2011 p += rc;
2012 yield += pchar(c, f);
2013 continue;
2014 }
2015 }
2016 #endif
2017 c = *p++;
2018 yield += pchar(c, f);
2019 }
2020
2021 return yield;
2022 }
2023 #endif
2024
2025
2026
2027 #ifdef SUPPORT_PCRE16
2028 /*************************************************
2029 * Find length of 0-terminated 16-bit string *
2030 *************************************************/
2031
2032 static int strlen16(PCRE_SPTR16 p)
2033 {
2034 PCRE_SPTR16 pp = p;
2035 while (*pp != 0) pp++;
2036 return (int)(pp - p);
2037 }
2038 #endif /* SUPPORT_PCRE16 */
2039
2040
2041
2042 #ifdef SUPPORT_PCRE32
2043 /*************************************************
2044 * Find length of 0-terminated 32-bit string *
2045 *************************************************/
2046
2047 static int strlen32(PCRE_SPTR32 p)
2048 {
2049 PCRE_SPTR32 pp = p;
2050 while (*pp != 0) pp++;
2051 return (int)(pp - p);
2052 }
2053 #endif /* SUPPORT_PCRE32 */
2054
2055
2056
2057 #ifdef SUPPORT_PCRE16
2058 /*************************************************
2059 * Print 16-bit character string *
2060 *************************************************/
2061
2062 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2063 If handed a NULL file, just counts chars without printing. */
2064
2065 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2066 {
2067 int yield = 0;
2068
2069 if (length < 0)
2070 length = strlen16(p);
2071
2072 while (length-- > 0)
2073 {
2074 pcre_uint32 c = *p++ & 0xffff;
2075 #if !defined NOUTF
2076 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2077 {
2078 int d = *p & 0xffff;
2079 if (d >= 0xDC00 && d <= 0xDFFF)
2080 {
2081 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2082 length--;
2083 p++;
2084 }
2085 }
2086 #endif
2087 yield += pchar(c, f);
2088 }
2089
2090 return yield;
2091 }
2092 #endif /* SUPPORT_PCRE16 */
2093
2094
2095
2096 #ifdef SUPPORT_PCRE32
2097 /*************************************************
2098 * Print 32-bit character string *
2099 *************************************************/
2100
2101 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2102 If handed a NULL file, just counts chars without printing. */
2103
2104 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2105 {
2106 int yield = 0;
2107
2108 (void)(utf); /* Avoid compiler warning */
2109
2110 if (length < 0)
2111 length = strlen32(p);
2112
2113 while (length-- > 0)
2114 {
2115 pcre_uint32 c = *p++;
2116 yield += pchar(c, f);
2117 }
2118
2119 return yield;
2120 }
2121 #endif /* SUPPORT_PCRE32 */
2122
2123
2124
2125 #ifdef SUPPORT_PCRE8
2126 /*************************************************
2127 * Read a capture name (8-bit) and check it *
2128 *************************************************/
2129
2130 static pcre_uint8 *
2131 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132 {
2133 pcre_uint8 *npp = *pp;
2134 while (isalnum(*p)) *npp++ = *p++;
2135 *npp++ = 0;
2136 *npp = 0;
2137 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138 {
2139 fprintf(outfile, "no parentheses with name \"");
2140 PCHARSV(*pp, 0, -1, outfile);
2141 fprintf(outfile, "\"\n");
2142 }
2143
2144 *pp = npp;
2145 return p;
2146 }
2147 #endif /* SUPPORT_PCRE8 */
2148
2149
2150
2151 #ifdef SUPPORT_PCRE16
2152 /*************************************************
2153 * Read a capture name (16-bit) and check it *
2154 *************************************************/
2155
2156 /* Note that the text being read is 8-bit. */
2157
2158 static pcre_uint8 *
2159 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160 {
2161 pcre_uint16 *npp = *pp;
2162 while (isalnum(*p)) *npp++ = *p++;
2163 *npp++ = 0;
2164 *npp = 0;
2165 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166 {
2167 fprintf(outfile, "no parentheses with name \"");
2168 PCHARSV(*pp, 0, -1, outfile);
2169 fprintf(outfile, "\"\n");
2170 }
2171 *pp = npp;
2172 return p;
2173 }
2174 #endif /* SUPPORT_PCRE16 */
2175
2176
2177
2178 #ifdef SUPPORT_PCRE32
2179 /*************************************************
2180 * Read a capture name (32-bit) and check it *
2181 *************************************************/
2182
2183 /* Note that the text being read is 8-bit. */
2184
2185 static pcre_uint8 *
2186 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2187 {
2188 pcre_uint32 *npp = *pp;
2189 while (isalnum(*p)) *npp++ = *p++;
2190 *npp++ = 0;
2191 *npp = 0;
2192 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2193 {
2194 fprintf(outfile, "no parentheses with name \"");
2195 PCHARSV(*pp, 0, -1, outfile);
2196 fprintf(outfile, "\"\n");
2197 }
2198 *pp = npp;
2199 return p;
2200 }
2201 #endif /* SUPPORT_PCRE32 */
2202
2203
2204
2205 /*************************************************
2206 * Callout function *
2207 *************************************************/
2208
2209 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2210 the match. Yield zero unless more callouts than the fail count, or the callout
2211 data is not zero. */
2212
2213 static int callout(pcre_callout_block *cb)
2214 {
2215 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2216 int i, pre_start, post_start, subject_length;
2217
2218 if (callout_extra)
2219 {
2220 fprintf(f, "Callout %d: last capture = %d\n",
2221 cb->callout_number, cb->capture_last);
2222
2223 for (i = 0; i < cb->capture_top * 2; i += 2)
2224 {
2225 if (cb->offset_vector[i] < 0)
2226 fprintf(f, "%2d: <unset>\n", i/2);
2227 else
2228 {
2229 fprintf(f, "%2d: ", i/2);
2230 PCHARSV(cb->subject, cb->offset_vector[i],
2231 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232 fprintf(f, "\n");
2233 }
2234 }
2235 }
2236
2237 /* Re-print the subject in canonical form, the first time or if giving full
2238 datails. On subsequent calls in the same match, we use pchars just to find the
2239 printed lengths of the substrings. */
2240
2241 if (f != NULL) fprintf(f, "--->");
2242
2243 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244 PCHARS(post_start, cb->subject, cb->start_match,
2245 cb->current_position - cb->start_match, f);
2246
2247 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2248
2249 PCHARSV(cb->subject, cb->current_position,
2250 cb->subject_length - cb->current_position, f);
2251
2252 if (f != NULL) fprintf(f, "\n");
2253
2254 /* Always print appropriate indicators, with callout number if not already
2255 shown. For automatic callouts, show the pattern offset. */
2256
2257 if (cb->callout_number == 255)
2258 {
2259 fprintf(outfile, "%+3d ", cb->pattern_position);
2260 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2261 }
2262 else
2263 {
2264 if (callout_extra) fprintf(outfile, " ");
2265 else fprintf(outfile, "%3d ", cb->callout_number);
2266 }
2267
2268 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2269 fprintf(outfile, "^");
2270
2271 if (post_start > 0)
2272 {
2273 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2274 fprintf(outfile, "^");
2275 }
2276
2277 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2278 fprintf(outfile, " ");
2279
2280 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2281 pbuffer + cb->pattern_position);
2282
2283 fprintf(outfile, "\n");
2284 first_callout = 0;
2285
2286 if (cb->mark != last_callout_mark)
2287 {
2288 if (cb->mark == NULL)
2289 fprintf(outfile, "Latest Mark: <unset>\n");
2290 else
2291 {
2292 fprintf(outfile, "Latest Mark: ");
2293 PCHARSV(cb->mark, 0, -1, outfile);
2294 putc('\n', outfile);
2295 }
2296 last_callout_mark = cb->mark;
2297 }
2298
2299 if (cb->callout_data != NULL)
2300 {
2301 int callout_data = *((int *)(cb->callout_data));
2302 if (callout_data != 0)
2303 {
2304 fprintf(outfile, "Callout data = %d\n", callout_data);
2305 return callout_data;
2306 }
2307 }
2308
2309 return (cb->callout_number != callout_fail_id)? 0 :
2310 (++callout_count >= callout_fail_count)? 1 : 0;
2311 }
2312
2313
2314 /*************************************************
2315 * Local malloc functions *
2316 *************************************************/
2317
2318 /* Alternative malloc function, to test functionality and save the size of a
2319 compiled re, which is the first store request that pcre_compile() makes. The
2320 show_malloc variable is set only during matching. */
2321
2322 static void *new_malloc(size_t size)
2323 {
2324 void *block = malloc(size);
2325 gotten_store = size;
2326 if (first_gotten_store == 0) first_gotten_store = size;
2327 if (show_malloc)
2328 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2329 return block;
2330 }
2331
2332 static void new_free(void *block)
2333 {
2334 if (show_malloc)
2335 fprintf(outfile, "free %p\n", block);
2336 free(block);
2337 }
2338
2339 /* For recursion malloc/free, to test stacking calls */
2340
2341 static void *stack_malloc(size_t size)
2342 {
2343 void *block = malloc(size);
2344 if (show_malloc)
2345 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346 return block;
2347 }
2348
2349 static void stack_free(void *block)
2350 {
2351 if (show_malloc)
2352 fprintf(outfile, "stack_free %p\n", block);
2353 free(block);
2354 }
2355
2356
2357 /*************************************************
2358 * Call pcre_fullinfo() *
2359 *************************************************/
2360
2361 /* Get one piece of information from the pcre_fullinfo() function. When only
2362 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2363 value, but the code is defensive.
2364
2365 Arguments:
2366 re compiled regex
2367 study study data
2368 option PCRE_INFO_xxx option
2369 ptr where to put the data
2370
2371 Returns: 0 when OK, < 0 on error
2372 */
2373
2374 static int
2375 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376 {
2377 int rc;
2378
2379 if (pcre_mode == PCRE32_MODE)
2380 #ifdef SUPPORT_PCRE32
2381 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2382 #else
2383 rc = PCRE_ERROR_BADMODE;
2384 #endif
2385 else if (pcre_mode == PCRE16_MODE)
2386 #ifdef SUPPORT_PCRE16
2387 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388 #else
2389 rc = PCRE_ERROR_BADMODE;
2390 #endif
2391 else
2392 #ifdef SUPPORT_PCRE8
2393 rc = pcre_fullinfo(re, study, option, ptr);
2394 #else
2395 rc = PCRE_ERROR_BADMODE;
2396 #endif
2397
2398 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2399 {
2400 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2401 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2402 if (rc == PCRE_ERROR_BADMODE)
2403 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2404 "%d-bit mode\n", 8 * CHAR_SIZE,
2405 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2406 }
2407
2408 return rc;
2409 }
2410
2411
2412
2413 /*************************************************
2414 * Swap byte functions *
2415 *************************************************/
2416
2417 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418 value, respectively.
2419
2420 Arguments:
2421 value any number
2422
2423 Returns: the byte swapped value
2424 */
2425
2426 static pcre_uint32
2427 swap_uint32(pcre_uint32 value)
2428 {
2429 return ((value & 0x000000ff) << 24) |
2430 ((value & 0x0000ff00) << 8) |
2431 ((value & 0x00ff0000) >> 8) |
2432 (value >> 24);
2433 }
2434
2435 static pcre_uint16
2436 swap_uint16(pcre_uint16 value)
2437 {
2438 return (value >> 8) | (value << 8);
2439 }
2440
2441
2442
2443 /*************************************************
2444 * Flip bytes in a compiled pattern *
2445 *************************************************/
2446
2447 /* This function is called if the 'F' option was present on a pattern that is
2448 to be written to a file. We flip the bytes of all the integer fields in the
2449 regex data block and the study block. In 16-bit mode this also flips relevant
2450 bytes in the pattern itself. This is to make it possible to test PCRE's
2451 ability to reload byte-flipped patterns, e.g. those compiled on a different
2452 architecture. */
2453
2454 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2455 static void
2456 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2457 {
2458 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2459 #ifdef SUPPORT_PCRE16
2460 int op;
2461 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462 int length = re->name_count * re->name_entry_size;
2463 #ifdef SUPPORT_UTF
2464 BOOL utf = (re->options & PCRE_UTF16) != 0;
2465 BOOL utf16_char = FALSE;
2466 #endif /* SUPPORT_UTF */
2467 #endif /* SUPPORT_PCRE16 */
2468
2469 /* Always flip the bytes in the main data block and study blocks. */
2470
2471 re->magic_number = REVERSED_MAGIC_NUMBER;
2472 re->size = swap_uint32(re->size);
2473 re->options = swap_uint32(re->options);
2474 re->flags = swap_uint32(re->flags);
2475 re->limit_match = swap_uint32(re->limit_match);
2476 re->limit_recursion = swap_uint32(re->limit_recursion);
2477 re->first_char = swap_uint16(re->first_char);
2478 re->req_char = swap_uint16(re->req_char);
2479 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2480 re->top_bracket = swap_uint16(re->top_bracket);
2481 re->top_backref = swap_uint16(re->top_backref);
2482 re->name_table_offset = swap_uint16(re->name_table_offset);
2483 re->name_entry_size = swap_uint16(re->name_entry_size);
2484 re->name_count = swap_uint16(re->name_count);
2485 re->ref_count = swap_uint16(re->ref_count);
2486
2487 if (extra != NULL)
2488 {
2489 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2490 rsd->size = swap_uint32(rsd->size);
2491 rsd->flags = swap_uint32(rsd->flags);
2492 rsd->minlength = swap_uint32(rsd->minlength);
2493 }
2494
2495 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2496 in the name table, if present, and then in the pattern itself. */
2497
2498 #ifdef SUPPORT_PCRE16
2499 if (pcre_mode != PCRE16_MODE) return;
2500
2501 while(TRUE)
2502 {
2503 /* Swap previous characters. */
2504 while (length-- > 0)
2505 {
2506 *ptr = swap_uint16(*ptr);
2507 ptr++;
2508 }
2509 #ifdef SUPPORT_UTF
2510 if (utf16_char)
2511 {
2512 if ((ptr[-1] & 0xfc00) == 0xd800)
2513 {
2514 /* We know that there is only one extra character in UTF-16. */
2515 *ptr = swap_uint16(*ptr);
2516 ptr++;
2517 }
2518 }
2519 utf16_char = FALSE;
2520 #endif /* SUPPORT_UTF */
2521
2522 /* Get next opcode. */
2523
2524 length = 0;
2525 op = *ptr;
2526 *ptr++ = swap_uint16(op);
2527
2528 switch (op)
2529 {
2530 case OP_END:
2531 return;
2532
2533 #ifdef SUPPORT_UTF
2534 case OP_CHAR:
2535 case OP_CHARI:
2536 case OP_NOT:
2537 case OP_NOTI:
2538 case OP_STAR:
2539 case OP_MINSTAR:
2540 case OP_PLUS:
2541 case OP_MINPLUS:
2542 case OP_QUERY:
2543 case OP_MINQUERY:
2544 case OP_UPTO:
2545 case OP_MINUPTO:
2546 case OP_EXACT:
2547 case OP_POSSTAR:
2548 case OP_POSPLUS:
2549 case OP_POSQUERY:
2550 case OP_POSUPTO:
2551 case OP_STARI:
2552 case OP_MINSTARI:
2553 case OP_PLUSI:
2554 case OP_MINPLUSI:
2555 case OP_QUERYI:
2556 case OP_MINQUERYI:
2557 case OP_UPTOI:
2558 case OP_MINUPTOI:
2559 case OP_EXACTI:
2560 case OP_POSSTARI:
2561 case OP_POSPLUSI:
2562 case OP_POSQUERYI:
2563 case OP_POSUPTOI:
2564 case OP_NOTSTAR:
2565 case OP_NOTMINSTAR:
2566 case OP_NOTPLUS:
2567 case OP_NOTMINPLUS:
2568 case OP_NOTQUERY:
2569 case OP_NOTMINQUERY:
2570 case OP_NOTUPTO:
2571 case OP_NOTMINUPTO:
2572 case OP_NOTEXACT:
2573 case OP_NOTPOSSTAR:
2574 case OP_NOTPOSPLUS:
2575 case OP_NOTPOSQUERY:
2576 case OP_NOTPOSUPTO:
2577 case OP_NOTSTARI:
2578 case OP_NOTMINSTARI:
2579 case OP_NOTPLUSI:
2580 case OP_NOTMINPLUSI:
2581 case OP_NOTQUERYI:
2582 case OP_NOTMINQUERYI:
2583 case OP_NOTUPTOI:
2584 case OP_NOTMINUPTOI:
2585 case OP_NOTEXACTI:
2586 case OP_NOTPOSSTARI:
2587 case OP_NOTPOSPLUSI:
2588 case OP_NOTPOSQUERYI:
2589 case OP_NOTPOSUPTOI:
2590 if (utf) utf16_char = TRUE;
2591 #endif
2592 /* Fall through. */
2593
2594 default:
2595 length = OP_lengths16[op] - 1;
2596 break;
2597
2598 case OP_CLASS:
2599 case OP_NCLASS:
2600 /* Skip the character bit map. */
2601 ptr += 32/sizeof(pcre_uint16);
2602 length = 0;
2603 break;
2604
2605 case OP_XCLASS:
2606 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2607 if (LINK_SIZE > 1)
2608 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2609 - (1 + LINK_SIZE + 1));
2610 else
2611 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2612
2613 /* Reverse the size of the XCLASS instance. */
2614 *ptr = swap_uint16(*ptr);
2615 ptr++;
2616 if (LINK_SIZE > 1)
2617 {
2618 *ptr = swap_uint16(*ptr);
2619 ptr++;
2620 }
2621
2622 op = *ptr;
2623 *ptr = swap_uint16(op);
2624 ptr++;
2625 if ((op & XCL_MAP) != 0)
2626 {
2627 /* Skip the character bit map. */
2628 ptr += 32/sizeof(pcre_uint16);
2629 length -= 32/sizeof(pcre_uint16);
2630 }
2631 break;
2632 }
2633 }
2634 /* Control should never reach here in 16 bit mode. */
2635 #endif /* SUPPORT_PCRE16 */
2636 }
2637 #endif /* SUPPORT_PCRE[8|16] */
2638
2639
2640
2641 #if defined SUPPORT_PCRE32
2642 static void
2643 regexflip_32(pcre *ere, pcre_extra *extra)
2644 {
2645 real_pcre32 *re = (real_pcre32 *)ere;
2646 int op;
2647 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2648 int length = re->name_count * re->name_entry_size;
2649
2650 /* Always flip the bytes in the main data block and study blocks. */
2651
2652 re->magic_number = REVERSED_MAGIC_NUMBER;
2653 re->size = swap_uint32(re->size);
2654 re->options = swap_uint32(re->options);
2655 re->flags = swap_uint32(re->flags);
2656 re->limit_match = swap_uint32(re->limit_match);
2657 re->limit_recursion = swap_uint32(re->limit_recursion);
2658 re->first_char = swap_uint32(re->first_char);
2659 re->req_char = swap_uint32(re->req_char);
2660 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2661 re->top_bracket = swap_uint16(re->top_bracket);
2662 re->top_backref = swap_uint16(re->top_backref);
2663 re->name_table_offset = swap_uint16(re->name_table_offset);
2664 re->name_entry_size = swap_uint16(re->name_entry_size);
2665 re->name_count = swap_uint16(re->name_count);
2666 re->ref_count = swap_uint16(re->ref_count);
2667
2668 if (extra != NULL)
2669 {
2670 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2671 rsd->size = swap_uint32(rsd->size);
2672 rsd->flags = swap_uint32(rsd->flags);
2673 rsd->minlength = swap_uint32(rsd->minlength);
2674 }
2675
2676 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2677 the pattern itself. */
2678
2679 while(TRUE)
2680 {
2681 /* Swap previous characters. */
2682 while (length-- > 0)
2683 {
2684 *ptr = swap_uint32(*ptr);
2685 ptr++;
2686 }
2687
2688 /* Get next opcode. */
2689
2690 length = 0;
2691 op = *ptr;
2692 *ptr++ = swap_uint32(op);
2693
2694 switch (op)
2695 {
2696 case OP_END:
2697 return;
2698
2699 default:
2700 length = OP_lengths32[op] - 1;
2701 break;
2702
2703 case OP_CLASS:
2704 case OP_NCLASS:
2705 /* Skip the character bit map. */
2706 ptr += 32/sizeof(pcre_uint32);
2707 length = 0;
2708 break;
2709
2710 case OP_XCLASS:
2711 /* LINK_SIZE can only be 1 in 32-bit mode. */
2712 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2713
2714 /* Reverse the size of the XCLASS instance. */
2715 *ptr = swap_uint32(*ptr);
2716 ptr++;
2717
2718 op = *ptr;
2719 *ptr = swap_uint32(op);
2720 ptr++;
2721 if ((op & XCL_MAP) != 0)
2722 {
2723 /* Skip the character bit map. */
2724 ptr += 32/sizeof(pcre_uint32);
2725 length -= 32/sizeof(pcre_uint32);
2726 }
2727 break;
2728 }
2729 }
2730 /* Control should never reach here in 32 bit mode. */
2731 }
2732
2733 #endif /* SUPPORT_PCRE32 */
2734
2735
2736
2737 static void
2738 regexflip(pcre *ere, pcre_extra *extra)
2739 {
2740 #if defined SUPPORT_PCRE32
2741 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2742 regexflip_32(ere, extra);
2743 #endif
2744 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2745 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2746 regexflip8_or_16(ere, extra);
2747 #endif
2748 }
2749
2750
2751
2752 /*************************************************
2753 * Check match or recursion limit *
2754 *************************************************/
2755
2756 static int
2757 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2758 int start_offset, int options, int *use_offsets, int use_size_offsets,
2759 int flag, unsigned long int *limit, int errnumber, const char *msg)
2760 {
2761 int count;
2762 int min = 0;
2763 int mid = 64;
2764 int max = -1;
2765
2766 extra->flags |= flag;
2767
2768 for (;;)
2769 {
2770 *limit = mid;
2771
2772 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2773 use_offsets, use_size_offsets);
2774
2775 if (count == errnumber)
2776 {
2777 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2778 min = mid;
2779 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2780 }
2781
2782 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2783 count == PCRE_ERROR_PARTIAL)
2784 {
2785 if (mid == min + 1)
2786 {
2787 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2788 break;
2789 }
2790 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2791 max = mid;
2792 mid = (min + mid)/2;
2793 }
2794 else break; /* Some other error */
2795 }
2796
2797 extra->flags &= ~flag;
2798 return count;
2799 }
2800
2801
2802
2803 /*************************************************
2804 * Case-independent strncmp() function *
2805 *************************************************/
2806
2807 /*
2808 Arguments:
2809 s first string
2810 t second string
2811 n number of characters to compare
2812
2813 Returns: < 0, = 0, or > 0, according to the comparison
2814 */
2815
2816 static int
2817 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2818 {
2819 while (n--)
2820 {
2821 int c = tolower(*s++) - tolower(*t++);
2822 if (c) return c;
2823 }
2824 return 0;
2825 }
2826
2827
2828
2829 /*************************************************
2830 * Check newline indicator *
2831 *************************************************/
2832
2833 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2834 a message and return 0 if there is no match.
2835
2836 Arguments:
2837 p points after the leading '<'
2838 f file for error message
2839
2840 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2841 */
2842
2843 static int
2844 check_newline(pcre_uint8 *p, FILE *f)
2845 {
2846 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2847 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2848 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2849 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2850 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2851 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2852 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2853 fprintf(f, "Unknown newline type at: <%s\n", p);
2854 return 0;
2855 }
2856
2857
2858
2859 /*************************************************
2860 * Usage function *
2861 *************************************************/
2862
2863 static void
2864 usage(void)
2865 {
2866 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2867 printf("Input and output default to stdin and stdout.\n");
2868 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2869 printf("If input is a terminal, readline() is used to read from it.\n");
2870 #else
2871 printf("This version of pcretest is not linked with readline().\n");
2872 #endif
2873 printf("\nOptions:\n");
2874 #ifdef SUPPORT_PCRE16
2875 printf(" -16 use the 16-bit library\n");
2876 #endif
2877 #ifdef SUPPORT_PCRE32
2878 printf(" -32 use the 32-bit library\n");
2879 #endif
2880 printf(" -b show compiled code\n");
2881 printf(" -C show PCRE compile-time options and exit\n");
2882 printf(" -C arg show a specific compile-time option\n");
2883 printf(" and exit with its value. The arg can be:\n");
2884 printf(" linksize internal link size [2, 3, 4]\n");
2885 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2886 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2887 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2888 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2889 printf(" ucp Unicode Properties supported [0, 1]\n");
2890 printf(" jit Just-in-time compiler supported [0, 1]\n");
2891 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2892 printf(" -d debug: show compiled code and information (-b and -i)\n");
2893 #if !defined NODFA
2894 printf(" -dfa force DFA matching for all subjects\n");
2895 #endif
2896 printf(" -help show usage information\n");
2897 printf(" -i show information about compiled patterns\n"
2898 " -M find MATCH_LIMIT minimum for each subject\n"
2899 " -m output memory used information\n"
2900 " -o <n> set size of offsets vector to <n>\n");
2901 #if !defined NOPOSIX
2902 printf(" -p use POSIX interface\n");
2903 #endif
2904 printf(" -q quiet: do not output PCRE version number at start\n");
2905 printf(" -S <n> set stack size to <n> megabytes\n");
2906 printf(" -s force each pattern to be studied at basic level\n"
2907 " -s+ force each pattern to be studied, using JIT if available\n"
2908 " -s++ ditto, verifying when JIT was actually used\n"
2909 " -s+n force each pattern to be studied, using JIT if available,\n"
2910 " where 1 <= n <= 7 selects JIT options\n"
2911 " -s++n ditto, verifying when JIT was actually used\n"
2912 " -t time compilation and execution\n");
2913 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2914 printf(" -tm time execution (matching) only\n");
2915 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2916 printf(" -T same as -t, but show total times at the end\n");
2917 printf(" -TM same as -tm, but show total time at the end\n");
2918 }
2919
2920
2921
2922 /*************************************************
2923 * Main Program *
2924 *************************************************/
2925
2926 /* Read lines from named file or stdin and write to named file or stdout; lines
2927 consist of a regular expression, in delimiters and optionally followed by
2928 options, followed by a set of test data, terminated by an empty line. */
2929
2930 int main(int argc, char **argv)
2931 {
2932 FILE *infile = stdin;
2933 const char *version;
2934 int options = 0;
2935 int study_options = 0;
2936 int default_find_match_limit = FALSE;
2937 int op = 1;
2938 int timeit = 0;
2939 int timeitm = 0;
2940 int showtotaltimes = 0;
2941 int showinfo = 0;
2942 int showstore = 0;
2943 int force_study = -1;
2944 int force_study_options = 0;
2945 int quiet = 0;
2946 int size_offsets = 45;
2947 int size_offsets_max;
2948 int *offsets = NULL;
2949 int debug = 0;
2950 int done = 0;
2951 int all_use_dfa = 0;
2952 int verify_jit = 0;
2953 int yield = 0;
2954 int stack_size;
2955 pcre_uint8 *dbuffer = NULL;
2956 size_t dbuffer_size = 1u << 14;
2957 clock_t total_compile_time = 0;
2958 clock_t total_study_time = 0;
2959 clock_t total_match_time = 0;
2960
2961 #if !defined NOPOSIX
2962 int posix = 0;
2963 #endif
2964 #if !defined NODFA
2965 int *dfa_workspace = NULL;
2966 #endif
2967
2968 pcre_jit_stack *jit_stack = NULL;
2969
2970 /* These vectors store, end-to-end, a list of zero-terminated captured
2971 substring names, each list itself being terminated by an empty name. Assume
2972 that 1024 is plenty long enough for the few names we'll be testing. It is
2973 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2974 for the actual memory, to ensure alignment. */
2975
2976 pcre_uint32 copynames[1024];
2977 pcre_uint32 getnames[1024];
2978
2979 #ifdef SUPPORT_PCRE32
2980 pcre_uint32 *cn32ptr;
2981 pcre_uint32 *gn32ptr;
2982 #endif
2983
2984 #ifdef SUPPORT_PCRE16
2985 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2986 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2987 pcre_uint16 *cn16ptr;
2988 pcre_uint16 *gn16ptr;
2989 #endif
2990
2991 #ifdef SUPPORT_PCRE8
2992 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2993 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2994 pcre_uint8 *cn8ptr;
2995 pcre_uint8 *gn8ptr;
2996 #endif
2997
2998 /* Get buffers from malloc() so that valgrind will check their misuse when
2999 debugging. They grow automatically when very long lines are read. The 16-
3000 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3001
3002 buffer = (pcre_uint8 *)malloc(buffer_size);
3003 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3004
3005 /* The outfile variable is static so that new_malloc can use it. */
3006
3007 outfile = stdout;
3008
3009 /* The following _setmode() stuff is some Windows magic that tells its runtime
3010 library to translate CRLF into a single LF character. At least, that's what
3011 I've been told: never having used Windows I take this all on trust. Originally
3012 it set 0x8000, but then I was advised that _O_BINARY was better. */
3013
3014 #if defined(_WIN32) || defined(WIN32)
3015 _setmode( _fileno( stdout ), _O_BINARY );
3016 #endif
3017
3018 /* Get the version number: both pcre_version() and pcre16_version() give the
3019 same answer. We just need to ensure that we call one that is available. */
3020
3021 #if defined SUPPORT_PCRE8
3022 version = pcre_version();
3023 #elif defined SUPPORT_PCRE16
3024 version = pcre16_version();
3025 #elif defined SUPPORT_PCRE32
3026 version = pcre32_version();
3027 #endif
3028
3029 /* Scan options */
3030
3031 while (argc > 1 && argv[op][0] == '-')
3032 {
3033 pcre_uint8 *endptr;
3034 char *arg = argv[op];
3035
3036 if (strcmp(arg, "-m") == 0) showstore = 1;
3037 else if (strcmp(arg, "-s") == 0) force_study = 0;
3038
3039 else if (strncmp(arg, "-s+", 3) == 0)
3040 {
3041 arg += 3;
3042 if (*arg == '+') { arg++; verify_jit = TRUE; }
3043 force_study = 1;
3044 if (*arg == 0)
3045 force_study_options = jit_study_bits[6];
3046 else if (*arg >= '1' && *arg <= '7')
3047 force_study_options = jit_study_bits[*arg - '1'];
3048 else goto BAD_ARG;
3049 }
3050 else if (strcmp(arg, "-8") == 0)
3051 {
3052 #ifdef SUPPORT_PCRE8
3053 pcre_mode = PCRE8_MODE;
3054 #else
3055 printf("** This version of PCRE was built without 8-bit support\n");
3056 exit(1);
3057 #endif
3058 }
3059 else if (strcmp(arg, "-16") == 0)
3060 {
3061 #ifdef SUPPORT_PCRE16
3062 pcre_mode = PCRE16_MODE;
3063 #else
3064 printf("** This version of PCRE was built without 16-bit support\n");
3065 exit(1);
3066 #endif
3067 }
3068 else if (strcmp(arg, "-32") == 0)
3069 {
3070 #ifdef SUPPORT_PCRE32
3071 pcre_mode = PCRE32_MODE;
3072 #else
3073 printf("** This version of PCRE was built without 32-bit support\n");
3074 exit(1);
3075 #endif
3076 }
3077 else if (strcmp(arg, "-q") == 0) quiet = 1;
3078 else if (strcmp(arg, "-b") == 0) debug = 1;
3079 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3080 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3081 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3082 #if !defined NODFA
3083 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3084 #endif
3085 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3086 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3087 *endptr == 0))
3088 {
3089 op++;
3090 argc--;
3091 }
3092 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3093 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3094 {
3095 int temp;
3096 int both = arg[2] == 0;
3097 showtotaltimes = arg[1] == 'T';
3098 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3099 *endptr == 0))
3100 {
3101 timeitm = temp;
3102 op++;
3103 argc--;
3104 }
3105 else timeitm = LOOPREPEAT;
3106 if (both) timeit = timeitm;
3107 }
3108 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3109 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3110 *endptr == 0))
3111 {
3112 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3113 printf("PCRE: -S not supported on this OS\n");
3114 exit(1);
3115 #else
3116 int rc;
3117 struct rlimit rlim;
3118 getrlimit(RLIMIT_STACK, &rlim);
3119 rlim.rlim_cur = stack_size * 1024 * 1024;
3120 rc = setrlimit(RLIMIT_STACK, &rlim);
3121 if (rc != 0)
3122 {
3123 printf("PCRE: setrlimit() failed with error %d\n", rc);
3124 exit(1);
3125 }
3126 op++;
3127 argc--;
3128 #endif
3129 }
3130 #if !defined NOPOSIX
3131 else if (strcmp(arg, "-p") == 0) posix = 1;
3132 #endif
3133 else if (strcmp(arg, "-C") == 0)
3134 {
3135 int rc;
3136 unsigned long int lrc;
3137
3138 if (argc > 2)
3139 {
3140 if (strcmp(argv[op + 1], "linksize") == 0)
3141 {
3142 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3143 printf("%d\n", rc);
3144 yield = rc;
3145
3146 #ifdef __VMS
3147 vms_setsymbol("LINKSIZE",0,yield );
3148 #endif
3149 }
3150 else if (strcmp(argv[op + 1], "pcre8") == 0)
3151 {
3152 #ifdef SUPPORT_PCRE8
3153 printf("1\n");
3154 yield = 1;
3155 #else
3156 printf("0\n");
3157 yield = 0;
3158 #endif
3159 #ifdef __VMS
3160 vms_setsymbol("PCRE8",0,yield );
3161 #endif
3162 }
3163 else if (strcmp(argv[op + 1], "pcre16") == 0)
3164 {
3165 #ifdef SUPPORT_PCRE16
3166 printf("1\n");
3167 yield = 1;
3168 #else
3169 printf("0\n");
3170 yield = 0;
3171 #endif
3172 #ifdef __VMS
3173 vms_setsymbol("PCRE16",0,yield );
3174 #endif
3175 }
3176 else if (strcmp(argv[op + 1], "pcre32") == 0)
3177 {
3178 #ifdef SUPPORT_PCRE32
3179 printf("1\n");
3180 yield = 1;
3181 #else
3182 printf("0\n");
3183 yield = 0;
3184 #endif
3185 #ifdef __VMS
3186 vms_setsymbol("PCRE32",0,yield );
3187 #endif
3188 }
3189 else if (strcmp(argv[op + 1], "utf") == 0)
3190 {
3191 #ifdef SUPPORT_PCRE8
3192 if (pcre_mode == PCRE8_MODE)
3193 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3194 #endif
3195 #ifdef SUPPORT_PCRE16
3196 if (pcre_mode == PCRE16_MODE)
3197 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3198 #endif
3199 #ifdef SUPPORT_PCRE32
3200 if (pcre_mode == PCRE32_MODE)
3201 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3202 #endif
3203 printf("%d\n", rc);
3204 yield = rc;
3205 #ifdef __VMS
3206 vms_setsymbol("UTF",0,yield );
3207 #endif
3208 }
3209 else if (strcmp(argv[op + 1], "ucp") == 0)
3210 {
3211 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3212 printf("%d\n", rc);
3213 yield = rc;
3214 }
3215 else if (strcmp(argv[op + 1], "jit") == 0)
3216 {
3217 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3218 printf("%d\n", rc);
3219 yield = rc;
3220 }
3221 else if (strcmp(argv[op + 1], "newline") == 0)
3222 {
3223 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3224 print_newline_config(rc, TRUE);
3225 }
3226 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3227 {
3228 #ifdef EBCDIC
3229 printf("1\n");
3230 yield = 1;
3231 #else
3232 printf("0\n");
3233 #endif
3234 }
3235 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3236 {
3237 #ifdef EBCDIC
3238 printf("0x%02x\n", CHAR_LF);
3239 #else
3240 printf("0\n");
3241 #endif
3242 }
3243 else
3244 {
3245 printf("Unknown -C option: %s\n", argv[op + 1]);
3246 }
3247 goto EXIT;
3248 }
3249
3250 /* No argument for -C: output all configuration information. */
3251
3252 printf("PCRE version %s\n", version);
3253 printf("Compiled with\n");
3254
3255 #ifdef EBCDIC
3256 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3257 #endif
3258
3259 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3260 are set, either both UTFs are supported or both are not supported. */
3261
3262 #ifdef SUPPORT_PCRE8
3263 printf(" 8-bit support\n");
3264 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3265 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3266 #endif
3267 #ifdef SUPPORT_PCRE16
3268 printf(" 16-bit support\n");
3269 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3270 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3271 #endif
3272 #ifdef SUPPORT_PCRE32
3273 printf(" 32-bit support\n");
3274 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3275 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3276 #endif
3277
3278 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3279 printf(" %sUnicode properties support\n", rc? "" : "No ");
3280 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3281 if (rc)
3282 {
3283 const char *arch;
3284 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3285 printf(" Just-in-time compiler support: %s\n", arch);
3286 }
3287 else
3288 printf(" No just-in-time compiler support\n");
3289 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3290 print_newline_config(rc, FALSE);
3291 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3292 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3293 "all Unicode newlines");
3294 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3295 printf(" Internal link size = %d\n", rc);
3296 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3297 printf(" POSIX malloc threshold = %d\n", rc);
3298 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3299 printf(" Default match limit = %ld\n", lrc);
3300 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3301 printf(" Default recursion depth limit = %ld\n", lrc);
3302 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3303 printf(" Match recursion uses %s", rc? "stack" : "heap");
3304 if (showstore)
3305 {
3306 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3307 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3308 }
3309 printf("\n");
3310 goto EXIT;
3311 }
3312 else if (strcmp(arg, "-help") == 0 ||
3313 strcmp(arg, "--help") == 0)
3314 {
3315 usage();
3316 goto EXIT;
3317 }
3318 else
3319 {
3320 BAD_ARG:
3321 printf("** Unknown or malformed option %s\n", arg);
3322 usage();
3323 yield = 1;
3324 goto EXIT;
3325 }
3326 op++;
3327 argc--;
3328 }
3329
3330 /* Get the store for the offsets vector, and remember what it was */
3331
3332 size_offsets_max = size_offsets;
3333 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3334 if (offsets == NULL)
3335 {
3336 printf("** Failed to get %d bytes of memory for offsets vector\n",
3337 (int)(size_offsets_max * sizeof(int)));
3338 yield = 1;
3339 goto EXIT;
3340 }
3341
3342 /* Sort out the input and output files */
3343
3344 if (argc > 1)
3345 {
3346 infile = fopen(argv[op], INPUT_MODE);
3347 if (infile == NULL)
3348 {
3349 printf("** Failed to open %s\n", argv[op]);
3350 yield = 1;
3351 goto EXIT;
3352 }
3353 }
3354
3355 if (argc > 2)
3356 {
3357 outfile = fopen(argv[op+1], OUTPUT_MODE);
3358 if (outfile == NULL)
3359 {
3360 printf("** Failed to open %s\n", argv[op+1]);
3361 yield = 1;
3362 goto EXIT;
3363 }
3364 }
3365
3366 /* Set alternative malloc function */
3367
3368 #ifdef SUPPORT_PCRE8
3369 pcre_malloc = new_malloc;
3370 pcre_free = new_free;
3371 pcre_stack_malloc = stack_malloc;
3372 pcre_stack_free = stack_free;
3373 #endif
3374
3375 #ifdef SUPPORT_PCRE16
3376 pcre16_malloc = new_malloc;
3377 pcre16_free = new_free;
3378 pcre16_stack_malloc = stack_malloc;
3379 pcre16_stack_free = stack_free;
3380 #endif
3381
3382 #ifdef SUPPORT_PCRE32
3383 pcre32_malloc = new_malloc;
3384 pcre32_free = new_free;
3385 pcre32_stack_malloc = stack_malloc;
3386 pcre32_stack_free = stack_free;
3387 #endif
3388
3389 /* Heading line unless quiet, then prompt for first regex if stdin */
3390
3391 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3392
3393 /* Main loop */
3394
3395 while (!done)
3396 {
3397 pcre *re = NULL;
3398 pcre_extra *extra = NULL;
3399
3400 #if !defined NOPOSIX /* There are still compilers that require no indent */
3401 regex_t preg;
3402 int do_posix = 0;
3403 #endif
3404
3405 const char *error;
3406 pcre_uint8 *markptr;
3407 pcre_uint8 *p, *pp, *ppp;
3408 pcre_uint8 *to_file = NULL;
3409 const pcre_uint8 *tables = NULL;
3410 unsigned long int get_options;
3411 unsigned long int true_size, true_study_size = 0;
3412 size_t size, regex_gotten_store;
3413 int do_allcaps = 0;
3414 int do_mark = 0;
3415 int do_study = 0;
3416 int no_force_study = 0;
3417 int do_debug = debug;
3418 int do_G = 0;
3419 int do_g = 0;
3420 int do_showinfo = showinfo;
3421 int do_showrest = 0;
3422 int do_showcaprest = 0;
3423 int do_flip = 0;
3424 int erroroffset, len, delimiter, poffset;
3425
3426 #if !defined NODFA
3427 int dfa_matched = 0;
3428 #endif
3429
3430 use_utf = 0;
3431 debug_lengths = 1;
3432
3433 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3434 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3435 fflush(outfile);
3436
3437 p = buffer;
3438 while (isspace(*p)) p++;
3439 if (*p == 0) continue;
3440
3441 /* See if the pattern is to be loaded pre-compiled from a file. */
3442
3443 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3444 {
3445 pcre_uint32 magic;
3446 pcre_uint8 sbuf[8];
3447 FILE *f;
3448
3449 p++;
3450 if (*p == '!')
3451 {
3452 do_debug = TRUE;
3453 do_showinfo = TRUE;
3454 p++;
3455 }
3456
3457 pp = p + (int)strlen((char *)p);
3458 while (isspace(pp[-1])) pp--;
3459 *pp = 0;
3460
3461 f = fopen((char *)p, "rb");
3462 if (f == NULL)
3463 {
3464 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3465 continue;
3466 }
3467
3468 first_gotten_store = 0;
3469 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3470
3471 true_size =
3472 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3473 true_study_size =
3474 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3475
3476 re = (pcre *)new_malloc(true_size);
3477 if (re == NULL)
3478 {
3479 printf("** Failed to get %d bytes of memory for pcre object\n",
3480 (int)true_size);
3481 yield = 1;
3482 goto EXIT;
3483 }
3484 regex_gotten_store = first_gotten_store;
3485
3486 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3487
3488 magic = REAL_PCRE_MAGIC(re);
3489 if (magic != MAGIC_NUMBER)
3490 {
3491 if (swap_uint32(magic) == MAGIC_NUMBER)
3492 {
3493 do_flip = 1;
3494 }
3495 else
3496 {
3497 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3498 new_free(re);
3499 fclose(f);
3500 continue;
3501 }
3502 }
3503
3504 /* We hide the byte-invert info for little and big endian tests. */
3505 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3506 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3507
3508 /* Now see if there is any following study data. */
3509
3510 if (true_study_size != 0)
3511 {
3512 pcre_study_data *psd;
3513
3514 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3515 extra->flags = PCRE_EXTRA_STUDY_DATA;
3516
3517 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3518 extra->study_data = psd;
3519
3520 if (fread(psd, 1, true_study_size, f) != true_study_size)
3521 {
3522 FAIL_READ:
3523 fprintf(outfile, "Failed to read data from %s\n", p);
3524 if (extra != NULL)
3525 {
3526 PCRE_FREE_STUDY(extra);
3527 }
3528 new_free(re);
3529 fclose(f);
3530 continue;
3531 }
3532 fprintf(outfile, "Study data loaded from %s\n", p);
3533 do_study = 1; /* To get the data output if requested */
3534 }
3535 else fprintf(outfile, "No study data\n");
3536
3537 /* Flip the necessary bytes. */
3538 if (do_flip)
3539 {
3540 int rc;
3541 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3542 if (rc == PCRE_ERROR_BADMODE)
3543 {
3544 pcre_uint32 flags_in_host_byte_order;
3545 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3546 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3547 else
3548 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3549 /* Simulate the result of the function call below. */
3550 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3551 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3552 PCRE_INFO_OPTIONS);
3553 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3554 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3555 new_free(re);
3556 fclose(f);
3557 continue;
3558 }
3559 }
3560
3561 /* Need to know if UTF-8 for printing data strings. */
3562
3563 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3564 {
3565 new_free(re);
3566 fclose(f);
3567 continue;
3568 }
3569 use_utf = (get_options & PCRE_UTF8) != 0;
3570
3571 fclose(f);
3572 goto SHOW_INFO;
3573 }
3574
3575 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3576 the pattern; if it isn't complete, read more. */
3577
3578 delimiter = *p++;
3579
3580 if (isalnum(delimiter) || delimiter == '\\')
3581 {
3582 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3583 goto SKIP_DATA;
3584 }
3585
3586 pp = p;
3587 poffset = (int)(p - buffer);
3588
3589 for(;;)
3590 {
3591 while (*pp != 0)
3592 {
3593 if (*pp == '\\' && pp[1] != 0) pp++;
3594 else if (*pp == delimiter) break;
3595 pp++;
3596 }
3597 if (*pp != 0) break;
3598 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3599 {
3600 fprintf(outfile, "** Unexpected EOF\n");
3601 done = 1;
3602 goto CONTINUE;
3603 }
3604 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3605 }
3606
3607 /* The buffer may have moved while being extended; reset the start of data
3608 pointer to the correct relative point in the buffer. */
3609
3610 p = buffer + poffset;
3611
3612 /* If the first character after the delimiter is backslash, make
3613 the pattern end with backslash. This is purely to provide a way
3614 of testing for the error message when a pattern ends with backslash. */
3615
3616 if (pp[1] == '\\') *pp++ = '\\';
3617
3618 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3619 for callouts. */
3620
3621 *pp++ = 0;
3622 strcpy((char *)pbuffer, (char *)p);
3623
3624 /* Look for options after final delimiter */
3625
3626 options = 0;
3627 study_options = force_study_options;
3628 log_store = showstore; /* default from command line */
3629
3630 while (*pp != 0)
3631 {
3632 switch (*pp++)
3633 {
3634 case 'f': options |= PCRE_FIRSTLINE; break;
3635 case 'g': do_g = 1; break;
3636 case 'i': options |= PCRE_CASELESS; break;
3637 case 'm': options |= PCRE_MULTILINE; break;
3638 case 's': options |= PCRE_DOTALL; break;
3639 case 'x': options |= PCRE_EXTENDED; break;
3640
3641 case '+':
3642 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3643 break;
3644
3645 case '=': do_allcaps = 1; break;
3646 case 'A': options |= PCRE_ANCHORED; break;
3647 case 'B': do_debug = 1; break;
3648 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3649 case 'D': do_debug = do_showinfo = 1; break;
3650 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3651 case 'F': do_flip = 1; break;
3652 case 'G': do_G = 1; break;
3653 case 'I': do_showinfo = 1; break;
3654 case 'J': options |= PCRE_DUPNAMES; break;
3655 case 'K': do_mark = 1; break;
3656 case 'M': log_store = 1; break;
3657 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3658
3659 #if !defined NOPOSIX
3660 case 'P': do_posix = 1; break;
3661 #endif
3662
3663 case 'S':
3664 do_study = 1;
3665 for (;;)
3666 {
3667 switch (*pp++)
3668 {
3669 case 'S':
3670 do_study = 0;
3671 no_force_study = 1;
3672 break;
3673
3674 case '!':
3675 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3676 break;
3677
3678 case '+':
3679 if (*pp == '+')
3680 {
3681 verify_jit = TRUE;
3682 pp++;
3683 }
3684 if (*pp >= '1' && *pp <= '7')
3685 study_options |= jit_study_bits[*pp++ - '1'];
3686 else
3687 study_options |= jit_study_bits[6];
3688 break;
3689
3690 case '-':
3691 study_options &= ~PCRE_STUDY_ALLJIT;
3692 break;
3693
3694 default:
3695 pp--;
3696 goto ENDLOOP;
3697 }
3698 }
3699 ENDLOOP:
3700 break;
3701
3702 case 'U': options |= PCRE_UNGREEDY; break;
3703 case 'W': options |= PCRE_UCP; break;
3704 case 'X': options |= PCRE_EXTRA; break;
3705 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3706 case 'Z': debug_lengths = 0; break;
3707 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3708 case '9': options |= PCRE_NEVER_UTF; break;
3709 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3710
3711 case 'T':
3712 switch (*pp++)
3713 {
3714 case '0': tables = tables0; break;
3715 case '1': tables = tables1; break;
3716
3717 case '\r':
3718 case '\n':
3719 case ' ':
3720 case 0:
3721 fprintf(outfile, "** Missing table number after /T\n");
3722 goto SKIP_DATA;
3723
3724 default:
3725 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3726 goto SKIP_DATA;
3727 }
3728 break;
3729
3730 case 'L':
3731 ppp = pp;
3732 /* The '\r' test here is so that it works on Windows. */
3733 /* The '0' test is just in case this is an unterminated line. */
3734 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3735 *ppp = 0;
3736 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3737 {
3738 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3739 goto SKIP_DATA;
3740 }
3741 locale_set = 1;
3742 tables = PCRE_MAKETABLES;
3743 pp = ppp;
3744 break;
3745
3746 case '>':
3747 to_file = pp;
3748 while (*pp != 0) pp++;
3749 while (isspace(pp[-1])) pp--;
3750 *pp = 0;
3751 break;
3752
3753 case '<':
3754 {
3755 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3756 {
3757 options |= PCRE_JAVASCRIPT_COMPAT;
3758 pp += 3;
3759 }
3760 else
3761 {
3762 int x = check_newline(pp, outfile);
3763 if (x == 0) goto SKIP_DATA;
3764 options |= x;
3765 while (*pp++ != '>');
3766 }
3767 }
3768 break;
3769
3770 case '\r': /* So that it works in Windows */
3771 case '\n':
3772 case ' ':
3773 break;
3774
3775 default:
3776 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3777 goto SKIP_DATA;
3778 }
3779 }
3780
3781 /* Handle compiling via the POSIX interface, which doesn't support the
3782 timing, showing, or debugging options, nor the ability to pass over
3783 local character tables. Neither does it have 16-bit support. */
3784
3785 #if !defined NOPOSIX
3786 if (posix || do_posix)
3787 {
3788 int rc;
3789 int cflags = 0;
3790
3791 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3792 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3793 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3794 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3795 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3796 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3797 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3798
3799 first_gotten_store = 0;
3800 rc = regcomp(&preg, (char *)p, cflags);
3801
3802 /* Compilation failed; go back for another re, skipping to blank line
3803 if non-interactive. */
3804
3805 if (rc != 0)
3806 {
3807 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3808 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3809 goto SKIP_DATA;
3810 }
3811 }
3812
3813 /* Handle compiling via the native interface */
3814
3815 else
3816 #endif /* !defined NOPOSIX */
3817
3818 {
3819 /* In 16- or 32-bit mode, convert the input. */
3820
3821 #ifdef SUPPORT_PCRE16
3822 if (pcre_mode == PCRE16_MODE)
3823 {
3824 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3825 {
3826 case -1:
3827 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3828 "converted to UTF-16\n");
3829 goto SKIP_DATA;
3830
3831 case -2:
3832 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3833 "cannot be converted to UTF-16\n");
3834 goto SKIP_DATA;
3835
3836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3837 fprintf(outfile, "**Failed: character value greater than 0xffff "
3838 "cannot be converted to 16-bit in non-UTF mode\n");
3839 goto SKIP_DATA;
3840
3841 default:
3842 break;
3843 }
3844 p = (pcre_uint8 *)buffer16;
3845 }
3846 #endif
3847
3848 #ifdef SUPPORT_PCRE32
3849 if (pcre_mode == PCRE32_MODE)
3850 {
3851 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3852 {
3853 case -1:
3854 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3855 "converted to UTF-32\n");
3856 goto SKIP_DATA;
3857
3858 case -2:
3859 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3860 "cannot be converted to UTF-32\n");
3861 goto SKIP_DATA;
3862
3863 case -3:
3864 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3865 goto SKIP_DATA;
3866
3867 default:
3868 break;
3869 }
3870 p = (pcre_uint8 *)buffer32;
3871 }
3872 #endif
3873
3874 /* Compile many times when timing */
3875
3876 if (timeit > 0)
3877 {
3878 register int i;
3879 clock_t time_taken;
3880 clock_t start_time = clock();
3881 for (i = 0; i < timeit; i++)
3882 {
3883 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3884 if (re != NULL) free(re);
3885 }
3886 total_compile_time += (time_taken = clock() - start_time);
3887 fprintf(outfile, "Compile time %.4f milliseconds\n",
3888 (((double)time_taken * 1000.0) / (double)timeit) /
3889 (double)CLOCKS_PER_SEC);
3890 }
3891
3892 first_gotten_store = 0;
3893 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3894
3895 /* Compilation failed; go back for another re, skipping to blank line
3896 if non-interactive. */
3897
3898 if (re == NULL)
3899 {
3900 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3901 SKIP_DATA:
3902 if (infile != stdin)
3903 {
3904 for (;;)
3905 {
3906 if (extend_inputline(infile, buffer, NULL) == NULL)
3907 {
3908 done = 1;
3909 goto CONTINUE;
3910 }
3911 len = (int)strlen((char *)buffer);
3912 while (len > 0 && isspace(buffer[len-1])) len--;
3913 if (len == 0) break;
3914 }
3915 fprintf(outfile, "\n");
3916 }
3917 goto CONTINUE;
3918 }
3919
3920 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3921 within the regex; check for this so that we know how to process the data
3922 lines. */
3923
3924 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3925 goto SKIP_DATA;
3926 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3927
3928 /* Extract the size for possible writing before possibly flipping it,
3929 and remember the store that was got. */
3930
3931 true_size = REAL_PCRE_SIZE(re);
3932 regex_gotten_store = first_gotten_store;
3933
3934 /* Output code size information if requested */
3935
3936 if (log_store)
3937 {
3938 int name_count, name_entry_size, real_pcre_size;
3939
3940 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3941 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3942 real_pcre_size = 0;
3943 #ifdef SUPPORT_PCRE8
3944 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3945 real_pcre_size = sizeof(real_pcre);
3946 #endif
3947 #ifdef SUPPORT_PCRE16
3948 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3949 real_pcre_size = sizeof(real_pcre16);
3950 #endif
3951 #ifdef SUPPORT_PCRE32
3952 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3953 real_pcre_size = sizeof(real_pcre32);
3954 #endif
3955 fprintf(outfile, "Memory allocation (code space): %d\n",
3956 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3957 }
3958
3959 /* If -s or /S was present, study the regex to generate additional info to
3960 help with the matching, unless the pattern has the SS option, which
3961 suppresses the effect of /S (used for a few test patterns where studying is
3962 never sensible). */
3963
3964 if (do_study || (force_study >= 0 && !no_force_study))
3965 {
3966 if (timeit > 0)
3967 {
3968 register int i;
3969 clock_t time_taken;
3970 clock_t start_time = clock();
3971 for (i = 0; i < timeit; i++)
3972 {
3973 PCRE_STUDY(extra, re, study_options, &error);
3974 }
3975 total_study_time = (time_taken = clock() - start_time);
3976 if (extra != NULL)
3977 {
3978 PCRE_FREE_STUDY(extra);
3979 }
3980 fprintf(outfile, " Study time %.4f milliseconds\n",
3981 (((double)time_taken * 1000.0) / (double)timeit) /
3982 (double)CLOCKS_PER_SEC);
3983 }
3984 PCRE_STUDY(extra, re, study_options, &error);
3985 if (error != NULL)
3986 fprintf(outfile, "Failed to study: %s\n", error);
3987 else if (extra != NULL)
3988 {
3989 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3990 if (log_store)
3991 {
3992 size_t jitsize;
3993 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3994 jitsize != 0)
3995 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3996 }
3997 }
3998 }
3999
4000 /* If /K was present, we set up for handling MARK data. */
4001
4002 if (do_mark)
4003 {
4004 if (extra == NULL)
4005 {
4006 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4007 extra->flags = 0;
4008 }
4009 extra->mark = &markptr;
4010 extra->flags |= PCRE_EXTRA_MARK;
4011 }
4012
4013 /* Extract and display information from the compiled data if required. */
4014
4015 SHOW_INFO:
4016
4017 if (do_debug)
4018 {
4019 fprintf(outfile, "------------------------------------------------------------------\n");
4020 PCRE_PRINTINT(re, outfile, debug_lengths);
4021 }
4022
4023 /* We already have the options in get_options (see above) */
4024
4025 if (do_showinfo)
4026 {
4027 unsigned long int all_options;
4028 pcre_uint32 first_char, need_char;
4029 pcre_uint32 match_limit, recursion_limit;
4030 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4031 hascrorlf, maxlookbehind, match_empty;
4032 int nameentrysize, namecount;
4033 const pcre_uint8 *nametable;
4034
4035 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4036 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4037 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4038 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4039 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4040 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4041 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4042 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4043 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4044 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4045 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4046 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4047 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4048 new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4049 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4050 != 0)
4051 goto SKIP_DATA;
4052
4053 if (size != regex_gotten_store) fprintf(outfile,
4054 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4055 (int)size, (int)regex_gotten_store);
4056
4057 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4058
4059 if (backrefmax > 0)
4060 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4061
4062 if (maxlookbehind > 0)
4063 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4064
4065 if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4066 fprintf(outfile, "Match limit = %u\n", match_limit);
4067
4068 if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4069 fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4070
4071 if (namecount > 0)
4072 {
4073 fprintf(outfile, "Named capturing subpatterns:\n");
4074 while (namecount-- > 0)
4075 {
4076 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4077 int length = (int)STRLEN(nametable + imm2_size);
4078 fprintf(outfile, " ");
4079 PCHARSV(nametable, imm2_size, length, outfile);
4080 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4081 #ifdef SUPPORT_PCRE32
4082 if (pcre_mode == PCRE32_MODE)
4083 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4084 #endif
4085 #ifdef SUPPORT_PCRE16
4086 if (pcre_mode == PCRE16_MODE)
4087 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4088 #endif
4089 #ifdef SUPPORT_PCRE8
4090 if (pcre_mode == PCRE8_MODE)
4091 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4092 #endif
4093 nametable += nameentrysize * CHAR_SIZE;
4094 }
4095 }
4096
4097 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4098 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4099 if (match_empty) fprintf(outfile, "May match empty string\n");
4100
4101 all_options = REAL_PCRE_OPTIONS(re);
4102 if (do_flip) all_options = swap_uint32(all_options);
4103
4104 if (get_options == 0) fprintf(outfile, "No options\n");
4105 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4106 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4107 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4108 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4109 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4110 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4111 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4112 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4113 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4114 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4115 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4116 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4117 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4118 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4119 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4120 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4121 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4122 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4123 ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4124
4125 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4126
4127 switch (get_options & PCRE_NEWLINE_BITS)
4128 {
4129 case PCRE_NEWLINE_CR:
4130 fprintf(outfile, "Forced newline sequence: CR\n");
4131 break;
4132
4133 case PCRE_NEWLINE_LF:
4134 fprintf(outfile, "Forced newline sequence: LF\n");
4135 break;
4136
4137 case PCRE_NEWLINE_CRLF:
4138 fprintf(outfile, "Forced newline sequence: CRLF\n");
4139 break;
4140
4141 case PCRE_NEWLINE_ANYCRLF:
4142 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4143 break;
4144
4145 case PCRE_NEWLINE_ANY:
4146 fprintf(outfile, "Forced newline sequence: ANY\n");
4147 break;
4148
4149 default:
4150 break;
4151 }
4152
4153 if (first_char_set == 2)
4154 {
4155 fprintf(outfile, "First char at start or follows newline\n");
4156 }
4157 else if (first_char_set == 1)
4158 {
4159 const char *caseless =
4160 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4161 "" : " (caseless)";
4162
4163 if (PRINTOK(first_char))
4164 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4165 else
4166 {
4167 fprintf(outfile, "First char = ");
4168 pchar(first_char, outfile);
4169 fprintf(outfile, "%s\n", caseless);
4170 }
4171 }
4172 else
4173 {
4174 fprintf(outfile, "No first char\n");
4175 }
4176
4177 if (need_char_set == 0)
4178 {
4179 fprintf(outfile, "No need char\n");
4180 }
4181 else
4182 {
4183 const char *caseless =
4184 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4185 "" : " (caseless)";
4186
4187 if (PRINTOK(need_char))
4188 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4189 else
4190 {
4191 fprintf(outfile, "Need char = ");
4192 pchar(need_char, outfile);
4193 fprintf(outfile, "%s\n", caseless);
4194 }
4195 }
4196
4197 /* Don't output study size; at present it is in any case a fixed
4198 value, but it varies, depending on the computer architecture, and
4199 so messes up the test suite. (And with the /F option, it might be
4200 flipped.) If study was forced by an external -s, don't show this
4201 information unless -i or -d was also present. This means that, except
4202 when auto-callouts are involved, the output from runs with and without
4203 -s should be identical. */
4204
4205 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4206 {
4207 if (extra == NULL)
4208 fprintf(outfile, "Study returned NULL\n");
4209 else
4210 {
4211 pcre_uint8 *start_bits = NULL;
4212 int minlength;
4213
4214 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4215 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4216
4217 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4218 {
4219 if (start_bits == NULL)
4220 fprintf(outfile, "No set of starting bytes\n");
4221 else
4222 {
4223 int i;
4224 int c = 24;
4225 fprintf(outfile, "Starting byte set: ");
4226 for (i = 0; i < 256; i++)
4227 {
4228 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4229 {
4230 if (c > 75)
4231 {
4232 fprintf(outfile, "\n ");
4233 c = 2;
4234 }
4235 if (PRINTOK(i) && i != ' ')
4236 {
4237 fprintf(outfile, "%c ", i);
4238 c += 2;
4239 }
4240 else
4241 {
4242 fprintf(outfile, "\\x%02x ", i);
4243 c += 5;
4244 }
4245 }
4246 }
4247 fprintf(outfile, "\n");
4248 }
4249 }
4250 }
4251
4252 /* Show this only if the JIT was set by /S, not by -s. */
4253
4254 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4255 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4256 {
4257 int jit;
4258 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4259 {
4260 if (jit)
4261 fprintf(outfile, "JIT study was successful\n");
4262 else
4263 #ifdef SUPPORT_JIT
4264 fprintf(outfile, "JIT study was not successful\n");
4265 #else
4266 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4267 #endif
4268 }
4269 }
4270 }
4271 }
4272
4273 /* If the '>' option was present, we write out the regex to a file, and
4274 that is all. The first 8 bytes of the file are the regex length and then
4275 the study length, in big-endian order. */
4276
4277 if (to_file != NULL)
4278 {
4279 FILE *f = fopen((char *)to_file, "wb");
4280 if (f == NULL)
4281 {
4282 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4283 }
4284 else
4285 {
4286 pcre_uint8 sbuf[8];
4287
4288 if (do_flip) regexflip(re, extra);
4289 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4290 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4291 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4292 sbuf[3] = (pcre_uint8)((true_size) & 255);
4293 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4294 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4295 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4296 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4297
4298 if (fwrite(sbuf, 1, 8, f) < 8 ||
4299 fwrite(re, 1, true_size, f) < true_size)
4300 {
4301 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4302 }
4303 else
4304 {
4305 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4306
4307 /* If there is study data, write it. */
4308
4309 if (extra != NULL)
4310 {
4311 if (fwrite(extra->study_data, 1, true_study_size, f) <
4312 true_study_size)
4313 {
4314 fprintf(outfile, "Write error on %s: %s\n", to_file,
4315 strerror(errno));
4316 }
4317 else fprintf(outfile, "Study data written to %s\n", to_file);
4318 }
4319 }
4320 fclose(f);
4321 }
4322
4323 new_free(re);
4324 if (extra != NULL)
4325 {
4326 PCRE_FREE_STUDY(extra);
4327 }
4328 if (locale_set)
4329 {
4330 new_free((void *)tables);
4331 setlocale(LC_CTYPE, "C");
4332 locale_set = 0;
4333 }
4334 continue; /* With next regex */
4335 }
4336 } /* End of non-POSIX compile */
4337
4338 /* Read data lines and test them */
4339
4340 for (;;)
4341 {
4342 #ifdef SUPPORT_PCRE8
4343 pcre_uint8 *q8;
4344 #endif
4345 #ifdef SUPPORT_PCRE16
4346 pcre_uint16 *q16;
4347 #endif
4348 #ifdef SUPPORT_PCRE32
4349 pcre_uint32 *q32;
4350 #endif
4351 pcre_uint8 *bptr;
4352 int *use_offsets = offsets;
4353 int use_size_offsets = size_offsets;
4354 int callout_data = 0;
4355 int callout_data_set = 0;
4356 int count;
4357 pcre_uint32 c;
4358 int copystrings = 0;
4359 int find_match_limit = default_find_match_limit;
4360 int getstrings = 0;
4361 int getlist = 0;
4362 int gmatched = 0;
4363 int start_offset = 0;
4364 int start_offset_sign = 1;
4365 int g_notempty = 0;
4366 int use_dfa = 0;
4367
4368 *copynames = 0;
4369 *getnames = 0;
4370
4371 #ifdef SUPPORT_PCRE32
4372 cn32ptr = copynames;
4373 gn32ptr = getnames;
4374 #endif
4375 #ifdef SUPPORT_PCRE16
4376 cn16ptr = copynames16;
4377 gn16ptr = getnames16;
4378 #endif
4379 #ifdef SUPPORT_PCRE8
4380 cn8ptr = copynames8;
4381 gn8ptr = getnames8;
4382 #endif
4383
4384 SET_PCRE_CALLOUT(callout);
4385 first_callout = 1;
4386 last_callout_mark = NULL;
4387 callout_extra = 0;
4388 callout_count = 0;
4389 callout_fail_count = 999999;
4390 callout_fail_id = -1;
4391 show_malloc = 0;
4392 options = 0;
4393
4394 if (extra != NULL) extra->flags &=
4395 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4396
4397 len = 0;
4398 for (;;)
4399 {
4400 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4401 {
4402 if (len > 0) /* Reached EOF without hitting a newline */
4403 {
4404 fprintf(outfile, "\n");
4405 break;
4406 }
4407 done = 1;
4408 goto CONTINUE;
4409 }
4410 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4411 len = (int)strlen((char *)buffer);
4412 if (buffer[len-1] == '\n') break;
4413 }
4414
4415 while (len > 0 && isspace(buffer[len-1])) len--;
4416 buffer[len] = 0;
4417 if (len == 0) break;
4418
4419 p = buffer;
4420 while (isspace(*p)) p++;
4421
4422 #ifndef NOUTF
4423 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4424 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4425
4426 if (use_utf)
4427 {
4428 pcre_uint8 *q;
4429 pcre_uint32 cc;
4430 int n = 1;
4431
4432 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4433 if (n <= 0)
4434 {
4435 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4436 goto NEXT_DATA;
4437 }
4438 }
4439 #endif
4440
4441 #ifdef SUPPORT_VALGRIND
4442 /* Mark the dbuffer as addressable but undefined again. */
4443
4444 if (dbuffer != NULL)
4445 {
4446 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4447 }
4448 #endif
4449
4450 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4451 the number of pcre_uchar units that will be needed. */
4452
4453 while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4454 {
4455 dbuffer_size *= 2;
4456 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4457 if (dbuffer == NULL)
4458 {
4459 fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4460 exit(1);
4461 }
4462 }
4463
4464 #ifdef SUPPORT_PCRE8
4465 q8 = (pcre_uint8 *) dbuffer;
4466 #endif
4467 #ifdef SUPPORT_PCRE16
4468 q16 = (pcre_uint16 *) dbuffer;
4469 #endif
4470 #ifdef SUPPORT_PCRE32
4471 q32 = (pcre_uint32 *) dbuffer;
4472 #endif
4473
4474 while ((c = *p++) != 0)
4475 {
4476 int i = 0;
4477 int n = 0;
4478
4479 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4480 In non-UTF mode, allow the value of the byte to fall through to later,
4481 where values greater than 127 are turned into UTF-8 when running in
4482 16-bit or 32-bit mode. */
4483
4484 if (c != '\\')
4485 {
4486 #ifndef NOUTF
4487 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4488 #endif
4489 }
4490
4491 /* Handle backslash escapes */
4492
4493 else switch ((c = *p++))
4494 {
4495 case 'a': c = 7; break;
4496 case 'b': c = '\b'; break;
4497 case 'e': c = 27; break;
4498 case 'f': c = '\f'; break;
4499 case 'n': c = '\n'; break;
4500 case 'r': c = '\r'; break;
4501 case 't': c = '\t'; break;
4502 case 'v': c = '\v'; break;
4503
4504 case '0': case '1': case '2': case '3':
4505 case '4': case '5': case '6': case '7':
4506 c -= '0';
4507 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4508 c = c * 8 + *p++ - '0';
4509 break;
4510
4511 case 'x':
4512 if (*p == '{')
4513 {
4514 pcre_uint8 *pt = p;
4515 c = 0;
4516
4517 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4518 when isxdigit() is a macro that refers to its argument more than
4519 once. This is banned by the C Standard, but apparently happens in at
4520 least one MacOS environment. */
4521
4522 for (pt++; isxdigit(*pt); pt++)
4523 {
4524 if (++i == 9)
4525 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4526 "using only the first eight.\n");
4527 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4528 }
4529 if (*pt == '}')
4530 {
4531 p = pt + 1;
4532 break;
4533 }
4534 /* Not correct form for \x{...}; fall through */
4535 }
4536
4537 /* \x without {} always defines just one byte in 8-bit mode. This
4538 allows UTF-8 characters to be constructed byte by byte, and also allows
4539 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4540 Otherwise, pass it down to later code so that it can be turned into
4541 UTF-8 when running in 16/32-bit mode. */
4542
4543 c = 0;
4544 while (i++ < 2 && isxdigit(*p))
4545 {
4546 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4547 p++;
4548 }
4549 #if !defined NOUTF && defined SUPPORT_PCRE8
4550 if (use_utf && (pcre_mode == PCRE8_MODE))
4551 {
4552 *q8++ = c;
4553 continue;
4554 }
4555 #endif
4556 break;
4557
4558 case 0: /* \ followed by EOF allows for an empty line */
4559 p--;
4560 continue;
4561
4562 case '>':
4563 if (*p == '-')
4564 {
4565 start_offset_sign = -1;
4566 p++;
4567 }
4568 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4569 start_offset *= start_offset_sign;
4570 continue;
4571
4572 case 'A': /* Option setting */
4573 options |= PCRE_ANCHORED;
4574 continue;
4575
4576 case 'B':
4577 options |= PCRE_NOTBOL;
4578 continue;
4579
4580 case 'C':
4581 if (isdigit(*p)) /* Set copy string */
4582 {
4583 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4584 copystrings |= 1 << n;
4585 }
4586 else if (isalnum(*p))
4587 {
4588 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4589 }
4590 else if (*p == '+')
4591 {
4592 callout_extra = 1;
4593 p++;
4594 }
4595 else if (*p == '-')
4596 {
4597 SET_PCRE_CALLOUT(NULL);
4598 p++;
4599 }
4600 else if (*p == '!')
4601 {
4602 callout_fail_id = 0;
4603 p++;
4604 while(isdigit(*p))
4605 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4606 callout_fail_count = 0;
4607 if (*p == '!')
4608 {
4609 p++;
4610 while(isdigit(*p))
4611 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4612 }
4613 }
4614 else if (*p == '*')
4615 {
4616 int sign = 1;
4617 callout_data = 0;
4618 if (*(++p) == '-') { sign = -1; p++; }
4619 while(isdigit(*p))
4620 callout_data = callout_data * 10 + *p++ - '0';
4621 callout_data *= sign;
4622 callout_data_set = 1;
4623 }
4624 continue;
4625
4626 #if !defined NODFA
4627 case 'D':
4628 #if !defined NOPOSIX
4629 if (posix || do_posix)
4630 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4631 else
4632 #endif
4633 use_dfa = 1;
4634 continue;
4635 #endif
4636
4637 #if !defined NODFA
4638 case 'F':
4639 options |= PCRE_DFA_SHORTEST;
4640 continue;
4641 #endif
4642
4643 case 'G':
4644 if (isdigit(*p))
4645 {
4646 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4647 getstrings |= 1 << n;
4648 }
4649 else if (isalnum(*p))
4650 {
4651 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4652 }
4653 continue;
4654
4655 case 'J':
4656 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4657 if (extra != NULL
4658 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4659 && extra->executable_jit != NULL)
4660 {
4661 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4662 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4663 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4664 }
4665 continue;
4666
4667 case 'L':
4668 getlist = 1;
4669 continue;
4670
4671 case 'M':
4672 find_match_limit = 1;
4673 continue;
4674
4675 case 'N':
4676 if ((options & PCRE_NOTEMPTY) != 0)
4677 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4678 else
4679 options |= PCRE_NOTEMPTY;
4680 continue;
4681
4682 case 'O':
4683 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4684 if (n > size_offsets_max)
4685 {
4686 size_offsets_max = n;
4687 free(offsets);
4688 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4689 if (offsets == NULL)
4690 {
4691 printf("** Failed to get %d bytes of memory for offsets vector\n",
4692 (int)(size_offsets_max * sizeof(int)));
4693 yield = 1;
4694 goto EXIT;
4695 }
4696 }
4697 use_size_offsets = n;
4698 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4699 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4700 continue;
4701
4702 case 'P':
4703 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4704 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4705 continue;
4706
4707 case 'Q':
4708 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4709 if (extra == NULL)
4710 {
4711 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4712 extra->flags = 0;
4713 }
4714 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4715 extra->match_limit_recursion = n;
4716 continue;
4717
4718 case 'q':
4719 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4720 if (extra == NULL)
4721 {
4722 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4723 extra->flags = 0;
4724 }
4725 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4726 extra->match_limit = n;
4727 continue;
4728
4729 #if !defined NODFA
4730 case 'R':
4731 options |= PCRE_DFA_RESTART;
4732 continue;
4733 #endif
4734
4735 case 'S':
4736 show_malloc = 1;
4737 continue;
4738
4739 case 'Y':
4740 options |= PCRE_NO_START_OPTIMIZE;
4741 continue;
4742
4743 case 'Z':
4744 options |= PCRE_NOTEOL;
4745 continue;
4746
4747 case '?':
4748 options |= PCRE_NO_UTF8_CHECK;
4749 continue;
4750
4751 case '<':
4752 {
4753 int x = check_newline(p, outfile);
4754 if (x == 0) goto NEXT_DATA;
4755 options |= x;
4756 while (*p++ != '>');
4757 }
4758 continue;
4759 }
4760
4761 /* We now have a character value in c that may be greater than 255.
4762 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4763 than 127 in UTF mode must have come from \x{...} or octal constructs
4764 because values from \x.. get this far only in non-UTF mode. */
4765
4766 #ifdef SUPPORT_PCRE8
4767 if (pcre_mode == PCRE8_MODE)
4768 {
4769 #ifndef NOUTF
4770 if (use_utf)
4771 {
4772 if (c > 0x7fffffff)
4773 {
4774 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4775 "and so cannot be converted to UTF-8\n", c);
4776 goto NEXT_DATA;
4777 }
4778 q8 += ord2utf8(c, q8);
4779 }
4780 else
4781 #endif
4782 {
4783 if (c > 0xffu)
4784 {
4785 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4786 "and UTF-8 mode is not enabled.\n", c);
4787 fprintf(outfile, "** Truncation will probably give the wrong "
4788 "result.\n");
4789 }
4790 *q8++ = c;
4791 }
4792 }
4793 #endif
4794 #ifdef SUPPORT_PCRE16
4795 if (pcre_mode == PCRE16_MODE)
4796 {
4797 #ifndef NOUTF
4798 if (use_utf)
4799 {
4800 if (c > 0x10ffffu)
4801 {
4802 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4803 "0x10ffff and so cannot be converted to UTF-16\n", c);
4804 goto NEXT_DATA;
4805 }
4806 else if (c >= 0x10000u)
4807 {
4808 c-= 0x10000u;
4809 *q16++ = 0xD800 | (c >> 10);
4810 *q16++ = 0xDC00 | (c & 0x3ff);
4811 }
4812 else
4813 *q16++ = c;
4814 }
4815 else
4816 #endif
4817 {
4818 if (c > 0xffffu)
4819 {
4820 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4821 "and UTF-16 mode is not enabled.\n", c);
4822 fprintf(outfile, "** Truncation will probably give the wrong "
4823 "result.\n");
4824 }
4825
4826 *q16++ = c;
4827 }
4828 }
4829 #endif
4830 #ifdef SUPPORT_PCRE32
4831 if (pcre_mode == PCRE32_MODE)
4832 {
4833 *q32++ = c;
4834 }
4835 #endif
4836
4837 }
4838
4839 /* Reached end of subject string */
4840
4841 #ifdef SUPPORT_PCRE8
4842 if (pcre_mode == PCRE8_MODE)
4843 {
4844 *q8 = 0;
4845 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4846 }
4847 #endif
4848 #ifdef SUPPORT_PCRE16
4849 if (pcre_mode == PCRE16_MODE)
4850 {
4851 *q16 = 0;
4852 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4853 }
4854 #endif
4855 #ifdef SUPPORT_PCRE32
4856 if (pcre_mode == PCRE32_MODE)
4857 {
4858 *q32 = 0;
4859 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4860 }
4861 #endif
4862
4863 /* If we're compiling with explicit valgrind support, Mark the data from after
4864 its end to the end of the buffer as unaddressable, so that a read over the end
4865 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4866 If we're not building with valgrind support, at least move the data to the end
4867 of the buffer so that it might at least cause a crash.
4868 If we are using the POSIX interface, we must include the terminating zero. */
4869
4870 bptr = dbuffer;
4871
4872 #if !defined NOPOSIX
4873 if (posix || do_posix)
4874 {
4875 #ifdef SUPPORT_VALGRIND
4876 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4877 #else
4878 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4879 bptr += dbuffer_size - len - 1;
4880 #endif
4881 }
4882 else
4883 #endif
4884 {
4885 #ifdef SUPPORT_VALGRIND
4886 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4887 #else
4888 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4889 #endif
4890 }
4891
4892 if ((all_use_dfa || use_dfa) && find_match_limit)
4893 {
4894 printf("**Match limit not relevant for DFA matching: ignored\n");
4895 find_match_limit = 0;
4896 }
4897
4898 /* Handle matching via the POSIX interface, which does not
4899 support timing or playing with the match limit or callout data. */
4900
4901 #if !defined NOPOSIX
4902 if (posix || do_posix)
4903 {
4904 int rc;
4905 int eflags = 0;
4906 regmatch_t *pmatch = NULL;
4907 if (use_size_offsets > 0)
4908 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4909 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4910 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4911 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4912
4913 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4914
4915 if (rc != 0)
4916 {
4917 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4918 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4919 }
4920 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4921 {
4922 fprintf(outfile, "Matched with REG_NOSUB\n");
4923 }
4924 else
4925 {
4926 size_t i;
4927 for (i = 0; i < (size_t)use_size_offsets; i++)
4928 {
4929 if (pmatch[i].rm_so >= 0)
4930 {
4931 fprintf(outfile, "%2d: ", (int)i);
4932 PCHARSV(dbuffer, pmatch[i].rm_so,
4933 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4934 fprintf(outfile, "\n");
4935 if (do_showcaprest || (i == 0 && do_showrest))
4936 {
4937 fprintf(outfile, "%2d+ ", (int)i);
4938 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4939 outfile);
4940 fprintf(outfile, "\n");
4941 }
4942 }
4943 }
4944 }
4945 free(pmatch);
4946 goto NEXT_DATA;
4947 }
4948
4949 #endif /* !defined NOPOSIX */
4950
4951 /* Handle matching via the native interface - repeats for /g and /G */
4952
4953 /* Ensure that there is a JIT callback if we want to verify that JIT was
4954 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4955
4956 if (verify_jit && jit_stack == NULL && extra != NULL)
4957 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4958
4959 for (;; gmatched++) /* Loop for /g or /G */
4960 {
4961 markptr = NULL;
4962 jit_was_used = FALSE;
4963
4964 if (timeitm > 0)
4965 {
4966 register int i;
4967 clock_t time_taken;
4968 clock_t start_time = clock();
4969
4970 #if !defined NODFA
4971 if (all_use_dfa || use_dfa)
4972 {
4973 if ((options & PCRE_DFA_RESTART) != 0)
4974 {
4975 fprintf(outfile, "Timing DFA restarts is not supported\n");
4976 break;
4977 }
4978 if (dfa_workspace == NULL)
4979 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4980 for (i = 0; i < timeitm; i++)
4981 {
4982 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4983 (options | g_notempty), use_offsets, use_size_offsets,
4984 dfa_workspace, DFA_WS_DIMENSION);
4985 }
4986 }
4987 else
4988 #endif
4989
4990 for (i = 0; i < timeitm; i++)
4991 {
4992 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4993 (options | g_notempty), use_offsets, use_size_offsets);
4994 }
4995 total_match_time += (time_taken = clock() - start_time);
4996 fprintf(outfile, "Execute time %.4f milliseconds\n",
4997 (((double)time_taken * 1000.0) / (double)timeitm) /
4998 (double)CLOCKS_PER_SEC);
4999 }
5000
5001 /* If find_match_limit is set, we want to do repeated matches with
5002 varying limits in order to find the minimum value for the match limit and
5003 for the recursion limit. The match limits are relevant only to the normal
5004 running of pcre_exec(), so disable the JIT optimization. This makes it
5005 possible to run the same set of tests with and without JIT externally
5006 requested. */
5007
5008 if (find_match_limit)
5009 {
5010 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5011 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5012 extra->flags = 0;
5013
5014 (void)check_match_limit(re, extra, bptr, len, start_offset,
5015 options|g_notempty, use_offsets, use_size_offsets,
5016 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5017 PCRE_ERROR_MATCHLIMIT, "match()");
5018
5019 count = check_match_limit(re, extra, bptr, len, start_offset,
5020 options|g_notempty, use_offsets, use_size_offsets,
5021 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5022 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5023 }
5024
5025 /* If callout_data is set, use the interface with additional data */
5026
5027 else if (callout_data_set)
5028 {
5029 if (extra == NULL)
5030 {
5031 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5032 extra->flags = 0;
5033 }
5034 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5035 extra->callout_data = &callout_data;
5036 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5037 options | g_notempty, use_offsets, use_size_offsets);
5038 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5039 }
5040
5041 /* The normal case is just to do the match once, with the default
5042 value of match_limit. */
5043
5044 #if !defined NODFA
5045 else if (all_use_dfa || use_dfa)
5046 {
5047 if (dfa_workspace == NULL)
5048 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5049 if (dfa_matched++ == 0)
5050 dfa_workspace[0] = -1; /* To catch bad restart */
5051 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5052 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5053 DFA_WS_DIMENSION);
5054 if (count == 0)
5055 {
5056 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5057 count = use_size_offsets/2;
5058 }
5059 }
5060 #endif
5061
5062 else
5063 {
5064 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5065 options | g_notempty, use_offsets, use_size_offsets);
5066 if (count == 0)
5067 {
5068 fprintf(outfile, "Matched, but too many substrings\n");
5069 /* 2 is a special case; match can be returned */
5070 count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5071 }
5072 }
5073
5074 /* Matched */
5075
5076 if (count >= 0)
5077 {
5078 int i, maxcount;
5079 void *cnptr, *gnptr;
5080
5081 #if !defined NODFA
5082 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5083 #endif
5084 /* 2 is a special case; match can be returned */
5085 maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5086
5087 /* This is a check against a lunatic return value. */
5088
5089 if (count > maxcount)
5090 {
5091 fprintf(outfile,
5092 "** PCRE error: returned count %d is too big for offset size %d\n",
5093 count, use_size_offsets);
5094 count = use_size_offsets/3;
5095 if (do_g || do_G)
5096 {
5097 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5098 do_g = do_G = FALSE; /* Break g/G loop */
5099 }
5100 }
5101
5102 /* do_allcaps requests showing of all captures in the pattern, to check
5103 unset ones at the end. */
5104
5105 if (do_allcaps)
5106 {
5107 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5108 goto SKIP_DATA;
5109 count++; /* Allow for full match */
5110 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5111 }
5112
5113 /* Output the captured substrings */
5114
5115 for (i = 0; i < count * 2; i += 2)
5116 {
5117 if (use_offsets[i] < 0)
5118 {
5119 if (use_offsets[i] != -1)
5120 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5121 use_offsets[i], i);
5122 if (use_offsets[i+1] != -1)
5123 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5124 use_offsets[i+1], i+1);
5125 fprintf(outfile, "%2d: <unset>\n", i/2);
5126 }
5127 else
5128 {
5129 fprintf(outfile, "%2d: ", i/2);
5130 PCHARSV(bptr, use_offsets[i],
5131 use_offsets[i+1] - use_offsets[i], outfile);
5132 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5133 fprintf(outfile, "\n");
5134 if (do_showcaprest || (i == 0 && do_showrest))
5135 {
5136 fprintf(outfile, "%2d+ ", i/2);
5137 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5138 outfile);
5139 fprintf(outfile, "\n");
5140 }
5141 }
5142 }
5143
5144 if (markptr != NULL)
5145 {
5146 fprintf(outfile, "MK: ");
5147 PCHARSV(markptr, 0, -1, outfile);
5148 fprintf(outfile, "\n");
5149 }
5150
5151 for (i = 0; i < 32; i++)
5152 {
5153 if ((copystrings & (1 << i)) != 0)
5154 {
5155 int rc;
5156 char copybuffer[256];
5157 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5158 copybuffer, sizeof(copybuffer));
5159 if (rc < 0)
5160 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5161 else
5162 {
5163 fprintf(outfile, "%2dC ", i);
5164 PCHARSV(copybuffer, 0, rc, outfile);
5165 fprintf(outfile, " (%d)\n", rc);
5166 }
5167 }
5168 }
5169
5170 cnptr = copynames;
5171 for (;;)
5172 {
5173 int rc;
5174 char copybuffer[256];
5175
5176 #ifdef SUPPORT_PCRE32
5177 if (pcre_mode == PCRE32_MODE)
5178 {
5179 if (*(pcre_uint32 *)cnptr == 0) break;
5180 }
5181 #endif
5182 #ifdef SUPPORT_PCRE16
5183 if (pcre_mode == PCRE16_MODE)
5184 {
5185 if (*(pcre_uint16 *)cnptr == 0) break;
5186 }
5187 #endif
5188 #ifdef SUPPORT_PCRE8
5189 if (pcre_mode == PCRE8_MODE)
5190 {
5191 if (*(pcre_uint8 *)cnptr == 0) break;
5192 }
5193 #endif
5194
5195 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5196 cnptr, copybuffer, sizeof(copybuffer));
5197
5198 if (rc < 0)
5199 {
5200 fprintf(outfile, "copy substring ");
5201 PCHARSV(cnptr, 0, -1, outfile);
5202 fprintf(outfile, " failed %d\n", rc);
5203 }
5204 else
5205 {
5206 fprintf(outfile, " C ");
5207 PCHARSV(copybuffer, 0, rc, outfile);
5208 fprintf(outfile, " (%d) ", rc);
5209 PCHARSV(cnptr, 0, -1, outfile);
5210 putc('\n', outfile);
5211 }
5212
5213 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5214 }
5215
5216 for (i = 0; i < 32; i++)
5217 {
5218 if ((getstrings & (1 << i)) != 0)
5219 {
5220 int rc;
5221 const char *substring;
5222 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5223 if (rc < 0)
5224 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5225 else
5226 {
5227 fprintf(outfile, "%2dG ", i);
5228 PCHARSV(substring, 0, rc, outfile);
5229 fprintf(outfile, " (%d)\n", rc);
5230 PCRE_FREE_SUBSTRING(substring);
5231 }
5232 }
5233 }
5234
5235 gnptr = getnames;
5236 for (;;)
5237 {
5238 int rc;
5239 const char *substring;
5240
5241 #ifdef SUPPORT_PCRE32
5242 if (pcre_mode == PCRE32_MODE)
5243 {
5244 if (*(pcre_uint32 *)gnptr == 0) break;
5245 }
5246 #endif
5247 #ifdef SUPPORT_PCRE16
5248 if (pcre_mode == PCRE16_MODE)
5249 {
5250 if (*(pcre_uint16 *)gnptr == 0) break;
5251 }
5252 #endif
5253 #ifdef SUPPORT_PCRE8
5254 if (pcre_mode == PCRE8_MODE)
5255 {
5256 if (*(pcre_uint8 *)gnptr == 0) break;
5257 }
5258 #endif
5259
5260 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5261 gnptr, &substring);
5262 if (rc < 0)
5263 {
5264 fprintf(outfile, "get substring ");
5265 PCHARSV(gnptr, 0, -1, outfile);
5266 fprintf(outfile, " failed %d\n", rc);
5267 }
5268 else
5269 {
5270 fprintf(outfile, " G ");
5271 PCHARSV(substring, 0, rc, outfile);
5272 fprintf(outfile, " (%d) ", rc);
5273 PCHARSV(gnptr, 0, -1, outfile);
5274 PCRE_FREE_SUBSTRING(substring);
5275 putc('\n', outfile);
5276 }
5277
5278 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5279 }
5280
5281 if (getlist)
5282 {
5283 int rc;
5284 const char **stringlist;
5285 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5286 if (rc < 0)
5287 fprintf(outfile, "get substring list failed %d\n", rc);
5288 else
5289 {
5290 for (i = 0; i < count; i++)
5291 {
5292 fprintf(outfile, "%2dL ", i);
5293 PCHARSV(stringlist[i], 0, -1, outfile);
5294 putc('\n', outfile);
5295 }
5296 if (stringlist[i] != NULL)
5297 fprintf(outfile, "string list not terminated by NULL\n");
5298 PCRE_FREE_SUBSTRING_LIST(stringlist);
5299 }
5300 }
5301 }
5302
5303 /* There was a partial match. If the bumpalong point is not the same as
5304 the first inspected character, show the offset explicitly. */
5305
5306 else if (count == PCRE_ERROR_PARTIAL)
5307 {
5308 fprintf(outfile, "Partial match");
5309 if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5310 fprintf(outfile, " at offset %d", use_offsets[2]);
5311 if (markptr != NULL)
5312 {
5313 fprintf(outfile, ", mark=");
5314 PCHARSV(markptr, 0, -1, outfile);
5315 }
5316 if (use_size_offsets > 1)
5317 {
5318 fprintf(outfile, ": ");
5319 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5320 outfile);
5321 }
5322 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5323 fprintf(outfile, "\n");
5324 break; /* Out of the /g loop */
5325 }
5326
5327 /* Failed to match. If this is a /g or /G loop and we previously set
5328 g_notempty after a null match, this is not necessarily the end. We want
5329 to advance the start offset, and continue. We won't be at the end of the
5330 string - that was checked before setting g_notempty.
5331
5332 Complication arises in the case when the newline convention is "any",
5333 "crlf", or "anycrlf". If the previous match was at the end of a line
5334 terminated by CRLF, an advance of one character just passes the \r,
5335 whereas we should prefer the longer newline sequence, as does the code in
5336 pcre_exec(). Fudge the offset value to achieve this. We check for a
5337 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5338 find the default.
5339
5340 Otherwise, in the case of UTF-8 matching, the advance must be one
5341 character, not one byte. */
5342
5343 else
5344 {
5345 if (g_notempty != 0)
5346 {
5347 int onechar = 1;
5348 unsigned int obits = REAL_PCRE_OPTIONS(re);
5349 use_offsets[0] = start_offset;
5350 if ((obits & PCRE_NEWLINE_BITS) == 0)
5351 {
5352 int d;
5353 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5354 /* Note that these values are always the ASCII ones, even in
5355 EBCDIC environments. CR = 13, NL = 10. */
5356 obits = (d == 13)? PCRE_NEWLINE_CR :
5357 (d == 10)? PCRE_NEWLINE_LF :
5358 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5359 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5360 (d == -1)? PCRE_NEWLINE_ANY : 0;
5361 }
5362 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5363 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5364 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5365 &&
5366 start_offset < len - 1 && (
5367 #ifdef SUPPORT_PCRE8
5368 (pcre_mode == PCRE8_MODE &&
5369 bptr[start_offset] == '\r' &&
5370 bptr[start_offset + 1] == '\n') ||
5371 #endif
5372 #ifdef SUPPORT_PCRE16
5373 (pcre_mode == PCRE16_MODE &&
5374 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5375 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5376 #endif
5377 #ifdef SUPPORT_PCRE32
5378 (pcre_mode == PCRE32_MODE &&
5379 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5380 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5381 #endif
5382 0))
5383 onechar++;
5384 else if (use_utf)
5385 {
5386 while (start_offset + onechar < len)
5387 {
5388 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5389 onechar++;
5390 }
5391 }
5392 use_offsets[1] = start_offset + onechar;
5393 }
5394 else
5395 {
5396 switch(count)
5397 {
5398 case PCRE_ERROR_NOMATCH:
5399 if (gmatched == 0)
5400 {
5401 if (markptr == NULL)
5402 {
5403 fprintf(outfile, "No match");
5404 }
5405 else
5406 {
5407 fprintf(outfile, "No match, mark = ");
5408 PCHARSV(markptr, 0, -1, outfile);
5409 }
5410 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5411 putc('\n', outfile);
5412 }
5413 break;
5414
5415 case PCRE_ERROR_BADUTF8:
5416 case PCRE_ERROR_SHORTUTF8:
5417 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5418 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5419 8 * CHAR_SIZE);
5420 if (use_size_offsets >= 2)
5421 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5422 use_offsets[1]);
5423 fprintf(outfile, "\n");
5424 break;
5425
5426 case PCRE_ERROR_BADUTF8_OFFSET:
5427 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5428 8 * CHAR_SIZE);
5429 break;
5430
5431 default:
5432 if (count < 0 &&
5433 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5434 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5435 else
5436 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5437 break;
5438 }
5439
5440 break; /* Out of the /g loop */
5441 }
5442 }
5443
5444 /* If not /g or /G we are done */
5445
5446 if (!do_g && !do_G) break;
5447
5448 /* If we have matched an empty string, first check to see if we are at
5449 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5450 Perl's /g options does. This turns out to be rather cunning. First we set
5451 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5452 same point. If this fails (picked up above) we advance to the next
5453 character. */
5454
5455 g_notempty = 0;
5456
5457 if (use_offsets[0] == use_offsets[1])
5458 {
5459 if (use_offsets[0] == len) break;
5460 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5461 }
5462
5463 /* For /g, update the start offset, leaving the rest alone */
5464
5465 if (do_g) start_offset = use_offsets[1];
5466
5467 /* For /G, update the pointer and length */
5468
5469 else
5470 {
5471 bptr += use_offsets[1] * CHAR_SIZE;
5472 len -= use_offsets[1];
5473 }
5474 } /* End of loop for /g and /G */
5475
5476 NEXT_DATA: continue;
5477 } /* End of loop for data lines */
5478
5479 CONTINUE:
5480
5481 #if !defined NOPOSIX
5482 if (posix || do_posix) regfree(&preg);
5483 #endif
5484
5485 if (re != NULL) new_free(re);
5486 if (extra != NULL)
5487 {
5488 PCRE_FREE_STUDY(extra);
5489 }
5490 if (locale_set)
5491 {
5492 new_free((void *)tables);
5493 setlocale(LC_CTYPE, "C");
5494 locale_set = 0;
5495 }
5496 if (jit_stack != NULL)
5497 {
5498 PCRE_JIT_STACK_FREE(jit_stack);
5499 jit_stack = NULL;
5500 }
5501 }
5502
5503 if (infile == stdin) fprintf(outfile, "\n");
5504
5505 if (showtotaltimes)
5506 {
5507 fprintf(outfile, "--------------------------------------\n");
5508 if (timeit > 0)
5509 {
5510 fprintf(outfile, "Total compile time %.4f milliseconds\n",
5511 (((double)total_compile_time * 1000.0) / (double)timeit) /
5512 (double)CLOCKS_PER_SEC);
5513 fprintf(outfile, "Total study time %.4f milliseconds\n",
5514 (((double)total_study_time * 1000.0) / (double)timeit) /
5515 (double)CLOCKS_PER_SEC);
5516 }
5517 fprintf(outfile, "Total execute time %.4f milliseconds\n",
5518 (((double)total_match_time * 1000.0) / (double)timeitm) /
5519 (double)CLOCKS_PER_SEC);
5520 }
5521
5522 EXIT:
5523
5524 if (infile != NULL && infile != stdin) fclose(infile);
5525 if (outfile != NULL && outfile != stdout) fclose(outfile);
5526
5527 free(buffer);
5528 free(dbuffer);
5529 free(pbuffer);
5530 free(offsets);
5531
5532 #ifdef SUPPORT_PCRE16
5533 if (buffer16 != NULL) free(buffer16);
5534 #endif
5535 #ifdef SUPPORT_PCRE32
5536 if (buffer32 != NULL) free(buffer32);
5537 #endif
5538
5539 #if !defined NODFA
5540 if (dfa_workspace != NULL)
5541 free(dfa_workspace);
5542 #endif
5543
5544 #if defined(__VMS)
5545 yield = SS$_NORMAL; /* Return values via DCL symbols */
5546 #endif
5547
5548 return yield;
5549 }
5550
5551 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5