/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1450 - (show annotations)
Fri Jan 17 17:50:51 2014 UTC (5 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 170063 byte(s)
Fix bug in tests when 16/32 bits and --enable-bsr-anycrlf are both set. Extend 
pcretest to show the \R default.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
128
129
130 #define PRIV(name) name
131
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
136
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
140
141 #include "pcre.h"
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
158
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
162
163 #define PCRE_INCLUDED
164
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
167
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
173
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
179
180 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
186
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
190
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
194
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
199
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
205
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
213
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
219
220 #ifdef SUPPORT_PCRE8
221
222 #define PCHARS8(lv, p, offset, len, f) \
223 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224
225 #define PCHARSV8(p, offset, len, f) \
226 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 p = read_capture_name8(p, cn8, re)
230
231 #define STRLEN8(p) ((int)strlen((char *)p))
232
233 #define SET_PCRE_CALLOUT8(callout) \
234 pcre_callout = callout
235
236 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
237 pcre_assign_jit_stack(extra, callback, userdata)
238
239 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
240 re = pcre_compile((char *)pat, options, error, erroffset, tables)
241
242 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
243 namesptr, cbuffer, size) \
244 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
245 (char *)namesptr, cbuffer, size)
246
247 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
248 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
249
250 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
251 offsets, size_offsets, workspace, size_workspace) \
252 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
253 offsets, size_offsets, workspace, size_workspace)
254
255 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
256 offsets, size_offsets) \
257 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
258 offsets, size_offsets)
259
260 #define PCRE_FREE_STUDY8(extra) \
261 pcre_free_study(extra)
262
263 #define PCRE_FREE_SUBSTRING8(substring) \
264 pcre_free_substring(substring)
265
266 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
267 pcre_free_substring_list(listptr)
268
269 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
270 getnamesptr, subsptr) \
271 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
272 (char *)getnamesptr, subsptr)
273
274 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
275 n = pcre_get_stringnumber(re, (char *)ptr)
276
277 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
278 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
279
280 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
281 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
282
283 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
284 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
285
286 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
287 pcre_printint(re, outfile, debug_lengths)
288
289 #define PCRE_STUDY8(extra, re, options, error) \
290 extra = pcre_study(re, options, error)
291
292 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
293 pcre_jit_stack_alloc(startsize, maxsize)
294
295 #define PCRE_JIT_STACK_FREE8(stack) \
296 pcre_jit_stack_free(stack)
297
298 #define pcre8_maketables pcre_maketables
299
300 #endif /* SUPPORT_PCRE8 */
301
302 /* -----------------------------------------------------------*/
303
304 #ifdef SUPPORT_PCRE16
305
306 #define PCHARS16(lv, p, offset, len, f) \
307 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
308
309 #define PCHARSV16(p, offset, len, f) \
310 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
311
312 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
313 p = read_capture_name16(p, cn16, re)
314
315 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
316
317 #define SET_PCRE_CALLOUT16(callout) \
318 pcre16_callout = (int (*)(pcre16_callout_block *))callout
319
320 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
321 pcre16_assign_jit_stack((pcre16_extra *)extra, \
322 (pcre16_jit_callback)callback, userdata)
323
324 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
325 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
326 tables)
327
328 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
329 namesptr, cbuffer, size) \
330 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
331 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
332
333 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
334 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
335 (PCRE_UCHAR16 *)cbuffer, size/2)
336
337 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets, workspace, size_workspace) \
339 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
340 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
341 workspace, size_workspace)
342
343 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets) \
345 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
346 len, start_offset, options, offsets, size_offsets)
347
348 #define PCRE_FREE_STUDY16(extra) \
349 pcre16_free_study((pcre16_extra *)extra)
350
351 #define PCRE_FREE_SUBSTRING16(substring) \
352 pcre16_free_substring((PCRE_SPTR16)substring)
353
354 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
355 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
356
357 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
358 getnamesptr, subsptr) \
359 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
360 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
361
362 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
363 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
364
365 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
366 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
367 (PCRE_SPTR16 *)(void*)subsptr)
368
369 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
370 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
371 (PCRE_SPTR16 **)(void*)listptr)
372
373 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
374 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
375 tables)
376
377 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
378 pcre16_printint(re, outfile, debug_lengths)
379
380 #define PCRE_STUDY16(extra, re, options, error) \
381 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
382
383 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
384 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
385
386 #define PCRE_JIT_STACK_FREE16(stack) \
387 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
388
389 #endif /* SUPPORT_PCRE16 */
390
391 /* -----------------------------------------------------------*/
392
393 #ifdef SUPPORT_PCRE32
394
395 #define PCHARS32(lv, p, offset, len, f) \
396 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
397
398 #define PCHARSV32(p, offset, len, f) \
399 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
400
401 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
402 p = read_capture_name32(p, cn32, re)
403
404 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
405
406 #define SET_PCRE_CALLOUT32(callout) \
407 pcre32_callout = (int (*)(pcre32_callout_block *))callout
408
409 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
410 pcre32_assign_jit_stack((pcre32_extra *)extra, \
411 (pcre32_jit_callback)callback, userdata)
412
413 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
414 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
415 tables)
416
417 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
418 namesptr, cbuffer, size) \
419 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
420 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
421
422 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
423 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
424 (PCRE_UCHAR32 *)cbuffer, size/2)
425
426 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets, workspace, size_workspace) \
428 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
429 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
430 workspace, size_workspace)
431
432 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets) \
434 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
435 len, start_offset, options, offsets, size_offsets)
436
437 #define PCRE_FREE_STUDY32(extra) \
438 pcre32_free_study((pcre32_extra *)extra)
439
440 #define PCRE_FREE_SUBSTRING32(substring) \
441 pcre32_free_substring((PCRE_SPTR32)substring)
442
443 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
444 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
445
446 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr) \
448 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
449 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
450
451 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
452 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
453
454 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
455 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
456 (PCRE_SPTR32 *)(void*)subsptr)
457
458 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
459 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
460 (PCRE_SPTR32 **)(void*)listptr)
461
462 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
463 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
464 tables)
465
466 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
467 pcre32_printint(re, outfile, debug_lengths)
468
469 #define PCRE_STUDY32(extra, re, options, error) \
470 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
471
472 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
473 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
474
475 #define PCRE_JIT_STACK_FREE32(stack) \
476 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
477
478 #endif /* SUPPORT_PCRE32 */
479
480
481 /* ----- More than one mode is supported; a runtime test is needed, except for
482 pcre_config(), and the JIT stack functions, when it doesn't matter which
483 available version is called. ----- */
484
485 enum {
486 PCRE8_MODE,
487 PCRE16_MODE,
488 PCRE32_MODE
489 };
490
491 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
492 defined (SUPPORT_PCRE32)) >= 2
493
494 #define CHAR_SIZE (1 << pcre_mode)
495
496 /* There doesn't seem to be an easy way of writing these macros that can cope
497 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
498 cases separately. */
499
500 /* ----- All three modes supported ----- */
501
502 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
503
504 #define PCHARS(lv, p, offset, len, f) \
505 if (pcre_mode == PCRE32_MODE) \
506 PCHARS32(lv, p, offset, len, f); \
507 else if (pcre_mode == PCRE16_MODE) \
508 PCHARS16(lv, p, offset, len, f); \
509 else \
510 PCHARS8(lv, p, offset, len, f)
511
512 #define PCHARSV(p, offset, len, f) \
513 if (pcre_mode == PCRE32_MODE) \
514 PCHARSV32(p, offset, len, f); \
515 else if (pcre_mode == PCRE16_MODE) \
516 PCHARSV16(p, offset, len, f); \
517 else \
518 PCHARSV8(p, offset, len, f)
519
520 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
521 if (pcre_mode == PCRE32_MODE) \
522 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
523 else if (pcre_mode == PCRE16_MODE) \
524 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
525 else \
526 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
527
528 #define SET_PCRE_CALLOUT(callout) \
529 if (pcre_mode == PCRE32_MODE) \
530 SET_PCRE_CALLOUT32(callout); \
531 else if (pcre_mode == PCRE16_MODE) \
532 SET_PCRE_CALLOUT16(callout); \
533 else \
534 SET_PCRE_CALLOUT8(callout)
535
536 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
537
538 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
539 if (pcre_mode == PCRE32_MODE) \
540 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
541 else if (pcre_mode == PCRE16_MODE) \
542 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
543 else \
544 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
545
546 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
547 if (pcre_mode == PCRE32_MODE) \
548 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
549 else if (pcre_mode == PCRE16_MODE) \
550 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
551 else \
552 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
553
554 #define PCRE_CONFIG pcre_config
555
556 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size) \
558 if (pcre_mode == PCRE32_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else if (pcre_mode == PCRE16_MODE) \
562 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size); \
564 else \
565 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
566 namesptr, cbuffer, size)
567
568 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
569 if (pcre_mode == PCRE32_MODE) \
570 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
571 else if (pcre_mode == PCRE16_MODE) \
572 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
573 else \
574 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
575
576 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace) \
578 if (pcre_mode == PCRE32_MODE) \
579 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else if (pcre_mode == PCRE16_MODE) \
582 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace); \
584 else \
585 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets, workspace, size_workspace)
587
588 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets) \
590 if (pcre_mode == PCRE32_MODE) \
591 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else if (pcre_mode == PCRE16_MODE) \
594 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets); \
596 else \
597 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
598 offsets, size_offsets)
599
600 #define PCRE_FREE_STUDY(extra) \
601 if (pcre_mode == PCRE32_MODE) \
602 PCRE_FREE_STUDY32(extra); \
603 else if (pcre_mode == PCRE16_MODE) \
604 PCRE_FREE_STUDY16(extra); \
605 else \
606 PCRE_FREE_STUDY8(extra)
607
608 #define PCRE_FREE_SUBSTRING(substring) \
609 if (pcre_mode == PCRE32_MODE) \
610 PCRE_FREE_SUBSTRING32(substring); \
611 else if (pcre_mode == PCRE16_MODE) \
612 PCRE_FREE_SUBSTRING16(substring); \
613 else \
614 PCRE_FREE_SUBSTRING8(substring)
615
616 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
617 if (pcre_mode == PCRE32_MODE) \
618 PCRE_FREE_SUBSTRING_LIST32(listptr); \
619 else if (pcre_mode == PCRE16_MODE) \
620 PCRE_FREE_SUBSTRING_LIST16(listptr); \
621 else \
622 PCRE_FREE_SUBSTRING_LIST8(listptr)
623
624 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr) \
626 if (pcre_mode == PCRE32_MODE) \
627 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else if (pcre_mode == PCRE16_MODE) \
630 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr); \
632 else \
633 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
634 getnamesptr, subsptr)
635
636 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
637 if (pcre_mode == PCRE32_MODE) \
638 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
639 else if (pcre_mode == PCRE16_MODE) \
640 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
641 else \
642 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
643
644 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
645 if (pcre_mode == PCRE32_MODE) \
646 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
647 else if (pcre_mode == PCRE16_MODE) \
648 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
649 else \
650 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
651
652 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
653 if (pcre_mode == PCRE32_MODE) \
654 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
655 else if (pcre_mode == PCRE16_MODE) \
656 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
657 else \
658 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
659
660 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
661 (pcre_mode == PCRE32_MODE ? \
662 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
663 : pcre_mode == PCRE16_MODE ? \
664 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
665 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
666
667 #define PCRE_JIT_STACK_FREE(stack) \
668 if (pcre_mode == PCRE32_MODE) \
669 PCRE_JIT_STACK_FREE32(stack); \
670 else if (pcre_mode == PCRE16_MODE) \
671 PCRE_JIT_STACK_FREE16(stack); \
672 else \
673 PCRE_JIT_STACK_FREE8(stack)
674
675 #define PCRE_MAKETABLES \
676 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
677
678 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
679 if (pcre_mode == PCRE32_MODE) \
680 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
681 else if (pcre_mode == PCRE16_MODE) \
682 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
683 else \
684 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
685
686 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
687 if (pcre_mode == PCRE32_MODE) \
688 PCRE_PRINTINT32(re, outfile, debug_lengths); \
689 else if (pcre_mode == PCRE16_MODE) \
690 PCRE_PRINTINT16(re, outfile, debug_lengths); \
691 else \
692 PCRE_PRINTINT8(re, outfile, debug_lengths)
693
694 #define PCRE_STUDY(extra, re, options, error) \
695 if (pcre_mode == PCRE32_MODE) \
696 PCRE_STUDY32(extra, re, options, error); \
697 else if (pcre_mode == PCRE16_MODE) \
698 PCRE_STUDY16(extra, re, options, error); \
699 else \
700 PCRE_STUDY8(extra, re, options, error)
701
702
703 /* ----- Two out of three modes are supported ----- */
704
705 #else
706
707 /* We can use some macro trickery to make a single set of definitions work in
708 the three different cases. */
709
710 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
711
712 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
713 #define BITONE 32
714 #define BITTWO 16
715
716 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
717
718 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
719 #define BITONE 32
720 #define BITTWO 8
721
722 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723
724 #else
725 #define BITONE 16
726 #define BITTWO 8
727 #endif
728
729 #define glue(a,b) a##b
730 #define G(a,b) glue(a,b)
731
732
733 /* ----- Common macros for two-mode cases ----- */
734
735 #define PCHARS(lv, p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARS,BITONE)(lv, p, offset, len, f); \
738 else \
739 G(PCHARS,BITTWO)(lv, p, offset, len, f)
740
741 #define PCHARSV(p, offset, len, f) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(PCHARSV,BITONE)(p, offset, len, f); \
744 else \
745 G(PCHARSV,BITTWO)(p, offset, len, f)
746
747 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
750 else \
751 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
752
753 #define SET_PCRE_CALLOUT(callout) \
754 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
755 G(SET_PCRE_CALLOUT,BITONE)(callout); \
756 else \
757 G(SET_PCRE_CALLOUT,BITTWO)(callout)
758
759 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
760 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
761
762 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
765 else \
766 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
767
768 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
769 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
770 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
771 else \
772 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
773
774 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
775
776 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size) \
778 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
779 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
780 namesptr, cbuffer, size); \
781 else \
782 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
783 namesptr, cbuffer, size)
784
785 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
788 else \
789 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
790
791 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace) \
793 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
794 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets, workspace, size_workspace); \
796 else \
797 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets, workspace, size_workspace)
799
800 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets) \
802 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
803 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
804 offsets, size_offsets); \
805 else \
806 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
807 offsets, size_offsets)
808
809 #define PCRE_FREE_STUDY(extra) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_STUDY,BITONE)(extra); \
812 else \
813 G(PCRE_FREE_STUDY,BITTWO)(extra)
814
815 #define PCRE_FREE_SUBSTRING(substring) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
818 else \
819 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
820
821 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
822 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
823 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
824 else \
825 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
826
827 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr) \
829 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
830 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
831 getnamesptr, subsptr); \
832 else \
833 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
834 getnamesptr, subsptr)
835
836 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
839 else \
840 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
841
842 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
845 else \
846 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
847
848 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
849 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
850 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
851 else \
852 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
853
854 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
855 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
856 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
857 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
858
859 #define PCRE_JIT_STACK_FREE(stack) \
860 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
862 else \
863 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
864
865 #define PCRE_MAKETABLES \
866 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
867 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
868
869 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
872 else \
873 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
874
875 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
878 else \
879 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
880
881 #define PCRE_STUDY(extra, re, options, error) \
882 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
883 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
884 else \
885 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
886
887 #endif /* Two out of three modes */
888
889 /* ----- End of cases where more than one mode is supported ----- */
890
891
892 /* ----- Only 8-bit mode is supported ----- */
893
894 #elif defined SUPPORT_PCRE8
895 #define CHAR_SIZE 1
896 #define PCHARS PCHARS8
897 #define PCHARSV PCHARSV8
898 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
899 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
900 #define STRLEN STRLEN8
901 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
902 #define PCRE_COMPILE PCRE_COMPILE8
903 #define PCRE_CONFIG pcre_config
904 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
905 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
906 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
907 #define PCRE_EXEC PCRE_EXEC8
908 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
909 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
910 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
911 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
912 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
913 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
914 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
915 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
916 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
917 #define PCRE_MAKETABLES pcre_maketables()
918 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
919 #define PCRE_PRINTINT PCRE_PRINTINT8
920 #define PCRE_STUDY PCRE_STUDY8
921
922 /* ----- Only 16-bit mode is supported ----- */
923
924 #elif defined SUPPORT_PCRE16
925 #define CHAR_SIZE 2
926 #define PCHARS PCHARS16
927 #define PCHARSV PCHARSV16
928 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
929 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
930 #define STRLEN STRLEN16
931 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
932 #define PCRE_COMPILE PCRE_COMPILE16
933 #define PCRE_CONFIG pcre16_config
934 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
935 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
936 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
937 #define PCRE_EXEC PCRE_EXEC16
938 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
939 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
940 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
941 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
942 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
943 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
944 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
945 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
946 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
947 #define PCRE_MAKETABLES pcre16_maketables()
948 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
949 #define PCRE_PRINTINT PCRE_PRINTINT16
950 #define PCRE_STUDY PCRE_STUDY16
951
952 /* ----- Only 32-bit mode is supported ----- */
953
954 #elif defined SUPPORT_PCRE32
955 #define CHAR_SIZE 4
956 #define PCHARS PCHARS32
957 #define PCHARSV PCHARSV32
958 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
959 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
960 #define STRLEN STRLEN32
961 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
962 #define PCRE_COMPILE PCRE_COMPILE32
963 #define PCRE_CONFIG pcre32_config
964 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
965 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
966 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
967 #define PCRE_EXEC PCRE_EXEC32
968 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
969 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
970 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
971 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
972 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
973 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
974 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
975 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
976 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
977 #define PCRE_MAKETABLES pcre32_maketables()
978 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
979 #define PCRE_PRINTINT PCRE_PRINTINT32
980 #define PCRE_STUDY PCRE_STUDY32
981
982 #endif
983
984 /* ----- End of mode-specific function call macros ----- */
985
986
987 /* Other parameters */
988
989 #ifndef CLOCKS_PER_SEC
990 #ifdef CLK_TCK
991 #define CLOCKS_PER_SEC CLK_TCK
992 #else
993 #define CLOCKS_PER_SEC 100
994 #endif
995 #endif
996
997 #if !defined NODFA
998 #define DFA_WS_DIMENSION 1000
999 #endif
1000
1001 /* This is the default loop count for timing. */
1002
1003 #define LOOPREPEAT 500000
1004
1005 /* Static variables */
1006
1007 static FILE *outfile;
1008 static int log_store = 0;
1009 static int callout_count;
1010 static int callout_extra;
1011 static int callout_fail_count;
1012 static int callout_fail_id;
1013 static int debug_lengths;
1014 static int first_callout;
1015 static int jit_was_used;
1016 static int locale_set = 0;
1017 static int show_malloc;
1018 static int use_utf;
1019 static const unsigned char *last_callout_mark = NULL;
1020
1021 /* The buffers grow automatically if very long input lines are encountered. */
1022
1023 static int buffer_size = 50000;
1024 static pcre_uint8 *buffer = NULL;
1025 static pcre_uint8 *pbuffer = NULL;
1026
1027 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1028
1029 #ifdef COMPILE_PCRE16
1030 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1031 #endif
1032
1033 #ifdef COMPILE_PCRE32
1034 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1035 #endif
1036
1037 /* We need buffers for building 16/32-bit strings, and the tables of operator
1038 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1039 pattern for saving/reloading testing. Luckily, the data for these tables is
1040 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1041 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1042 LINK_SIZE is also used later in this program. */
1043
1044 #ifdef SUPPORT_PCRE16
1045 #undef IMM2_SIZE
1046 #define IMM2_SIZE 1
1047
1048 #if LINK_SIZE == 2
1049 #undef LINK_SIZE
1050 #define LINK_SIZE 1
1051 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1052 #undef LINK_SIZE
1053 #define LINK_SIZE 2
1054 #else
1055 #error LINK_SIZE must be either 2, 3, or 4
1056 #endif
1057
1058 static int buffer16_size = 0;
1059 static pcre_uint16 *buffer16 = NULL;
1060 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1061 #endif /* SUPPORT_PCRE16 */
1062
1063 #ifdef SUPPORT_PCRE32
1064 #undef IMM2_SIZE
1065 #define IMM2_SIZE 1
1066 #undef LINK_SIZE
1067 #define LINK_SIZE 1
1068
1069 static int buffer32_size = 0;
1070 static pcre_uint32 *buffer32 = NULL;
1071 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1072 #endif /* SUPPORT_PCRE32 */
1073
1074 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1075 support, it can be changed by an option. If there is no 8-bit support, there
1076 must be 16-or 32-bit support, so default it to 1. */
1077
1078 #if defined SUPPORT_PCRE8
1079 static int pcre_mode = PCRE8_MODE;
1080 #elif defined SUPPORT_PCRE16
1081 static int pcre_mode = PCRE16_MODE;
1082 #elif defined SUPPORT_PCRE32
1083 static int pcre_mode = PCRE32_MODE;
1084 #endif
1085
1086 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1087
1088 static int jit_study_bits[] =
1089 {
1090 PCRE_STUDY_JIT_COMPILE,
1091 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1092 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1093 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1094 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1095 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1096 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1097 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1098 };
1099
1100 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1101 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1102
1103 /* Textual explanations for runtime error codes */
1104
1105 static const char *errtexts[] = {
1106 NULL, /* 0 is no error */
1107 NULL, /* NOMATCH is handled specially */
1108 "NULL argument passed",
1109 "bad option value",
1110 "magic number missing",
1111 "unknown opcode - pattern overwritten?",
1112 "no more memory",
1113 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1114 "match limit exceeded",
1115 "callout error code",
1116 NULL, /* BADUTF8/16 is handled specially */
1117 NULL, /* BADUTF8/16 offset is handled specially */
1118 NULL, /* PARTIAL is handled specially */
1119 "not used - internal error",
1120 "internal error - pattern overwritten?",
1121 "bad count value",
1122 "item unsupported for DFA matching",
1123 "backreference condition or recursion test not supported for DFA matching",
1124 "match limit not supported for DFA matching",
1125 "workspace size exceeded in DFA matching",
1126 "too much recursion for DFA matching",
1127 "recursion limit exceeded",
1128 "not used - internal error",
1129 "invalid combination of newline options",
1130 "bad offset value",
1131 NULL, /* SHORTUTF8/16 is handled specially */
1132 "nested recursion at the same subject position",
1133 "JIT stack limit reached",
1134 "pattern compiled in wrong mode: 8-bit/16-bit error",
1135 "pattern compiled with other endianness",
1136 "invalid data in workspace for DFA restart",
1137 "bad JIT option",
1138 "bad length"
1139 };
1140
1141
1142 /*************************************************
1143 * Alternate character tables *
1144 *************************************************/
1145
1146 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1147 using the default tables of the library. However, the T option can be used to
1148 select alternate sets of tables, for different kinds of testing. Note also that
1149 the L (locale) option also adjusts the tables. */
1150
1151 /* This is the set of tables distributed as default with PCRE. It recognizes
1152 only ASCII characters. */
1153
1154 static const pcre_uint8 tables0[] = {
1155
1156 /* This table is a lower casing table. */
1157
1158 0, 1, 2, 3, 4, 5, 6, 7,
1159 8, 9, 10, 11, 12, 13, 14, 15,
1160 16, 17, 18, 19, 20, 21, 22, 23,
1161 24, 25, 26, 27, 28, 29, 30, 31,
1162 32, 33, 34, 35, 36, 37, 38, 39,
1163 40, 41, 42, 43, 44, 45, 46, 47,
1164 48, 49, 50, 51, 52, 53, 54, 55,
1165 56, 57, 58, 59, 60, 61, 62, 63,
1166 64, 97, 98, 99,100,101,102,103,
1167 104,105,106,107,108,109,110,111,
1168 112,113,114,115,116,117,118,119,
1169 120,121,122, 91, 92, 93, 94, 95,
1170 96, 97, 98, 99,100,101,102,103,
1171 104,105,106,107,108,109,110,111,
1172 112,113,114,115,116,117,118,119,
1173 120,121,122,123,124,125,126,127,
1174 128,129,130,131,132,133,134,135,
1175 136,137,138,139,140,141,142,143,
1176 144,145,146,147,148,149,150,151,
1177 152,153,154,155,156,157,158,159,
1178 160,161,162,163,164,165,166,167,
1179 168,169,170,171,172,173,174,175,
1180 176,177,178,179,180,181,182,183,
1181 184,185,186,187,188,189,190,191,
1182 192,193,194,195,196,197,198,199,
1183 200,201,202,203,204,205,206,207,
1184 208,209,210,211,212,213,214,215,
1185 216,217,218,219,220,221,222,223,
1186 224,225,226,227,228,229,230,231,
1187 232,233,234,235,236,237,238,239,
1188 240,241,242,243,244,245,246,247,
1189 248,249,250,251,252,253,254,255,
1190
1191 /* This table is a case flipping table. */
1192
1193 0, 1, 2, 3, 4, 5, 6, 7,
1194 8, 9, 10, 11, 12, 13, 14, 15,
1195 16, 17, 18, 19, 20, 21, 22, 23,
1196 24, 25, 26, 27, 28, 29, 30, 31,
1197 32, 33, 34, 35, 36, 37, 38, 39,
1198 40, 41, 42, 43, 44, 45, 46, 47,
1199 48, 49, 50, 51, 52, 53, 54, 55,
1200 56, 57, 58, 59, 60, 61, 62, 63,
1201 64, 97, 98, 99,100,101,102,103,
1202 104,105,106,107,108,109,110,111,
1203 112,113,114,115,116,117,118,119,
1204 120,121,122, 91, 92, 93, 94, 95,
1205 96, 65, 66, 67, 68, 69, 70, 71,
1206 72, 73, 74, 75, 76, 77, 78, 79,
1207 80, 81, 82, 83, 84, 85, 86, 87,
1208 88, 89, 90,123,124,125,126,127,
1209 128,129,130,131,132,133,134,135,
1210 136,137,138,139,140,141,142,143,
1211 144,145,146,147,148,149,150,151,
1212 152,153,154,155,156,157,158,159,
1213 160,161,162,163,164,165,166,167,
1214 168,169,170,171,172,173,174,175,
1215 176,177,178,179,180,181,182,183,
1216 184,185,186,187,188,189,190,191,
1217 192,193,194,195,196,197,198,199,
1218 200,201,202,203,204,205,206,207,
1219 208,209,210,211,212,213,214,215,
1220 216,217,218,219,220,221,222,223,
1221 224,225,226,227,228,229,230,231,
1222 232,233,234,235,236,237,238,239,
1223 240,241,242,243,244,245,246,247,
1224 248,249,250,251,252,253,254,255,
1225
1226 /* This table contains bit maps for various character classes. Each map is 32
1227 bytes long and the bits run from the least significant end of each byte. The
1228 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1229 graph, print, punct, and cntrl. Other classes are built from combinations. */
1230
1231 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1232 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1233 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1234 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235
1236 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1237 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1240
1241 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1244 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245
1246 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1248 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1250
1251 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1253 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1255
1256 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1257 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1258 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260
1261 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1262 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1263 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1264 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1265
1266 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1267 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1268 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270
1271 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1272 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1275
1276 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1277 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1278 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1280
1281 /* This table identifies various classes of character by individual bits:
1282 0x01 white space character
1283 0x02 letter
1284 0x04 decimal digit
1285 0x08 hexadecimal digit
1286 0x10 alphanumeric or '_'
1287 0x80 regular expression metacharacter or binary zero
1288 */
1289
1290 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1291 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1292 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1293 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1294 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1295 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1296 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1297 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1298 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1299 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1300 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1301 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1302 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1303 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1304 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1305 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1318 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1321 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1322
1323 /* This is a set of tables that came originally from a Windows user. It seems
1324 to be at least an approximation of ISO 8859. In particular, there are
1325 characters greater than 128 that are marked as spaces, letters, etc. */
1326
1327 static const pcre_uint8 tables1[] = {
1328 0,1,2,3,4,5,6,7,
1329 8,9,10,11,12,13,14,15,
1330 16,17,18,19,20,21,22,23,
1331 24,25,26,27,28,29,30,31,
1332 32,33,34,35,36,37,38,39,
1333 40,41,42,43,44,45,46,47,
1334 48,49,50,51,52,53,54,55,
1335 56,57,58,59,60,61,62,63,
1336 64,97,98,99,100,101,102,103,
1337 104,105,106,107,108,109,110,111,
1338 112,113,114,115,116,117,118,119,
1339 120,121,122,91,92,93,94,95,
1340 96,97,98,99,100,101,102,103,
1341 104,105,106,107,108,109,110,111,
1342 112,113,114,115,116,117,118,119,
1343 120,121,122,123,124,125,126,127,
1344 128,129,130,131,132,133,134,135,
1345 136,137,138,139,140,141,142,143,
1346 144,145,146,147,148,149,150,151,
1347 152,153,154,155,156,157,158,159,
1348 160,161,162,163,164,165,166,167,
1349 168,169,170,171,172,173,174,175,
1350 176,177,178,179,180,181,182,183,
1351 184,185,186,187,188,189,190,191,
1352 224,225,226,227,228,229,230,231,
1353 232,233,234,235,236,237,238,239,
1354 240,241,242,243,244,245,246,215,
1355 248,249,250,251,252,253,254,223,
1356 224,225,226,227,228,229,230,231,
1357 232,233,234,235,236,237,238,239,
1358 240,241,242,243,244,245,246,247,
1359 248,249,250,251,252,253,254,255,
1360 0,1,2,3,4,5,6,7,
1361 8,9,10,11,12,13,14,15,
1362 16,17,18,19,20,21,22,23,
1363 24,25,26,27,28,29,30,31,
1364 32,33,34,35,36,37,38,39,
1365 40,41,42,43,44,45,46,47,
1366 48,49,50,51,52,53,54,55,
1367 56,57,58,59,60,61,62,63,
1368 64,97,98,99,100,101,102,103,
1369 104,105,106,107,108,109,110,111,
1370 112,113,114,115,116,117,118,119,
1371 120,121,122,91,92,93,94,95,
1372 96,65,66,67,68,69,70,71,
1373 72,73,74,75,76,77,78,79,
1374 80,81,82,83,84,85,86,87,
1375 88,89,90,123,124,125,126,127,
1376 128,129,130,131,132,133,134,135,
1377 136,137,138,139,140,141,142,143,
1378 144,145,146,147,148,149,150,151,
1379 152,153,154,155,156,157,158,159,
1380 160,161,162,163,164,165,166,167,
1381 168,169,170,171,172,173,174,175,
1382 176,177,178,179,180,181,182,183,
1383 184,185,186,187,188,189,190,191,
1384 224,225,226,227,228,229,230,231,
1385 232,233,234,235,236,237,238,239,
1386 240,241,242,243,244,245,246,215,
1387 248,249,250,251,252,253,254,223,
1388 192,193,194,195,196,197,198,199,
1389 200,201,202,203,204,205,206,207,
1390 208,209,210,211,212,213,214,247,
1391 216,217,218,219,220,221,222,255,
1392 0,62,0,0,1,0,0,0,
1393 0,0,0,0,0,0,0,0,
1394 32,0,0,0,1,0,0,0,
1395 0,0,0,0,0,0,0,0,
1396 0,0,0,0,0,0,255,3,
1397 126,0,0,0,126,0,0,0,
1398 0,0,0,0,0,0,0,0,
1399 0,0,0,0,0,0,0,0,
1400 0,0,0,0,0,0,255,3,
1401 0,0,0,0,0,0,0,0,
1402 0,0,0,0,0,0,12,2,
1403 0,0,0,0,0,0,0,0,
1404 0,0,0,0,0,0,0,0,
1405 254,255,255,7,0,0,0,0,
1406 0,0,0,0,0,0,0,0,
1407 255,255,127,127,0,0,0,0,
1408 0,0,0,0,0,0,0,0,
1409 0,0,0,0,254,255,255,7,
1410 0,0,0,0,0,4,32,4,
1411 0,0,0,128,255,255,127,255,
1412 0,0,0,0,0,0,255,3,
1413 254,255,255,135,254,255,255,7,
1414 0,0,0,0,0,4,44,6,
1415 255,255,127,255,255,255,127,255,
1416 0,0,0,0,254,255,255,255,
1417 255,255,255,255,255,255,255,127,
1418 0,0,0,0,254,255,255,255,
1419 255,255,255,255,255,255,255,255,
1420 0,2,0,0,255,255,255,255,
1421 255,255,255,255,255,255,255,127,
1422 0,0,0,0,255,255,255,255,
1423 255,255,255,255,255,255,255,255,
1424 0,0,0,0,254,255,0,252,
1425 1,0,0,248,1,0,0,120,
1426 0,0,0,0,254,255,255,255,
1427 0,0,128,0,0,0,128,0,
1428 255,255,255,255,0,0,0,0,
1429 0,0,0,0,0,0,0,128,
1430 255,255,255,255,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 128,0,0,0,0,0,0,0,
1433 0,1,1,0,1,1,0,0,
1434 0,0,0,0,0,0,0,0,
1435 0,0,0,0,0,0,0,0,
1436 1,0,0,0,128,0,0,0,
1437 128,128,128,128,0,0,128,0,
1438 28,28,28,28,28,28,28,28,
1439 28,28,0,0,0,0,0,128,
1440 0,26,26,26,26,26,26,18,
1441 18,18,18,18,18,18,18,18,
1442 18,18,18,18,18,18,18,18,
1443 18,18,18,128,128,0,128,16,
1444 0,26,26,26,26,26,26,18,
1445 18,18,18,18,18,18,18,18,
1446 18,18,18,18,18,18,18,18,
1447 18,18,18,128,128,0,0,0,
1448 0,0,0,0,0,1,0,0,
1449 0,0,0,0,0,0,0,0,
1450 0,0,0,0,0,0,0,0,
1451 0,0,0,0,0,0,0,0,
1452 1,0,0,0,0,0,0,0,
1453 0,0,18,0,0,0,0,0,
1454 0,0,20,20,0,18,0,0,
1455 0,20,18,0,0,0,0,0,
1456 18,18,18,18,18,18,18,18,
1457 18,18,18,18,18,18,18,18,
1458 18,18,18,18,18,18,18,0,
1459 18,18,18,18,18,18,18,18,
1460 18,18,18,18,18,18,18,18,
1461 18,18,18,18,18,18,18,18,
1462 18,18,18,18,18,18,18,0,
1463 18,18,18,18,18,18,18,18
1464 };
1465
1466
1467
1468
1469 #ifndef HAVE_STRERROR
1470 /*************************************************
1471 * Provide strerror() for non-ANSI libraries *
1472 *************************************************/
1473
1474 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1475 in their libraries, but can provide the same facility by this simple
1476 alternative function. */
1477
1478 extern int sys_nerr;
1479 extern char *sys_errlist[];
1480
1481 char *
1482 strerror(int n)
1483 {
1484 if (n < 0 || n >= sys_nerr) return "unknown error number";
1485 return sys_errlist[n];
1486 }
1487 #endif /* HAVE_STRERROR */
1488
1489
1490
1491 /*************************************************
1492 * Print newline configuration *
1493 *************************************************/
1494
1495 /*
1496 Arguments:
1497 rc the return code from PCRE_CONFIG_NEWLINE
1498 isc TRUE if called from "-C newline"
1499 Returns: nothing
1500 */
1501
1502 static void
1503 print_newline_config(int rc, BOOL isc)
1504 {
1505 const char *s = NULL;
1506 if (!isc) printf(" Newline sequence is ");
1507 switch(rc)
1508 {
1509 case CHAR_CR: s = "CR"; break;
1510 case CHAR_LF: s = "LF"; break;
1511 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1512 case -1: s = "ANY"; break;
1513 case -2: s = "ANYCRLF"; break;
1514
1515 default:
1516 printf("a non-standard value: 0x%04x\n", rc);
1517 return;
1518 }
1519
1520 printf("%s\n", s);
1521 }
1522
1523
1524
1525 /*************************************************
1526 * JIT memory callback *
1527 *************************************************/
1528
1529 static pcre_jit_stack* jit_callback(void *arg)
1530 {
1531 jit_was_used = TRUE;
1532 return (pcre_jit_stack *)arg;
1533 }
1534
1535
1536 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1537 /*************************************************
1538 * Convert UTF-8 string to value *
1539 *************************************************/
1540
1541 /* This function takes one or more bytes that represents a UTF-8 character,
1542 and returns the value of the character.
1543
1544 Argument:
1545 utf8bytes a pointer to the byte vector
1546 vptr a pointer to an int to receive the value
1547
1548 Returns: > 0 => the number of bytes consumed
1549 -6 to 0 => malformed UTF-8 character at offset = (-return)
1550 */
1551
1552 static int
1553 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1554 {
1555 pcre_uint32 c = *utf8bytes++;
1556 pcre_uint32 d = c;
1557 int i, j, s;
1558
1559 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1560 {
1561 if ((d & 0x80) == 0) break;
1562 d <<= 1;
1563 }
1564
1565 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1566 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1567
1568 /* i now has a value in the range 1-5 */
1569
1570 s = 6*i;
1571 d = (c & utf8_table3[i]) << s;
1572
1573 for (j = 0; j < i; j++)
1574 {
1575 c = *utf8bytes++;
1576 if ((c & 0xc0) != 0x80) return -(j+1);
1577 s -= 6;
1578 d |= (c & 0x3f) << s;
1579 }
1580
1581 /* Check that encoding was the correct unique one */
1582
1583 for (j = 0; j < utf8_table1_size; j++)
1584 if (d <= (pcre_uint32)utf8_table1[j]) break;
1585 if (j != i) return -(i+1);
1586
1587 /* Valid value */
1588
1589 *vptr = d;
1590 return i+1;
1591 }
1592 #endif /* NOUTF || SUPPORT_PCRE16 */
1593
1594
1595
1596 #if defined SUPPORT_PCRE8 && !defined NOUTF
1597 /*************************************************
1598 * Convert character value to UTF-8 *
1599 *************************************************/
1600
1601 /* This function takes an integer value in the range 0 - 0x7fffffff
1602 and encodes it as a UTF-8 character in 0 to 6 bytes.
1603
1604 Arguments:
1605 cvalue the character value
1606 utf8bytes pointer to buffer for result - at least 6 bytes long
1607
1608 Returns: number of characters placed in the buffer
1609 */
1610
1611 static int
1612 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1613 {
1614 register int i, j;
1615 if (cvalue > 0x7fffffffu)
1616 return -1;
1617 for (i = 0; i < utf8_table1_size; i++)
1618 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1619 utf8bytes += i;
1620 for (j = i; j > 0; j--)
1621 {
1622 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1623 cvalue >>= 6;
1624 }
1625 *utf8bytes = utf8_table2[i] | cvalue;
1626 return i + 1;
1627 }
1628 #endif
1629
1630
1631 #ifdef SUPPORT_PCRE16
1632 /*************************************************
1633 * Convert a string to 16-bit *
1634 *************************************************/
1635
1636 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1637 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1638 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1639 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1640 result is always left in buffer16.
1641
1642 Note that this function does not object to surrogate values. This is
1643 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1644 for the purpose of testing that they are correctly faulted.
1645
1646 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1647 in UTF-8 so that values greater than 255 can be handled.
1648
1649 Arguments:
1650 data TRUE if converting a data line; FALSE for a regex
1651 p points to a byte string
1652 utf true if UTF-8 (to be converted to UTF-16)
1653 len number of bytes in the string (excluding trailing zero)
1654
1655 Returns: number of 16-bit data items used (excluding trailing zero)
1656 OR -1 if a UTF-8 string is malformed
1657 OR -2 if a value > 0x10ffff is encountered
1658 OR -3 if a value > 0xffff is encountered when not in UTF mode
1659 */
1660
1661 static int
1662 to16(int data, pcre_uint8 *p, int utf, int len)
1663 {
1664 pcre_uint16 *pp;
1665
1666 if (buffer16_size < 2*len + 2)
1667 {
1668 if (buffer16 != NULL) free(buffer16);
1669 buffer16_size = 2*len + 2;
1670 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1671 if (buffer16 == NULL)
1672 {
1673 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1674 exit(1);
1675 }
1676 }
1677
1678 pp = buffer16;
1679
1680 if (!utf && !data)
1681 {
1682 while (len-- > 0) *pp++ = *p++;
1683 }
1684
1685 else
1686 {
1687 pcre_uint32 c = 0;
1688 while (len > 0)
1689 {
1690 int chlen = utf82ord(p, &c);
1691 if (chlen <= 0) return -1;
1692 if (c > 0x10ffff) return -2;
1693 p += chlen;
1694 len -= chlen;
1695 if (c < 0x10000) *pp++ = c; else
1696 {
1697 if (!utf) return -3;
1698 c -= 0x10000;
1699 *pp++ = 0xD800 | (c >> 10);
1700 *pp++ = 0xDC00 | (c & 0x3ff);
1701 }
1702 }
1703 }
1704
1705 *pp = 0;
1706 return pp - buffer16;
1707 }
1708 #endif
1709
1710 #ifdef SUPPORT_PCRE32
1711 /*************************************************
1712 * Convert a string to 32-bit *
1713 *************************************************/
1714
1715 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1716 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1717 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1718 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1719 result is always left in buffer32.
1720
1721 Note that this function does not object to surrogate values. This is
1722 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1723 for the purpose of testing that they are correctly faulted.
1724
1725 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1726 in UTF-8 so that values greater than 255 can be handled.
1727
1728 Arguments:
1729 data TRUE if converting a data line; FALSE for a regex
1730 p points to a byte string
1731 utf true if UTF-8 (to be converted to UTF-32)
1732 len number of bytes in the string (excluding trailing zero)
1733
1734 Returns: number of 32-bit data items used (excluding trailing zero)
1735 OR -1 if a UTF-8 string is malformed
1736 OR -2 if a value > 0x10ffff is encountered
1737 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1738 */
1739
1740 static int
1741 to32(int data, pcre_uint8 *p, int utf, int len)
1742 {
1743 pcre_uint32 *pp;
1744
1745 if (buffer32_size < 4*len + 4)
1746 {
1747 if (buffer32 != NULL) free(buffer32);
1748 buffer32_size = 4*len + 4;
1749 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1750 if (buffer32 == NULL)
1751 {
1752 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1753 exit(1);
1754 }
1755 }
1756
1757 pp = buffer32;
1758
1759 if (!utf && !data)
1760 {
1761 while (len-- > 0) *pp++ = *p++;
1762 }
1763
1764 else
1765 {
1766 pcre_uint32 c = 0;
1767 while (len > 0)
1768 {
1769 int chlen = utf82ord(p, &c);
1770 if (chlen <= 0) return -1;
1771 if (utf)
1772 {
1773 if (c > 0x10ffff) return -2;
1774 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1775 }
1776
1777 p += chlen;
1778 len -= chlen;
1779 *pp++ = c;
1780 }
1781 }
1782
1783 *pp = 0;
1784 return pp - buffer32;
1785 }
1786
1787 /* Check that a 32-bit character string is valid UTF-32.
1788
1789 Arguments:
1790 string points to the string
1791 length length of string, or -1 if the string is zero-terminated
1792
1793 Returns: TRUE if the string is a valid UTF-32 string
1794 FALSE otherwise
1795 */
1796
1797 #ifdef NEVER /* Not used */
1798 #ifdef SUPPORT_UTF
1799 static BOOL
1800 valid_utf32(pcre_uint32 *string, int length)
1801 {
1802 register pcre_uint32 *p;
1803 register pcre_uint32 c;
1804
1805 for (p = string; length-- > 0; p++)
1806 {
1807 c = *p;
1808 if (c > 0x10ffffu) return FALSE; /* Too big */
1809 if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1810 }
1811
1812 return TRUE;
1813 }
1814 #endif /* SUPPORT_UTF */
1815 #endif /* NEVER */
1816 #endif /* SUPPORT_PCRE32 */
1817
1818
1819 /*************************************************
1820 * Read or extend an input line *
1821 *************************************************/
1822
1823 /* Input lines are read into buffer, but both patterns and data lines can be
1824 continued over multiple input lines. In addition, if the buffer fills up, we
1825 want to automatically expand it so as to be able to handle extremely large
1826 lines that are needed for certain stress tests. When the input buffer is
1827 expanded, the other two buffers must also be expanded likewise, and the
1828 contents of pbuffer, which are a copy of the input for callouts, must be
1829 preserved (for when expansion happens for a data line). This is not the most
1830 optimal way of handling this, but hey, this is just a test program!
1831
1832 Arguments:
1833 f the file to read
1834 start where in buffer to start (this *must* be within buffer)
1835 prompt for stdin or readline()
1836
1837 Returns: pointer to the start of new data
1838 could be a copy of start, or could be moved
1839 NULL if no data read and EOF reached
1840 */
1841
1842 static pcre_uint8 *
1843 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1844 {
1845 pcre_uint8 *here = start;
1846
1847 for (;;)
1848 {
1849 size_t rlen = (size_t)(buffer_size - (here - buffer));
1850
1851 if (rlen > 1000)
1852 {
1853 int dlen;
1854
1855 /* If libreadline or libedit support is required, use readline() to read a
1856 line if the input is a terminal. Note that readline() removes the trailing
1857 newline, so we must put it back again, to be compatible with fgets(). */
1858
1859 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1860 if (isatty(fileno(f)))
1861 {
1862 size_t len;
1863 char *s = readline(prompt);
1864 if (s == NULL) return (here == start)? NULL : start;
1865 len = strlen(s);
1866 if (len > 0) add_history(s);
1867 if (len > rlen - 1) len = rlen - 1;
1868 memcpy(here, s, len);
1869 here[len] = '\n';
1870 here[len+1] = 0;
1871 free(s);
1872 }
1873 else
1874 #endif
1875
1876 /* Read the next line by normal means, prompting if the file is stdin. */
1877
1878 {
1879 if (f == stdin) printf("%s", prompt);
1880 if (fgets((char *)here, rlen, f) == NULL)
1881 return (here == start)? NULL : start;
1882 }
1883
1884 dlen = (int)strlen((char *)here);
1885 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1886 here += dlen;
1887 }
1888
1889 else
1890 {
1891 int new_buffer_size = 2*buffer_size;
1892 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1893 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1894
1895 if (new_buffer == NULL || new_pbuffer == NULL)
1896 {
1897 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1898 exit(1);
1899 }
1900
1901 memcpy(new_buffer, buffer, buffer_size);
1902 memcpy(new_pbuffer, pbuffer, buffer_size);
1903
1904 buffer_size = new_buffer_size;
1905
1906 start = new_buffer + (start - buffer);
1907 here = new_buffer + (here - buffer);
1908
1909 free(buffer);
1910 free(pbuffer);
1911
1912 buffer = new_buffer;
1913 pbuffer = new_pbuffer;
1914 }
1915 }
1916
1917 /* Control never gets here */
1918 }
1919
1920
1921
1922 /*************************************************
1923 * Read number from string *
1924 *************************************************/
1925
1926 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1927 around with conditional compilation, just do the job by hand. It is only used
1928 for unpicking arguments, so just keep it simple.
1929
1930 Arguments:
1931 str string to be converted
1932 endptr where to put the end pointer
1933
1934 Returns: the unsigned long
1935 */
1936
1937 static int
1938 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1939 {
1940 int result = 0;
1941 while(*str != 0 && isspace(*str)) str++;
1942 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1943 *endptr = str;
1944 return(result);
1945 }
1946
1947
1948
1949 /*************************************************
1950 * Print one character *
1951 *************************************************/
1952
1953 /* Print a single character either literally, or as a hex escape. */
1954
1955 static int pchar(pcre_uint32 c, FILE *f)
1956 {
1957 int n = 0;
1958 if (PRINTOK(c))
1959 {
1960 if (f != NULL) fprintf(f, "%c", c);
1961 return 1;
1962 }
1963
1964 if (c < 0x100)
1965 {
1966 if (use_utf)
1967 {
1968 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1969 return 6;
1970 }
1971 else
1972 {
1973 if (f != NULL) fprintf(f, "\\x%02x", c);
1974 return 4;
1975 }
1976 }
1977
1978 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1979 return n >= 0 ? n : 0;
1980 }
1981
1982
1983
1984 #ifdef SUPPORT_PCRE8
1985 /*************************************************
1986 * Print 8-bit character string *
1987 *************************************************/
1988
1989 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1990 If handed a NULL file, just counts chars without printing. */
1991
1992 static int pchars(pcre_uint8 *p, int length, FILE *f)
1993 {
1994 pcre_uint32 c = 0;
1995 int yield = 0;
1996
1997 if (length < 0)
1998 length = strlen((char *)p);
1999
2000 while (length-- > 0)
2001 {
2002 #if !defined NOUTF
2003 if (use_utf)
2004 {
2005 int rc = utf82ord(p, &c);
2006 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2007 {
2008 length -= rc - 1;
2009 p += rc;
2010 yield += pchar(c, f);
2011 continue;
2012 }
2013 }
2014 #endif
2015 c = *p++;
2016 yield += pchar(c, f);
2017 }
2018
2019 return yield;
2020 }
2021 #endif
2022
2023
2024
2025 #ifdef SUPPORT_PCRE16
2026 /*************************************************
2027 * Find length of 0-terminated 16-bit string *
2028 *************************************************/
2029
2030 static int strlen16(PCRE_SPTR16 p)
2031 {
2032 PCRE_SPTR16 pp = p;
2033 while (*pp != 0) pp++;
2034 return (int)(pp - p);
2035 }
2036 #endif /* SUPPORT_PCRE16 */
2037
2038
2039
2040 #ifdef SUPPORT_PCRE32
2041 /*************************************************
2042 * Find length of 0-terminated 32-bit string *
2043 *************************************************/
2044
2045 static int strlen32(PCRE_SPTR32 p)
2046 {
2047 PCRE_SPTR32 pp = p;
2048 while (*pp != 0) pp++;
2049 return (int)(pp - p);
2050 }
2051 #endif /* SUPPORT_PCRE32 */
2052
2053
2054
2055 #ifdef SUPPORT_PCRE16
2056 /*************************************************
2057 * Print 16-bit character string *
2058 *************************************************/
2059
2060 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2061 If handed a NULL file, just counts chars without printing. */
2062
2063 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2064 {
2065 int yield = 0;
2066
2067 if (length < 0)
2068 length = strlen16(p);
2069
2070 while (length-- > 0)
2071 {
2072 pcre_uint32 c = *p++ & 0xffff;
2073 #if !defined NOUTF
2074 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2075 {
2076 int d = *p & 0xffff;
2077 if (d >= 0xDC00 && d <= 0xDFFF)
2078 {
2079 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2080 length--;
2081 p++;
2082 }
2083 }
2084 #endif
2085 yield += pchar(c, f);
2086 }
2087
2088 return yield;
2089 }
2090 #endif /* SUPPORT_PCRE16 */
2091
2092
2093
2094 #ifdef SUPPORT_PCRE32
2095 /*************************************************
2096 * Print 32-bit character string *
2097 *************************************************/
2098
2099 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2100 If handed a NULL file, just counts chars without printing. */
2101
2102 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2103 {
2104 int yield = 0;
2105
2106 (void)(utf); /* Avoid compiler warning */
2107
2108 if (length < 0)
2109 length = strlen32(p);
2110
2111 while (length-- > 0)
2112 {
2113 pcre_uint32 c = *p++;
2114 yield += pchar(c, f);
2115 }
2116
2117 return yield;
2118 }
2119 #endif /* SUPPORT_PCRE32 */
2120
2121
2122
2123 #ifdef SUPPORT_PCRE8
2124 /*************************************************
2125 * Read a capture name (8-bit) and check it *
2126 *************************************************/
2127
2128 static pcre_uint8 *
2129 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2130 {
2131 pcre_uint8 *npp = *pp;
2132 while (isalnum(*p)) *npp++ = *p++;
2133 *npp++ = 0;
2134 *npp = 0;
2135 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2136 {
2137 fprintf(outfile, "no parentheses with name \"");
2138 PCHARSV(*pp, 0, -1, outfile);
2139 fprintf(outfile, "\"\n");
2140 }
2141
2142 *pp = npp;
2143 return p;
2144 }
2145 #endif /* SUPPORT_PCRE8 */
2146
2147
2148
2149 #ifdef SUPPORT_PCRE16
2150 /*************************************************
2151 * Read a capture name (16-bit) and check it *
2152 *************************************************/
2153
2154 /* Note that the text being read is 8-bit. */
2155
2156 static pcre_uint8 *
2157 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2158 {
2159 pcre_uint16 *npp = *pp;
2160 while (isalnum(*p)) *npp++ = *p++;
2161 *npp++ = 0;
2162 *npp = 0;
2163 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2164 {
2165 fprintf(outfile, "no parentheses with name \"");
2166 PCHARSV(*pp, 0, -1, outfile);
2167 fprintf(outfile, "\"\n");
2168 }
2169 *pp = npp;
2170 return p;
2171 }
2172 #endif /* SUPPORT_PCRE16 */
2173
2174
2175
2176 #ifdef SUPPORT_PCRE32
2177 /*************************************************
2178 * Read a capture name (32-bit) and check it *
2179 *************************************************/
2180
2181 /* Note that the text being read is 8-bit. */
2182
2183 static pcre_uint8 *
2184 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2185 {
2186 pcre_uint32 *npp = *pp;
2187 while (isalnum(*p)) *npp++ = *p++;
2188 *npp++ = 0;
2189 *npp = 0;
2190 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2191 {
2192 fprintf(outfile, "no parentheses with name \"");
2193 PCHARSV(*pp, 0, -1, outfile);
2194 fprintf(outfile, "\"\n");
2195 }
2196 *pp = npp;
2197 return p;
2198 }
2199 #endif /* SUPPORT_PCRE32 */
2200
2201
2202
2203 /*************************************************
2204 * Callout function *
2205 *************************************************/
2206
2207 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2208 the match. Yield zero unless more callouts than the fail count, or the callout
2209 data is not zero. */
2210
2211 static int callout(pcre_callout_block *cb)
2212 {
2213 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2214 int i, pre_start, post_start, subject_length;
2215
2216 if (callout_extra)
2217 {
2218 fprintf(f, "Callout %d: last capture = %d\n",
2219 cb->callout_number, cb->capture_last);
2220
2221 for (i = 0; i < cb->capture_top * 2; i += 2)
2222 {
2223 if (cb->offset_vector[i] < 0)
2224 fprintf(f, "%2d: <unset>\n", i/2);
2225 else
2226 {
2227 fprintf(f, "%2d: ", i/2);
2228 PCHARSV(cb->subject, cb->offset_vector[i],
2229 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2230 fprintf(f, "\n");
2231 }
2232 }
2233 }
2234
2235 /* Re-print the subject in canonical form, the first time or if giving full
2236 datails. On subsequent calls in the same match, we use pchars just to find the
2237 printed lengths of the substrings. */
2238
2239 if (f != NULL) fprintf(f, "--->");
2240
2241 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2242 PCHARS(post_start, cb->subject, cb->start_match,
2243 cb->current_position - cb->start_match, f);
2244
2245 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2246
2247 PCHARSV(cb->subject, cb->current_position,
2248 cb->subject_length - cb->current_position, f);
2249
2250 if (f != NULL) fprintf(f, "\n");
2251
2252 /* Always print appropriate indicators, with callout number if not already
2253 shown. For automatic callouts, show the pattern offset. */
2254
2255 if (cb->callout_number == 255)
2256 {
2257 fprintf(outfile, "%+3d ", cb->pattern_position);
2258 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2259 }
2260 else
2261 {
2262 if (callout_extra) fprintf(outfile, " ");
2263 else fprintf(outfile, "%3d ", cb->callout_number);
2264 }
2265
2266 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2267 fprintf(outfile, "^");
2268
2269 if (post_start > 0)
2270 {
2271 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2272 fprintf(outfile, "^");
2273 }
2274
2275 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2276 fprintf(outfile, " ");
2277
2278 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2279 pbuffer + cb->pattern_position);
2280
2281 fprintf(outfile, "\n");
2282 first_callout = 0;
2283
2284 if (cb->mark != last_callout_mark)
2285 {
2286 if (cb->mark == NULL)
2287 fprintf(outfile, "Latest Mark: <unset>\n");
2288 else
2289 {
2290 fprintf(outfile, "Latest Mark: ");
2291 PCHARSV(cb->mark, 0, -1, outfile);
2292 putc('\n', outfile);
2293 }
2294 last_callout_mark = cb->mark;
2295 }
2296
2297 if (cb->callout_data != NULL)
2298 {
2299 int callout_data = *((int *)(cb->callout_data));
2300 if (callout_data != 0)
2301 {
2302 fprintf(outfile, "Callout data = %d\n", callout_data);
2303 return callout_data;
2304 }
2305 }
2306
2307 return (cb->callout_number != callout_fail_id)? 0 :
2308 (++callout_count >= callout_fail_count)? 1 : 0;
2309 }
2310
2311
2312 /*************************************************
2313 * Local malloc functions *
2314 *************************************************/
2315
2316 /* Alternative malloc function, to test functionality and save the size of a
2317 compiled re, which is the first store request that pcre_compile() makes. The
2318 show_malloc variable is set only during matching. */
2319
2320 static void *new_malloc(size_t size)
2321 {
2322 void *block = malloc(size);
2323 if (show_malloc)
2324 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2325 return block;
2326 }
2327
2328 static void new_free(void *block)
2329 {
2330 if (show_malloc)
2331 fprintf(outfile, "free %p\n", block);
2332 free(block);
2333 }
2334
2335 /* For recursion malloc/free, to test stacking calls */
2336
2337 static void *stack_malloc(size_t size)
2338 {
2339 void *block = malloc(size);
2340 if (show_malloc)
2341 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2342 return block;
2343 }
2344
2345 static void stack_free(void *block)
2346 {
2347 if (show_malloc)
2348 fprintf(outfile, "stack_free %p\n", block);
2349 free(block);
2350 }
2351
2352
2353 /*************************************************
2354 * Call pcre_fullinfo() *
2355 *************************************************/
2356
2357 /* Get one piece of information from the pcre_fullinfo() function. When only
2358 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2359 value, but the code is defensive.
2360
2361 Arguments:
2362 re compiled regex
2363 study study data
2364 option PCRE_INFO_xxx option
2365 ptr where to put the data
2366
2367 Returns: 0 when OK, < 0 on error
2368 */
2369
2370 static int
2371 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2372 {
2373 int rc;
2374
2375 if (pcre_mode == PCRE32_MODE)
2376 #ifdef SUPPORT_PCRE32
2377 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2378 #else
2379 rc = PCRE_ERROR_BADMODE;
2380 #endif
2381 else if (pcre_mode == PCRE16_MODE)
2382 #ifdef SUPPORT_PCRE16
2383 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2384 #else
2385 rc = PCRE_ERROR_BADMODE;
2386 #endif
2387 else
2388 #ifdef SUPPORT_PCRE8
2389 rc = pcre_fullinfo(re, study, option, ptr);
2390 #else
2391 rc = PCRE_ERROR_BADMODE;
2392 #endif
2393
2394 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2395 {
2396 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2397 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2398 if (rc == PCRE_ERROR_BADMODE)
2399 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2400 "%d-bit mode\n", 8 * CHAR_SIZE,
2401 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2402 }
2403
2404 return rc;
2405 }
2406
2407
2408
2409 /*************************************************
2410 * Swap byte functions *
2411 *************************************************/
2412
2413 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2414 value, respectively.
2415
2416 Arguments:
2417 value any number
2418
2419 Returns: the byte swapped value
2420 */
2421
2422 static pcre_uint32
2423 swap_uint32(pcre_uint32 value)
2424 {
2425 return ((value & 0x000000ff) << 24) |
2426 ((value & 0x0000ff00) << 8) |
2427 ((value & 0x00ff0000) >> 8) |
2428 (value >> 24);
2429 }
2430
2431 static pcre_uint16
2432 swap_uint16(pcre_uint16 value)
2433 {
2434 return (value >> 8) | (value << 8);
2435 }
2436
2437
2438
2439 /*************************************************
2440 * Flip bytes in a compiled pattern *
2441 *************************************************/
2442
2443 /* This function is called if the 'F' option was present on a pattern that is
2444 to be written to a file. We flip the bytes of all the integer fields in the
2445 regex data block and the study block. In 16-bit mode this also flips relevant
2446 bytes in the pattern itself. This is to make it possible to test PCRE's
2447 ability to reload byte-flipped patterns, e.g. those compiled on a different
2448 architecture. */
2449
2450 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2451 static void
2452 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2453 {
2454 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2455 #ifdef SUPPORT_PCRE16
2456 int op;
2457 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2458 int length = re->name_count * re->name_entry_size;
2459 #ifdef SUPPORT_UTF
2460 BOOL utf = (re->options & PCRE_UTF16) != 0;
2461 BOOL utf16_char = FALSE;
2462 #endif /* SUPPORT_UTF */
2463 #endif /* SUPPORT_PCRE16 */
2464
2465 /* Always flip the bytes in the main data block and study blocks. */
2466
2467 re->magic_number = REVERSED_MAGIC_NUMBER;
2468 re->size = swap_uint32(re->size);
2469 re->options = swap_uint32(re->options);
2470 re->flags = swap_uint32(re->flags);
2471 re->limit_match = swap_uint32(re->limit_match);
2472 re->limit_recursion = swap_uint32(re->limit_recursion);
2473 re->first_char = swap_uint16(re->first_char);
2474 re->req_char = swap_uint16(re->req_char);
2475 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2476 re->top_bracket = swap_uint16(re->top_bracket);
2477 re->top_backref = swap_uint16(re->top_backref);
2478 re->name_table_offset = swap_uint16(re->name_table_offset);
2479 re->name_entry_size = swap_uint16(re->name_entry_size);
2480 re->name_count = swap_uint16(re->name_count);
2481 re->ref_count = swap_uint16(re->ref_count);
2482
2483 if (extra != NULL)
2484 {
2485 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2486 rsd->size = swap_uint32(rsd->size);
2487 rsd->flags = swap_uint32(rsd->flags);
2488 rsd->minlength = swap_uint32(rsd->minlength);
2489 }
2490
2491 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2492 in the name table, if present, and then in the pattern itself. */
2493
2494 #ifdef SUPPORT_PCRE16
2495 if (pcre_mode != PCRE16_MODE) return;
2496
2497 while(TRUE)
2498 {
2499 /* Swap previous characters. */
2500 while (length-- > 0)
2501 {
2502 *ptr = swap_uint16(*ptr);
2503 ptr++;
2504 }
2505 #ifdef SUPPORT_UTF
2506 if (utf16_char)
2507 {
2508 if ((ptr[-1] & 0xfc00) == 0xd800)
2509 {
2510 /* We know that there is only one extra character in UTF-16. */
2511 *ptr = swap_uint16(*ptr);
2512 ptr++;
2513 }
2514 }
2515 utf16_char = FALSE;
2516 #endif /* SUPPORT_UTF */
2517
2518 /* Get next opcode. */
2519
2520 length = 0;
2521 op = *ptr;
2522 *ptr++ = swap_uint16(op);
2523
2524 switch (op)
2525 {
2526 case OP_END:
2527 return;
2528
2529 #ifdef SUPPORT_UTF
2530 case OP_CHAR:
2531 case OP_CHARI:
2532 case OP_NOT:
2533 case OP_NOTI:
2534 case OP_STAR:
2535 case OP_MINSTAR:
2536 case OP_PLUS:
2537 case OP_MINPLUS:
2538 case OP_QUERY:
2539 case OP_MINQUERY:
2540 case OP_UPTO:
2541 case OP_MINUPTO:
2542 case OP_EXACT:
2543 case OP_POSSTAR:
2544 case OP_POSPLUS:
2545 case OP_POSQUERY:
2546 case OP_POSUPTO:
2547 case OP_STARI:
2548 case OP_MINSTARI:
2549 case OP_PLUSI:
2550 case OP_MINPLUSI:
2551 case OP_QUERYI:
2552 case OP_MINQUERYI:
2553 case OP_UPTOI:
2554 case OP_MINUPTOI:
2555 case OP_EXACTI:
2556 case OP_POSSTARI:
2557 case OP_POSPLUSI:
2558 case OP_POSQUERYI:
2559 case OP_POSUPTOI:
2560 case OP_NOTSTAR:
2561 case OP_NOTMINSTAR:
2562 case OP_NOTPLUS:
2563 case OP_NOTMINPLUS:
2564 case OP_NOTQUERY:
2565 case OP_NOTMINQUERY:
2566 case OP_NOTUPTO:
2567 case OP_NOTMINUPTO:
2568 case OP_NOTEXACT:
2569 case OP_NOTPOSSTAR:
2570 case OP_NOTPOSPLUS:
2571 case OP_NOTPOSQUERY:
2572 case OP_NOTPOSUPTO:
2573 case OP_NOTSTARI:
2574 case OP_NOTMINSTARI:
2575 case OP_NOTPLUSI:
2576 case OP_NOTMINPLUSI:
2577 case OP_NOTQUERYI:
2578 case OP_NOTMINQUERYI:
2579 case OP_NOTUPTOI:
2580 case OP_NOTMINUPTOI:
2581 case OP_NOTEXACTI:
2582 case OP_NOTPOSSTARI:
2583 case OP_NOTPOSPLUSI:
2584 case OP_NOTPOSQUERYI:
2585 case OP_NOTPOSUPTOI:
2586 if (utf) utf16_char = TRUE;
2587 #endif
2588 /* Fall through. */
2589
2590 default:
2591 length = OP_lengths16[op] - 1;
2592 break;
2593
2594 case OP_CLASS:
2595 case OP_NCLASS:
2596 /* Skip the character bit map. */
2597 ptr += 32/sizeof(pcre_uint16);
2598 length = 0;
2599 break;
2600
2601 case OP_XCLASS:
2602 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2603 if (LINK_SIZE > 1)
2604 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2605 - (1 + LINK_SIZE + 1));
2606 else
2607 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2608
2609 /* Reverse the size of the XCLASS instance. */
2610 *ptr = swap_uint16(*ptr);
2611 ptr++;
2612 if (LINK_SIZE > 1)
2613 {
2614 *ptr = swap_uint16(*ptr);
2615 ptr++;
2616 }
2617
2618 op = *ptr;
2619 *ptr = swap_uint16(op);
2620 ptr++;
2621 if ((op & XCL_MAP) != 0)
2622 {
2623 /* Skip the character bit map. */
2624 ptr += 32/sizeof(pcre_uint16);
2625 length -= 32/sizeof(pcre_uint16);
2626 }
2627 break;
2628 }
2629 }
2630 /* Control should never reach here in 16 bit mode. */
2631 #endif /* SUPPORT_PCRE16 */
2632 }
2633 #endif /* SUPPORT_PCRE[8|16] */
2634
2635
2636
2637 #if defined SUPPORT_PCRE32
2638 static void
2639 regexflip_32(pcre *ere, pcre_extra *extra)
2640 {
2641 real_pcre32 *re = (real_pcre32 *)ere;
2642 int op;
2643 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2644 int length = re->name_count * re->name_entry_size;
2645
2646 /* Always flip the bytes in the main data block and study blocks. */
2647
2648 re->magic_number = REVERSED_MAGIC_NUMBER;
2649 re->size = swap_uint32(re->size);
2650 re->options = swap_uint32(re->options);
2651 re->flags = swap_uint32(re->flags);
2652 re->limit_match = swap_uint32(re->limit_match);
2653 re->limit_recursion = swap_uint32(re->limit_recursion);
2654 re->first_char = swap_uint32(re->first_char);
2655 re->req_char = swap_uint32(re->req_char);
2656 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2657 re->top_bracket = swap_uint16(re->top_bracket);
2658 re->top_backref = swap_uint16(re->top_backref);
2659 re->name_table_offset = swap_uint16(re->name_table_offset);
2660 re->name_entry_size = swap_uint16(re->name_entry_size);
2661 re->name_count = swap_uint16(re->name_count);
2662 re->ref_count = swap_uint16(re->ref_count);
2663
2664 if (extra != NULL)
2665 {
2666 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2667 rsd->size = swap_uint32(rsd->size);
2668 rsd->flags = swap_uint32(rsd->flags);
2669 rsd->minlength = swap_uint32(rsd->minlength);
2670 }
2671
2672 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2673 the pattern itself. */
2674
2675 while(TRUE)
2676 {
2677 /* Swap previous characters. */
2678 while (length-- > 0)
2679 {
2680 *ptr = swap_uint32(*ptr);
2681 ptr++;
2682 }
2683
2684 /* Get next opcode. */
2685
2686 length = 0;
2687 op = *ptr;
2688 *ptr++ = swap_uint32(op);
2689
2690 switch (op)
2691 {
2692 case OP_END:
2693 return;
2694
2695 default:
2696 length = OP_lengths32[op] - 1;
2697 break;
2698
2699 case OP_CLASS:
2700 case OP_NCLASS:
2701 /* Skip the character bit map. */
2702 ptr += 32/sizeof(pcre_uint32);
2703 length = 0;
2704 break;
2705
2706 case OP_XCLASS:
2707 /* LINK_SIZE can only be 1 in 32-bit mode. */
2708 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2709
2710 /* Reverse the size of the XCLASS instance. */
2711 *ptr = swap_uint32(*ptr);
2712 ptr++;
2713
2714 op = *ptr;
2715 *ptr = swap_uint32(op);
2716 ptr++;
2717 if ((op & XCL_MAP) != 0)
2718 {
2719 /* Skip the character bit map. */
2720 ptr += 32/sizeof(pcre_uint32);
2721 length -= 32/sizeof(pcre_uint32);
2722 }
2723 break;
2724 }
2725 }
2726 /* Control should never reach here in 32 bit mode. */
2727 }
2728
2729 #endif /* SUPPORT_PCRE32 */
2730
2731
2732
2733 static void
2734 regexflip(pcre *ere, pcre_extra *extra)
2735 {
2736 #if defined SUPPORT_PCRE32
2737 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2738 regexflip_32(ere, extra);
2739 #endif
2740 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2741 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2742 regexflip8_or_16(ere, extra);
2743 #endif
2744 }
2745
2746
2747
2748 /*************************************************
2749 * Check match or recursion limit *
2750 *************************************************/
2751
2752 static int
2753 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2754 int start_offset, int options, int *use_offsets, int use_size_offsets,
2755 int flag, unsigned long int *limit, int errnumber, const char *msg)
2756 {
2757 int count;
2758 int min = 0;
2759 int mid = 64;
2760 int max = -1;
2761
2762 extra->flags |= flag;
2763
2764 for (;;)
2765 {
2766 *limit = mid;
2767
2768 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2769 use_offsets, use_size_offsets);
2770
2771 if (count == errnumber)
2772 {
2773 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2774 min = mid;
2775 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2776 }
2777
2778 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2779 count == PCRE_ERROR_PARTIAL)
2780 {
2781 if (mid == min + 1)
2782 {
2783 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2784 break;
2785 }
2786 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2787 max = mid;
2788 mid = (min + mid)/2;
2789 }
2790 else break; /* Some other error */
2791 }
2792
2793 extra->flags &= ~flag;
2794 return count;
2795 }
2796
2797
2798
2799 /*************************************************
2800 * Case-independent strncmp() function *
2801 *************************************************/
2802
2803 /*
2804 Arguments:
2805 s first string
2806 t second string
2807 n number of characters to compare
2808
2809 Returns: < 0, = 0, or > 0, according to the comparison
2810 */
2811
2812 static int
2813 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2814 {
2815 while (n--)
2816 {
2817 int c = tolower(*s++) - tolower(*t++);
2818 if (c) return c;
2819 }
2820 return 0;
2821 }
2822
2823
2824
2825 /*************************************************
2826 * Check multicharacter option *
2827 *************************************************/
2828
2829 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2830 a message and return 0 if there is no match.
2831
2832 Arguments:
2833 p points after the leading '<'
2834 f file for error message
2835 nl TRUE to check only for newline settings
2836 stype "modifier" or "escape sequence"
2837
2838 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2839 */
2840
2841 static int
2842 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2843 {
2844 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2845 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2846 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2847 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2848 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2849 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2850 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2851
2852 if (!nl)
2853 {
2854 if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2855 }
2856
2857 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2858 return 0;
2859 }
2860
2861
2862
2863 /*************************************************
2864 * Usage function *
2865 *************************************************/
2866
2867 static void
2868 usage(void)
2869 {
2870 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2871 printf("Input and output default to stdin and stdout.\n");
2872 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2873 printf("If input is a terminal, readline() is used to read from it.\n");
2874 #else
2875 printf("This version of pcretest is not linked with readline().\n");
2876 #endif
2877 printf("\nOptions:\n");
2878 #ifdef SUPPORT_PCRE16
2879 printf(" -16 use the 16-bit library\n");
2880 #endif
2881 #ifdef SUPPORT_PCRE32
2882 printf(" -32 use the 32-bit library\n");
2883 #endif
2884 printf(" -b show compiled code\n");
2885 printf(" -C show PCRE compile-time options and exit\n");
2886 printf(" -C arg show a specific compile-time option and exit\n");
2887 printf(" with its value if numeric (else 0). The arg can be:\n");
2888 printf(" linksize internal link size [2, 3, 4]\n");
2889 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2890 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2891 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2892 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2893 printf(" ucp Unicode Properties supported [0, 1]\n");
2894 printf(" jit Just-in-time compiler supported [0, 1]\n");
2895 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2896 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2897 printf(" -d debug: show compiled code and information (-b and -i)\n");
2898 #if !defined NODFA
2899 printf(" -dfa force DFA matching for all subjects\n");
2900 #endif
2901 printf(" -help show usage information\n");
2902 printf(" -i show information about compiled patterns\n"
2903 " -M find MATCH_LIMIT minimum for each subject\n"
2904 " -m output memory used information\n"
2905 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2906 " -o <n> set size of offsets vector to <n>\n");
2907 #if !defined NOPOSIX
2908 printf(" -p use POSIX interface\n");
2909 #endif
2910 printf(" -q quiet: do not output PCRE version number at start\n");
2911 printf(" -S <n> set stack size to <n> megabytes\n");
2912 printf(" -s force each pattern to be studied at basic level\n"
2913 " -s+ force each pattern to be studied, using JIT if available\n"
2914 " -s++ ditto, verifying when JIT was actually used\n"
2915 " -s+n force each pattern to be studied, using JIT if available,\n"
2916 " where 1 <= n <= 7 selects JIT options\n"
2917 " -s++n ditto, verifying when JIT was actually used\n"
2918 " -t time compilation and execution\n");
2919 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2920 printf(" -tm time execution (matching) only\n");
2921 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2922 printf(" -T same as -t, but show total times at the end\n");
2923 printf(" -TM same as -tm, but show total time at the end\n");
2924 }
2925
2926
2927
2928 /*************************************************
2929 * Main Program *
2930 *************************************************/
2931
2932 /* Read lines from named file or stdin and write to named file or stdout; lines
2933 consist of a regular expression, in delimiters and optionally followed by
2934 options, followed by a set of test data, terminated by an empty line. */
2935
2936 int main(int argc, char **argv)
2937 {
2938 FILE *infile = stdin;
2939 const char *version;
2940 int options = 0;
2941 int study_options = 0;
2942 int default_find_match_limit = FALSE;
2943 pcre_uint32 default_options = 0;
2944 int op = 1;
2945 int timeit = 0;
2946 int timeitm = 0;
2947 int showtotaltimes = 0;
2948 int showinfo = 0;
2949 int showstore = 0;
2950 int force_study = -1;
2951 int force_study_options = 0;
2952 int quiet = 0;
2953 int size_offsets = 45;
2954 int size_offsets_max;
2955 int *offsets = NULL;
2956 int debug = 0;
2957 int done = 0;
2958 int all_use_dfa = 0;
2959 int verify_jit = 0;
2960 int yield = 0;
2961 int stack_size;
2962 pcre_uint8 *dbuffer = NULL;
2963 pcre_uint8 lockout[24] = { 0 };
2964 size_t dbuffer_size = 1u << 14;
2965 clock_t total_compile_time = 0;
2966 clock_t total_study_time = 0;
2967 clock_t total_match_time = 0;
2968
2969 #if !defined NOPOSIX
2970 int posix = 0;
2971 #endif
2972 #if !defined NODFA
2973 int *dfa_workspace = NULL;
2974 #endif
2975
2976 pcre_jit_stack *jit_stack = NULL;
2977
2978 /* These vectors store, end-to-end, a list of zero-terminated captured
2979 substring names, each list itself being terminated by an empty name. Assume
2980 that 1024 is plenty long enough for the few names we'll be testing. It is
2981 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2982 for the actual memory, to ensure alignment. */
2983
2984 pcre_uint32 copynames[1024];
2985 pcre_uint32 getnames[1024];
2986
2987 #ifdef SUPPORT_PCRE32
2988 pcre_uint32 *cn32ptr;
2989 pcre_uint32 *gn32ptr;
2990 #endif
2991
2992 #ifdef SUPPORT_PCRE16
2993 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2994 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2995 pcre_uint16 *cn16ptr;
2996 pcre_uint16 *gn16ptr;
2997 #endif
2998
2999 #ifdef SUPPORT_PCRE8
3000 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3001 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3002 pcre_uint8 *cn8ptr;
3003 pcre_uint8 *gn8ptr;
3004 #endif
3005
3006 /* Get buffers from malloc() so that valgrind will check their misuse when
3007 debugging. They grow automatically when very long lines are read. The 16-
3008 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3009
3010 buffer = (pcre_uint8 *)malloc(buffer_size);
3011 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3012
3013 /* The outfile variable is static so that new_malloc can use it. */
3014
3015 outfile = stdout;
3016
3017 /* The following _setmode() stuff is some Windows magic that tells its runtime
3018 library to translate CRLF into a single LF character. At least, that's what
3019 I've been told: never having used Windows I take this all on trust. Originally
3020 it set 0x8000, but then I was advised that _O_BINARY was better. */
3021
3022 #if defined(_WIN32) || defined(WIN32)
3023 _setmode( _fileno( stdout ), _O_BINARY );
3024 #endif
3025
3026 /* Get the version number: both pcre_version() and pcre16_version() give the
3027 same answer. We just need to ensure that we call one that is available. */
3028
3029 #if defined SUPPORT_PCRE8
3030 version = pcre_version();
3031 #elif defined SUPPORT_PCRE16
3032 version = pcre16_version();
3033 #elif defined SUPPORT_PCRE32
3034 version = pcre32_version();
3035 #endif
3036
3037 /* Scan options */
3038
3039 while (argc > 1 && argv[op][0] == '-')
3040 {
3041 pcre_uint8 *endptr;
3042 char *arg = argv[op];
3043
3044 if (strcmp(arg, "-m") == 0) showstore = 1;
3045 else if (strcmp(arg, "-s") == 0) force_study = 0;
3046
3047 else if (strncmp(arg, "-s+", 3) == 0)
3048 {
3049 arg += 3;
3050 if (*arg == '+') { arg++; verify_jit = TRUE; }
3051 force_study = 1;
3052 if (*arg == 0)
3053 force_study_options = jit_study_bits[6];
3054 else if (*arg >= '1' && *arg <= '7')
3055 force_study_options = jit_study_bits[*arg - '1'];
3056 else goto BAD_ARG;
3057 }
3058 else if (strcmp(arg, "-8") == 0)
3059 {
3060 #ifdef SUPPORT_PCRE8
3061 pcre_mode = PCRE8_MODE;
3062 #else
3063 printf("** This version of PCRE was built without 8-bit support\n");
3064 exit(1);
3065 #endif
3066 }
3067 else if (strcmp(arg, "-16") == 0)
3068 {
3069 #ifdef SUPPORT_PCRE16
3070 pcre_mode = PCRE16_MODE;
3071 #else
3072 printf("** This version of PCRE was built without 16-bit support\n");
3073 exit(1);
3074 #endif
3075 }
3076 else if (strcmp(arg, "-32") == 0)
3077 {
3078 #ifdef SUPPORT_PCRE32
3079 pcre_mode = PCRE32_MODE;
3080 #else
3081 printf("** This version of PCRE was built without 32-bit support\n");
3082 exit(1);
3083 #endif
3084 }
3085 else if (strcmp(arg, "-q") == 0) quiet = 1;
3086 else if (strcmp(arg, "-b") == 0) debug = 1;
3087 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3088 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3089 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3090 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3091 #if !defined NODFA
3092 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3093 #endif
3094 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3095 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3096 *endptr == 0))
3097 {
3098 op++;
3099 argc--;
3100 }
3101 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3102 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3103 {
3104 int temp;
3105 int both = arg[2] == 0;
3106 showtotaltimes = arg[1] == 'T';
3107 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3108 *endptr == 0))
3109 {
3110 timeitm = temp;
3111 op++;
3112 argc--;
3113 }
3114 else timeitm = LOOPREPEAT;
3115 if (both) timeit = timeitm;
3116 }
3117 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3118 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3119 *endptr == 0))
3120 {
3121 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3122 printf("PCRE: -S not supported on this OS\n");
3123 exit(1);
3124 #else
3125 int rc;
3126 struct rlimit rlim;
3127 getrlimit(RLIMIT_STACK, &rlim);
3128 rlim.rlim_cur = stack_size * 1024 * 1024;
3129 rc = setrlimit(RLIMIT_STACK, &rlim);
3130 if (rc != 0)
3131 {
3132 printf("PCRE: setrlimit() failed with error %d\n", rc);
3133 exit(1);
3134 }
3135 op++;
3136 argc--;
3137 #endif
3138 }
3139 #if !defined NOPOSIX
3140 else if (strcmp(arg, "-p") == 0) posix = 1;
3141 #endif
3142 else if (strcmp(arg, "-C") == 0)
3143 {
3144 int rc;
3145 unsigned long int lrc;
3146
3147 if (argc > 2)
3148 {
3149 if (strcmp(argv[op + 1], "linksize") == 0)
3150 {
3151 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3152 printf("%d\n", rc);
3153 yield = rc;
3154
3155 #ifdef __VMS
3156 vms_setsymbol("LINKSIZE",0,yield );
3157 #endif
3158 }
3159 else if (strcmp(argv[op + 1], "pcre8") == 0)
3160 {
3161 #ifdef SUPPORT_PCRE8
3162 printf("1\n");
3163 yield = 1;
3164 #else
3165 printf("0\n");
3166 yield = 0;
3167 #endif
3168 #ifdef __VMS
3169 vms_setsymbol("PCRE8",0,yield );
3170 #endif
3171 }
3172 else if (strcmp(argv[op + 1], "pcre16") == 0)
3173 {
3174 #ifdef SUPPORT_PCRE16
3175 printf("1\n");
3176 yield = 1;
3177 #else
3178 printf("0\n");
3179 yield = 0;
3180 #endif
3181 #ifdef __VMS
3182 vms_setsymbol("PCRE16",0,yield );
3183 #endif
3184 }
3185 else if (strcmp(argv[op + 1], "pcre32") == 0)
3186 {
3187 #ifdef SUPPORT_PCRE32
3188 printf("1\n");
3189 yield = 1;
3190 #else
3191 printf("0\n");
3192 yield = 0;
3193 #endif
3194 #ifdef __VMS
3195 vms_setsymbol("PCRE32",0,yield );
3196 #endif
3197 }
3198 else if (strcmp(argv[op + 1], "utf") == 0)
3199 {
3200 #ifdef SUPPORT_PCRE8
3201 if (pcre_mode == PCRE8_MODE)
3202 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3203 #endif
3204 #ifdef SUPPORT_PCRE16
3205 if (pcre_mode == PCRE16_MODE)
3206 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3207 #endif
3208 #ifdef SUPPORT_PCRE32
3209 if (pcre_mode == PCRE32_MODE)
3210 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3211 #endif
3212 printf("%d\n", rc);
3213 yield = rc;
3214 #ifdef __VMS
3215 vms_setsymbol("UTF",0,yield );
3216 #endif
3217 }
3218 else if (strcmp(argv[op + 1], "ucp") == 0)
3219 {
3220 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3221 printf("%d\n", rc);
3222 yield = rc;
3223 }
3224 else if (strcmp(argv[op + 1], "jit") == 0)
3225 {
3226 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3227 printf("%d\n", rc);
3228 yield = rc;
3229 }
3230 else if (strcmp(argv[op + 1], "newline") == 0)
3231 {
3232 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3233 print_newline_config(rc, TRUE);
3234 }
3235 else if (strcmp(argv[op + 1], "bsr") == 0)
3236 {
3237 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3238 printf("%s\n", rc? "ANYCRLF" : "ANY");
3239 }
3240 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3241 {
3242 #ifdef EBCDIC
3243 printf("1\n");
3244 yield = 1;
3245 #else
3246 printf("0\n");
3247 #endif
3248 }
3249 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3250 {
3251 #ifdef EBCDIC
3252 printf("0x%02x\n", CHAR_LF);
3253 #else
3254 printf("0\n");
3255 #endif
3256 }
3257 else
3258 {
3259 printf("Unknown -C option: %s\n", argv[op + 1]);
3260 }
3261 goto EXIT;
3262 }
3263
3264 /* No argument for -C: output all configuration information. */
3265
3266 printf("PCRE version %s\n", version);
3267 printf("Compiled with\n");
3268
3269 #ifdef EBCDIC
3270 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3271 #endif
3272
3273 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3274 are set, either both UTFs are supported or both are not supported. */
3275
3276 #ifdef SUPPORT_PCRE8
3277 printf(" 8-bit support\n");
3278 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3279 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3280 #endif
3281 #ifdef SUPPORT_PCRE16
3282 printf(" 16-bit support\n");
3283 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3284 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3285 #endif
3286 #ifdef SUPPORT_PCRE32
3287 printf(" 32-bit support\n");
3288 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3289 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3290 #endif
3291
3292 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3293 printf(" %sUnicode properties support\n", rc? "" : "No ");
3294 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3295 if (rc)
3296 {
3297 const char *arch;
3298 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3299 printf(" Just-in-time compiler support: %s\n", arch);
3300 }
3301 else
3302 printf(" No just-in-time compiler support\n");
3303 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3304 print_newline_config(rc, FALSE);
3305 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3306 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3307 "all Unicode newlines");
3308 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3309 printf(" Internal link size = %d\n", rc);
3310 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3311 printf(" POSIX malloc threshold = %d\n", rc);
3312 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3313 printf(" Parentheses nest limit = %ld\n", lrc);
3314 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3315 printf(" Default match limit = %ld\n", lrc);
3316 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3317 printf(" Default recursion depth limit = %ld\n", lrc);
3318 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3319 printf(" Match recursion uses %s", rc? "stack" : "heap");
3320 if (showstore)
3321 {
3322 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3323 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3324 }
3325 printf("\n");
3326 goto EXIT;
3327 }
3328 else if (strcmp(arg, "-help") == 0 ||
3329 strcmp(arg, "--help") == 0)
3330 {
3331 usage();
3332 goto EXIT;
3333 }
3334 else
3335 {
3336 BAD_ARG:
3337 printf("** Unknown or malformed option %s\n", arg);
3338 usage();
3339 yield = 1;
3340 goto EXIT;
3341 }
3342 op++;
3343 argc--;
3344 }
3345
3346 /* Get the store for the offsets vector, and remember what it was */
3347
3348 size_offsets_max = size_offsets;
3349 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3350 if (offsets == NULL)
3351 {
3352 printf("** Failed to get %d bytes of memory for offsets vector\n",
3353 (int)(size_offsets_max * sizeof(int)));
3354 yield = 1;
3355 goto EXIT;
3356 }
3357
3358 /* Sort out the input and output files */
3359
3360 if (argc > 1)
3361 {
3362 infile = fopen(argv[op], INPUT_MODE);
3363 if (infile == NULL)
3364 {
3365 printf("** Failed to open %s\n", argv[op]);
3366 yield = 1;
3367 goto EXIT;
3368 }
3369 }
3370
3371 if (argc > 2)
3372 {
3373 outfile = fopen(argv[op+1], OUTPUT_MODE);
3374 if (outfile == NULL)
3375 {
3376 printf("** Failed to open %s\n", argv[op+1]);
3377 yield = 1;
3378 goto EXIT;
3379 }
3380 }
3381
3382 /* Set alternative malloc function */
3383
3384 #ifdef SUPPORT_PCRE8
3385 pcre_malloc = new_malloc;
3386 pcre_free = new_free;
3387 pcre_stack_malloc = stack_malloc;
3388 pcre_stack_free = stack_free;
3389 #endif
3390
3391 #ifdef SUPPORT_PCRE16
3392 pcre16_malloc = new_malloc;
3393 pcre16_free = new_free;
3394 pcre16_stack_malloc = stack_malloc;
3395 pcre16_stack_free = stack_free;
3396 #endif
3397
3398 #ifdef SUPPORT_PCRE32
3399 pcre32_malloc = new_malloc;
3400 pcre32_free = new_free;
3401 pcre32_stack_malloc = stack_malloc;
3402 pcre32_stack_free = stack_free;
3403 #endif
3404
3405 /* Heading line unless quiet */
3406
3407 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3408
3409 /* Main loop */
3410
3411 while (!done)
3412 {
3413 pcre *re = NULL;
3414 pcre_extra *extra = NULL;
3415
3416 #if !defined NOPOSIX /* There are still compilers that require no indent */
3417 regex_t preg;
3418 int do_posix = 0;
3419 #endif
3420
3421 const char *error;
3422 pcre_uint8 *markptr;
3423 pcre_uint8 *p, *pp, *ppp;
3424 pcre_uint8 *to_file = NULL;
3425 const pcre_uint8 *tables = NULL;
3426 unsigned long int get_options;
3427 unsigned long int true_size, true_study_size = 0;
3428 size_t size;
3429 int do_allcaps = 0;
3430 int do_mark = 0;
3431 int do_study = 0;
3432 int no_force_study = 0;
3433 int do_debug = debug;
3434 int do_G = 0;
3435 int do_g = 0;
3436 int do_showinfo = showinfo;
3437 int do_showrest = 0;
3438 int do_showcaprest = 0;
3439 int do_flip = 0;
3440 int erroroffset, len, delimiter, poffset;
3441
3442 #if !defined NODFA
3443 int dfa_matched = 0;
3444 #endif
3445
3446 use_utf = 0;
3447 debug_lengths = 1;
3448
3449 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3450 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3451 fflush(outfile);
3452
3453 p = buffer;
3454 while (isspace(*p)) p++;
3455 if (*p == 0) continue;
3456
3457 /* Handle option lock-out setting */
3458
3459 if (*p == '<' && p[1] == ' ')
3460 {
3461 p += 2;
3462 while (isspace(*p)) p++;
3463 if (strncmp((char *)p, "forbid ", 7) == 0)
3464 {
3465 p += 7;
3466 while (isspace(*p)) p++;
3467 pp = lockout;
3468 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3469 *pp++ = *p++;
3470 *pp = 0;
3471 }
3472 else
3473 {
3474 printf("** Unrecognized special command '%s'\n", p);
3475 yield = 1;
3476 goto EXIT;
3477 }
3478 continue;
3479 }
3480
3481 /* See if the pattern is to be loaded pre-compiled from a file. */
3482
3483 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3484 {
3485 pcre_uint32 magic;
3486 pcre_uint8 sbuf[8];
3487 FILE *f;
3488
3489 p++;
3490 if (*p == '!')
3491 {
3492 do_debug = TRUE;
3493 do_showinfo = TRUE;
3494 p++;
3495 }
3496
3497 pp = p + (int)strlen((char *)p);
3498 while (isspace(pp[-1])) pp--;
3499 *pp = 0;
3500
3501 f = fopen((char *)p, "rb");
3502 if (f == NULL)
3503 {
3504 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3505 continue;
3506 }
3507 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3508
3509 true_size =
3510 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3511 true_study_size =
3512 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3513
3514 re = (pcre *)new_malloc(true_size);
3515 if (re == NULL)
3516 {
3517 printf("** Failed to get %d bytes of memory for pcre object\n",
3518 (int)true_size);
3519 yield = 1;
3520 goto EXIT;
3521 }
3522 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3523
3524 magic = REAL_PCRE_MAGIC(re);
3525 if (magic != MAGIC_NUMBER)
3526 {
3527 if (swap_uint32(magic) == MAGIC_NUMBER)
3528 {
3529 do_flip = 1;
3530 }
3531 else
3532 {
3533 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3534 new_free(re);
3535 fclose(f);
3536 continue;
3537 }
3538 }
3539
3540 /* We hide the byte-invert info for little and big endian tests. */
3541 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3542 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3543
3544 /* Now see if there is any following study data. */
3545
3546 if (true_study_size != 0)
3547 {
3548 pcre_study_data *psd;
3549
3550 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3551 extra->flags = PCRE_EXTRA_STUDY_DATA;
3552
3553 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3554 extra->study_data = psd;
3555
3556 if (fread(psd, 1, true_study_size, f) != true_study_size)
3557 {
3558 FAIL_READ:
3559 fprintf(outfile, "Failed to read data from %s\n", p);
3560 if (extra != NULL)
3561 {
3562 PCRE_FREE_STUDY(extra);
3563 }
3564 new_free(re);
3565 fclose(f);
3566 continue;
3567 }
3568 fprintf(outfile, "Study data loaded from %s\n", p);
3569 do_study = 1; /* To get the data output if requested */
3570 }
3571 else fprintf(outfile, "No study data\n");
3572
3573 /* Flip the necessary bytes. */
3574 if (do_flip)
3575 {
3576 int rc;
3577 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3578 if (rc == PCRE_ERROR_BADMODE)
3579 {
3580 pcre_uint32 flags_in_host_byte_order;
3581 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3582 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3583 else
3584 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3585 /* Simulate the result of the function call below. */
3586 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3587 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3588 PCRE_INFO_OPTIONS);
3589 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3590 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3591 new_free(re);
3592 fclose(f);
3593 continue;
3594 }
3595 }
3596
3597 /* Need to know if UTF-8 for printing data strings. */
3598
3599 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3600 {
3601 new_free(re);
3602 fclose(f);
3603 continue;
3604 }
3605 use_utf = (get_options & PCRE_UTF8) != 0;
3606
3607 fclose(f);
3608 goto SHOW_INFO;
3609 }
3610
3611 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3612 the pattern; if it isn't complete, read more. */
3613
3614 delimiter = *p++;
3615
3616 if (isalnum(delimiter) || delimiter == '\\')
3617 {
3618 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3619 goto SKIP_DATA;
3620 }
3621
3622 pp = p;
3623 poffset = (int)(p - buffer);
3624
3625 for(;;)
3626 {
3627 while (*pp != 0)
3628 {
3629 if (*pp == '\\' && pp[1] != 0) pp++;
3630 else if (*pp == delimiter) break;
3631 pp++;
3632 }
3633 if (*pp != 0) break;
3634 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3635 {
3636 fprintf(outfile, "** Unexpected EOF\n");
3637 done = 1;
3638 goto CONTINUE;
3639 }
3640 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3641 }
3642
3643 /* The buffer may have moved while being extended; reset the start of data
3644 pointer to the correct relative point in the buffer. */
3645
3646 p = buffer + poffset;
3647
3648 /* If the first character after the delimiter is backslash, make
3649 the pattern end with backslash. This is purely to provide a way
3650 of testing for the error message when a pattern ends with backslash. */
3651
3652 if (pp[1] == '\\') *pp++ = '\\';
3653
3654 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3655 for callouts. */
3656
3657 *pp++ = 0;
3658 strcpy((char *)pbuffer, (char *)p);
3659
3660 /* Look for modifiers and options after the final delimiter. */
3661
3662 options = default_options;
3663 study_options = force_study_options;
3664 log_store = showstore; /* default from command line */
3665
3666 while (*pp != 0)
3667 {
3668 /* Check to see whether this modifier has been locked out for this file.
3669 This is complicated for the multi-character options that begin with '<'.
3670 If there is no '>' in the lockout string, all multi-character modifiers are
3671 locked out. */
3672
3673 if (strchr((char *)lockout, *pp) != NULL)
3674 {
3675 if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3676 {
3677 int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3678 if (x == 0) goto SKIP_DATA;
3679
3680 for (ppp = lockout; *ppp != 0; ppp++)
3681 {
3682 if (*ppp == '<')
3683 {
3684 int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3685 if (y == 0)
3686 {
3687 printf("** Error in modifier forbid data - giving up.\n");
3688 yield = 1;
3689 goto EXIT;
3690 }
3691 if (x == y)
3692 {
3693 ppp = pp;
3694 while (*ppp != '>') ppp++;
3695 printf("** The %.*s modifier is locked out - giving up.\n",
3696 (int)(ppp - pp + 1), pp);
3697 yield = 1;
3698 goto EXIT;
3699 }
3700 }
3701 }
3702 }
3703
3704 /* The single-character modifiers are straightforward. */
3705
3706 else
3707 {
3708 printf("** The /%c modifier is locked out - giving up.\n", *pp);
3709 yield = 1;
3710 goto EXIT;
3711 }
3712 }
3713
3714 /* The modifier is not locked out; handle it. */
3715
3716 switch (*pp++)
3717 {
3718 case 'f': options |= PCRE_FIRSTLINE; break;
3719 case 'g': do_g = 1; break;
3720 case 'i': options |= PCRE_CASELESS; break;
3721 case 'm': options |= PCRE_MULTILINE; break;
3722 case 's': options |= PCRE_DOTALL; break;
3723 case 'x': options |= PCRE_EXTENDED; break;
3724
3725 case '+':
3726 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3727 break;
3728
3729 case '=': do_allcaps = 1; break;
3730 case 'A': options |= PCRE_ANCHORED; break;
3731 case 'B': do_debug = 1; break;
3732 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3733 case 'D': do_debug = do_showinfo = 1; break;
3734 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3735 case 'F': do_flip = 1; break;
3736 case 'G': do_G = 1; break;
3737 case 'I': do_showinfo = 1; break;
3738 case 'J': options |= PCRE_DUPNAMES; break;
3739 case 'K': do_mark = 1; break;
3740 case 'M': log_store = 1; break;
3741 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3742 case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3743
3744 #if !defined NOPOSIX
3745 case 'P': do_posix = 1; break;
3746 #endif
3747
3748 case 'S':
3749 do_study = 1;
3750 for (;;)
3751 {
3752 switch (*pp++)
3753 {
3754 case 'S':
3755 do_study = 0;
3756 no_force_study = 1;
3757 break;
3758
3759 case '!':
3760 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3761 break;
3762
3763 case '+':
3764 if (*pp == '+')
3765 {
3766 verify_jit = TRUE;
3767 pp++;
3768 }
3769 if (*pp >= '1' && *pp <= '7')
3770 study_options |= jit_study_bits[*pp++ - '1'];
3771 else
3772 study_options |= jit_study_bits[6];
3773 break;
3774
3775 case '-':
3776 study_options &= ~PCRE_STUDY_ALLJIT;
3777 break;
3778
3779 default:
3780 pp--;
3781 goto ENDLOOP;
3782 }
3783 }
3784 ENDLOOP:
3785 break;
3786
3787 case 'U': options |= PCRE_UNGREEDY; break;
3788 case 'W': options |= PCRE_UCP; break;
3789 case 'X': options |= PCRE_EXTRA; break;
3790 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3791 case 'Z': debug_lengths = 0; break;
3792 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3793 case '9': options |= PCRE_NEVER_UTF; break;
3794 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3795
3796 case 'T':
3797 switch (*pp++)
3798 {
3799 case '0': tables = tables0; break;
3800 case '1': tables = tables1; break;
3801
3802 case '\r':
3803 case '\n':
3804 case ' ':
3805 case 0:
3806 fprintf(outfile, "** Missing table number after /T\n");
3807 goto SKIP_DATA;
3808
3809 default:
3810 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3811 goto SKIP_DATA;
3812 }
3813 break;
3814
3815 case 'L':
3816 ppp = pp;
3817 /* The '\r' test here is so that it works on Windows. */
3818 /* The '0' test is just in case this is an unterminated line. */
3819 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3820 *ppp = 0;
3821 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3822 {
3823 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3824 goto SKIP_DATA;
3825 }
3826 locale_set = 1;
3827 tables = PCRE_MAKETABLES;
3828 pp = ppp;
3829 break;
3830
3831 case '>':
3832 to_file = pp;
3833 while (*pp != 0) pp++;
3834 while (isspace(pp[-1])) pp--;
3835 *pp = 0;
3836 break;
3837
3838 case '<':
3839 {
3840 int x = check_mc_option(pp, outfile, FALSE, "modifier");
3841 if (x == 0) goto SKIP_DATA;
3842 options |= x;
3843 while (*pp++ != '>');
3844 }
3845 break;
3846
3847 case '\r': /* So that it works in Windows */
3848 case '\n':
3849 case ' ':
3850 break;
3851
3852 default:
3853 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3854 goto SKIP_DATA;
3855 }
3856 }
3857
3858 /* Handle compiling via the POSIX interface, which doesn't support the
3859 timing, showing, or debugging options, nor the ability to pass over
3860 local character tables. Neither does it have 16-bit support. */
3861
3862 #if !defined NOPOSIX
3863 if (posix || do_posix)
3864 {
3865 int rc;
3866 int cflags = 0;
3867
3868 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3869 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3870 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3871 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3872 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3873 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3874 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3875
3876 rc = regcomp(&preg, (char *)p, cflags);
3877
3878 /* Compilation failed; go back for another re, skipping to blank line
3879 if non-interactive. */
3880
3881 if (rc != 0)
3882 {
3883 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3884 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3885 goto SKIP_DATA;
3886 }
3887 }
3888
3889 /* Handle compiling via the native interface */
3890
3891 else
3892 #endif /* !defined NOPOSIX */
3893
3894 {
3895 /* In 16- or 32-bit mode, convert the input. */
3896
3897 #ifdef SUPPORT_PCRE16
3898 if (pcre_mode == PCRE16_MODE)
3899 {
3900 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3901 {
3902 case -1:
3903 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3904 "converted to UTF-16\n");
3905 goto SKIP_DATA;
3906
3907 case -2:
3908 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3909 "cannot be converted to UTF-16\n");
3910 goto SKIP_DATA;
3911
3912 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3913 fprintf(outfile, "**Failed: character value greater than 0xffff "
3914 "cannot be converted to 16-bit in non-UTF mode\n");
3915 goto SKIP_DATA;
3916
3917 default:
3918 break;
3919 }
3920 p = (pcre_uint8 *)buffer16;
3921 }
3922 #endif
3923
3924 #ifdef SUPPORT_PCRE32
3925 if (pcre_mode == PCRE32_MODE)
3926 {
3927 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3928 {
3929 case -1:
3930 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3931 "converted to UTF-32\n");
3932 goto SKIP_DATA;
3933
3934 case -2:
3935 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3936 "cannot be converted to UTF-32\n");
3937 goto SKIP_DATA;
3938
3939 case -3:
3940 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3941 goto SKIP_DATA;
3942
3943 default:
3944 break;
3945 }
3946 p = (pcre_uint8 *)buffer32;
3947 }
3948 #endif
3949
3950 /* Compile many times when timing */
3951
3952 if (timeit > 0)
3953 {
3954 register int i;
3955 clock_t time_taken;
3956 clock_t start_time = clock();
3957 for (i = 0; i < timeit; i++)
3958 {
3959 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3960 if (re != NULL) free(re);
3961 }
3962 total_compile_time += (time_taken = clock() - start_time);
3963 fprintf(outfile, "Compile time %.4f milliseconds\n",
3964 (((double)time_taken * 1000.0) / (double)timeit) /
3965 (double)CLOCKS_PER_SEC);
3966 }
3967
3968 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3969
3970 /* Compilation failed; go back for another re, skipping to blank line
3971 if non-interactive. */
3972
3973 if (re == NULL)
3974 {
3975 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3976 SKIP_DATA:
3977 if (infile != stdin)
3978 {
3979 for (;;)
3980 {
3981 if (extend_inputline(infile, buffer, NULL) == NULL)
3982 {
3983 done = 1;
3984 goto CONTINUE;
3985 }
3986 len = (int)strlen((char *)buffer);
3987 while (len > 0 && isspace(buffer[len-1])) len--;
3988 if (len == 0) break;
3989 }
3990 fprintf(outfile, "\n");
3991 }
3992 goto CONTINUE;
3993 }
3994
3995 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3996 within the regex; check for this so that we know how to process the data
3997 lines. */
3998
3999 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4000 goto SKIP_DATA;
4001 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4002
4003 /* Extract the size for possible writing before possibly flipping it,
4004 and remember the store that was got. */
4005
4006 true_size = REAL_PCRE_SIZE(re);
4007
4008 /* Output code size information if requested */
4009
4010 if (log_store)
4011 {
4012 int name_count, name_entry_size, real_pcre_size;
4013
4014 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4015 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4016 real_pcre_size = 0;
4017 #ifdef SUPPORT_PCRE8
4018 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4019 real_pcre_size = sizeof(real_pcre);
4020 #endif
4021 #ifdef SUPPORT_PCRE16
4022 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4023 real_pcre_size = sizeof(real_pcre16);
4024 #endif
4025 #ifdef SUPPORT_PCRE32
4026 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4027 real_pcre_size = sizeof(real_pcre32);
4028 #endif
4029 new_info(re, NULL, PCRE_INFO_SIZE, &size);
4030 fprintf(outfile, "Memory allocation (code space): %d\n",
4031 (int)(size - real_pcre_size - name_count * name_entry_size));
4032 }
4033
4034 /* If -s or /S was present, study the regex to generate additional info to
4035 help with the matching, unless the pattern has the SS option, which
4036 suppresses the effect of /S (used for a few test patterns where studying is
4037 never sensible). */
4038
4039 if (do_study || (force_study >= 0 && !no_force_study))
4040 {
4041 if (timeit > 0)
4042 {
4043 register int i;
4044 clock_t time_taken;
4045 clock_t start_time = clock();
4046 for (i = 0; i < timeit; i++)
4047 {
4048 PCRE_STUDY(extra, re, study_options, &error);
4049 }
4050 total_study_time = (time_taken = clock() - start_time);
4051 if (extra != NULL)
4052 {
4053 PCRE_FREE_STUDY(extra);
4054 }
4055 fprintf(outfile, " Study time %.4f milliseconds\n",
4056 (((double)time_taken * 1000.0) / (double)timeit) /
4057 (double)CLOCKS_PER_SEC);
4058 }
4059 PCRE_STUDY(extra, re, study_options, &error);
4060 if (error != NULL)
4061 fprintf(outfile, "Failed to study: %s\n", error);
4062 else if (extra != NULL)
4063 {
4064 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4065 if (log_store)
4066 {
4067 size_t jitsize;
4068 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4069 jitsize != 0)
4070 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4071 }
4072 }
4073 }
4074
4075 /* If /K was present, we set up for handling MARK data. */
4076
4077 if (do_mark)
4078 {
4079 if (extra == NULL)
4080 {
4081 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4082 extra->flags = 0;
4083 }
4084 extra->mark = &markptr;
4085 extra->flags |= PCRE_EXTRA_MARK;
4086 }
4087
4088 /* Extract and display information from the compiled data if required. */
4089
4090 SHOW_INFO:
4091
4092 if (do_debug)
4093 {
4094 fprintf(outfile, "------------------------------------------------------------------\n");
4095 PCRE_PRINTINT(re, outfile, debug_lengths);
4096 }
4097
4098 /* We already have the options in get_options (see above) */
4099
4100 if (do_showinfo)
4101 {
4102 unsigned long int all_options;
4103 pcre_uint32 first_char, need_char;
4104 pcre_uint32 match_limit, recursion_limit;
4105 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4106 hascrorlf, maxlookbehind, match_empty;
4107 int nameentrysize, namecount;
4108 const pcre_uint8 *nametable;
4109
4110 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4111 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4112 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4113 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4114 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4115 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4116 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4117 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4118 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4119 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4120 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4121 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4122 new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4123 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4124 != 0)
4125 goto SKIP_DATA;
4126
4127 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4128
4129 if (backrefmax > 0)
4130 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4131
4132 if (maxlookbehind > 0)
4133 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4134
4135 if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4136 fprintf(outfile, "Match limit = %u\n", match_limit);
4137
4138 if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4139 fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4140
4141 if (namecount > 0)
4142 {
4143 fprintf(outfile, "Named capturing subpatterns:\n");
4144 while (namecount-- > 0)
4145 {
4146 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4147 int length = (int)STRLEN(nametable + imm2_size);
4148 fprintf(outfile, " ");
4149 PCHARSV(nametable, imm2_size, length, outfile);
4150 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4151 #ifdef SUPPORT_PCRE32
4152 if (pcre_mode == PCRE32_MODE)
4153 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4154 #endif
4155 #ifdef SUPPORT_PCRE16
4156 if (pcre_mode == PCRE16_MODE)
4157 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4158 #endif
4159 #ifdef SUPPORT_PCRE8
4160 if (pcre_mode == PCRE8_MODE)
4161 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4162 #endif
4163 nametable += nameentrysize * CHAR_SIZE;
4164 }
4165 }
4166
4167 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4168 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4169 if (match_empty) fprintf(outfile, "May match empty string\n");
4170
4171 all_options = REAL_PCRE_OPTIONS(re);
4172 if (do_flip) all_options = swap_uint32(all_options);
4173
4174 if (get_options == 0) fprintf(outfile, "No options\n");
4175 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4176 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4177 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4178 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4179 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4180 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4181 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4182 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4183 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4184 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4185 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4186 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4187 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4188 ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
4189 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4190 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4191 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4192 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4193 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4194 ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4195
4196 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4197
4198 switch (get_options & PCRE_NEWLINE_BITS)
4199 {
4200 case PCRE_NEWLINE_CR:
4201 fprintf(outfile, "Forced newline sequence: CR\n");
4202 break;
4203
4204 case PCRE_NEWLINE_LF:
4205 fprintf(outfile, "Forced newline sequence: LF\n");
4206 break;
4207
4208 case PCRE_NEWLINE_CRLF:
4209 fprintf(outfile, "Forced newline sequence: CRLF\n");
4210 break;
4211
4212 case PCRE_NEWLINE_ANYCRLF:
4213 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4214 break;
4215
4216 case PCRE_NEWLINE_ANY:
4217 fprintf(outfile, "Forced newline sequence: ANY\n");
4218 break;
4219
4220 default:
4221 break;
4222 }
4223
4224 if (first_char_set == 2)
4225 {
4226 fprintf(outfile, "First char at start or follows newline\n");
4227 }
4228 else if (first_char_set == 1)
4229 {
4230 const char *caseless =
4231 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4232 "" : " (caseless)";
4233
4234 if (PRINTOK(first_char))
4235 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4236 else
4237 {
4238 fprintf(outfile, "First char = ");
4239 pchar(first_char, outfile);
4240 fprintf(outfile, "%s\n", caseless);
4241 }
4242 }
4243 else
4244 {
4245 fprintf(outfile, "No first char\n");
4246 }
4247
4248 if (need_char_set == 0)
4249 {
4250 fprintf(outfile, "No need char\n");
4251 }
4252 else
4253 {
4254 const char *caseless =
4255 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4256 "" : " (caseless)";
4257
4258 if (PRINTOK(need_char))
4259 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4260 else
4261 {
4262 fprintf(outfile, "Need char = ");
4263 pchar(need_char, outfile);
4264 fprintf(outfile, "%s\n", caseless);
4265 }
4266 }
4267
4268 /* Don't output study size; at present it is in any case a fixed
4269 value, but it varies, depending on the computer architecture, and
4270 so messes up the test suite. (And with the /F option, it might be
4271 flipped.) If study was forced by an external -s, don't show this
4272 information unless -i or -d was also present. This means that, except
4273 when auto-callouts are involved, the output from runs with and without
4274 -s should be identical. */
4275
4276 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4277 {
4278 if (extra == NULL)
4279 fprintf(outfile, "Study returned NULL\n");
4280 else
4281 {
4282 pcre_uint8 *start_bits = NULL;
4283 int minlength;
4284
4285 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4286 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4287
4288 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4289 {
4290 if (start_bits == NULL)
4291 fprintf(outfile, "No starting char list\n");
4292 else
4293 {
4294 int i;
4295 int c = 24;
4296 fprintf(outfile, "Starting chars: ");
4297 for (i = 0; i < 256; i++)
4298 {
4299 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4300 {
4301 if (c > 75)
4302 {
4303 fprintf(outfile, "\n ");
4304 c = 2;
4305 }
4306 if (PRINTOK(i) && i != ' ')
4307 {
4308 fprintf(outfile, "%c ", i);
4309 c += 2;
4310 }
4311 else
4312 {
4313 fprintf(outfile, "\\x%02x ", i);
4314 c += 5;
4315 }
4316 }
4317 }
4318 fprintf(outfile, "\n");
4319 }
4320 }
4321 }
4322
4323 /* Show this only if the JIT was set by /S, not by -s. */
4324
4325 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4326 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4327 {
4328 int jit;
4329 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4330 {
4331 if (jit)
4332 fprintf(outfile, "JIT study was successful\n");
4333 else
4334 #ifdef SUPPORT_JIT
4335 fprintf(outfile, "JIT study was not successful\n");
4336 #else
4337 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4338 #endif
4339 }
4340 }
4341 }
4342 }
4343
4344 /* If the '>' option was present, we write out the regex to a file, and
4345 that is all. The first 8 bytes of the file are the regex length and then
4346 the study length, in big-endian order. */
4347
4348 if (to_file != NULL)
4349 {
4350 FILE *f = fopen((char *)to_file, "wb");
4351 if (f == NULL)
4352 {
4353 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4354 }
4355 else
4356 {
4357 pcre_uint8 sbuf[8];
4358
4359 if (do_flip) regexflip(re, extra);
4360 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4361 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4362 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4363 sbuf[3] = (pcre_uint8)((true_size) & 255);
4364 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4365 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4366 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4367 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4368
4369 if (fwrite(sbuf, 1, 8, f) < 8 ||
4370 fwrite(re, 1, true_size, f) < true_size)
4371 {
4372 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4373 }
4374 else
4375 {
4376 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4377
4378 /* If there is study data, write it. */
4379
4380 if (extra != NULL)
4381 {
4382 if (fwrite(extra->study_data, 1, true_study_size, f) <
4383 true_study_size)
4384 {
4385 fprintf(outfile, "Write error on %s: %s\n", to_file,
4386 strerror(errno));
4387 }
4388 else fprintf(outfile, "Study data written to %s\n", to_file);
4389 }
4390 }
4391 fclose(f);
4392 }
4393
4394 new_free(re);
4395 if (extra != NULL)
4396 {
4397 PCRE_FREE_STUDY(extra);
4398 }
4399 if (locale_set)
4400 {
4401 new_free((void *)tables);
4402 setlocale(LC_CTYPE, "C");
4403 locale_set = 0;
4404 }
4405 continue; /* With next regex */
4406 }
4407 } /* End of non-POSIX compile */
4408
4409 /* Read data lines and test them */
4410
4411 for (;;)
4412 {
4413 #ifdef SUPPORT_PCRE8
4414 pcre_uint8 *q8;
4415 #endif
4416 #ifdef SUPPORT_PCRE16
4417 pcre_uint16 *q16;
4418 #endif
4419 #ifdef SUPPORT_PCRE32
4420 pcre_uint32 *q32;
4421 #endif
4422 pcre_uint8 *bptr;
4423 int *use_offsets = offsets;
4424 int use_size_offsets = size_offsets;
4425 int callout_data = 0;
4426 int callout_data_set = 0;
4427 int count;
4428 pcre_uint32 c;
4429 int copystrings = 0;
4430 int find_match_limit = default_find_match_limit;
4431 int getstrings = 0;
4432 int getlist = 0;
4433 int gmatched = 0;
4434 int start_offset = 0;
4435 int start_offset_sign = 1;
4436 int g_notempty = 0;
4437 int use_dfa = 0;
4438
4439 *copynames = 0;
4440 *getnames = 0;
4441
4442 #ifdef SUPPORT_PCRE32
4443 cn32ptr = copynames;
4444 gn32ptr = getnames;
4445 #endif
4446 #ifdef SUPPORT_PCRE16
4447 cn16ptr = copynames16;
4448 gn16ptr = getnames16;
4449 #endif
4450 #ifdef SUPPORT_PCRE8
4451 cn8ptr = copynames8;
4452 gn8ptr = getnames8;
4453 #endif
4454
4455 SET_PCRE_CALLOUT(callout);
4456 first_callout = 1;
4457 last_callout_mark = NULL;
4458 callout_extra = 0;
4459 callout_count = 0;
4460 callout_fail_count = 999999;
4461 callout_fail_id = -1;
4462 show_malloc = 0;
4463 options = 0;
4464
4465 if (extra != NULL) extra->flags &=
4466 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4467
4468 len = 0;
4469 for (;;)
4470 {
4471 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4472 {
4473 if (len > 0) /* Reached EOF without hitting a newline */
4474 {
4475 fprintf(outfile, "\n");
4476 break;
4477 }
4478 done = 1;
4479 goto CONTINUE;
4480 }
4481 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4482 len = (int)strlen((char *)buffer);
4483 if (buffer[len-1] == '\n') break;
4484 }
4485
4486 while (len > 0 && isspace(buffer[len-1])) len--;
4487 buffer[len] = 0;
4488 if (len == 0) break;
4489
4490 p = buffer;
4491 while (isspace(*p)) p++;
4492
4493 #ifndef NOUTF
4494 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4495 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4496
4497 if (use_utf)
4498 {
4499 pcre_uint8 *q;
4500 pcre_uint32 cc;
4501 int n = 1;
4502
4503 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4504 if (n <= 0)
4505 {
4506 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4507 goto NEXT_DATA;
4508 }
4509 }
4510 #endif
4511
4512 #ifdef SUPPORT_VALGRIND
4513 /* Mark the dbuffer as addressable but undefined again. */
4514
4515 if (dbuffer != NULL)
4516 {
4517 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4518 }
4519 #endif
4520
4521 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4522 the number of pcre_uchar units that will be needed. */
4523
4524 while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4525 {
4526 dbuffer_size *= 2;
4527 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4528 if (dbuffer == NULL)
4529 {
4530 fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4531 exit(1);
4532 }
4533 }
4534
4535 #ifdef SUPPORT_PCRE8
4536 q8 = (pcre_uint8 *) dbuffer;
4537 #endif
4538 #ifdef SUPPORT_PCRE16
4539 q16 = (pcre_uint16 *) dbuffer;
4540 #endif
4541 #ifdef SUPPORT_PCRE32
4542 q32 = (pcre_uint32 *) dbuffer;
4543 #endif
4544
4545 while ((c = *p++) != 0)
4546 {
4547 int i = 0;
4548 int n = 0;
4549
4550 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4551 In non-UTF mode, allow the value of the byte to fall through to later,
4552 where values greater than 127 are turned into UTF-8 when running in
4553 16-bit or 32-bit mode. */
4554
4555 if (c != '\\')
4556 {
4557 #ifndef NOUTF
4558 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4559 #endif
4560 }
4561
4562 /* Handle backslash escapes */
4563
4564 else switch ((c = *p++))
4565 {
4566 case 'a': c = 7; break;
4567 case 'b': c = '\b'; break;
4568 case 'e': c = 27; break;
4569 case 'f': c = '\f'; break;
4570 case 'n': c = '\n'; break;
4571 case 'r': c = '\r'; break;
4572 case 't': c = '\t'; break;
4573 case 'v': c = '\v'; break;
4574
4575 case '0': case '1': case '2': case '3':
4576 case '4': case '5': case '6': case '7':
4577 c -= '0';
4578 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4579 c = c * 8 + *p++ - '0';
4580 break;
4581
4582 case 'o':
4583 if (*p == '{')
4584 {
4585 pcre_uint8 *pt = p;
4586 c = 0;
4587 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
4588 {
4589 if (++i == 12)
4590 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
4591 "using only the first twelve.\n");
4592 else c = c * 8 + *pt - '0';
4593 }
4594 if (*pt == '}') p = pt + 1;
4595 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
4596 }
4597 break;
4598
4599 case 'x':
4600 if (*p == '{')
4601 {
4602 pcre_uint8 *pt = p;
4603 c = 0;
4604
4605 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4606 when isxdigit() is a macro that refers to its argument more than
4607 once. This is banned by the C Standard, but apparently happens in at
4608 least one MacOS environment. */
4609
4610 for (pt++; isxdigit(*pt); pt++)
4611 {
4612 if (++i == 9)
4613 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4614 "using only the first eight.\n");
4615 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4616 }
4617 if (*pt == '}')
4618 {
4619 p = pt + 1;
4620 break;
4621 }
4622 /* Not correct form for \x{...}; fall through */
4623 }
4624
4625 /* \x without {} always defines just one byte in 8-bit mode. This
4626 allows UTF-8 characters to be constructed byte by byte, and also allows
4627 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4628 Otherwise, pass it down to later code so that it can be turned into
4629 UTF-8 when running in 16/32-bit mode. */
4630
4631 c = 0;
4632 while (i++ < 2 && isxdigit(*p))
4633 {
4634 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4635 p++;
4636 }
4637 #if !defined NOUTF && defined SUPPORT_PCRE8
4638 if (use_utf && (pcre_mode == PCRE8_MODE))
4639 {
4640 *q8++ = c;
4641 continue;
4642 }
4643 #endif
4644 break;
4645
4646 case 0: /* \ followed by EOF allows for an empty line */
4647 p--;
4648 continue;
4649
4650 case '>':
4651 if (*p == '-')
4652 {
4653 start_offset_sign = -1;
4654 p++;
4655 }
4656 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4657 start_offset *= start_offset_sign;
4658 continue;
4659
4660 case 'A': /* Option setting */
4661 options |= PCRE_ANCHORED;
4662 continue;
4663
4664 case 'B':
4665 options |= PCRE_NOTBOL;
4666 continue;
4667
4668 case 'C':
4669 if (isdigit(*p)) /* Set copy string */
4670 {
4671 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4672 copystrings |= 1 << n;
4673 }
4674 else if (isalnum(*p))
4675 {
4676 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4677 }
4678 else if (*p == '+')
4679 {
4680 callout_extra = 1;
4681 p++;
4682 }
4683 else if (*p == '-')
4684 {
4685 SET_PCRE_CALLOUT(NULL);
4686 p++;
4687 }
4688 else if (*p == '!')
4689 {
4690 callout_fail_id = 0;
4691 p++;
4692 while(isdigit(*p))
4693 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4694 callout_fail_count = 0;
4695 if (*p == '!')
4696 {
4697 p++;
4698 while(isdigit(*p))
4699 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4700 }
4701 }
4702 else if (*p == '*')
4703 {
4704 int sign = 1;
4705 callout_data = 0;
4706 if (*(++p) == '-') { sign = -1; p++; }
4707 while(isdigit(*p))
4708 callout_data = callout_data * 10 + *p++ - '0';
4709 callout_data *= sign;
4710 callout_data_set = 1;
4711 }
4712 continue;
4713
4714 #if !defined NODFA
4715 case 'D':
4716 #if !defined NOPOSIX
4717 if (posix || do_posix)
4718 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4719 else
4720 #endif
4721 use_dfa = 1;
4722 continue;
4723 #endif
4724
4725 #if !defined NODFA
4726 case 'F':
4727 options |= PCRE_DFA_SHORTEST;
4728 continue;
4729 #endif
4730
4731 case 'G':
4732 if (isdigit(*p))
4733 {
4734 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4735 getstrings |= 1 << n;
4736 }
4737 else if (isalnum(*p))
4738 {
4739 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4740 }
4741 continue;
4742
4743 case 'J':
4744 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4745 if (extra != NULL
4746 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4747 && extra->executable_jit != NULL)
4748 {
4749 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4750 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4751 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4752 }
4753 continue;
4754
4755 case 'L':
4756 getlist = 1;
4757 continue;
4758
4759 case 'M':
4760 find_match_limit = 1;
4761 continue;
4762
4763 case 'N':
4764 if ((options & PCRE_NOTEMPTY) != 0)
4765 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4766 else
4767 options |= PCRE_NOTEMPTY;
4768 continue;
4769
4770 case 'O':
4771 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4772 if (n > size_offsets_max)
4773 {
4774 size_offsets_max = n;
4775 free(offsets);
4776 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4777 if (offsets == NULL)
4778 {
4779 printf("** Failed to get %d bytes of memory for offsets vector\n",
4780 (int)(size_offsets_max * sizeof(int)));
4781 yield = 1;
4782 goto EXIT;
4783 }
4784 }
4785 use_size_offsets = n;
4786 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4787 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4788 continue;
4789
4790 case 'P':
4791 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4792 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4793 continue;
4794
4795 case 'Q':
4796 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4797 if (extra == NULL)
4798 {
4799 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4800 extra->flags = 0;
4801 }
4802 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4803 extra->match_limit_recursion = n;
4804 continue;
4805
4806 case 'q':
4807 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4808 if (extra == NULL)
4809 {
4810 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4811 extra->flags = 0;
4812 }
4813 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4814 extra->match_limit = n;
4815 continue;
4816
4817 #if !defined NODFA
4818 case 'R':
4819 options |= PCRE_DFA_RESTART;
4820 continue;
4821 #endif
4822
4823 case 'S':
4824 show_malloc = 1;
4825 continue;
4826
4827 case 'Y':
4828 options |= PCRE_NO_START_OPTIMIZE;
4829 continue;
4830
4831 case 'Z':
4832 options |= PCRE_NOTEOL;
4833 continue;
4834
4835 case '?':
4836 options |= PCRE_NO_UTF8_CHECK;
4837 continue;
4838
4839 case '<':
4840 {
4841 int x = check_mc_option(p, outfile, TRUE, "escape sequence");
4842 if (x == 0) goto NEXT_DATA;
4843 options |= x;
4844 while (*p++ != '>');
4845 }
4846 continue;
4847 }
4848
4849 /* We now have a character value in c that may be greater than 255.
4850 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4851 than 127 in UTF mode must have come from \x{...} or octal constructs
4852 because values from \x.. get this far only in non-UTF mode. */
4853
4854 #ifdef SUPPORT_PCRE8
4855 if (pcre_mode == PCRE8_MODE)
4856 {
4857 #ifndef NOUTF
4858 if (use_utf)
4859 {
4860 if (c > 0x7fffffff)
4861 {
4862 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4863 "and so cannot be converted to UTF-8\n", c);
4864 goto NEXT_DATA;
4865 }
4866 q8 += ord2utf8(c, q8);
4867 }
4868 else
4869 #endif
4870 {
4871 if (c > 0xffu)
4872 {
4873 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4874 "and UTF-8 mode is not enabled.\n", c);
4875 fprintf(outfile, "** Truncation will probably give the wrong "
4876 "result.\n");
4877 }
4878 *q8++ = c;
4879 }
4880 }
4881 #endif
4882 #ifdef SUPPORT_PCRE16
4883 if (pcre_mode == PCRE16_MODE)
4884 {
4885 #ifndef NOUTF
4886 if (use_utf)
4887 {
4888 if (c > 0x10ffffu)
4889 {
4890 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4891 "0x10ffff and so cannot be converted to UTF-16\n", c);
4892 goto NEXT_DATA;
4893 }
4894 else if (c >= 0x10000u)
4895 {
4896 c-= 0x10000u;
4897 *q16++ = 0xD800 | (c >> 10);
4898 *q16++ = 0xDC00 | (c & 0x3ff);
4899 }
4900 else
4901 *q16++ = c;
4902 }
4903 else
4904 #endif
4905 {
4906 if (c > 0xffffu)
4907 {
4908 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4909 "and UTF-16 mode is not enabled.\n", c);
4910 fprintf(outfile, "** Truncation will probably give the wrong "
4911 "result.\n");
4912 }
4913
4914 *q16++ = c;
4915 }
4916 }
4917 #endif
4918 #ifdef SUPPORT_PCRE32
4919 if (pcre_mode == PCRE32_MODE)
4920 {
4921 *q32++ = c;
4922 }
4923 #endif
4924
4925 }
4926
4927 /* Reached end of subject string */
4928
4929 #ifdef SUPPORT_PCRE8
4930 if (pcre_mode == PCRE8_MODE)
4931 {
4932 *q8 = 0;
4933 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4934 }
4935 #endif
4936 #ifdef SUPPORT_PCRE16
4937 if (pcre_mode == PCRE16_MODE)
4938 {
4939 *q16 = 0;
4940 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4941 }
4942 #endif
4943 #ifdef SUPPORT_PCRE32
4944 if (pcre_mode == PCRE32_MODE)
4945 {
4946 *q32 = 0;
4947 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4948 }
4949 #endif
4950
4951 /* If we're compiling with explicit valgrind support, Mark the data from after
4952 its end to the end of the buffer as unaddressable, so that a read over the end
4953 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4954 If we're not building with valgrind support, at least move the data to the end
4955 of the buffer so that it might at least cause a crash.
4956 If we are using the POSIX interface, we must include the terminating zero. */
4957
4958 bptr = dbuffer;
4959
4960 #if !defined NOPOSIX
4961 if (posix || do_posix)
4962 {
4963 #ifdef SUPPORT_VALGRIND
4964 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4965 #else
4966 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4967 bptr += dbuffer_size - len - 1;
4968 #endif
4969 }
4970 else
4971 #endif
4972 {
4973 #ifdef SUPPORT_VALGRIND
4974 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4975 #else
4976 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4977 #endif
4978 }
4979
4980 if ((all_use_dfa || use_dfa) && find_match_limit)
4981 {
4982 printf("**Match limit not relevant for DFA matching: ignored\n");
4983 find_match_limit = 0;
4984 }
4985
4986 /* Handle matching via the POSIX interface, which does not
4987 support timing or playing with the match limit or callout data. */
4988
4989 #if !defined NOPOSIX
4990 if (posix || do_posix)
4991 {
4992 int rc;
4993 int eflags = 0;
4994 regmatch_t *pmatch = NULL;
4995 if (use_size_offsets > 0)
4996 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4997 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4998 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4999 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5000
5001 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5002
5003 if (rc != 0)
5004 {
5005 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5006 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5007 }
5008 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5009 {
5010 fprintf(outfile, "Matched with REG_NOSUB\n");
5011 }
5012 else
5013 {
5014 size_t i;
5015 for (i = 0; i < (size_t)use_size_offsets; i++)
5016 {
5017 if (pmatch[i].rm_so >= 0)
5018 {
5019 fprintf(outfile, "%2d: ", (int)i);
5020 PCHARSV(dbuffer, pmatch[i].rm_so,
5021 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5022 fprintf(outfile, "\n");
5023 if (do_showcaprest || (i == 0 && do_showrest))
5024 {
5025 fprintf(outfile, "%2d+ ", (int)i);
5026 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5027 outfile);
5028 fprintf(outfile, "\n");
5029 }
5030 }
5031 }
5032 }
5033 free(pmatch);
5034 goto NEXT_DATA;
5035 }
5036
5037 #endif /* !defined NOPOSIX */
5038
5039 /* Handle matching via the native interface - repeats for /g and /G */
5040
5041 /* Ensure that there is a JIT callback if we want to verify that JIT was
5042 actually used. If jit_stack == NULL, no stack has yet been assigned. */
5043
5044 if (verify_jit && jit_stack == NULL && extra != NULL)
5045 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5046
5047 for (;; gmatched++) /* Loop for /g or /G */
5048 {
5049 markptr = NULL;
5050 jit_was_used = FALSE;
5051
5052 if (timeitm > 0)
5053 {
5054 register int i;
5055 clock_t time_taken;
5056 clock_t start_time = clock();
5057
5058 #if !defined NODFA
5059 if (all_use_dfa || use_dfa)
5060 {
5061 if ((options & PCRE_DFA_RESTART) != 0)
5062 {
5063 fprintf(outfile, "Timing DFA restarts is not supported\n");
5064 break;
5065 }
5066 if (dfa_workspace == NULL)
5067 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5068 for (i = 0; i < timeitm; i++)
5069 {
5070 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5071 (options | g_notempty), use_offsets, use_size_offsets,
5072 dfa_workspace, DFA_WS_DIMENSION);
5073 }
5074 }
5075 else
5076 #endif
5077
5078 for (i = 0; i < timeitm; i++)
5079 {
5080 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5081 (options | g_notempty), use_offsets, use_size_offsets);
5082 }
5083 total_match_time += (time_taken = clock() - start_time);
5084 fprintf(outfile, "Execute time %.4f milliseconds\n",
5085 (((double)time_taken * 1000.0) / (double)timeitm) /
5086 (double)CLOCKS_PER_SEC);
5087 }
5088
5089 /* If find_match_limit is set, we want to do repeated matches with
5090 varying limits in order to find the minimum value for the match limit and
5091 for the recursion limit. The match limits are relevant only to the normal
5092 running of pcre_exec(), so disable the JIT optimization. This makes it
5093 possible to run the same set of tests with and without JIT externally
5094 requested. */
5095
5096 if (find_match_limit)
5097 {
5098 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5099 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5100 extra->flags = 0;
5101
5102 (void)check_match_limit(re, extra, bptr, len, start_offset,
5103 options|g_notempty, use_offsets, use_size_offsets,
5104 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5105 PCRE_ERROR_MATCHLIMIT, "match()");
5106
5107 count = check_match_limit(re, extra, bptr, len, start_offset,
5108 options|g_notempty, use_offsets, use_size_offsets,
5109 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5110 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5111 }
5112
5113 /* If callout_data is set, use the interface with additional data */
5114
5115 else if (callout_data_set)
5116 {
5117 if (extra == NULL)
5118 {
5119 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5120 extra->flags = 0;
5121 }
5122 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5123 extra->callout_data = &callout_data;
5124 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5125 options | g_notempty, use_offsets, use_size_offsets);
5126 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5127 }
5128
5129 /* The normal case is just to do the match once, with the default
5130 value of match_limit. */
5131
5132 #if !defined NODFA
5133 else if (all_use_dfa || use_dfa)
5134 {
5135 if (dfa_workspace == NULL)
5136 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5137 if (dfa_matched++ == 0)
5138 dfa_workspace[0] = -1; /* To catch bad restart */
5139 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5140 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5141 DFA_WS_DIMENSION);
5142 if (count == 0)
5143 {
5144 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5145 count = use_size_offsets/2;
5146 }
5147 }
5148 #endif
5149
5150 else
5151 {
5152 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5153 options | g_notempty, use_offsets, use_size_offsets);
5154 if (count == 0)
5155 {
5156 fprintf(outfile, "Matched, but too many substrings\n");
5157 /* 2 is a special case; match can be returned */
5158 count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5159 }
5160 }
5161
5162 /* Matched */
5163
5164 if (count >= 0)
5165 {
5166 int i, maxcount;
5167 void *cnptr, *gnptr;
5168
5169 #if !defined NODFA
5170 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5171 #endif
5172 /* 2 is a special case; match can be returned */
5173 maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5174
5175 /* This is a check against a lunatic return value. */
5176
5177 if (count > maxcount)
5178 {
5179 fprintf(outfile,
5180 "** PCRE error: returned count %d is too big for offset size %d\n",
5181 count, use_size_offsets);
5182 count = use_size_offsets/3;
5183 if (do_g || do_G)
5184 {
5185 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5186 do_g = do_G = FALSE; /* Break g/G loop */
5187 }
5188 }
5189
5190 /* do_allcaps requests showing of all captures in the pattern, to check
5191 unset ones at the end. */
5192
5193 if (do_allcaps)
5194 {
5195 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5196 goto SKIP_DATA;
5197 count++; /* Allow for full match */
5198 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5199 }
5200
5201 /* Output the captured substrings. Note that, for the matched string,
5202 the use of \K in an assertion can make the start later than the end. */
5203
5204 for (i = 0; i < count * 2; i += 2)
5205 {
5206 if (use_offsets[i] < 0)
5207 {
5208 if (use_offsets[i] != -1)
5209 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5210 use_offsets[i], i);
5211 if (use_offsets[i+1] != -1)
5212 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5213 use_offsets[i+1], i+1);
5214 fprintf(outfile, "%2d: <unset>\n", i/2);
5215 }
5216 else
5217 {
5218 int start = use_offsets[i];
5219 int end = use_offsets[i+1];
5220
5221 if (start > end)
5222 {
5223 start = use_offsets[i+1];
5224 end = use_offsets[i];
5225 fprintf(outfile, "Start of matched string is beyond its end - "
5226 "displaying from end to start.\n");
5227 }
5228
5229 fprintf(outfile, "%2d: ", i/2);
5230 PCHARSV(bptr, start, end - start, outfile);
5231 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5232 fprintf(outfile, "\n");
5233
5234 /* Note: don't use the start/end variables here because we want to
5235 show the text from what is reported as the end. */
5236
5237 if (do_showcaprest || (i == 0 && do_showrest))
5238 {
5239 fprintf(outfile, "%2d+ ", i/2);
5240 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5241 outfile);
5242 fprintf(outfile, "\n");
5243 }
5244 }
5245 }
5246
5247 if (markptr != NULL)
5248 {
5249 fprintf(outfile, "MK: ");
5250 PCHARSV(markptr, 0, -1, outfile);
5251 fprintf(outfile, "\n");
5252 }
5253
5254 for (i = 0; i < 32; i++)
5255 {
5256 if ((copystrings & (1 << i)) != 0)
5257 {
5258 int rc;
5259 char copybuffer[256];
5260 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5261 copybuffer, sizeof(copybuffer));
5262 if (rc < 0)
5263 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5264 else
5265 {
5266 fprintf(outfile, "%2dC ", i);
5267 PCHARSV(copybuffer, 0, rc, outfile);
5268 fprintf(outfile, " (%d)\n", rc);
5269 }
5270 }
5271 }
5272
5273 cnptr = copynames;
5274 for (;;)
5275 {
5276 int rc;
5277 char copybuffer[256];
5278
5279 #ifdef SUPPORT_PCRE32
5280 if (pcre_mode == PCRE32_MODE)
5281 {
5282 if (*(pcre_uint32 *)cnptr == 0) break;
5283 }
5284 #endif
5285 #ifdef SUPPORT_PCRE16
5286 if (pcre_mode == PCRE16_MODE)
5287 {
5288 if (*(pcre_uint16 *)cnptr == 0) break;
5289 }
5290 #endif
5291 #ifdef SUPPORT_PCRE8
5292 if (pcre_mode == PCRE8_MODE)
5293 {
5294 if (*(pcre_uint8 *)cnptr == 0) break;
5295 }
5296 #endif
5297
5298 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5299 cnptr, copybuffer, sizeof(copybuffer));
5300
5301 if (rc < 0)
5302 {
5303 fprintf(outfile, "copy substring ");
5304 PCHARSV(cnptr, 0, -1, outfile);
5305 fprintf(outfile, " failed %d\n", rc);
5306 }
5307 else
5308 {
5309 fprintf(outfile, " C ");
5310 PCHARSV(copybuffer, 0, rc, outfile);
5311 fprintf(outfile, " (%d) ", rc);
5312 PCHARSV(cnptr, 0, -1, outfile);
5313 putc('\n', outfile);
5314 }
5315
5316 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5317 }
5318
5319 for (i = 0; i < 32; i++)
5320 {
5321 if ((getstrings & (1 << i)) != 0)
5322 {
5323 int rc;
5324 const char *substring;
5325 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5326 if (rc < 0)
5327 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5328 else
5329 {
5330 fprintf(outfile, "%2dG ", i);
5331 PCHARSV(substring, 0, rc, outfile);
5332 fprintf(outfile, " (%d)\n", rc);
5333 PCRE_FREE_SUBSTRING(substring);
5334 }
5335 }
5336 }
5337
5338 gnptr = getnames;
5339 for (;;)
5340 {
5341 int rc;
5342 const char *substring;
5343
5344 #ifdef SUPPORT_PCRE32
5345 if (pcre_mode == PCRE32_MODE)
5346 {
5347 if (*(pcre_uint32 *)gnptr == 0) break;
5348 }
5349 #endif
5350 #ifdef SUPPORT_PCRE16
5351 if (pcre_mode == PCRE16_MODE)
5352 {
5353 if (*(pcre_uint16 *)gnptr == 0) break;
5354 }
5355 #endif
5356 #ifdef SUPPORT_PCRE8
5357 if (pcre_mode == PCRE8_MODE)
5358 {
5359 if (*(pcre_uint8 *)gnptr == 0) break;
5360 }
5361 #endif
5362
5363 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5364 gnptr, &substring);
5365 if (rc < 0)
5366 {
5367 fprintf(outfile, "get substring ");
5368 PCHARSV(gnptr, 0, -1, outfile);
5369 fprintf(outfile, " failed %d\n", rc);
5370 }
5371 else
5372 {
5373 fprintf(outfile, " G ");
5374 PCHARSV(substring, 0, rc, outfile);
5375 fprintf(outfile, " (%d) ", rc);
5376 PCHARSV(gnptr, 0, -1, outfile);
5377 PCRE_FREE_SUBSTRING(substring);
5378 putc('\n', outfile);
5379 }
5380
5381 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5382 }
5383
5384 if (getlist)
5385 {
5386 int rc;
5387 const char **stringlist;
5388 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5389 if (rc < 0)
5390 fprintf(outfile, "get substring list failed %d\n", rc);
5391 else
5392 {
5393 for (i = 0; i < count; i++)
5394 {
5395 fprintf(outfile, "%2dL ", i);
5396 PCHARSV(stringlist[i], 0, -1, outfile);
5397 putc('\n', outfile);
5398 }
5399 if (stringlist[i] != NULL)
5400 fprintf(outfile, "string list not terminated by NULL\n");
5401 PCRE_FREE_SUBSTRING_LIST(stringlist);
5402 }
5403 }
5404 }
5405
5406 /* There was a partial match. If the bumpalong point is not the same as
5407 the first inspected character, show the offset explicitly. */
5408
5409 else if (count == PCRE_ERROR_PARTIAL)
5410 {
5411 fprintf(outfile, "Partial match");
5412 if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5413 fprintf(outfile, " at offset %d", use_offsets[2]);
5414 if (markptr != NULL)
5415 {
5416 fprintf(outfile, ", mark=");
5417 PCHARSV(markptr, 0, -1, outfile);
5418 }
5419 if (use_size_offsets > 1)
5420 {
5421 fprintf(outfile, ": ");
5422 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5423 outfile);
5424 }
5425 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5426 fprintf(outfile, "\n");
5427 break; /* Out of the /g loop */
5428 }
5429
5430 /* Failed to match. If this is a /g or /G loop and we previously set
5431 g_notempty after a null match, this is not necessarily the end. We want
5432 to advance the start offset, and continue. We won't be at the end of the
5433 string - that was checked before setting g_notempty.
5434
5435 Complication arises in the case when the newline convention is "any",
5436 "crlf", or "anycrlf". If the previous match was at the end of a line
5437 terminated by CRLF, an advance of one character just passes the \r,
5438 whereas we should prefer the longer newline sequence, as does the code in
5439 pcre_exec(). Fudge the offset value to achieve this. We check for a
5440 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5441 find the default.
5442
5443 Otherwise, in the case of UTF-8 matching, the advance must be one
5444 character, not one byte. */
5445
5446 else
5447 {
5448 if (g_notempty != 0)
5449 {
5450 int onechar = 1;
5451 unsigned int obits = REAL_PCRE_OPTIONS(re);
5452 use_offsets[0] = start_offset;
5453 if ((obits & PCRE_NEWLINE_BITS) == 0)
5454 {
5455 int d;
5456 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5457 /* Note that these values are always the ASCII ones, even in
5458 EBCDIC environments. CR = 13, NL = 10. */
5459 obits = (d == 13)? PCRE_NEWLINE_CR :
5460 (d == 10)? PCRE_NEWLINE_LF :
5461 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5462 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5463 (d == -1)? PCRE_NEWLINE_ANY : 0;
5464 }
5465 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5466 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5467 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5468 &&
5469 start_offset < len - 1 && (
5470 #ifdef SUPPORT_PCRE8
5471 (pcre_mode == PCRE8_MODE &&
5472 bptr[start_offset] == '\r' &&
5473 bptr[start_offset + 1] == '\n') ||
5474 #endif
5475 #ifdef SUPPORT_PCRE16
5476 (pcre_mode == PCRE16_MODE &&
5477 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5478 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5479 #endif
5480 #ifdef SUPPORT_PCRE32
5481 (pcre_mode == PCRE32_MODE &&
5482 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5483 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5484 #endif
5485 0))
5486 onechar++;
5487 else if (use_utf)
5488 {
5489 while (start_offset + onechar < len)
5490 {
5491 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5492 onechar++;
5493 }
5494 }
5495 use_offsets[1] = start_offset + onechar;
5496 }
5497 else
5498 {
5499 switch(count)
5500 {
5501 case PCRE_ERROR_NOMATCH:
5502 if (gmatched == 0)
5503 {
5504 if (markptr == NULL)
5505 {
5506 fprintf(outfile, "No match");
5507 }
5508 else
5509 {
5510 fprintf(outfile, "No match, mark = ");
5511 PCHARSV(markptr, 0, -1, outfile);
5512 }
5513 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5514 putc('\n', outfile);
5515 }
5516 break;
5517
5518 case PCRE_ERROR_BADUTF8:
5519 case PCRE_ERROR_SHORTUTF8:
5520 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5521 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5522 8 * CHAR_SIZE);
5523 if (use_size_offsets >= 2)
5524 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5525 use_offsets[1]);
5526 fprintf(outfile, "\n");
5527 break;
5528
5529 case PCRE_ERROR_BADUTF8_OFFSET:
5530 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5531 8 * CHAR_SIZE);
5532 break;
5533
5534 default:
5535 if (count < 0 &&
5536 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5537 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5538 else
5539 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5540 break;
5541 }
5542
5543 break; /* Out of the /g loop */
5544 }
5545 }
5546
5547 /* If not /g or /G we are done */
5548
5549 if (!do_g && !do_G) break;
5550
5551 /* If we have matched an empty string, first check to see if we are at
5552 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5553 Perl's /g options does. This turns out to be rather cunning. First we set
5554 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5555 same point. If this fails (picked up above) we advance to the next
5556 character. */
5557
5558 g_notempty = 0;
5559
5560 if (use_offsets[0] == use_offsets[1])
5561 {
5562 if (use_offsets[0] == len) break;
5563 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5564 }
5565
5566 /* For /g, update the start offset, leaving the rest alone */
5567
5568 if (do_g) start_offset = use_offsets[1];
5569
5570 /* For /G, update the pointer and length */
5571
5572 else
5573 {
5574 bptr += use_offsets[1] * CHAR_SIZE;
5575 len -= use_offsets[1];
5576 }
5577 } /* End of loop for /g and /G */
5578
5579 NEXT_DATA: continue;
5580 } /* End of loop for data lines */
5581
5582 CONTINUE:
5583
5584 #if !defined NOPOSIX
5585 if (posix || do_posix) regfree(&preg);
5586 #endif
5587
5588 if (re != NULL) new_free(re);
5589 if (extra != NULL)
5590 {
5591 PCRE_FREE_STUDY(extra);
5592 }
5593 if (locale_set)
5594 {
5595 new_free((void *)tables);
5596 setlocale(LC_CTYPE, "C");
5597 locale_set = 0;
5598 }
5599 if (jit_stack != NULL)
5600 {
5601 PCRE_JIT_STACK_FREE(jit_stack);
5602 jit_stack = NULL;
5603 }
5604 }
5605
5606 if (infile == stdin) fprintf(outfile, "\n");
5607
5608 if (showtotaltimes)
5609 {
5610 fprintf(outfile, "--------------------------------------\n");
5611 if (timeit > 0)
5612 {
5613 fprintf(outfile, "Total compile time %.4f milliseconds\n",
5614 (((double)total_compile_time * 1000.0) / (double)timeit) /
5615 (double)CLOCKS_PER_SEC);
5616 fprintf(outfile, "Total study time %.4f milliseconds\n",
5617 (((double)total_study_time * 1000.0) / (double)timeit) /
5618 (double)CLOCKS_PER_SEC);
5619 }
5620 fprintf(outfile, "Total execute time %.4f milliseconds\n",
5621 (((double)total_match_time * 1000.0) / (double)timeitm) /
5622 (double)CLOCKS_PER_SEC);
5623 }
5624
5625 EXIT:
5626
5627 if (infile != NULL && infile != stdin) fclose(infile);
5628 if (outfile != NULL && outfile != stdout) fclose(outfile);
5629
5630 free(buffer);
5631 free(dbuffer);
5632 free(pbuffer);
5633 free(offsets);
5634
5635 #ifdef SUPPORT_PCRE16
5636 if (buffer16 != NULL) free(buffer16);
5637 #endif
5638 #ifdef SUPPORT_PCRE32
5639 if (buffer32 != NULL) free(buffer32);
5640 #endif
5641
5642 #if !defined NODFA
5643 if (dfa_workspace != NULL)
5644 free(dfa_workspace);
5645 #endif
5646
5647 #if defined(__VMS)
5648 yield = SS$_NORMAL; /* Return values via DCL symbols */
5649 #endif
5650
5651 return yield;
5652 }
5653
5654 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5