/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1661 - (show annotations)
Wed Jul 6 14:55:40 2016 UTC (2 weeks, 3 days ago) by ph10
File MIME type: text/plain
File size: 173569 byte(s)
Error occurred while calculating annotation data.
Ignore "show all captures" after DFA matching.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
128
129
130 #define PRIV(name) name
131
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
136
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
140
141 #include "pcre.h"
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
158
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
162
163 #define PCRE_INCLUDED
164
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
167
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
173
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
179
180 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
186
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
190
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
194
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
199
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
205
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
213
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
219
220 #ifdef SUPPORT_PCRE8
221
222 #define PCHARS8(lv, p, offset, len, f) \
223 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224
225 #define PCHARSV8(p, offset, len, f) \
226 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 p = read_capture_name8(p, cn8, re)
230
231 #define STRLEN8(p) ((int)strlen((char *)p))
232
233 #define SET_PCRE_CALLOUT8(callout) \
234 pcre_callout = callout
235
236 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237 pcre_stack_guard = stack_guard
238
239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240 pcre_assign_jit_stack(extra, callback, userdata)
241
242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243 re = pcre_compile((char *)pat, options, error, erroffset, tables)
244
245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246 namesptr, cbuffer, size) \
247 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248 (char *)namesptr, cbuffer, size)
249
250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
252
253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254 offsets, size_offsets, workspace, size_workspace) \
255 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256 offsets, size_offsets, workspace, size_workspace)
257
258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets) \
260 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets)
262
263 #define PCRE_FREE_STUDY8(extra) \
264 pcre_free_study(extra)
265
266 #define PCRE_FREE_SUBSTRING8(substring) \
267 pcre_free_substring(substring)
268
269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270 pcre_free_substring_list(listptr)
271
272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273 getnamesptr, subsptr) \
274 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275 (char *)getnamesptr, subsptr)
276
277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278 n = pcre_get_stringnumber(re, (char *)ptr)
279
280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
282
283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
285
286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
288
289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290 pcre_printint(re, outfile, debug_lengths)
291
292 #define PCRE_STUDY8(extra, re, options, error) \
293 extra = pcre_study(re, options, error)
294
295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296 pcre_jit_stack_alloc(startsize, maxsize)
297
298 #define PCRE_JIT_STACK_FREE8(stack) \
299 pcre_jit_stack_free(stack)
300
301 #define pcre8_maketables pcre_maketables
302
303 #endif /* SUPPORT_PCRE8 */
304
305 /* -----------------------------------------------------------*/
306
307 #ifdef SUPPORT_PCRE16
308
309 #define PCHARS16(lv, p, offset, len, f) \
310 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
311
312 #define PCHARSV16(p, offset, len, f) \
313 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316 p = read_capture_name16(p, cn16, re)
317
318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
319
320 #define SET_PCRE_CALLOUT16(callout) \
321 pcre16_callout = (int (*)(pcre16_callout_block *))callout
322
323 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324 pcre16_stack_guard = (int (*)(void))stack_guard
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416 pcre32_stack_guard = (int (*)(void))stack_guard
417
418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419 pcre32_assign_jit_stack((pcre32_extra *)extra, \
420 (pcre32_jit_callback)callback, userdata)
421
422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424 tables)
425
426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427 namesptr, cbuffer, size) \
428 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
430
431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433 (PCRE_UCHAR32 *)cbuffer, size/2)
434
435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436 offsets, size_offsets, workspace, size_workspace) \
437 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439 workspace, size_workspace)
440
441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442 offsets, size_offsets) \
443 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444 len, start_offset, options, offsets, size_offsets)
445
446 #define PCRE_FREE_STUDY32(extra) \
447 pcre32_free_study((pcre32_extra *)extra)
448
449 #define PCRE_FREE_SUBSTRING32(substring) \
450 pcre32_free_substring((PCRE_SPTR32)substring)
451
452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
454
455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456 getnamesptr, subsptr) \
457 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
459
460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
462
463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465 (PCRE_SPTR32 *)(void*)subsptr)
466
467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469 (PCRE_SPTR32 **)(void*)listptr)
470
471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473 tables)
474
475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476 pcre32_printint(re, outfile, debug_lengths)
477
478 #define PCRE_STUDY32(extra, re, options, error) \
479 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
480
481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
483
484 #define PCRE_JIT_STACK_FREE32(stack) \
485 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
486
487 #endif /* SUPPORT_PCRE32 */
488
489
490 /* ----- More than one mode is supported; a runtime test is needed, except for
491 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 available version is called. ----- */
493
494 enum {
495 PCRE8_MODE,
496 PCRE16_MODE,
497 PCRE32_MODE
498 };
499
500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501 defined (SUPPORT_PCRE32)) >= 2
502
503 #define CHAR_SIZE (1 << pcre_mode)
504
505 /* There doesn't seem to be an easy way of writing these macros that can cope
506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507 cases separately. */
508
509 /* ----- All three modes supported ----- */
510
511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
512
513 #define PCHARS(lv, p, offset, len, f) \
514 if (pcre_mode == PCRE32_MODE) \
515 PCHARS32(lv, p, offset, len, f); \
516 else if (pcre_mode == PCRE16_MODE) \
517 PCHARS16(lv, p, offset, len, f); \
518 else \
519 PCHARS8(lv, p, offset, len, f)
520
521 #define PCHARSV(p, offset, len, f) \
522 if (pcre_mode == PCRE32_MODE) \
523 PCHARSV32(p, offset, len, f); \
524 else if (pcre_mode == PCRE16_MODE) \
525 PCHARSV16(p, offset, len, f); \
526 else \
527 PCHARSV8(p, offset, len, f)
528
529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530 if (pcre_mode == PCRE32_MODE) \
531 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532 else if (pcre_mode == PCRE16_MODE) \
533 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534 else \
535 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
536
537 #define SET_PCRE_CALLOUT(callout) \
538 if (pcre_mode == PCRE32_MODE) \
539 SET_PCRE_CALLOUT32(callout); \
540 else if (pcre_mode == PCRE16_MODE) \
541 SET_PCRE_CALLOUT16(callout); \
542 else \
543 SET_PCRE_CALLOUT8(callout)
544
545 #define SET_PCRE_STACK_GUARD(stack_guard) \
546 if (pcre_mode == PCRE32_MODE) \
547 SET_PCRE_STACK_GUARD32(stack_guard); \
548 else if (pcre_mode == PCRE16_MODE) \
549 SET_PCRE_STACK_GUARD16(stack_guard); \
550 else \
551 SET_PCRE_STACK_GUARD8(stack_guard)
552
553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
554
555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556 if (pcre_mode == PCRE32_MODE) \
557 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560 else \
561 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
562
563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564 if (pcre_mode == PCRE32_MODE) \
565 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566 else if (pcre_mode == PCRE16_MODE) \
567 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568 else \
569 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
570
571 #define PCRE_CONFIG pcre_config
572
573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574 namesptr, cbuffer, size) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577 namesptr, cbuffer, size); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580 namesptr, cbuffer, size); \
581 else \
582 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583 namesptr, cbuffer, size)
584
585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586 if (pcre_mode == PCRE32_MODE) \
587 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588 else if (pcre_mode == PCRE16_MODE) \
589 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590 else \
591 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
592
593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594 offsets, size_offsets, workspace, size_workspace) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597 offsets, size_offsets, workspace, size_workspace); \
598 else if (pcre_mode == PCRE16_MODE) \
599 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600 offsets, size_offsets, workspace, size_workspace); \
601 else \
602 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603 offsets, size_offsets, workspace, size_workspace)
604
605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606 offsets, size_offsets) \
607 if (pcre_mode == PCRE32_MODE) \
608 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609 offsets, size_offsets); \
610 else if (pcre_mode == PCRE16_MODE) \
611 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612 offsets, size_offsets); \
613 else \
614 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615 offsets, size_offsets)
616
617 #define PCRE_FREE_STUDY(extra) \
618 if (pcre_mode == PCRE32_MODE) \
619 PCRE_FREE_STUDY32(extra); \
620 else if (pcre_mode == PCRE16_MODE) \
621 PCRE_FREE_STUDY16(extra); \
622 else \
623 PCRE_FREE_STUDY8(extra)
624
625 #define PCRE_FREE_SUBSTRING(substring) \
626 if (pcre_mode == PCRE32_MODE) \
627 PCRE_FREE_SUBSTRING32(substring); \
628 else if (pcre_mode == PCRE16_MODE) \
629 PCRE_FREE_SUBSTRING16(substring); \
630 else \
631 PCRE_FREE_SUBSTRING8(substring)
632
633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_FREE_SUBSTRING_LIST32(listptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_FREE_SUBSTRING_LIST16(listptr); \
638 else \
639 PCRE_FREE_SUBSTRING_LIST8(listptr)
640
641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642 getnamesptr, subsptr) \
643 if (pcre_mode == PCRE32_MODE) \
644 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645 getnamesptr, subsptr); \
646 else if (pcre_mode == PCRE16_MODE) \
647 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648 getnamesptr, subsptr); \
649 else \
650 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651 getnamesptr, subsptr)
652
653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654 if (pcre_mode == PCRE32_MODE) \
655 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656 else if (pcre_mode == PCRE16_MODE) \
657 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658 else \
659 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
660
661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666 else \
667 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
668
669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670 if (pcre_mode == PCRE32_MODE) \
671 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672 else if (pcre_mode == PCRE16_MODE) \
673 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674 else \
675 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
676
677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678 (pcre_mode == PCRE32_MODE ? \
679 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680 : pcre_mode == PCRE16_MODE ? \
681 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
683
684 #define PCRE_JIT_STACK_FREE(stack) \
685 if (pcre_mode == PCRE32_MODE) \
686 PCRE_JIT_STACK_FREE32(stack); \
687 else if (pcre_mode == PCRE16_MODE) \
688 PCRE_JIT_STACK_FREE16(stack); \
689 else \
690 PCRE_JIT_STACK_FREE8(stack)
691
692 #define PCRE_MAKETABLES \
693 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
694
695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696 if (pcre_mode == PCRE32_MODE) \
697 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698 else if (pcre_mode == PCRE16_MODE) \
699 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700 else \
701 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
702
703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704 if (pcre_mode == PCRE32_MODE) \
705 PCRE_PRINTINT32(re, outfile, debug_lengths); \
706 else if (pcre_mode == PCRE16_MODE) \
707 PCRE_PRINTINT16(re, outfile, debug_lengths); \
708 else \
709 PCRE_PRINTINT8(re, outfile, debug_lengths)
710
711 #define PCRE_STUDY(extra, re, options, error) \
712 if (pcre_mode == PCRE32_MODE) \
713 PCRE_STUDY32(extra, re, options, error); \
714 else if (pcre_mode == PCRE16_MODE) \
715 PCRE_STUDY16(extra, re, options, error); \
716 else \
717 PCRE_STUDY8(extra, re, options, error)
718
719
720 /* ----- Two out of three modes are supported ----- */
721
722 #else
723
724 /* We can use some macro trickery to make a single set of definitions work in
725 the three different cases. */
726
727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
728
729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730 #define BITONE 32
731 #define BITTWO 16
732
733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
734
735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 #define BITONE 32
737 #define BITTWO 8
738
739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
740
741 #else
742 #define BITONE 16
743 #define BITTWO 8
744 #endif
745
746 #define glue(a,b) a##b
747 #define G(a,b) glue(a,b)
748
749
750 /* ----- Common macros for two-mode cases ----- */
751
752 #define PCHARS(lv, p, offset, len, f) \
753 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754 G(PCHARS,BITONE)(lv, p, offset, len, f); \
755 else \
756 G(PCHARS,BITTWO)(lv, p, offset, len, f)
757
758 #define PCHARSV(p, offset, len, f) \
759 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760 G(PCHARSV,BITONE)(p, offset, len, f); \
761 else \
762 G(PCHARSV,BITTWO)(p, offset, len, f)
763
764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767 else \
768 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
769
770 #define SET_PCRE_CALLOUT(callout) \
771 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772 G(SET_PCRE_CALLOUT,BITONE)(callout); \
773 else \
774 G(SET_PCRE_CALLOUT,BITTWO)(callout)
775
776 #define SET_PCRE_STACK_GUARD(stack_guard) \
777 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778 G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779 else \
780 G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
781
782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
784
785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788 else \
789 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
790
791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794 else \
795 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
796
797 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
798
799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800 namesptr, cbuffer, size) \
801 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803 namesptr, cbuffer, size); \
804 else \
805 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806 namesptr, cbuffer, size)
807
808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811 else \
812 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
813
814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815 offsets, size_offsets, workspace, size_workspace) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818 offsets, size_offsets, workspace, size_workspace); \
819 else \
820 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821 offsets, size_offsets, workspace, size_workspace)
822
823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824 offsets, size_offsets) \
825 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827 offsets, size_offsets); \
828 else \
829 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830 offsets, size_offsets)
831
832 #define PCRE_FREE_STUDY(extra) \
833 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834 G(PCRE_FREE_STUDY,BITONE)(extra); \
835 else \
836 G(PCRE_FREE_STUDY,BITTWO)(extra)
837
838 #define PCRE_FREE_SUBSTRING(substring) \
839 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841 else \
842 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
843
844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847 else \
848 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
849
850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851 getnamesptr, subsptr) \
852 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854 getnamesptr, subsptr); \
855 else \
856 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857 getnamesptr, subsptr)
858
859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862 else \
863 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
864
865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868 else \
869 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
870
871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874 else \
875 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
876
877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
881
882 #define PCRE_JIT_STACK_FREE(stack) \
883 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885 else \
886 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
887
888 #define PCRE_MAKETABLES \
889 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
891
892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895 else \
896 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
897
898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901 else \
902 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
903
904 #define PCRE_STUDY(extra, re, options, error) \
905 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907 else \
908 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
909
910 #endif /* Two out of three modes */
911
912 /* ----- End of cases where more than one mode is supported ----- */
913
914
915 /* ----- Only 8-bit mode is supported ----- */
916
917 #elif defined SUPPORT_PCRE8
918 #define CHAR_SIZE 1
919 #define PCHARS PCHARS8
920 #define PCHARSV PCHARSV8
921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 #define STRLEN STRLEN8
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 #define PCRE_COMPILE PCRE_COMPILE8
927 #define PCRE_CONFIG pcre_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931 #define PCRE_EXEC PCRE_EXEC8
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 #define PCRE_MAKETABLES pcre_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943 #define PCRE_PRINTINT PCRE_PRINTINT8
944 #define PCRE_STUDY PCRE_STUDY8
945
946 /* ----- Only 16-bit mode is supported ----- */
947
948 #elif defined SUPPORT_PCRE16
949 #define CHAR_SIZE 2
950 #define PCHARS PCHARS16
951 #define PCHARSV PCHARSV16
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 #define STRLEN STRLEN16
956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 #define PCRE_COMPILE PCRE_COMPILE16
958 #define PCRE_CONFIG pcre16_config
959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962 #define PCRE_EXEC PCRE_EXEC16
963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 #define PCRE_MAKETABLES pcre16_maketables()
973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974 #define PCRE_PRINTINT PCRE_PRINTINT16
975 #define PCRE_STUDY PCRE_STUDY16
976
977 /* ----- Only 32-bit mode is supported ----- */
978
979 #elif defined SUPPORT_PCRE32
980 #define CHAR_SIZE 4
981 #define PCHARS PCHARS32
982 #define PCHARSV PCHARSV32
983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 #define STRLEN STRLEN32
987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988 #define PCRE_COMPILE PCRE_COMPILE32
989 #define PCRE_CONFIG pcre32_config
990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993 #define PCRE_EXEC PCRE_EXEC32
994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003 #define PCRE_MAKETABLES pcre32_maketables()
1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005 #define PCRE_PRINTINT PCRE_PRINTINT32
1006 #define PCRE_STUDY PCRE_STUDY32
1007
1008 #endif
1009
1010 /* ----- End of mode-specific function call macros ----- */
1011
1012
1013 /* Other parameters */
1014
1015 #ifndef CLOCKS_PER_SEC
1016 #ifdef CLK_TCK
1017 #define CLOCKS_PER_SEC CLK_TCK
1018 #else
1019 #define CLOCKS_PER_SEC 100
1020 #endif
1021 #endif
1022
1023 #if !defined NODFA
1024 #define DFA_WS_DIMENSION 1000
1025 #endif
1026
1027 /* This is the default loop count for timing. */
1028
1029 #define LOOPREPEAT 500000
1030
1031 /* Static variables */
1032
1033 static FILE *outfile;
1034 static int log_store = 0;
1035 static int callout_count;
1036 static int callout_extra;
1037 static int callout_fail_count;
1038 static int callout_fail_id;
1039 static int debug_lengths;
1040 static int first_callout;
1041 static int jit_was_used;
1042 static int locale_set = 0;
1043 static int show_malloc;
1044 static int stack_guard_return;
1045 static int use_utf;
1046 static const unsigned char *last_callout_mark = NULL;
1047
1048 /* The buffers grow automatically if very long input lines are encountered. */
1049
1050 static int buffer_size = 50000;
1051 static pcre_uint8 *buffer = NULL;
1052 static pcre_uint8 *pbuffer = NULL;
1053
1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1055
1056 #ifdef COMPILE_PCRE16
1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058 #endif
1059
1060 #ifdef COMPILE_PCRE32
1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062 #endif
1063
1064 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066 pattern for saving/reloading testing. Luckily, the data for these tables is
1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069 LINK_SIZE is also used later in this program. */
1070
1071 #ifdef SUPPORT_PCRE16
1072 #undef IMM2_SIZE
1073 #define IMM2_SIZE 1
1074
1075 #if LINK_SIZE == 2
1076 #undef LINK_SIZE
1077 #define LINK_SIZE 1
1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079 #undef LINK_SIZE
1080 #define LINK_SIZE 2
1081 #else
1082 #error LINK_SIZE must be either 2, 3, or 4
1083 #endif
1084
1085 static int buffer16_size = 0;
1086 static pcre_uint16 *buffer16 = NULL;
1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088 #endif /* SUPPORT_PCRE16 */
1089
1090 #ifdef SUPPORT_PCRE32
1091 #undef IMM2_SIZE
1092 #define IMM2_SIZE 1
1093 #undef LINK_SIZE
1094 #define LINK_SIZE 1
1095
1096 static int buffer32_size = 0;
1097 static pcre_uint32 *buffer32 = NULL;
1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099 #endif /* SUPPORT_PCRE32 */
1100
1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102 support, it can be changed by an option. If there is no 8-bit support, there
1103 must be 16-or 32-bit support, so default it to 1. */
1104
1105 #if defined SUPPORT_PCRE8
1106 static int pcre_mode = PCRE8_MODE;
1107 #elif defined SUPPORT_PCRE16
1108 static int pcre_mode = PCRE16_MODE;
1109 #elif defined SUPPORT_PCRE32
1110 static int pcre_mode = PCRE32_MODE;
1111 #endif
1112
1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1114
1115 static int jit_study_bits[] =
1116 {
1117 PCRE_STUDY_JIT_COMPILE,
1118 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1125 };
1126
1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1129
1130 /* Textual explanations for runtime error codes */
1131
1132 static const char *errtexts[] = {
1133 NULL, /* 0 is no error */
1134 NULL, /* NOMATCH is handled specially */
1135 "NULL argument passed",
1136 "bad option value",
1137 "magic number missing",
1138 "unknown opcode - pattern overwritten?",
1139 "no more memory",
1140 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141 "match limit exceeded",
1142 "callout error code",
1143 NULL, /* BADUTF8/16 is handled specially */
1144 NULL, /* BADUTF8/16 offset is handled specially */
1145 NULL, /* PARTIAL is handled specially */
1146 "not used - internal error",
1147 "internal error - pattern overwritten?",
1148 "bad count value",
1149 "item unsupported for DFA matching",
1150 "backreference condition or recursion test not supported for DFA matching",
1151 "match limit not supported for DFA matching",
1152 "workspace size exceeded in DFA matching",
1153 "too much recursion for DFA matching",
1154 "recursion limit exceeded",
1155 "not used - internal error",
1156 "invalid combination of newline options",
1157 "bad offset value",
1158 NULL, /* SHORTUTF8/16 is handled specially */
1159 "nested recursion at the same subject position",
1160 "JIT stack limit reached",
1161 "pattern compiled in wrong mode: 8-bit/16-bit error",
1162 "pattern compiled with other endianness",
1163 "invalid data in workspace for DFA restart",
1164 "bad JIT option",
1165 "bad length"
1166 };
1167
1168
1169 /*************************************************
1170 * Alternate character tables *
1171 *************************************************/
1172
1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174 using the default tables of the library. However, the T option can be used to
1175 select alternate sets of tables, for different kinds of testing. Note also that
1176 the L (locale) option also adjusts the tables. */
1177
1178 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 only ASCII characters. */
1180
1181 static const pcre_uint8 tables0[] = {
1182
1183 /* This table is a lower casing table. */
1184
1185 0, 1, 2, 3, 4, 5, 6, 7,
1186 8, 9, 10, 11, 12, 13, 14, 15,
1187 16, 17, 18, 19, 20, 21, 22, 23,
1188 24, 25, 26, 27, 28, 29, 30, 31,
1189 32, 33, 34, 35, 36, 37, 38, 39,
1190 40, 41, 42, 43, 44, 45, 46, 47,
1191 48, 49, 50, 51, 52, 53, 54, 55,
1192 56, 57, 58, 59, 60, 61, 62, 63,
1193 64, 97, 98, 99,100,101,102,103,
1194 104,105,106,107,108,109,110,111,
1195 112,113,114,115,116,117,118,119,
1196 120,121,122, 91, 92, 93, 94, 95,
1197 96, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122,123,124,125,126,127,
1201 128,129,130,131,132,133,134,135,
1202 136,137,138,139,140,141,142,143,
1203 144,145,146,147,148,149,150,151,
1204 152,153,154,155,156,157,158,159,
1205 160,161,162,163,164,165,166,167,
1206 168,169,170,171,172,173,174,175,
1207 176,177,178,179,180,181,182,183,
1208 184,185,186,187,188,189,190,191,
1209 192,193,194,195,196,197,198,199,
1210 200,201,202,203,204,205,206,207,
1211 208,209,210,211,212,213,214,215,
1212 216,217,218,219,220,221,222,223,
1213 224,225,226,227,228,229,230,231,
1214 232,233,234,235,236,237,238,239,
1215 240,241,242,243,244,245,246,247,
1216 248,249,250,251,252,253,254,255,
1217
1218 /* This table is a case flipping table. */
1219
1220 0, 1, 2, 3, 4, 5, 6, 7,
1221 8, 9, 10, 11, 12, 13, 14, 15,
1222 16, 17, 18, 19, 20, 21, 22, 23,
1223 24, 25, 26, 27, 28, 29, 30, 31,
1224 32, 33, 34, 35, 36, 37, 38, 39,
1225 40, 41, 42, 43, 44, 45, 46, 47,
1226 48, 49, 50, 51, 52, 53, 54, 55,
1227 56, 57, 58, 59, 60, 61, 62, 63,
1228 64, 97, 98, 99,100,101,102,103,
1229 104,105,106,107,108,109,110,111,
1230 112,113,114,115,116,117,118,119,
1231 120,121,122, 91, 92, 93, 94, 95,
1232 96, 65, 66, 67, 68, 69, 70, 71,
1233 72, 73, 74, 75, 76, 77, 78, 79,
1234 80, 81, 82, 83, 84, 85, 86, 87,
1235 88, 89, 90,123,124,125,126,127,
1236 128,129,130,131,132,133,134,135,
1237 136,137,138,139,140,141,142,143,
1238 144,145,146,147,148,149,150,151,
1239 152,153,154,155,156,157,158,159,
1240 160,161,162,163,164,165,166,167,
1241 168,169,170,171,172,173,174,175,
1242 176,177,178,179,180,181,182,183,
1243 184,185,186,187,188,189,190,191,
1244 192,193,194,195,196,197,198,199,
1245 200,201,202,203,204,205,206,207,
1246 208,209,210,211,212,213,214,215,
1247 216,217,218,219,220,221,222,223,
1248 224,225,226,227,228,229,230,231,
1249 232,233,234,235,236,237,238,239,
1250 240,241,242,243,244,245,246,247,
1251 248,249,250,251,252,253,254,255,
1252
1253 /* This table contains bit maps for various character classes. Each map is 32
1254 bytes long and the bits run from the least significant end of each byte. The
1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256 graph, print, punct, and cntrl. Other classes are built from combinations. */
1257
1258 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262
1263 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267
1268 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277
1278 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282
1283 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287
1288 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1292
1293 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1297
1298 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1302
1303 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1307
1308 /* This table identifies various classes of character by individual bits:
1309 0x01 white space character
1310 0x02 letter
1311 0x04 decimal digit
1312 0x08 hexadecimal digit
1313 0x10 alphanumeric or '_'
1314 0x80 regular expression metacharacter or binary zero
1315 */
1316
1317 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1349
1350 /* This is a set of tables that came originally from a Windows user. It seems
1351 to be at least an approximation of ISO 8859. In particular, there are
1352 characters greater than 128 that are marked as spaces, letters, etc. */
1353
1354 static const pcre_uint8 tables1[] = {
1355 0,1,2,3,4,5,6,7,
1356 8,9,10,11,12,13,14,15,
1357 16,17,18,19,20,21,22,23,
1358 24,25,26,27,28,29,30,31,
1359 32,33,34,35,36,37,38,39,
1360 40,41,42,43,44,45,46,47,
1361 48,49,50,51,52,53,54,55,
1362 56,57,58,59,60,61,62,63,
1363 64,97,98,99,100,101,102,103,
1364 104,105,106,107,108,109,110,111,
1365 112,113,114,115,116,117,118,119,
1366 120,121,122,91,92,93,94,95,
1367 96,97,98,99,100,101,102,103,
1368 104,105,106,107,108,109,110,111,
1369 112,113,114,115,116,117,118,119,
1370 120,121,122,123,124,125,126,127,
1371 128,129,130,131,132,133,134,135,
1372 136,137,138,139,140,141,142,143,
1373 144,145,146,147,148,149,150,151,
1374 152,153,154,155,156,157,158,159,
1375 160,161,162,163,164,165,166,167,
1376 168,169,170,171,172,173,174,175,
1377 176,177,178,179,180,181,182,183,
1378 184,185,186,187,188,189,190,191,
1379 224,225,226,227,228,229,230,231,
1380 232,233,234,235,236,237,238,239,
1381 240,241,242,243,244,245,246,215,
1382 248,249,250,251,252,253,254,223,
1383 224,225,226,227,228,229,230,231,
1384 232,233,234,235,236,237,238,239,
1385 240,241,242,243,244,245,246,247,
1386 248,249,250,251,252,253,254,255,
1387 0,1,2,3,4,5,6,7,
1388 8,9,10,11,12,13,14,15,
1389 16,17,18,19,20,21,22,23,
1390 24,25,26,27,28,29,30,31,
1391 32,33,34,35,36,37,38,39,
1392 40,41,42,43,44,45,46,47,
1393 48,49,50,51,52,53,54,55,
1394 56,57,58,59,60,61,62,63,
1395 64,97,98,99,100,101,102,103,
1396 104,105,106,107,108,109,110,111,
1397 112,113,114,115,116,117,118,119,
1398 120,121,122,91,92,93,94,95,
1399 96,65,66,67,68,69,70,71,
1400 72,73,74,75,76,77,78,79,
1401 80,81,82,83,84,85,86,87,
1402 88,89,90,123,124,125,126,127,
1403 128,129,130,131,132,133,134,135,
1404 136,137,138,139,140,141,142,143,
1405 144,145,146,147,148,149,150,151,
1406 152,153,154,155,156,157,158,159,
1407 160,161,162,163,164,165,166,167,
1408 168,169,170,171,172,173,174,175,
1409 176,177,178,179,180,181,182,183,
1410 184,185,186,187,188,189,190,191,
1411 224,225,226,227,228,229,230,231,
1412 232,233,234,235,236,237,238,239,
1413 240,241,242,243,244,245,246,215,
1414 248,249,250,251,252,253,254,223,
1415 192,193,194,195,196,197,198,199,
1416 200,201,202,203,204,205,206,207,
1417 208,209,210,211,212,213,214,247,
1418 216,217,218,219,220,221,222,255,
1419 0,62,0,0,1,0,0,0,
1420 0,0,0,0,0,0,0,0,
1421 32,0,0,0,1,0,0,0,
1422 0,0,0,0,0,0,0,0,
1423 0,0,0,0,0,0,255,3,
1424 126,0,0,0,126,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 0,0,0,0,0,0,0,0,
1427 0,0,0,0,0,0,255,3,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,12,2,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 254,255,255,7,0,0,0,0,
1433 0,0,0,0,0,0,0,0,
1434 255,255,127,127,0,0,0,0,
1435 0,0,0,0,0,0,0,0,
1436 0,0,0,0,254,255,255,7,
1437 0,0,0,0,0,4,32,4,
1438 0,0,0,128,255,255,127,255,
1439 0,0,0,0,0,0,255,3,
1440 254,255,255,135,254,255,255,7,
1441 0,0,0,0,0,4,44,6,
1442 255,255,127,255,255,255,127,255,
1443 0,0,0,0,254,255,255,255,
1444 255,255,255,255,255,255,255,127,
1445 0,0,0,0,254,255,255,255,
1446 255,255,255,255,255,255,255,255,
1447 0,2,0,0,255,255,255,255,
1448 255,255,255,255,255,255,255,127,
1449 0,0,0,0,255,255,255,255,
1450 255,255,255,255,255,255,255,255,
1451 0,0,0,0,254,255,0,252,
1452 1,0,0,248,1,0,0,120,
1453 0,0,0,0,254,255,255,255,
1454 0,0,128,0,0,0,128,0,
1455 255,255,255,255,0,0,0,0,
1456 0,0,0,0,0,0,0,128,
1457 255,255,255,255,0,0,0,0,
1458 0,0,0,0,0,0,0,0,
1459 128,0,0,0,0,0,0,0,
1460 0,1,1,0,1,1,0,0,
1461 0,0,0,0,0,0,0,0,
1462 0,0,0,0,0,0,0,0,
1463 1,0,0,0,128,0,0,0,
1464 128,128,128,128,0,0,128,0,
1465 28,28,28,28,28,28,28,28,
1466 28,28,0,0,0,0,0,128,
1467 0,26,26,26,26,26,26,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,128,128,0,128,16,
1471 0,26,26,26,26,26,26,18,
1472 18,18,18,18,18,18,18,18,
1473 18,18,18,18,18,18,18,18,
1474 18,18,18,128,128,0,0,0,
1475 0,0,0,0,0,1,0,0,
1476 0,0,0,0,0,0,0,0,
1477 0,0,0,0,0,0,0,0,
1478 0,0,0,0,0,0,0,0,
1479 1,0,0,0,0,0,0,0,
1480 0,0,18,0,0,0,0,0,
1481 0,0,20,20,0,18,0,0,
1482 0,20,18,0,0,0,0,0,
1483 18,18,18,18,18,18,18,18,
1484 18,18,18,18,18,18,18,18,
1485 18,18,18,18,18,18,18,0,
1486 18,18,18,18,18,18,18,18,
1487 18,18,18,18,18,18,18,18,
1488 18,18,18,18,18,18,18,18,
1489 18,18,18,18,18,18,18,0,
1490 18,18,18,18,18,18,18,18
1491 };
1492
1493
1494
1495
1496 #ifndef HAVE_STRERROR
1497 /*************************************************
1498 * Provide strerror() for non-ANSI libraries *
1499 *************************************************/
1500
1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502 in their libraries, but can provide the same facility by this simple
1503 alternative function. */
1504
1505 extern int sys_nerr;
1506 extern char *sys_errlist[];
1507
1508 char *
1509 strerror(int n)
1510 {
1511 if (n < 0 || n >= sys_nerr) return "unknown error number";
1512 return sys_errlist[n];
1513 }
1514 #endif /* HAVE_STRERROR */
1515
1516
1517
1518 /*************************************************
1519 * Print newline configuration *
1520 *************************************************/
1521
1522 /*
1523 Arguments:
1524 rc the return code from PCRE_CONFIG_NEWLINE
1525 isc TRUE if called from "-C newline"
1526 Returns: nothing
1527 */
1528
1529 static void
1530 print_newline_config(int rc, BOOL isc)
1531 {
1532 const char *s = NULL;
1533 if (!isc) printf(" Newline sequence is ");
1534 switch(rc)
1535 {
1536 case CHAR_CR: s = "CR"; break;
1537 case CHAR_LF: s = "LF"; break;
1538 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539 case -1: s = "ANY"; break;
1540 case -2: s = "ANYCRLF"; break;
1541
1542 default:
1543 printf("a non-standard value: 0x%04x\n", rc);
1544 return;
1545 }
1546
1547 printf("%s\n", s);
1548 }
1549
1550
1551
1552 /*************************************************
1553 * JIT memory callback *
1554 *************************************************/
1555
1556 static pcre_jit_stack* jit_callback(void *arg)
1557 {
1558 jit_was_used = TRUE;
1559 return (pcre_jit_stack *)arg;
1560 }
1561
1562
1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 /*************************************************
1565 * Convert UTF-8 string to value *
1566 *************************************************/
1567
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1569 and returns the value of the character.
1570
1571 Argument:
1572 utf8bytes a pointer to the byte vector
1573 vptr a pointer to an int to receive the value
1574
1575 Returns: > 0 => the number of bytes consumed
1576 -6 to 0 => malformed UTF-8 character at offset = (-return)
1577 */
1578
1579 static int
1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1581 {
1582 pcre_uint32 c = *utf8bytes++;
1583 pcre_uint32 d = c;
1584 int i, j, s;
1585
1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1587 {
1588 if ((d & 0x80) == 0) break;
1589 d <<= 1;
1590 }
1591
1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1594
1595 /* i now has a value in the range 1-5 */
1596
1597 s = 6*i;
1598 d = (c & utf8_table3[i]) << s;
1599
1600 for (j = 0; j < i; j++)
1601 {
1602 c = *utf8bytes++;
1603 if ((c & 0xc0) != 0x80) return -(j+1);
1604 s -= 6;
1605 d |= (c & 0x3f) << s;
1606 }
1607
1608 /* Check that encoding was the correct unique one */
1609
1610 for (j = 0; j < utf8_table1_size; j++)
1611 if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 if (j != i) return -(i+1);
1613
1614 /* Valid value */
1615
1616 *vptr = d;
1617 return i+1;
1618 }
1619 #endif /* NOUTF || SUPPORT_PCRE16 */
1620
1621
1622
1623 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 /*************************************************
1625 * Convert character value to UTF-8 *
1626 *************************************************/
1627
1628 /* This function takes an integer value in the range 0 - 0x7fffffff
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1630
1631 Arguments:
1632 cvalue the character value
1633 utf8bytes pointer to buffer for result - at least 6 bytes long
1634
1635 Returns: number of characters placed in the buffer
1636 */
1637
1638 static int
1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1640 {
1641 register int i, j;
1642 if (cvalue > 0x7fffffffu)
1643 return -1;
1644 for (i = 0; i < utf8_table1_size; i++)
1645 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 utf8bytes += i;
1647 for (j = i; j > 0; j--)
1648 {
1649 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650 cvalue >>= 6;
1651 }
1652 *utf8bytes = utf8_table2[i] | cvalue;
1653 return i + 1;
1654 }
1655 #endif
1656
1657
1658 #ifdef SUPPORT_PCRE16
1659 /*************************************************
1660 * Convert a string to 16-bit *
1661 *************************************************/
1662
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667 result is always left in buffer16.
1668
1669 Note that this function does not object to surrogate values. This is
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671 for the purpose of testing that they are correctly faulted.
1672
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1675
1676 Arguments:
1677 data TRUE if converting a data line; FALSE for a regex
1678 p points to a byte string
1679 utf true if UTF-8 (to be converted to UTF-16)
1680 len number of bytes in the string (excluding trailing zero)
1681
1682 Returns: number of 16-bit data items used (excluding trailing zero)
1683 OR -1 if a UTF-8 string is malformed
1684 OR -2 if a value > 0x10ffff is encountered
1685 OR -3 if a value > 0xffff is encountered when not in UTF mode
1686 */
1687
1688 static int
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1690 {
1691 pcre_uint16 *pp;
1692
1693 if (buffer16_size < 2*len + 2)
1694 {
1695 if (buffer16 != NULL) free(buffer16);
1696 buffer16_size = 2*len + 2;
1697 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698 if (buffer16 == NULL)
1699 {
1700 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701 exit(1);
1702 }
1703 }
1704
1705 pp = buffer16;
1706
1707 if (!utf && !data)
1708 {
1709 while (len-- > 0) *pp++ = *p++;
1710 }
1711
1712 else
1713 {
1714 pcre_uint32 c = 0;
1715 while (len > 0)
1716 {
1717 int chlen = utf82ord(p, &c);
1718 if (chlen <= 0) return -1;
1719 if (c > 0x10ffff) return -2;
1720 p += chlen;
1721 len -= chlen;
1722 if (c < 0x10000) *pp++ = c; else
1723 {
1724 if (!utf) return -3;
1725 c -= 0x10000;
1726 *pp++ = 0xD800 | (c >> 10);
1727 *pp++ = 0xDC00 | (c & 0x3ff);
1728 }
1729 }
1730 }
1731
1732 *pp = 0;
1733 return pp - buffer16;
1734 }
1735 #endif
1736
1737 #ifdef SUPPORT_PCRE32
1738 /*************************************************
1739 * Convert a string to 32-bit *
1740 *************************************************/
1741
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746 result is always left in buffer32.
1747
1748 Note that this function does not object to surrogate values. This is
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750 for the purpose of testing that they are correctly faulted.
1751
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1754
1755 Arguments:
1756 data TRUE if converting a data line; FALSE for a regex
1757 p points to a byte string
1758 utf true if UTF-8 (to be converted to UTF-32)
1759 len number of bytes in the string (excluding trailing zero)
1760
1761 Returns: number of 32-bit data items used (excluding trailing zero)
1762 OR -1 if a UTF-8 string is malformed
1763 OR -2 if a value > 0x10ffff is encountered
1764 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1765 */
1766
1767 static int
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1769 {
1770 pcre_uint32 *pp;
1771
1772 if (buffer32_size < 4*len + 4)
1773 {
1774 if (buffer32 != NULL) free(buffer32);
1775 buffer32_size = 4*len + 4;
1776 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777 if (buffer32 == NULL)
1778 {
1779 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780 exit(1);
1781 }
1782 }
1783
1784 pp = buffer32;
1785
1786 if (!utf && !data)
1787 {
1788 while (len-- > 0) *pp++ = *p++;
1789 }
1790
1791 else
1792 {
1793 pcre_uint32 c = 0;
1794 while (len > 0)
1795 {
1796 int chlen = utf82ord(p, &c);
1797 if (chlen <= 0) return -1;
1798 if (utf)
1799 {
1800 if (c > 0x10ffff) return -2;
1801 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1802 }
1803
1804 p += chlen;
1805 len -= chlen;
1806 *pp++ = c;
1807 }
1808 }
1809
1810 *pp = 0;
1811 return pp - buffer32;
1812 }
1813
1814 /* Check that a 32-bit character string is valid UTF-32.
1815
1816 Arguments:
1817 string points to the string
1818 length length of string, or -1 if the string is zero-terminated
1819
1820 Returns: TRUE if the string is a valid UTF-32 string
1821 FALSE otherwise
1822 */
1823
1824 #ifdef NEVER /* Not used */
1825 #ifdef SUPPORT_UTF
1826 static BOOL
1827 valid_utf32(pcre_uint32 *string, int length)
1828 {
1829 register pcre_uint32 *p;
1830 register pcre_uint32 c;
1831
1832 for (p = string; length-- > 0; p++)
1833 {
1834 c = *p;
1835 if (c > 0x10ffffu) return FALSE; /* Too big */
1836 if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1837 }
1838
1839 return TRUE;
1840 }
1841 #endif /* SUPPORT_UTF */
1842 #endif /* NEVER */
1843 #endif /* SUPPORT_PCRE32 */
1844
1845
1846 /*************************************************
1847 * Read or extend an input line *
1848 *************************************************/
1849
1850 /* Input lines are read into buffer, but both patterns and data lines can be
1851 continued over multiple input lines. In addition, if the buffer fills up, we
1852 want to automatically expand it so as to be able to handle extremely large
1853 lines that are needed for certain stress tests. When the input buffer is
1854 expanded, the other two buffers must also be expanded likewise, and the
1855 contents of pbuffer, which are a copy of the input for callouts, must be
1856 preserved (for when expansion happens for a data line). This is not the most
1857 optimal way of handling this, but hey, this is just a test program!
1858
1859 Arguments:
1860 f the file to read
1861 start where in buffer to start (this *must* be within buffer)
1862 prompt for stdin or readline()
1863
1864 Returns: pointer to the start of new data
1865 could be a copy of start, or could be moved
1866 NULL if no data read and EOF reached
1867 */
1868
1869 static pcre_uint8 *
1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1871 {
1872 pcre_uint8 *here = start;
1873
1874 for (;;)
1875 {
1876 size_t rlen = (size_t)(buffer_size - (here - buffer));
1877
1878 if (rlen > 1000)
1879 {
1880 int dlen;
1881
1882 /* If libreadline or libedit support is required, use readline() to read a
1883 line if the input is a terminal. Note that readline() removes the trailing
1884 newline, so we must put it back again, to be compatible with fgets(). */
1885
1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887 if (isatty(fileno(f)))
1888 {
1889 size_t len;
1890 char *s = readline(prompt);
1891 if (s == NULL) return (here == start)? NULL : start;
1892 len = strlen(s);
1893 if (len > 0) add_history(s);
1894 if (len > rlen - 1) len = rlen - 1;
1895 memcpy(here, s, len);
1896 here[len] = '\n';
1897 here[len+1] = 0;
1898 free(s);
1899 }
1900 else
1901 #endif
1902
1903 /* Read the next line by normal means, prompting if the file is stdin. */
1904
1905 {
1906 if (f == stdin) printf("%s", prompt);
1907 if (fgets((char *)here, rlen, f) == NULL)
1908 return (here == start)? NULL : start;
1909 }
1910
1911 dlen = (int)strlen((char *)here);
1912 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913 here += dlen;
1914 }
1915
1916 else
1917 {
1918 int new_buffer_size = 2*buffer_size;
1919 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1921
1922 if (new_buffer == NULL || new_pbuffer == NULL)
1923 {
1924 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925 exit(1);
1926 }
1927
1928 memcpy(new_buffer, buffer, buffer_size);
1929 memcpy(new_pbuffer, pbuffer, buffer_size);
1930
1931 buffer_size = new_buffer_size;
1932
1933 start = new_buffer + (start - buffer);
1934 here = new_buffer + (here - buffer);
1935
1936 free(buffer);
1937 free(pbuffer);
1938
1939 buffer = new_buffer;
1940 pbuffer = new_pbuffer;
1941 }
1942 }
1943
1944 /* Control never gets here */
1945 }
1946
1947
1948
1949 /*************************************************
1950 * Read number from string *
1951 *************************************************/
1952
1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954 around with conditional compilation, just do the job by hand. It is only used
1955 for unpicking arguments, so just keep it simple.
1956
1957 Arguments:
1958 str string to be converted
1959 endptr where to put the end pointer
1960
1961 Returns: the unsigned long
1962 */
1963
1964 static int
1965 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1966 {
1967 int result = 0;
1968 while(*str != 0 && isspace(*str)) str++;
1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970 *endptr = str;
1971 return(result);
1972 }
1973
1974
1975
1976 /*************************************************
1977 * Print one character *
1978 *************************************************/
1979
1980 /* Print a single character either literally, or as a hex escape. */
1981
1982 static int pchar(pcre_uint32 c, FILE *f)
1983 {
1984 int n = 0;
1985 if (PRINTOK(c))
1986 {
1987 if (f != NULL) fprintf(f, "%c", c);
1988 return 1;
1989 }
1990
1991 if (c < 0x100)
1992 {
1993 if (use_utf)
1994 {
1995 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1996 return 6;
1997 }
1998 else
1999 {
2000 if (f != NULL) fprintf(f, "\\x%02x", c);
2001 return 4;
2002 }
2003 }
2004
2005 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2006 return n >= 0 ? n : 0;
2007 }
2008
2009
2010
2011 #ifdef SUPPORT_PCRE8
2012 /*************************************************
2013 * Print 8-bit character string *
2014 *************************************************/
2015
2016 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2017 If handed a NULL file, just counts chars without printing. */
2018
2019 static int pchars(pcre_uint8 *p, int length, FILE *f)
2020 {
2021 pcre_uint32 c = 0;
2022 int yield = 0;
2023
2024 if (length < 0)
2025 length = strlen((char *)p);
2026
2027 while (length-- > 0)
2028 {
2029 #if !defined NOUTF
2030 if (use_utf)
2031 {
2032 int rc = utf82ord(p, &c);
2033 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2034 {
2035 length -= rc - 1;
2036 p += rc;
2037 yield += pchar(c, f);
2038 continue;
2039 }
2040 }
2041 #endif
2042 c = *p++;
2043 yield += pchar(c, f);
2044 }
2045
2046 return yield;
2047 }
2048 #endif
2049
2050
2051
2052 #ifdef SUPPORT_PCRE16
2053 /*************************************************
2054 * Find length of 0-terminated 16-bit string *
2055 *************************************************/
2056
2057 static int strlen16(PCRE_SPTR16 p)
2058 {
2059 PCRE_SPTR16 pp = p;
2060 while (*pp != 0) pp++;
2061 return (int)(pp - p);
2062 }
2063 #endif /* SUPPORT_PCRE16 */
2064
2065
2066
2067 #ifdef SUPPORT_PCRE32
2068 /*************************************************
2069 * Find length of 0-terminated 32-bit string *
2070 *************************************************/
2071
2072 static int strlen32(PCRE_SPTR32 p)
2073 {
2074 PCRE_SPTR32 pp = p;
2075 while (*pp != 0) pp++;
2076 return (int)(pp - p);
2077 }
2078 #endif /* SUPPORT_PCRE32 */
2079
2080
2081
2082 #ifdef SUPPORT_PCRE16
2083 /*************************************************
2084 * Print 16-bit character string *
2085 *************************************************/
2086
2087 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2088 If handed a NULL file, just counts chars without printing. */
2089
2090 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2091 {
2092 int yield = 0;
2093
2094 if (length < 0)
2095 length = strlen16(p);
2096
2097 while (length-- > 0)
2098 {
2099 pcre_uint32 c = *p++ & 0xffff;
2100 #if !defined NOUTF
2101 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2102 {
2103 int d = *p & 0xffff;
2104 if (d >= 0xDC00 && d <= 0xDFFF)
2105 {
2106 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2107 length--;
2108 p++;
2109 }
2110 }
2111 #endif
2112 yield += pchar(c, f);
2113 }
2114
2115 return yield;
2116 }
2117 #endif /* SUPPORT_PCRE16 */
2118
2119
2120
2121 #ifdef SUPPORT_PCRE32
2122 /*************************************************
2123 * Print 32-bit character string *
2124 *************************************************/
2125
2126 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2127 If handed a NULL file, just counts chars without printing. */
2128
2129 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2130 {
2131 int yield = 0;
2132
2133 (void)(utf); /* Avoid compiler warning */
2134
2135 if (length < 0)
2136 length = strlen32(p);
2137
2138 while (length-- > 0)
2139 {
2140 pcre_uint32 c = *p++;
2141 yield += pchar(c, f);
2142 }
2143
2144 return yield;
2145 }
2146 #endif /* SUPPORT_PCRE32 */
2147
2148
2149
2150 #ifdef SUPPORT_PCRE8
2151 /*************************************************
2152 * Read a capture name (8-bit) and check it *
2153 *************************************************/
2154
2155 static pcre_uint8 *
2156 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2157 {
2158 pcre_uint8 *npp = *pp;
2159 while (isalnum(*p)) *npp++ = *p++;
2160 *npp++ = 0;
2161 *npp = 0;
2162 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2163 {
2164 fprintf(outfile, "no parentheses with name \"");
2165 PCHARSV(*pp, 0, -1, outfile);
2166 fprintf(outfile, "\"\n");
2167 }
2168
2169 *pp = npp;
2170 return p;
2171 }
2172 #endif /* SUPPORT_PCRE8 */
2173
2174
2175
2176 #ifdef SUPPORT_PCRE16
2177 /*************************************************
2178 * Read a capture name (16-bit) and check it *
2179 *************************************************/
2180
2181 /* Note that the text being read is 8-bit. */
2182
2183 static pcre_uint8 *
2184 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2185 {
2186 pcre_uint16 *npp = *pp;
2187 while (isalnum(*p)) *npp++ = *p++;
2188 *npp++ = 0;
2189 *npp = 0;
2190 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2191 {
2192 fprintf(outfile, "no parentheses with name \"");
2193 PCHARSV(*pp, 0, -1, outfile);
2194 fprintf(outfile, "\"\n");
2195 }
2196 *pp = npp;
2197 return p;
2198 }
2199 #endif /* SUPPORT_PCRE16 */
2200
2201
2202
2203 #ifdef SUPPORT_PCRE32
2204 /*************************************************
2205 * Read a capture name (32-bit) and check it *
2206 *************************************************/
2207
2208 /* Note that the text being read is 8-bit. */
2209
2210 static pcre_uint8 *
2211 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2212 {
2213 pcre_uint32 *npp = *pp;
2214 while (isalnum(*p)) *npp++ = *p++;
2215 *npp++ = 0;
2216 *npp = 0;
2217 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2218 {
2219 fprintf(outfile, "no parentheses with name \"");
2220 PCHARSV(*pp, 0, -1, outfile);
2221 fprintf(outfile, "\"\n");
2222 }
2223 *pp = npp;
2224 return p;
2225 }
2226 #endif /* SUPPORT_PCRE32 */
2227
2228
2229
2230 /*************************************************
2231 * Stack guard function *
2232 *************************************************/
2233
2234 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2235 return when a count overflows. */
2236
2237 static int stack_guard(void)
2238 {
2239 return stack_guard_return;
2240 }
2241
2242 /*************************************************
2243 * Callout function *
2244 *************************************************/
2245
2246 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2247 the match. Yield zero unless more callouts than the fail count, or the callout
2248 data is not zero. */
2249
2250 static int callout(pcre_callout_block *cb)
2251 {
2252 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2253 int i, current_position, pre_start, post_start, subject_length;
2254
2255 if (callout_extra)
2256 {
2257 fprintf(f, "Callout %d: last capture = %d\n",
2258 cb->callout_number, cb->capture_last);
2259
2260 if (cb->offset_vector != NULL)
2261 {
2262 for (i = 0; i < cb->capture_top * 2; i += 2)
2263 {
2264 if (cb->offset_vector[i] < 0)
2265 fprintf(f, "%2d: <unset>\n", i/2);
2266 else
2267 {
2268 fprintf(f, "%2d: ", i/2);
2269 PCHARSV(cb->subject, cb->offset_vector[i],
2270 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2271 fprintf(f, "\n");
2272 }
2273 }
2274 }
2275 }
2276
2277 /* Re-print the subject in canonical form, the first time or if giving full
2278 datails. On subsequent calls in the same match, we use pchars just to find the
2279 printed lengths of the substrings. */
2280
2281 if (f != NULL) fprintf(f, "--->");
2282
2283 /* If a lookbehind is involved, the current position may be earlier than the
2284 match start. If so, use the match start instead. */
2285
2286 current_position = (cb->current_position >= cb->start_match)?
2287 cb->current_position : cb->start_match;
2288
2289 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2290 PCHARS(post_start, cb->subject, cb->start_match,
2291 current_position - cb->start_match, f);
2292
2293 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2294
2295 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
2296
2297 if (f != NULL) fprintf(f, "\n");
2298
2299 /* Always print appropriate indicators, with callout number if not already
2300 shown. For automatic callouts, show the pattern offset. */
2301
2302 if (cb->callout_number == 255)
2303 {
2304 fprintf(outfile, "%+3d ", cb->pattern_position);
2305 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2306 }
2307 else
2308 {
2309 if (callout_extra) fprintf(outfile, " ");
2310 else fprintf(outfile, "%3d ", cb->callout_number);
2311 }
2312
2313 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2314 fprintf(outfile, "^");
2315
2316 if (post_start > 0)
2317 {
2318 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2319 fprintf(outfile, "^");
2320 }
2321
2322 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2323 fprintf(outfile, " ");
2324
2325 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2326 pbuffer + cb->pattern_position);
2327
2328 fprintf(outfile, "\n");
2329 first_callout = 0;
2330
2331 if (cb->mark != last_callout_mark)
2332 {
2333 if (cb->mark == NULL)
2334 fprintf(outfile, "Latest Mark: <unset>\n");
2335 else
2336 {
2337 fprintf(outfile, "Latest Mark: ");
2338 PCHARSV(cb->mark, 0, -1, outfile);
2339 putc('\n', outfile);
2340 }
2341 last_callout_mark = cb->mark;
2342 }
2343
2344 if (cb->callout_data != NULL)
2345 {
2346 int callout_data = *((int *)(cb->callout_data));
2347 if (callout_data != 0)
2348 {
2349 fprintf(outfile, "Callout data = %d\n", callout_data);
2350 return callout_data;
2351 }
2352 }
2353
2354 return (cb->callout_number != callout_fail_id)? 0 :
2355 (++callout_count >= callout_fail_count)? 1 : 0;
2356 }
2357
2358
2359 /*************************************************
2360 * Local malloc functions *
2361 *************************************************/
2362
2363 /* Alternative malloc function, to test functionality and save the size of a
2364 compiled re, which is the first store request that pcre_compile() makes. The
2365 show_malloc variable is set only during matching. */
2366
2367 static void *new_malloc(size_t size)
2368 {
2369 void *block = malloc(size);
2370 if (show_malloc)
2371 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2372 return block;
2373 }
2374
2375 static void new_free(void *block)
2376 {
2377 if (show_malloc)
2378 fprintf(outfile, "free %p\n", block);
2379 free(block);
2380 }
2381
2382 /* For recursion malloc/free, to test stacking calls */
2383
2384 static void *stack_malloc(size_t size)
2385 {
2386 void *block = malloc(size);
2387 if (show_malloc)
2388 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2389 return block;
2390 }
2391
2392 static void stack_free(void *block)
2393 {
2394 if (show_malloc)
2395 fprintf(outfile, "stack_free %p\n", block);
2396 free(block);
2397 }
2398
2399
2400 /*************************************************
2401 * Call pcre_fullinfo() *
2402 *************************************************/
2403
2404 /* Get one piece of information from the pcre_fullinfo() function. When only
2405 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2406 value, but the code is defensive.
2407
2408 Arguments:
2409 re compiled regex
2410 study study data
2411 option PCRE_INFO_xxx option
2412 ptr where to put the data
2413
2414 Returns: 0 when OK, < 0 on error
2415 */
2416
2417 static int
2418 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2419 {
2420 int rc;
2421
2422 if (pcre_mode == PCRE32_MODE)
2423 #ifdef SUPPORT_PCRE32
2424 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2425 #else
2426 rc = PCRE_ERROR_BADMODE;
2427 #endif
2428 else if (pcre_mode == PCRE16_MODE)
2429 #ifdef SUPPORT_PCRE16
2430 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2431 #else
2432 rc = PCRE_ERROR_BADMODE;
2433 #endif
2434 else
2435 #ifdef SUPPORT_PCRE8
2436 rc = pcre_fullinfo(re, study, option, ptr);
2437 #else
2438 rc = PCRE_ERROR_BADMODE;
2439 #endif
2440
2441 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2442 {
2443 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2444 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2445 if (rc == PCRE_ERROR_BADMODE)
2446 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2447 "%d-bit mode\n", 8 * CHAR_SIZE,
2448 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2449 }
2450
2451 return rc;
2452 }
2453
2454
2455
2456 /*************************************************
2457 * Swap byte functions *
2458 *************************************************/
2459
2460 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2461 value, respectively.
2462
2463 Arguments:
2464 value any number
2465
2466 Returns: the byte swapped value
2467 */
2468
2469 static pcre_uint32
2470 swap_uint32(pcre_uint32 value)
2471 {
2472 return ((value & 0x000000ff) << 24) |
2473 ((value & 0x0000ff00) << 8) |
2474 ((value & 0x00ff0000) >> 8) |
2475 (value >> 24);
2476 }
2477
2478 static pcre_uint16
2479 swap_uint16(pcre_uint16 value)
2480 {
2481 return (value >> 8) | (value << 8);
2482 }
2483
2484
2485
2486 /*************************************************
2487 * Flip bytes in a compiled pattern *
2488 *************************************************/
2489
2490 /* This function is called if the 'F' option was present on a pattern that is
2491 to be written to a file. We flip the bytes of all the integer fields in the
2492 regex data block and the study block. In 16-bit mode this also flips relevant
2493 bytes in the pattern itself. This is to make it possible to test PCRE's
2494 ability to reload byte-flipped patterns, e.g. those compiled on a different
2495 architecture. */
2496
2497 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2498 static void
2499 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2500 {
2501 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2502 #ifdef SUPPORT_PCRE16
2503 int op;
2504 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2505 int length = re->name_count * re->name_entry_size;
2506 #ifdef SUPPORT_UTF
2507 BOOL utf = (re->options & PCRE_UTF16) != 0;
2508 BOOL utf16_char = FALSE;
2509 #endif /* SUPPORT_UTF */
2510 #endif /* SUPPORT_PCRE16 */
2511
2512 /* Always flip the bytes in the main data block and study blocks. */
2513
2514 re->magic_number = REVERSED_MAGIC_NUMBER;
2515 re->size = swap_uint32(re->size);
2516 re->options = swap_uint32(re->options);
2517 re->flags = swap_uint32(re->flags);
2518 re->limit_match = swap_uint32(re->limit_match);
2519 re->limit_recursion = swap_uint32(re->limit_recursion);
2520 re->first_char = swap_uint16(re->first_char);
2521 re->req_char = swap_uint16(re->req_char);
2522 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2523 re->top_bracket = swap_uint16(re->top_bracket);
2524 re->top_backref = swap_uint16(re->top_backref);
2525 re->name_table_offset = swap_uint16(re->name_table_offset);
2526 re->name_entry_size = swap_uint16(re->name_entry_size);
2527 re->name_count = swap_uint16(re->name_count);
2528 re->ref_count = swap_uint16(re->ref_count);
2529
2530 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2531 {
2532 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2533 rsd->size = swap_uint32(rsd->size);
2534 rsd->flags = swap_uint32(rsd->flags);
2535 rsd->minlength = swap_uint32(rsd->minlength);
2536 }
2537
2538 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2539 in the name table, if present, and then in the pattern itself. */
2540
2541 #ifdef SUPPORT_PCRE16
2542 if (pcre_mode != PCRE16_MODE) return;
2543
2544 while(TRUE)
2545 {
2546 /* Swap previous characters. */
2547 while (length-- > 0)
2548 {
2549 *ptr = swap_uint16(*ptr);
2550 ptr++;
2551 }
2552 #ifdef SUPPORT_UTF
2553 if (utf16_char)
2554 {
2555 if ((ptr[-1] & 0xfc00) == 0xd800)
2556 {
2557 /* We know that there is only one extra character in UTF-16. */
2558 *ptr = swap_uint16(*ptr);
2559 ptr++;
2560 }
2561 }
2562 utf16_char = FALSE;
2563 #endif /* SUPPORT_UTF */
2564
2565 /* Get next opcode. */
2566
2567 length = 0;
2568 op = *ptr;
2569 *ptr++ = swap_uint16(op);
2570
2571 switch (op)
2572 {
2573 case OP_END:
2574 return;
2575
2576 #ifdef SUPPORT_UTF
2577 case OP_CHAR:
2578 case OP_CHARI:
2579 case OP_NOT:
2580 case OP_NOTI:
2581 case OP_STAR:
2582 case OP_MINSTAR:
2583 case OP_PLUS:
2584 case OP_MINPLUS:
2585 case OP_QUERY:
2586 case OP_MINQUERY:
2587 case OP_UPTO:
2588 case OP_MINUPTO:
2589 case OP_EXACT:
2590 case OP_POSSTAR:
2591 case OP_POSPLUS:
2592 case OP_POSQUERY:
2593 case OP_POSUPTO:
2594 case OP_STARI:
2595 case OP_MINSTARI:
2596 case OP_PLUSI:
2597 case OP_MINPLUSI:
2598 case OP_QUERYI:
2599 case OP_MINQUERYI:
2600 case OP_UPTOI:
2601 case OP_MINUPTOI:
2602 case OP_EXACTI:
2603 case OP_POSSTARI:
2604 case OP_POSPLUSI:
2605 case OP_POSQUERYI:
2606 case OP_POSUPTOI:
2607 case OP_NOTSTAR:
2608 case OP_NOTMINSTAR:
2609 case OP_NOTPLUS:
2610 case OP_NOTMINPLUS:
2611 case OP_NOTQUERY:
2612 case OP_NOTMINQUERY:
2613 case OP_NOTUPTO:
2614 case OP_NOTMINUPTO:
2615 case OP_NOTEXACT:
2616 case OP_NOTPOSSTAR:
2617 case OP_NOTPOSPLUS:
2618 case OP_NOTPOSQUERY:
2619 case OP_NOTPOSUPTO:
2620 case OP_NOTSTARI:
2621 case OP_NOTMINSTARI:
2622 case OP_NOTPLUSI:
2623 case OP_NOTMINPLUSI:
2624 case OP_NOTQUERYI:
2625 case OP_NOTMINQUERYI:
2626 case OP_NOTUPTOI:
2627 case OP_NOTMINUPTOI:
2628 case OP_NOTEXACTI:
2629 case OP_NOTPOSSTARI:
2630 case OP_NOTPOSPLUSI:
2631 case OP_NOTPOSQUERYI:
2632 case OP_NOTPOSUPTOI:
2633 if (utf) utf16_char = TRUE;
2634 #endif
2635 /* Fall through. */
2636
2637 default:
2638 length = OP_lengths16[op] - 1;
2639 break;
2640
2641 case OP_CLASS:
2642 case OP_NCLASS:
2643 /* Skip the character bit map. */
2644 ptr += 32/sizeof(pcre_uint16);
2645 length = 0;
2646 break;
2647
2648 case OP_XCLASS:
2649 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2650 if (LINK_SIZE > 1)
2651 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2652 - (1 + LINK_SIZE + 1));
2653 else
2654 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2655
2656 /* Reverse the size of the XCLASS instance. */
2657 *ptr = swap_uint16(*ptr);
2658 ptr++;
2659 if (LINK_SIZE > 1)
2660 {
2661 *ptr = swap_uint16(*ptr);
2662 ptr++;
2663 }
2664
2665 op = *ptr;
2666 *ptr = swap_uint16(op);
2667 ptr++;
2668 if ((op & XCL_MAP) != 0)
2669 {
2670 /* Skip the character bit map. */
2671 ptr += 32/sizeof(pcre_uint16);
2672 length -= 32/sizeof(pcre_uint16);
2673 }
2674 break;
2675 }
2676 }
2677 /* Control should never reach here in 16 bit mode. */
2678 #endif /* SUPPORT_PCRE16 */
2679 }
2680 #endif /* SUPPORT_PCRE[8|16] */
2681
2682
2683
2684 #if defined SUPPORT_PCRE32
2685 static void
2686 regexflip_32(pcre *ere, pcre_extra *extra)
2687 {
2688 real_pcre32 *re = (real_pcre32 *)ere;
2689 int op;
2690 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2691 int length = re->name_count * re->name_entry_size;
2692
2693 /* Always flip the bytes in the main data block and study blocks. */
2694
2695 re->magic_number = REVERSED_MAGIC_NUMBER;
2696 re->size = swap_uint32(re->size);
2697 re->options = swap_uint32(re->options);
2698 re->flags = swap_uint32(re->flags);
2699 re->limit_match = swap_uint32(re->limit_match);
2700 re->limit_recursion = swap_uint32(re->limit_recursion);
2701 re->first_char = swap_uint32(re->first_char);
2702 re->req_char = swap_uint32(re->req_char);
2703 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2704 re->top_bracket = swap_uint16(re->top_bracket);
2705 re->top_backref = swap_uint16(re->top_backref);
2706 re->name_table_offset = swap_uint16(re->name_table_offset);
2707 re->name_entry_size = swap_uint16(re->name_entry_size);
2708 re->name_count = swap_uint16(re->name_count);
2709 re->ref_count = swap_uint16(re->ref_count);
2710
2711 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2712 {
2713 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2714 rsd->size = swap_uint32(rsd->size);
2715 rsd->flags = swap_uint32(rsd->flags);
2716 rsd->minlength = swap_uint32(rsd->minlength);
2717 }
2718
2719 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2720 the pattern itself. */
2721
2722 while(TRUE)
2723 {
2724 /* Swap previous characters. */
2725 while (length-- > 0)
2726 {
2727 *ptr = swap_uint32(*ptr);
2728 ptr++;
2729 }
2730
2731 /* Get next opcode. */
2732
2733 length = 0;
2734 op = *ptr;
2735 *ptr++ = swap_uint32(op);
2736
2737 switch (op)
2738 {
2739 case OP_END:
2740 return;
2741
2742 default:
2743 length = OP_lengths32[op] - 1;
2744 break;
2745
2746 case OP_CLASS:
2747 case OP_NCLASS:
2748 /* Skip the character bit map. */
2749 ptr += 32/sizeof(pcre_uint32);
2750 length = 0;
2751 break;
2752
2753 case OP_XCLASS:
2754 /* LINK_SIZE can only be 1 in 32-bit mode. */
2755 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2756
2757 /* Reverse the size of the XCLASS instance. */
2758 *ptr = swap_uint32(*ptr);
2759 ptr++;
2760
2761 op = *ptr;
2762 *ptr = swap_uint32(op);
2763 ptr++;
2764 if ((op & XCL_MAP) != 0)
2765 {
2766 /* Skip the character bit map. */
2767 ptr += 32/sizeof(pcre_uint32);
2768 length -= 32/sizeof(pcre_uint32);
2769 }
2770 break;
2771 }
2772 }
2773 /* Control should never reach here in 32 bit mode. */
2774 }
2775
2776 #endif /* SUPPORT_PCRE32 */
2777
2778
2779
2780 static void
2781 regexflip(pcre *ere, pcre_extra *extra)
2782 {
2783 #if defined SUPPORT_PCRE32
2784 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2785 regexflip_32(ere, extra);
2786 #endif
2787 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2788 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2789 regexflip8_or_16(ere, extra);
2790 #endif
2791 }
2792
2793
2794
2795 /*************************************************
2796 * Check match or recursion limit *
2797 *************************************************/
2798
2799 static int
2800 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2801 int start_offset, int options, int *use_offsets, int use_size_offsets,
2802 int flag, unsigned long int *limit, int errnumber, const char *msg)
2803 {
2804 int count;
2805 int min = 0;
2806 int mid = 64;
2807 int max = -1;
2808
2809 extra->flags |= flag;
2810
2811 for (;;)
2812 {
2813 *limit = mid;
2814
2815 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2816 use_offsets, use_size_offsets);
2817
2818 if (count == errnumber)
2819 {
2820 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2821 min = mid;
2822 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2823 }
2824
2825 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2826 count == PCRE_ERROR_PARTIAL)
2827 {
2828 if (mid == min + 1)
2829 {
2830 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2831 break;
2832 }
2833 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2834 max = mid;
2835 mid = (min + mid)/2;
2836 }
2837 else break; /* Some other error */
2838 }
2839
2840 extra->flags &= ~flag;
2841 return count;
2842 }
2843
2844
2845
2846 /*************************************************
2847 * Case-independent strncmp() function *
2848 *************************************************/
2849
2850 /*
2851 Arguments:
2852 s first string
2853 t second string
2854 n number of characters to compare
2855
2856 Returns: < 0, = 0, or > 0, according to the comparison
2857 */
2858
2859 static int
2860 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2861 {
2862 while (n--)
2863 {
2864 int c = tolower(*s++) - tolower(*t++);
2865 if (c) return c;
2866 }
2867 return 0;
2868 }
2869
2870
2871
2872 /*************************************************
2873 * Check multicharacter option *
2874 *************************************************/
2875
2876 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2877 a message and return 0 if there is no match.
2878
2879 Arguments:
2880 p points after the leading '<'
2881 f file for error message
2882 nl TRUE to check only for newline settings
2883 stype "modifier" or "escape sequence"
2884
2885 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2886 */
2887
2888 static int
2889 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2890 {
2891 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2892 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2893 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2894 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2895 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2896 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2897 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2898
2899 if (!nl)
2900 {
2901 if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2902 }
2903
2904 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2905 return 0;
2906 }
2907
2908
2909
2910 /*************************************************
2911 * Usage function *
2912 *************************************************/
2913
2914 static void
2915 usage(void)
2916 {
2917 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2918 printf("Input and output default to stdin and stdout.\n");
2919 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2920 printf("If input is a terminal, readline() is used to read from it.\n");
2921 #else
2922 printf("This version of pcretest is not linked with readline().\n");
2923 #endif
2924 printf("\nOptions:\n");
2925 #ifdef SUPPORT_PCRE16
2926 printf(" -16 use the 16-bit library\n");
2927 #endif
2928 #ifdef SUPPORT_PCRE32
2929 printf(" -32 use the 32-bit library\n");
2930 #endif
2931 printf(" -b show compiled code\n");
2932 printf(" -C show PCRE compile-time options and exit\n");
2933 printf(" -C arg show a specific compile-time option and exit\n");
2934 printf(" with its value if numeric (else 0). The arg can be:\n");
2935 printf(" linksize internal link size [2, 3, 4]\n");
2936 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2937 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2938 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2939 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2940 printf(" ucp Unicode Properties supported [0, 1]\n");
2941 printf(" jit Just-in-time compiler supported [0, 1]\n");
2942 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2943 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2944 printf(" -d debug: show compiled code and information (-b and -i)\n");
2945 #if !defined NODFA
2946 printf(" -dfa force DFA matching for all subjects\n");
2947 #endif
2948 printf(" -help show usage information\n");
2949 printf(" -i show information about compiled patterns\n"
2950 " -M find MATCH_LIMIT minimum for each subject\n"
2951 " -m output memory used information\n"
2952 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2953 " -o <n> set size of offsets vector to <n>\n");
2954 #if !defined NOPOSIX
2955 printf(" -p use POSIX interface\n");
2956 #endif
2957 printf(" -q quiet: do not output PCRE version number at start\n");
2958 printf(" -S <n> set stack size to <n> megabytes\n");
2959 printf(" -s force each pattern to be studied at basic level\n"
2960 " -s+ force each pattern to be studied, using JIT if available\n"
2961 " -s++ ditto, verifying when JIT was actually used\n"
2962 " -s+n force each pattern to be studied, using JIT if available,\n"
2963 " where 1 <= n <= 7 selects JIT options\n"
2964 " -s++n ditto, verifying when JIT was actually used\n"
2965 " -t time compilation and execution\n");
2966 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2967 printf(" -tm time execution (matching) only\n");
2968 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2969 printf(" -T same as -t, but show total times at the end\n");
2970 printf(" -TM same as -tm, but show total time at the end\n");
2971 }
2972
2973
2974
2975 /*************************************************
2976 * Main Program *
2977 *************************************************/
2978
2979 /* Read lines from named file or stdin and write to named file or stdout; lines
2980 consist of a regular expression, in delimiters and optionally followed by
2981 options, followed by a set of test data, terminated by an empty line. */
2982
2983 int main(int argc, char **argv)
2984 {
2985 FILE *infile = stdin;
2986 const char *version;
2987 int options = 0;
2988 int study_options = 0;
2989 int default_find_match_limit = FALSE;
2990 pcre_uint32 default_options = 0;
2991 int op = 1;
2992 int timeit = 0;
2993 int timeitm = 0;
2994 int showtotaltimes = 0;
2995 int showinfo = 0;
2996 int showstore = 0;
2997 int force_study = -1;
2998 int force_study_options = 0;
2999 int quiet = 0;
3000 int size_offsets = 45;
3001 int size_offsets_max;
3002 int *offsets = NULL;
3003 int debug = 0;
3004 int done = 0;
3005 int all_use_dfa = 0;
3006 int verify_jit = 0;
3007 int yield = 0;
3008 int stack_size;
3009 pcre_uint8 *dbuffer = NULL;
3010 pcre_uint8 lockout[24] = { 0 };
3011 size_t dbuffer_size = 1u << 14;
3012 clock_t total_compile_time = 0;
3013 clock_t total_study_time = 0;
3014 clock_t total_match_time = 0;
3015
3016 #if !defined NOPOSIX
3017 int posix = 0;
3018 #endif
3019 #if !defined NODFA
3020 int *dfa_workspace = NULL;
3021 #endif
3022
3023 pcre_jit_stack *jit_stack = NULL;
3024
3025 /* These vectors store, end-to-end, a list of zero-terminated captured
3026 substring names, each list itself being terminated by an empty name. Assume
3027 that 1024 is plenty long enough for the few names we'll be testing. It is
3028 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3029 for the actual memory, to ensure alignment. */
3030
3031 pcre_uint32 copynames[1024];
3032 pcre_uint32 getnames[1024];
3033
3034 #ifdef SUPPORT_PCRE32
3035 pcre_uint32 *cn32ptr;
3036 pcre_uint32 *gn32ptr;
3037 #endif
3038
3039 #ifdef SUPPORT_PCRE16
3040 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3041 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3042 pcre_uint16 *cn16ptr;
3043 pcre_uint16 *gn16ptr;
3044 #endif
3045
3046 #ifdef SUPPORT_PCRE8
3047 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3048 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3049 pcre_uint8 *cn8ptr;
3050 pcre_uint8 *gn8ptr;
3051 #endif
3052
3053 /* Get buffers from malloc() so that valgrind will check their misuse when
3054 debugging. They grow automatically when very long lines are read. The 16-
3055 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3056
3057 buffer = (pcre_uint8 *)malloc(buffer_size);
3058 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3059
3060 /* The outfile variable is static so that new_malloc can use it. */
3061
3062 outfile = stdout;
3063
3064 /* The following _setmode() stuff is some Windows magic that tells its runtime
3065 library to translate CRLF into a single LF character. At least, that's what
3066 I've been told: never having used Windows I take this all on trust. Originally
3067 it set 0x8000, but then I was advised that _O_BINARY was better. */
3068
3069 #if defined(_WIN32) || defined(WIN32)
3070 _setmode( _fileno( stdout ), _O_BINARY );
3071 #endif
3072
3073 /* Get the version number: both pcre_version() and pcre16_version() give the
3074 same answer. We just need to ensure that we call one that is available. */
3075
3076 #if defined SUPPORT_PCRE8
3077 version = pcre_version();
3078 #elif defined SUPPORT_PCRE16
3079 version = pcre16_version();
3080 #elif defined SUPPORT_PCRE32
3081 version = pcre32_version();
3082 #endif
3083
3084 /* Scan options */
3085
3086 while (argc > 1 && argv[op][0] == '-')
3087 {
3088 pcre_uint8 *endptr;
3089 char *arg = argv[op];
3090
3091 if (strcmp(arg, "-m") == 0) showstore = 1;
3092 else if (strcmp(arg, "-s") == 0) force_study = 0;
3093
3094 else if (strncmp(arg, "-s+", 3) == 0)
3095 {
3096 arg += 3;
3097 if (*arg == '+') { arg++; verify_jit = TRUE; }
3098 force_study = 1;
3099 if (*arg == 0)
3100 force_study_options = jit_study_bits[6];
3101 else if (*arg >= '1' && *arg <= '7')
3102 force_study_options = jit_study_bits[*arg - '1'];
3103 else goto BAD_ARG;
3104 }
3105 else if (strcmp(arg, "-8") == 0)
3106 {
3107 #ifdef SUPPORT_PCRE8
3108 pcre_mode = PCRE8_MODE;
3109 #else
3110 printf("** This version of PCRE was built without 8-bit support\n");
3111 exit(1);
3112 #endif
3113 }
3114 else if (strcmp(arg, "-16") == 0)
3115 {
3116 #ifdef SUPPORT_PCRE16
3117 pcre_mode = PCRE16_MODE;
3118 #else
3119 printf("** This version of PCRE was built without 16-bit support\n");
3120 exit(1);
3121 #endif
3122 }
3123 else if (strcmp(arg, "-32") == 0)
3124 {
3125 #ifdef SUPPORT_PCRE32
3126 pcre_mode = PCRE32_MODE;
3127 #else
3128 printf("** This version of PCRE was built without 32-bit support\n");
3129 exit(1);
3130 #endif
3131 }
3132 else if (strcmp(arg, "-q") == 0) quiet = 1;
3133 else if (strcmp(arg, "-b") == 0) debug = 1;
3134 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3135 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3136 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3137 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3138 #if !defined NODFA
3139 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3140 #endif
3141 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3142 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3143 *endptr == 0))
3144 {
3145 op++;
3146 argc--;
3147 }
3148 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3149 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3150 {
3151 int temp;
3152 int both = arg[2] == 0;
3153 showtotaltimes = arg[1] == 'T';
3154 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3155 *endptr == 0))
3156 {
3157 timeitm = temp;
3158 op++;
3159 argc--;
3160 }
3161 else timeitm = LOOPREPEAT;
3162 if (both) timeit = timeitm;
3163 }
3164 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3165 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3166 *endptr == 0))
3167 {
3168 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3169 printf("PCRE: -S not supported on this OS\n");
3170 exit(1);
3171 #else
3172 int rc;
3173 struct rlimit rlim;
3174 getrlimit(RLIMIT_STACK, &rlim);
3175 rlim.rlim_cur = stack_size * 1024 * 1024;
3176 rc = setrlimit(RLIMIT_STACK, &rlim);
3177 if (rc != 0)
3178 {
3179 printf("PCRE: setrlimit() failed with error %d\n", rc);
3180 exit(1);
3181 }
3182 op++;
3183 argc--;
3184 #endif
3185 }
3186 #if !defined NOPOSIX
3187 else if (strcmp(arg, "-p") == 0) posix = 1;
3188 #endif
3189 else if (strcmp(arg, "-C") == 0)
3190 {
3191 int rc;
3192 unsigned long int lrc;
3193
3194 if (argc > 2)
3195 {
3196 if (strcmp(argv[op + 1], "linksize") == 0)
3197 {
3198 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3199 printf("%d\n", rc);
3200 yield = rc;
3201
3202 #ifdef __VMS
3203 vms_setsymbol("LINKSIZE",0,yield );
3204 #endif
3205 }
3206 else if (strcmp(argv[op + 1], "pcre8") == 0)
3207 {
3208 #ifdef SUPPORT_PCRE8
3209 printf("1\n");
3210 yield = 1;
3211 #else
3212 printf("0\n");
3213 yield = 0;
3214 #endif
3215 #ifdef __VMS
3216 vms_setsymbol("PCRE8",0,yield );
3217 #endif
3218 }
3219 else if (strcmp(argv[op + 1], "pcre16") == 0)
3220 {
3221 #ifdef SUPPORT_PCRE16
3222 printf("1\n");
3223 yield = 1;
3224 #else
3225 printf("0\n");
3226 yield = 0;
3227 #endif
3228 #ifdef __VMS
3229 vms_setsymbol("PCRE16",0,yield );
3230 #endif
3231 }
3232 else if (strcmp(argv[op + 1], "pcre32") == 0)
3233 {
3234 #ifdef SUPPORT_PCRE32
3235 printf("1\n");
3236 yield = 1;
3237 #else
3238 printf("0\n");
3239 yield = 0;
3240 #endif
3241 #ifdef __VMS
3242 vms_setsymbol("PCRE32",0,yield );
3243 #endif
3244 }
3245 else if (strcmp(argv[op + 1], "utf") == 0)
3246 {
3247 #ifdef SUPPORT_PCRE8
3248 if (pcre_mode == PCRE8_MODE)
3249 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3250 #endif
3251 #ifdef SUPPORT_PCRE16
3252 if (pcre_mode == PCRE16_MODE)
3253 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3254 #endif
3255 #ifdef SUPPORT_PCRE32
3256 if (pcre_mode == PCRE32_MODE)
3257 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3258 #endif
3259 printf("%d\n", rc);
3260 yield = rc;
3261 #ifdef __VMS
3262 vms_setsymbol("UTF",0,yield );
3263 #endif
3264 }
3265 else if (strcmp(argv[op + 1], "ucp") == 0)
3266 {
3267 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3268 printf("%d\n", rc);
3269 yield = rc;
3270 }
3271 else if (strcmp(argv[op + 1], "jit") == 0)
3272 {
3273 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3274 printf("%d\n", rc);
3275 yield = rc;
3276 }
3277 else if (strcmp(argv[op + 1], "newline") == 0)
3278 {
3279 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3280 print_newline_config(rc, TRUE);
3281 }
3282 else if (strcmp(argv[op + 1], "bsr") == 0)
3283 {
3284 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3285 printf("%s\n", rc? "ANYCRLF" : "ANY");
3286 }
3287 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3288 {
3289 #ifdef EBCDIC
3290 printf("1\n");
3291 yield = 1;
3292 #else
3293 printf("0\n");
3294 #endif
3295 }
3296 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3297 {
3298 #ifdef EBCDIC
3299 printf("0x%02x\n", CHAR_LF);
3300 #else
3301 printf("0\n");
3302 #endif
3303 }
3304 else
3305 {
3306 printf("Unknown -C option: %s\n", argv[op + 1]);
3307 }
3308 goto EXIT;
3309 }
3310
3311 /* No argument for -C: output all configuration information. */
3312
3313 printf("PCRE version %s\n", version);
3314 printf("Compiled with\n");
3315
3316 #ifdef EBCDIC
3317 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3318 #endif
3319
3320 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3321 are set, either both UTFs are supported or both are not supported. */
3322
3323 #ifdef SUPPORT_PCRE8
3324 printf(" 8-bit support\n");
3325 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3326 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3327 #endif
3328 #ifdef SUPPORT_PCRE16
3329 printf(" 16-bit support\n");
3330 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3331 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3332 #endif
3333 #ifdef SUPPORT_PCRE32
3334 printf(" 32-bit support\n");
3335 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3336 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3337 #endif
3338
3339 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3340 printf(" %sUnicode properties support\n", rc? "" : "No ");
3341 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3342 if (rc)
3343 {
3344 const char *arch;
3345 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3346 printf(" Just-in-time compiler support: %s\n", arch);
3347 }
3348 else
3349 printf(" No just-in-time compiler support\n");
3350 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3351 print_newline_config(rc, FALSE);
3352 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3353 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3354 "all Unicode newlines");
3355 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3356 printf(" Internal link size = %d\n", rc);
3357 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3358 printf(" POSIX malloc threshold = %d\n", rc);
3359 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3360 printf(" Parentheses nest limit = %ld\n", lrc);
3361 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3362 printf(" Default match limit = %ld\n", lrc);
3363 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3364 printf(" Default recursion depth limit = %ld\n", lrc);
3365 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3366 printf(" Match recursion uses %s", rc? "stack" : "heap");
3367 if (showstore)
3368 {
3369 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3370 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3371 }
3372 printf("\n");
3373 goto EXIT;
3374 }
3375 else if (strcmp(arg, "-help") == 0 ||
3376 strcmp(arg, "--help") == 0)
3377 {
3378 usage();
3379 goto EXIT;
3380 }
3381 else
3382 {
3383 BAD_ARG:
3384 printf("** Unknown or malformed option %s\n", arg);
3385 usage();
3386 yield = 1;
3387 goto EXIT;
3388 }
3389 op++;
3390 argc--;
3391 }
3392
3393 /* Get the store for the offsets vector, and remember what it was */
3394
3395 size_offsets_max = size_offsets;
3396 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3397 if (offsets == NULL)
3398 {
3399 printf("** Failed to get %d bytes of memory for offsets vector\n",
3400 (int)(size_offsets_max * sizeof(int)));
3401 yield = 1;
3402 goto EXIT;
3403 }
3404
3405 /* Sort out the input and output files */
3406
3407 if (argc > 1)
3408 {
3409 infile = fopen(argv[op], INPUT_MODE);
3410 if (infile == NULL)
3411 {
3412 printf("** Failed to open %s\n", argv[op]);
3413 yield = 1;
3414 goto EXIT;
3415 }
3416 }
3417
3418 if (argc > 2)
3419 {
3420 outfile = fopen(argv[op+1], OUTPUT_MODE);
3421 if (outfile == NULL)
3422 {
3423 printf("** Failed to open %s\n", argv[op+1]);
3424 yield = 1;
3425 goto EXIT;
3426 }
3427 }
3428
3429 /* Set alternative malloc function */
3430
3431 #ifdef SUPPORT_PCRE8
3432 pcre_malloc = new_malloc;
3433 pcre_free = new_free;
3434 pcre_stack_malloc = stack_malloc;
3435 pcre_stack_free = stack_free;
3436 #endif
3437
3438 #ifdef SUPPORT_PCRE16
3439 pcre16_malloc = new_malloc;
3440 pcre16_free = new_free;
3441 pcre16_stack_malloc = stack_malloc;
3442 pcre16_stack_free = stack_free;
3443 #endif
3444
3445 #ifdef SUPPORT_PCRE32
3446 pcre32_malloc = new_malloc;
3447 pcre32_free = new_free;
3448 pcre32_stack_malloc = stack_malloc;
3449 pcre32_stack_free = stack_free;
3450 #endif
3451
3452 /* Heading line unless quiet */
3453
3454 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3455
3456 /* Main loop */
3457
3458 while (!done)
3459 {
3460 pcre *re = NULL;
3461 pcre_extra *extra = NULL;
3462
3463 #if !defined NOPOSIX /* There are still compilers that require no indent */
3464 regex_t preg = { NULL, 0, 0} ;
3465 int do_posix = 0;
3466 #endif
3467
3468 const char *error;
3469 pcre_uint8 *markptr;
3470 pcre_uint8 *p, *pp, *ppp;
3471 pcre_uint8 *to_file = NULL;
3472 const pcre_uint8 *tables = NULL;
3473 unsigned long int get_options;
3474 unsigned long int true_size, true_study_size = 0;
3475 size_t size;
3476 int do_allcaps = 0;
3477 int do_mark = 0;
3478 int do_study = 0;
3479 int no_force_study = 0;
3480 int do_debug = debug;
3481 int do_G = 0;
3482 int do_g = 0;
3483 int do_showinfo = showinfo;
3484 int do_showrest = 0;
3485 int do_showcaprest = 0;
3486 int do_flip = 0;
3487 int erroroffset, len, delimiter, poffset;
3488
3489 #if !defined NODFA
3490 int dfa_matched = 0;
3491 #endif
3492
3493 use_utf = 0;
3494 debug_lengths = 1;
3495 SET_PCRE_STACK_GUARD(NULL);
3496
3497 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3498 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3499 fflush(outfile);
3500
3501 p = buffer;
3502 while (isspace(*p)) p++;
3503 if (*p == 0) continue;
3504
3505 /* Handle option lock-out setting */
3506
3507 if (*p == '<' && p[1] == ' ')
3508 {
3509 p += 2;
3510 while (isspace(*p)) p++;
3511 if (strncmp((char *)p, "forbid ", 7) == 0)
3512 {
3513 p += 7;
3514 while (isspace(*p)) p++;
3515 pp = lockout;
3516 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3517 *pp++ = *p++;
3518 *pp = 0;
3519 }
3520 else
3521 {
3522 printf("** Unrecognized special command '%s'\n", p);
3523 yield = 1;
3524 goto EXIT;
3525 }
3526 continue;
3527 }
3528
3529 /* See if the pattern is to be loaded pre-compiled from a file. */
3530
3531 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3532 {
3533 pcre_uint32 magic;
3534 pcre_uint8 sbuf[8];
3535 FILE *f;
3536
3537 p++;
3538 if (*p == '!')
3539 {
3540 do_debug = TRUE;
3541 do_showinfo = TRUE;
3542 p++;
3543 }
3544
3545 pp = p + (int)strlen((char *)p);
3546 while (isspace(pp[-1])) pp--;
3547 *pp = 0;
3548
3549 f = fopen((char *)p, "rb");
3550 if (f == NULL)
3551 {
3552 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3553 continue;
3554 }
3555 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3556
3557 true_size =
3558 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3559 true_study_size =
3560 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3561
3562 re = (pcre *)new_malloc(true_size);
3563 if (re == NULL)
3564 {
3565 printf("** Failed to get %d bytes of memory for pcre object\n",
3566 (int)true_size);
3567 yield = 1;
3568 goto EXIT;
3569 }
3570 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3571
3572 magic = REAL_PCRE_MAGIC(re);
3573 if (magic != MAGIC_NUMBER)
3574 {
3575 if (swap_uint32(magic) == MAGIC_NUMBER)
3576 {
3577 do_flip = 1;
3578 }
3579 else
3580 {
3581 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3582 new_free(re);
3583 fclose(f);
3584 continue;
3585 }
3586 }
3587
3588 /* We hide the byte-invert info for little and big endian tests. */
3589 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3590 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3591
3592 /* Now see if there is any following study data. */
3593
3594 if (true_study_size != 0)
3595 {
3596 pcre_study_data *psd;
3597
3598 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3599 extra->flags = PCRE_EXTRA_STUDY_DATA;
3600
3601 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3602 extra->study_data = psd;
3603
3604 if (fread(psd, 1, true_study_size, f) != true_study_size)
3605 {
3606 FAIL_READ:
3607 fprintf(outfile, "Failed to read data from %s\n", p);
3608 if (extra != NULL)
3609 {
3610 PCRE_FREE_STUDY(extra);
3611 }
3612 new_free(re);
3613 fclose(f);
3614 continue;
3615 }
3616 fprintf(outfile, "Study data loaded from %s\n", p);
3617 do_study = 1; /* To get the data output if requested */
3618 }
3619 else fprintf(outfile, "No study data\n");
3620
3621 /* Flip the necessary bytes. */
3622 if (do_flip)
3623 {
3624 int rc;
3625 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3626 if (rc == PCRE_ERROR_BADMODE)
3627 {
3628 pcre_uint32 flags_in_host_byte_order;
3629 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3630 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3631 else
3632 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3633 /* Simulate the result of the function call below. */
3634 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3635 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3636 PCRE_INFO_OPTIONS);
3637 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3638 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3639 new_free(re);
3640 fclose(f);
3641 continue;
3642 }
3643 }
3644
3645 /* Need to know if UTF-8 for printing data strings. */
3646
3647 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3648 {
3649 new_free(re);
3650 fclose(f);
3651 continue;
3652 }
3653 use_utf = (get_options & PCRE_UTF8) != 0;
3654
3655 fclose(f);
3656 goto SHOW_INFO;
3657 }
3658
3659 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3660 the pattern; if it isn't complete, read more. */
3661
3662 delimiter = *p++;
3663
3664 if (isalnum(delimiter) || delimiter == '\\')
3665 {
3666 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3667 goto SKIP_DATA;
3668 }
3669
3670 pp = p;
3671 poffset = (int)(p - buffer);
3672
3673 for(;;)
3674 {
3675 while (*pp != 0)
3676 {
3677 if (*pp == '\\' && pp[1] != 0) pp++;
3678 else if (*pp == delimiter) break;
3679 pp++;
3680 }
3681 if (*pp != 0) break;
3682 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3683 {
3684 fprintf(outfile, "** Unexpected EOF\n");
3685 done = 1;
3686 goto CONTINUE;
3687 }
3688 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3689 }
3690
3691 /* The buffer may have moved while being extended; reset the start of data
3692 pointer to the correct relative point in the buffer. */
3693
3694 p = buffer + poffset;
3695
3696 /* If the first character after the delimiter is backslash, make
3697 the pattern end with backslash. This is purely to provide a way
3698 of testing for the error message when a pattern ends with backslash. */
3699
3700 if (pp[1] == '\\') *pp++ = '\\';
3701
3702 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3703 for callouts. */
3704
3705 *pp++ = 0;
3706 strcpy((char *)pbuffer, (char *)p);
3707
3708 /* Look for modifiers and options after the final delimiter. */
3709
3710 options = default_options;
3711 study_options = force_study_options;
3712 log_store = showstore; /* default from command line */
3713
3714 while (*pp != 0)
3715 {
3716 /* Check to see whether this modifier has been locked out for this file.
3717 This is complicated for the multi-character options that begin with '<'.
3718 If there is no '>' in the lockout string, all multi-character modifiers are
3719 locked out. */
3720
3721 if (strchr((char *)lockout, *pp) != NULL)
3722 {
3723 if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3724 {
3725 int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3726 if (x == 0) goto SKIP_DATA;
3727
3728 for (ppp = lockout; *ppp != 0; ppp++)
3729 {
3730 if (*ppp == '<')
3731 {
3732 int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3733 if (y == 0)
3734 {
3735 printf("** Error in modifier forbid data - giving up.\n");
3736 yield = 1;
3737 goto EXIT;
3738 }
3739 if (x == y)
3740 {
3741 ppp = pp;
3742 while (*ppp != '>') ppp++;
3743 printf("** The %.*s modifier is locked out - giving up.\n",
3744 (int)(ppp - pp + 1), pp);
3745 yield = 1;
3746 goto EXIT;
3747 }
3748 }
3749 }
3750 }
3751
3752 /* The single-character modifiers are straightforward. */
3753
3754 else
3755 {
3756 printf("** The /%c modifier is locked out - giving up.\n", *pp);
3757 yield = 1;
3758 goto EXIT;
3759 }
3760 }
3761
3762 /* The modifier is not locked out; handle it. */
3763
3764 switch (*pp++)
3765 {
3766 case 'f': options |= PCRE_FIRSTLINE; break;
3767 case 'g': do_g = 1; break;
3768 case 'i': options |= PCRE_CASELESS; break;
3769 case 'm': options |= PCRE_MULTILINE; break;
3770 case 's': options |= PCRE_DOTALL; break;
3771 case 'x': options |= PCRE_EXTENDED; break;
3772
3773 case '+':
3774 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3775 break;
3776
3777 case '=': do_allcaps = 1; break;
3778 case 'A': options |= PCRE_ANCHORED; break;
3779 case 'B': do_debug = 1; break;
3780 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3781 case 'D': do_debug = do_showinfo = 1; break;
3782 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3783 case 'F': do_flip = 1; break;
3784 case 'G': do_G = 1; break;
3785 case 'I': do_showinfo = 1; break;
3786 case 'J': options |= PCRE_DUPNAMES; break;
3787 case 'K': do_mark = 1; break;
3788 case 'M': log_store = 1; break;
3789 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3790 case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3791
3792 #if !defined NOPOSIX
3793 case 'P': do_posix = 1; break;
3794 #endif
3795
3796 case 'Q':
3797 switch (*pp)
3798 {
3799 case '0':
3800 case '1':
3801 stack_guard_return = *pp++ - '0';
3802 break;
3803
3804 default:
3805 fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3806 goto SKIP_DATA;
3807 }
3808 SET_PCRE_STACK_GUARD(stack_guard);
3809 break;
3810
3811 case 'S':
3812 do_study = 1;
3813 for (;;)
3814 {
3815 switch (*pp++)
3816 {
3817 case 'S':
3818 do_study = 0;
3819 no_force_study = 1;
3820 break;
3821
3822 case '!':
3823 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3824 break;
3825
3826 case '+':
3827 if (*pp == '+')
3828 {
3829 verify_jit = TRUE;
3830 pp++;
3831 }
3832 if (*pp >= '1' && *pp <= '7')
3833 study_options |= jit_study_bits[*pp++ - '1'];
3834 else
3835 study_options |= jit_study_bits[6];
3836 break;
3837
3838 case '-':
3839 study_options &= ~PCRE_STUDY_ALLJIT;
3840 break;
3841
3842 default:
3843 pp--;
3844 goto ENDLOOP;
3845 }
3846 }
3847 ENDLOOP:
3848 break;
3849
3850 case 'U': options |= PCRE_UNGREEDY; break;
3851 case 'W': options |= PCRE_UCP; break;
3852 case 'X': options |= PCRE_EXTRA; break;
3853 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3854 case 'Z': debug_lengths = 0; break;
3855 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3856 case '9': options |= PCRE_NEVER_UTF; break;
3857 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3858
3859 case 'T':
3860 switch (*pp++)
3861 {
3862 case '0': tables = tables0; break;
3863 case '1': tables = tables1; break;
3864
3865 case '\r':
3866 case '\n':
3867 case ' ':
3868 case 0:
3869 fprintf(outfile, "** Missing table number after /T\n");
3870 goto SKIP_DATA;
3871
3872 default:
3873 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3874 goto SKIP_DATA;
3875 }
3876 break;
3877
3878 case 'L':
3879 ppp = pp;
3880 /* The '\r' test here is so that it works on Windows. */
3881 /* The '0' test is just in case this is an unterminated line. */
3882 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3883 *ppp = 0;
3884 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3885 {
3886 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3887 goto SKIP_DATA;
3888 }
3889 locale_set = 1;
3890 tables = PCRE_MAKETABLES;
3891 pp = ppp;
3892 break;
3893
3894 case '>':
3895 to_file = pp;
3896 while (*pp != 0) pp++;
3897 while (isspace(pp[-1])) pp--;
3898 *pp = 0;
3899 break;
3900
3901 case '<':
3902 {
3903 int x = check_mc_option(pp, outfile, FALSE, "modifier");
3904 if (x == 0) goto SKIP_DATA;
3905 options |= x;
3906 while (*pp++ != '>');
3907 }
3908 break;
3909
3910 case '\r': /* So that it works in Windows */
3911 case '\n':
3912 case ' ':
3913 break;
3914
3915 default:
3916 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3917 goto SKIP_DATA;
3918 }
3919 }
3920
3921 /* Handle compiling via the POSIX interface, which doesn't support the
3922 timing, showing, or debugging options, nor the ability to pass over
3923 local character tables. Neither does it have 16-bit support. */
3924
3925 #if !defined NOPOSIX
3926 if (posix || do_posix)
3927 {
3928 int rc;
3929 int cflags = 0;
3930
3931 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3932 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3933 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3934 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3935 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3936 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3937 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3938
3939 rc = regcomp(&preg, (char *)p, cflags);
3940
3941 /* Compilation failed; go back for another re, skipping to blank line
3942 if non-interactive. */
3943
3944 if (rc != 0)
3945 {
3946 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3947 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3948 goto SKIP_DATA;
3949 }
3950 }
3951
3952 /* Handle compiling via the native interface */
3953
3954 else
3955 #endif /* !defined NOPOSIX */
3956
3957 {
3958 /* In 16- or 32-bit mode, convert the input. */
3959
3960 #ifdef SUPPORT_PCRE16
3961 if (pcre_mode == PCRE16_MODE)
3962 {
3963 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3964 {
3965 case -1:
3966 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3967 "converted to UTF-16\n");
3968 goto SKIP_DATA;
3969
3970 case -2:
3971 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3972 "cannot be converted to UTF-16\n");
3973 goto SKIP_DATA;
3974
3975 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3976 fprintf(outfile, "**Failed: character value greater than 0xffff "
3977 "cannot be converted to 16-bit in non-UTF mode\n");
3978 goto SKIP_DATA;
3979
3980 default:
3981 break;
3982 }
3983 p = (pcre_uint8 *)buffer16;
3984 }
3985 #endif
3986
3987 #ifdef SUPPORT_PCRE32
3988 if (pcre_mode == PCRE32_MODE)
3989 {
3990 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3991 {
3992 case -1:
3993 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3994 "converted to UTF-32\n");
3995 goto SKIP_DATA;
3996
3997 case -2:
3998 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3999 "cannot be converted to UTF-32\n");
4000 goto SKIP_DATA;
4001
4002 case -3:
4003 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4004 goto SKIP_DATA;
4005
4006 default:
4007 break;
4008 }
4009 p = (pcre_uint8 *)buffer32;
4010 }
4011 #endif
4012
4013 /* Compile many times when timing */
4014
4015 if (timeit > 0)
4016 {
4017 register int i;
4018 clock_t time_taken;
4019 clock_t start_time = clock();
4020 for (i = 0; i < timeit; i++)
4021 {
4022 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4023 if (re != NULL) free(re);
4024 }
4025 total_compile_time += (time_taken = clock() - start_time);
4026 fprintf(outfile, "Compile time %.4f milliseconds\n",
4027 (((double)time_taken * 1000.0) / (double)timeit) /
4028 (double)CLOCKS_PER_SEC);
4029 }
4030
4031 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4032
4033 /* Compilation failed; go back for another re, skipping to blank line
4034 if non-interactive. */
4035
4036 if (re == NULL)
4037 {
4038 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4039 SKIP_DATA:
4040 if (infile != stdin)
4041 {
4042 for (;;)
4043 {
4044 if (extend_inputline(infile, buffer, NULL) == NULL)
4045 {
4046 done = 1;
4047 goto CONTINUE;
4048 }
4049 len = (int)strlen((char *)buffer);
4050 while (len > 0 && isspace(buffer[len-1])) len--;
4051 if (len == 0) break;
4052 }
4053 fprintf(outfile, "\n");
4054 }
4055 goto CONTINUE;
4056 }
4057
4058 /* Compilation succeeded. It is now possible to set the UTF-8 option from
4059 within the regex; check for this so that we know how to process the data
4060 lines. */
4061
4062 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4063 goto SKIP_DATA;
4064 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4065
4066 /* Extract the size for possible writing before possibly flipping it,
4067 and remember the store that was got. */
4068
4069 true_size = REAL_PCRE_SIZE(re);
4070
4071 /* Output code size information if requested */
4072
4073 if (log_store)
4074 {
4075 int name_count, name_entry_size, real_pcre_size;
4076
4077 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4078 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4079 real_pcre_size = 0;
4080 #ifdef SUPPORT_PCRE8
4081 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4082 real_pcre_size = sizeof(real_pcre);
4083 #endif
4084 #ifdef SUPPORT_PCRE16
4085 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4086 real_pcre_size = sizeof(real_pcre16);
4087 #endif
4088 #ifdef SUPPORT_PCRE32
4089 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4090 real_pcre_size = sizeof(real_pcre32);
4091 #endif
4092 new_info(re, NULL, PCRE_INFO_SIZE, &size);
4093 fprintf(outfile, "Memory allocation (code space): %d\n",
4094 (int)(size - real_pcre_size - name_count * name_entry_size));
4095 }
4096
4097 /* If -s or /S was present, study the regex to generate additional info to
4098 help with the matching, unless the pattern has the SS option, which
4099 suppresses the effect of /S (used for a few test patterns where studying is
4100 never sensible). */
4101
4102 if (do_study || (force_study >= 0 && !no_force_study))
4103 {
4104 if (timeit > 0)
4105 {
4106 register int i;
4107 clock_t time_taken;
4108 clock_t start_time = clock();
4109 for (i = 0; i < timeit; i++)
4110 {
4111 PCRE_STUDY(extra, re, study_options, &error);
4112 }
4113 total_study_time = (time_taken = clock() - start_time);
4114 if (extra != NULL)
4115 {
4116 PCRE_FREE_STUDY(extra);
4117 }
4118 fprintf(outfile, " Study time %.4f milliseconds\n",
4119 (((double)time_taken * 1000.0) / (double)timeit) /
4120 (double)CLOCKS_PER_SEC);
4121 }
4122 PCRE_STUDY(extra, re, study_options, &error);
4123 if (error != NULL)
4124 fprintf(outfile, "Failed to study: %s\n", error);
4125 else if (extra != NULL)
4126 {
4127 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4128 if (log_store)
4129 {
4130 size_t jitsize;
4131 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4132 jitsize != 0)
4133 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4134 }
4135 }
4136 }
4137
4138 /* If /K was present, we set up for handling MARK data. */
4139
4140 if (do_mark)
4141 {
4142 if (extra == NULL)
4143 {
4144 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4145 extra->flags = 0;
4146 }
4147 extra->mark = &markptr;
4148 extra->flags |= PCRE_EXTRA_MARK;
4149 }
4150
4151 /* Extract and display information from the compiled data if required. */
4152
4153 SHOW_INFO:
4154
4155 if (do_debug)
4156 {
4157 fprintf(outfile, "------------------------------------------------------------------\n");
4158 PCRE_PRINTINT(re, outfile, debug_lengths);
4159 }
4160
4161 /* We already have the options in get_options (see above) */
4162
4163 if (do_showinfo)
4164 {
4165 unsigned long int all_options;
4166 pcre_uint32 first_char, need_char;
4167 pcre_uint32 match_limit, recursion_limit;
4168 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4169 hascrorlf, maxlookbehind, match_empty;
4170 int nameentrysize, namecount;
4171 const pcre_uint8 *nametable;
4172
4173 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4174 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4175 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4176 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4177 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4178 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4179 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4180 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4181 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4182 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4183 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4184 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4185 new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4186 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4187 != 0)
4188 goto SKIP_DATA;
4189
4190 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4191
4192 if (backrefmax > 0)
4193 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4194
4195 if (maxlookbehind > 0)
4196 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4197
4198 if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4199 fprintf(outfile, "Match limit = %u\n", match_limit);
4200
4201 if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4202 fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4203
4204 if (namecount > 0)
4205 {
4206 fprintf(outfile, "Named capturing subpatterns:\n");
4207 while (namecount-- > 0)
4208 {
4209 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4210 int length = (int)STRLEN(nametable + imm2_size);
4211 fprintf(outfile, " ");
4212 PCHARSV(nametable, imm2_size, length, outfile);
4213 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4214 #ifdef SUPPORT_PCRE32
4215 if (pcre_mode == PCRE32_MODE)
4216 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4217 #endif
4218 #ifdef SUPPORT_PCRE16
4219 if (pcre_mode == PCRE16_MODE)
4220 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4221 #endif
4222 #ifdef SUPPORT_PCRE8
4223 if (pcre_mode == PCRE8_MODE)
4224 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4225 #endif
4226 nametable += nameentrysize * CHAR_SIZE;
4227 }
4228 }
4229
4230 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4231 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4232 if (match_empty) fprintf(outfile, "May match empty string\n");
4233
4234 all_options = REAL_PCRE_OPTIONS(re);
4235 if (do_flip) all_options = swap_uint32(all_options);
4236
4237 if (get_options == 0) fprintf(outfile, "No options\n");
4238 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4239 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4240 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4241 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4242 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4243 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4244 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4245 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4246 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4247 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4248 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4249 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4250 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4251 ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
4252 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4253 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4254 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4255 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4256 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4257 ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4258
4259 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4260
4261 switch (get_options & PCRE_NEWLINE_BITS)
4262 {
4263 case PCRE_NEWLINE_CR:
4264 fprintf(outfile, "Forced newline sequence: CR\n");
4265 break;
4266
4267 case PCRE_NEWLINE_LF:
4268 fprintf(outfile, "Forced newline sequence: LF\n");
4269 break;
4270
4271 case PCRE_NEWLINE_CRLF:
4272 fprintf(outfile, "Forced newline sequence: CRLF\n");
4273 break;
4274
4275 case PCRE_NEWLINE_ANYCRLF:
4276 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4277 break;
4278
4279 case PCRE_NEWLINE_ANY:
4280 fprintf(outfile, "Forced newline sequence: ANY\n");
4281 break;
4282
4283 default:
4284 break;
4285 }
4286
4287 if (first_char_set == 2)
4288 {
4289 fprintf(outfile, "First char at start or follows newline\n");
4290 }
4291 else if (first_char_set == 1)
4292 {
4293 const char *caseless =
4294 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4295 "" : " (caseless)";
4296
4297 if (PRINTOK(first_char))
4298 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4299 else
4300 {
4301 fprintf(outfile, "First char = ");
4302 pchar(first_char, outfile);
4303 fprintf(outfile, "%s\n", caseless);
4304 }
4305 }
4306 else
4307 {
4308 fprintf(outfile, "No first char\n");
4309 }
4310
4311 if (need_char_set == 0)
4312 {
4313 fprintf(outfile, "No need char\n");
4314 }
4315 else
4316 {
4317 const char *caseless =
4318 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4319 "" : " (caseless)";
4320
4321 if (PRINTOK(need_char))
4322 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4323 else
4324 {
4325 fprintf(outfile, "Need char = ");
4326 pchar(need_char, outfile);
4327 fprintf(outfile, "%s\n", caseless);
4328 }
4329 }
4330
4331 /* Don't output study size; at present it is in any case a fixed
4332 value, but it varies, depending on the computer architecture, and
4333 so messes up the test suite. (And with the /F option, it might be
4334 flipped.) If study was forced by an external -s, don't show this
4335 information unless -i or -d was also present. This means that, except
4336 when auto-callouts are involved, the output from runs with and without
4337 -s should be identical. */
4338
4339 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4340 {
4341 if (extra == NULL)
4342 fprintf(outfile, "Study returned NULL\n");
4343 else
4344 {
4345 pcre_uint8 *start_bits = NULL;
4346 int minlength;
4347
4348 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4349 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4350
4351 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4352 {
4353 if (start_bits == NULL)
4354 fprintf(outfile, "No starting char list\n");
4355 else
4356 {
4357 int i;
4358 int c = 24;
4359 fprintf(outfile, "Starting chars: ");
4360 for (i = 0; i < 256; i++)
4361 {
4362 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4363 {
4364 if (c > 75)
4365 {
4366 fprintf(outfile, "\n ");
4367 c = 2;
4368 }
4369 if (PRINTOK(i) && i != ' ')
4370 {
4371 fprintf(outfile, "%c ", i);
4372 c += 2;
4373 }
4374 else
4375 {
4376 fprintf(outfile, "\\x%02x ", i);
4377 c += 5;
4378 }
4379 }
4380 }
4381 fprintf(outfile, "\n");
4382 }
4383 }
4384 }
4385
4386 /* Show this only if the JIT was set by /S, not by -s. */
4387
4388 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4389 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4390 {
4391 int jit;
4392 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4393 {
4394 if (jit)
4395 fprintf(outfile, "JIT study was successful\n");
4396 else
4397 #ifdef SUPPORT_JIT
4398 fprintf(outfile, "JIT study was not successful\n");
4399 #else
4400 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4401 #endif
4402 }
4403 }
4404 }
4405 }
4406
4407 /* If the '>' option was present, we write out the regex to a file, and
4408 that is all. The first 8 bytes of the file are the regex length and then
4409 the study length, in big-endian order. */
4410
4411 if (to_file != NULL)
4412 {
4413 FILE *f = fopen((char *)to_file, "wb");
4414 if (f == NULL)
4415 {
4416 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4417 }
4418 else
4419 {
4420 pcre_uint8 sbuf[8];
4421
4422 if (do_flip) regexflip(re, extra);
4423 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4424 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4425 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4426 sbuf[3] = (pcre_uint8)((true_size) & 255);
4427 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4428 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4429 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4430 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4431
4432 if (fwrite(sbuf, 1, 8, f) < 8 ||
4433 fwrite(re, 1, true_size, f) < true_size)
4434 {
4435 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4436 }
4437 else
4438 {
4439 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4440
4441 /* If there is study data, write it. */
4442
4443 if (extra != NULL)
4444 {
4445 if (fwrite(extra->study_data, 1, true_study_size, f) <
4446 true_study_size)
4447 {
4448 fprintf(outfile, "Write error on %s: %s\n", to_file,
4449 strerror(errno));
4450 }
4451 else fprintf(outfile, "Study data written to %s\n", to_file);
4452 }
4453 }
4454 fclose(f);
4455 }
4456
4457 new_free(re);
4458 if (extra != NULL)
4459 {
4460 PCRE_FREE_STUDY(extra);
4461 }
4462 if (locale_set)
4463 {
4464 new_free((void *)tables);
4465 setlocale(LC_CTYPE, "C");
4466 locale_set = 0;
4467 }
4468 continue; /* With next regex */
4469 }
4470 } /* End of non-POSIX compile */
4471
4472 /* Read data lines and test them */
4473
4474 for (;;)
4475 {
4476 #ifdef SUPPORT_PCRE8
4477 pcre_uint8 *q8;
4478 #endif
4479 #ifdef SUPPORT_PCRE16
4480 pcre_uint16 *q16;
4481 #endif
4482 #ifdef SUPPORT_PCRE32
4483 pcre_uint32 *q32;
4484 #endif
4485 pcre_uint8 *bptr;
4486 int *use_offsets = offsets;
4487 int use_size_offsets = size_offsets;
4488 int callout_data = 0;
4489 int callout_data_set = 0;
4490 int count;
4491 pcre_uint32 c;
4492 int copystrings = 0;
4493 int find_match_limit = default_find_match_limit;
4494 int getstrings = 0;
4495 int getlist = 0;
4496 int gmatched = 0;
4497 int start_offset = 0;
4498 int start_offset_sign = 1;
4499 int g_notempty = 0;
4500 int use_dfa = 0;
4501
4502 *copynames = 0;
4503 *getnames = 0;
4504
4505 #ifdef SUPPORT_PCRE32
4506 cn32ptr = copynames;
4507 gn32ptr = getnames;
4508 #endif
4509 #ifdef SUPPORT_PCRE16
4510 cn16ptr = copynames16;
4511 gn16ptr = getnames16;
4512 #endif
4513 #ifdef SUPPORT_PCRE8
4514 cn8ptr = copynames8;
4515 gn8ptr = getnames8;
4516 #endif
4517
4518 SET_PCRE_CALLOUT(callout);
4519 first_callout = 1;
4520 last_callout_mark = NULL;
4521 callout_extra = 0;
4522 callout_count = 0;
4523 callout_fail_count = 999999;
4524 callout_fail_id = -1;
4525 show_malloc = 0;
4526 options = 0;
4527
4528 if (extra != NULL) extra->flags &=
4529 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4530
4531 len = 0;
4532 for (;;)
4533 {
4534 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4535 {
4536 if (len > 0) /* Reached EOF without hitting a newline */
4537 {
4538 fprintf(outfile, "\n");
4539 break;
4540 }
4541 done = 1;
4542 goto CONTINUE;
4543 }
4544 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4545 len = (int)strlen((char *)buffer);
4546 if (buffer[len-1] == '\n') break;
4547 }
4548
4549 while (len > 0 && isspace(buffer[len-1])) len--;
4550 buffer[len] = 0;
4551 if (len == 0) break;
4552
4553 p = buffer;
4554 while (isspace(*p)) p++;
4555
4556 #ifndef NOUTF
4557 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4558 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4559
4560 if (use_utf)
4561 {
4562 pcre_uint8 *q;
4563 pcre_uint32 cc;
4564 int n = 1;
4565
4566 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4567 if (n <= 0)
4568 {
4569 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4570 goto NEXT_DATA;
4571 }
4572 }
4573 #endif
4574
4575 #ifdef SUPPORT_VALGRIND
4576 /* Mark the dbuffer as addressable but undefined again. */
4577
4578 if (dbuffer != NULL)
4579 {
4580 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4581 }
4582 #endif
4583
4584 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4585 the number of pcre_uchar units that will be needed. */
4586
4587 while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4588 {
4589 dbuffer_size *= 2;
4590 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4591 if (dbuffer == NULL)
4592 {
4593 fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4594 exit(1);
4595 }
4596 }
4597
4598 #ifdef SUPPORT_PCRE8
4599 q8 = (pcre_uint8 *) dbuffer;
4600 #endif
4601 #ifdef SUPPORT_PCRE16
4602 q16 = (pcre_uint16 *) dbuffer;
4603 #endif
4604 #ifdef SUPPORT_PCRE32
4605 q32 = (pcre_uint32 *) dbuffer;
4606 #endif
4607
4608 while ((c = *p++) != 0)
4609 {
4610 int i = 0;
4611 int n = 0;
4612
4613 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4614 In non-UTF mode, allow the value of the byte to fall through to later,
4615 where values greater than 127 are turned into UTF-8 when running in
4616 16-bit or 32-bit mode. */
4617
4618 if (c != '\\')
4619 {
4620 #ifndef NOUTF
4621 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4622 #endif
4623 }
4624
4625 /* Handle backslash escapes */
4626
4627 else switch ((c = *p++))
4628 {
4629 case 'a': c = CHAR_BEL; break;
4630 case 'b': c = '\b'; break;
4631 case 'e': c = CHAR_ESC; break;
4632 case 'f': c = '\f'; break;
4633 case 'n': c = '\n'; break;
4634 case 'r': c = '\r'; break;
4635 case 't': c = '\t'; break;
4636 case 'v': c = '\v'; break;
4637
4638 case '0': case '1': case '2': case '3':
4639 case '4': case '5': case '6': case '7':
4640 c -= '0';
4641 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4642 c = c * 8 + *p++ - '0';
4643 break;
4644
4645 case 'o':
4646 if (*p == '{')
4647 {
4648 pcre_uint8 *pt = p;
4649 c = 0;
4650 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
4651 {
4652 if (++i == 12)
4653 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
4654 "using only the first twelve.\n");
4655 else c = c * 8 + *pt - '0';
4656 }
4657 if (*pt == '}') p = pt + 1;
4658 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
4659 }
4660 break;
4661
4662 case 'x':
4663 if (*p == '{')
4664 {
4665 pcre_uint8 *pt = p;
4666 c = 0;
4667
4668 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4669 when isxdigit() is a macro that refers to its argument more than
4670 once. This is banned by the C Standard, but apparently happens in at
4671 least one MacOS environment. */
4672
4673 for (pt++; isxdigit(*pt); pt++)
4674 {
4675 if (++i == 9)
4676 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4677 "using only the first eight.\n");
4678 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4679 }
4680 if (*pt == '}')
4681 {
4682 p = pt + 1;
4683 break;
4684 }
4685 /* Not correct form for \x{...}; fall through */
4686 }
4687
4688 /* \x without {} always defines just one byte in 8-bit mode. This
4689 allows UTF-8 characters to be constructed byte by byte, and also allows
4690 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4691 Otherwise, pass it down to later code so that it can be turned into
4692 UTF-8 when running in 16/32-bit mode. */
4693
4694 c = 0;
4695 while (i++ < 2 && isxdigit(*p))
4696 {
4697 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4698 p++;
4699 }
4700 #if !defined NOUTF && defined SUPPORT_PCRE8
4701 if (use_utf && (pcre_mode == PCRE8_MODE))
4702 {
4703 *q8++ = c;
4704 continue;
4705 }
4706 #endif
4707 break;
4708
4709 case 0: /* \ followed by EOF allows for an empty line */
4710 p--;
4711 continue;
4712
4713 case '>':
4714 if (*p == '-')
4715 {
4716 start_offset_sign = -1;
4717 p++;
4718 }
4719 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4720 start_offset *= start_offset_sign;
4721 continue;
4722
4723 case 'A': /* Option setting */
4724 options |= PCRE_ANCHORED;
4725 continue;
4726
4727 case 'B':
4728 options |= PCRE_NOTBOL;
4729 continue;
4730
4731 case 'C':
4732 if (isdigit(*p)) /* Set copy string */
4733 {
4734 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4735 copystrings |= 1 << n;
4736 }
4737 else if (isalnum(*p))
4738 {
4739 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4740 }
4741 else if (*p == '+')
4742 {
4743 callout_extra = 1;
4744 p++;
4745 }
4746 else if (*p == '-')
4747 {
4748 SET_PCRE_CALLOUT(NULL);
4749 p++;
4750 }
4751 else if (*p == '!')
4752 {
4753 callout_fail_id = 0;
4754 p++;
4755 while(isdigit(*p))
4756 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4757 callout_fail_count = 0;
4758 if (*p == '!')
4759 {
4760 p++;
4761 while(isdigit(*p))
4762 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4763 }
4764 }
4765 else if (*p == '*')
4766 {
4767 int sign = 1;
4768 callout_data = 0;
4769 if (*(++p) == '-') { sign = -1; p++; }
4770 while(isdigit(*p))
4771 callout_data = callout_data * 10 + *p++ - '0';
4772 callout_data *= sign;
4773 callout_data_set = 1;
4774 }
4775 continue;
4776
4777 #if !defined NODFA
4778 case 'D':
4779 #if !defined NOPOSIX
4780 if (posix || do_posix)
4781 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4782 else
4783 #endif
4784 use_dfa = 1;
4785 continue;
4786 #endif
4787
4788 #if !defined NODFA
4789 case 'F':
4790 options |= PCRE_DFA_SHORTEST;
4791 continue;
4792 #endif
4793
4794 case 'G':
4795 if (isdigit(*p))
4796 {
4797 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4798 getstrings |= 1 << n;
4799 }
4800 else if (isalnum(*p))
4801 {
4802 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4803 }
4804 continue;
4805
4806 case 'J':
4807 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4808 if (extra != NULL
4809 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4810 && extra->executable_jit != NULL)
4811 {
4812 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4813 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4814 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4815 }
4816 continue;
4817
4818 case 'L':
4819 getlist = 1;
4820 continue;
4821
4822 case 'M':
4823 find_match_limit = 1;
4824 continue;
4825
4826 case 'N':
4827 if ((options & PCRE_NOTEMPTY) != 0)
4828 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4829 else
4830 options |= PCRE_NOTEMPTY;
4831 continue;
4832
4833 case 'O':
4834 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4835 if (n > size_offsets_max)
4836 {
4837 size_offsets_max = n;
4838 free(offsets);
4839 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4840 if (offsets == NULL)
4841 {
4842 printf("** Failed to get %d bytes of memory for offsets vector\n",
4843 (int)(size_offsets_max * sizeof(int)));
4844 yield = 1;
4845 goto EXIT;
4846 }
4847 }
4848 use_size_offsets = n;
4849 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4850 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4851 continue;
4852
4853 case 'P':
4854 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4855 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4856 continue;
4857
4858 case 'Q':
4859 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4860 if (extra == NULL)
4861 {
4862 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4863 extra->flags = 0;
4864 }
4865 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4866 extra->match_limit_recursion = n;
4867 continue;
4868
4869 case 'q':
4870 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4871 if (extra == NULL)
4872 {
4873 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4874 extra->flags = 0;
4875 }
4876 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4877 extra->match_limit = n;
4878 continue;
4879
4880 #if !defined NODFA
4881 case 'R':
4882 options |= PCRE_DFA_RESTART;
4883 continue;
4884 #endif
4885
4886 case 'S':
4887 show_malloc = 1;
4888 continue;
4889
4890 case 'Y':
4891 options |= PCRE_NO_START_OPTIMIZE;
4892 continue;
4893
4894 case 'Z':
4895 options |= PCRE_NOTEOL;
4896 continue;
4897
4898 case '?':
4899 options |= PCRE_NO_UTF8_CHECK;
4900 continue;
4901
4902 case '<':
4903 {
4904 int x = check_mc_option(p, outfile, TRUE, "escape sequence");
4905 if (x == 0) goto NEXT_DATA;
4906 options |= x;
4907 while (*p++ != '>');
4908 }
4909 continue;
4910 }
4911
4912 /* We now have a character value in c that may be greater than 255.
4913 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4914 than 127 in UTF mode must have come from \x{...} or octal constructs
4915 because values from \x.. get this far only in non-UTF mode. */
4916
4917 #ifdef SUPPORT_PCRE8
4918 if (pcre_mode == PCRE8_MODE)
4919 {
4920 #ifndef NOUTF
4921 if (use_utf)
4922 {
4923 if (c > 0x7fffffff)
4924 {
4925 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4926 "and so cannot be converted to UTF-8\n", c);
4927 goto NEXT_DATA;
4928 }
4929 q8 += ord2utf8(c, q8);
4930 }
4931 else
4932 #endif
4933 {
4934 if (c > 0xffu)
4935 {
4936 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4937 "and UTF-8 mode is not enabled.\n", c);
4938 fprintf(outfile, "** Truncation will probably give the wrong "
4939 "result.\n");
4940 }
4941 *q8++ = c;
4942 }
4943 }
4944 #endif
4945 #ifdef SUPPORT_PCRE16
4946 if (pcre_mode == PCRE16_MODE)
4947 {
4948 #ifndef NOUTF
4949 if (use_utf)
4950 {
4951 if (c > 0x10ffffu)
4952 {
4953 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4954 "0x10ffff and so cannot be converted to UTF-16\n", c);
4955 goto NEXT_DATA;
4956 }
4957 else if (c >= 0x10000u)
4958 {
4959 c-= 0x10000u;
4960 *q16++ = 0xD800 | (c >> 10);
4961 *q16++ = 0xDC00 | (c & 0x3ff);
4962 }
4963 else
4964 *q16++ = c;
4965 }
4966 else
4967 #endif
4968 {
4969 if (c > 0xffffu)
4970 {
4971 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4972 "and UTF-16 mode is not enabled.\n", c);
4973 fprintf(outfile, "** Truncation will probably give the wrong "
4974 "result.\n");
4975 }
4976
4977 *q16++ = c;
4978 }
4979 }
4980 #endif
4981 #ifdef SUPPORT_PCRE32
4982 if (pcre_mode == PCRE32_MODE)
4983 {
4984 *q32++ = c;
4985 }
4986 #endif
4987
4988 }
4989
4990 /* Reached end of subject string */
4991
4992 #ifdef SUPPORT_PCRE8
4993 if (pcre_mode == PCRE8_MODE)
4994 {
4995 *q8 = 0;
4996 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4997 }
4998 #endif
4999 #ifdef SUPPORT_PCRE16
5000 if (pcre_mode == PCRE16_MODE)
5001 {
5002 *q16 = 0;
5003 len = (int)(q16 - (pcre_uint16 *)dbuffer);
5004 }
5005 #endif
5006 #ifdef SUPPORT_PCRE32
5007 if (pcre_mode == PCRE32_MODE)
5008 {
5009 *q32 = 0;
5010 len = (int)(q32 - (pcre_uint32 *)dbuffer);
5011 }
5012 #endif
5013
5014 /* If we're compiling with explicit valgrind support, Mark the data from after
5015 its end to the end of the buffer as unaddressable, so that a read over the end
5016 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
5017 If we're not building with valgrind support, at least move the data to the end
5018 of the buffer so that it might at least cause a crash.
5019 If we are using the POSIX interface, we must include the terminating zero. */
5020
5021 bptr = dbuffer;
5022
5023 #if !defined NOPOSIX
5024 if (posix || do_posix)
5025 {
5026 #ifdef SUPPORT_VALGRIND
5027 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
5028 #else
5029 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5030 bptr += dbuffer_size - len - 1;
5031 #endif
5032 }
5033 else
5034 #endif
5035 {
5036 #ifdef SUPPORT_VALGRIND
5037 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
5038 #else
5039 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5040 #endif
5041 }
5042
5043 if ((all_use_dfa || use_dfa) && find_match_limit)
5044 {
5045 printf("** Match limit not relevant for DFA matching: ignored\n");
5046 find_match_limit = 0;
5047 }
5048
5049 /* Handle matching via the POSIX interface, which does not
5050 support timing or playing with the match limit or callout data. */
5051
5052 #if !defined NOPOSIX
5053 if (posix || do_posix)
5054 {
5055 int rc;
5056 int eflags = 0;
5057 regmatch_t *pmatch = NULL;
5058 if (use_size_offsets > 0)
5059 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5060 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5061 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5062 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5063
5064 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5065
5066 if (rc != 0)
5067 {
5068 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5069 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5070 }
5071 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5072 {
5073 fprintf(outfile, "Matched with REG_NOSUB\n");
5074 }
5075 else
5076 {
5077 size_t i;
5078 for (i = 0; i < (size_t)use_size_offsets; i++)
5079 {
5080 if (pmatch[i].rm_so >= 0)
5081 {
5082 fprintf(outfile, "%2d: ", (int)i);
5083 PCHARSV(dbuffer, pmatch[i].rm_so,
5084 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5085 fprintf(outfile, "\n");
5086 if (do_showcaprest || (i == 0 && do_showrest))
5087 {
5088 fprintf(outfile, "%2d+ ", (int)i);
5089 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5090 outfile);
5091 fprintf(outfile, "\n");
5092 }
5093 }
5094 }
5095 }
5096 free(pmatch);
5097 goto NEXT_DATA;
5098 }
5099
5100 #endif /* !defined NOPOSIX */
5101
5102 /* Handle matching via the native interface - repeats for /g and /G */
5103
5104 /* Ensure that there is a JIT callback if we want to verify that JIT was
5105 actually used. If jit_stack == NULL, no stack has yet been assigned. */
5106
5107 if (verify_jit && jit_stack == NULL && extra != NULL)
5108 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5109
5110 for (;; gmatched++) /* Loop for /g or /G */
5111 {
5112 markptr = NULL;
5113 jit_was_used = FALSE;
5114
5115 if (timeitm > 0)
5116 {
5117 register int i;
5118 clock_t time_taken;
5119 clock_t start_time = clock();
5120
5121 #if !defined NODFA
5122 if (all_use_dfa || use_dfa)
5123 {
5124 if ((options & PCRE_DFA_RESTART) != 0)
5125 {
5126 fprintf(outfile, "Timing DFA restarts is not supported\n");
5127 break;
5128 }
5129 if (dfa_workspace == NULL)
5130 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5131 for (i = 0; i < timeitm; i++)
5132 {
5133 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5134 (options | g_notempty), use_offsets, use_size_offsets,
5135 dfa_workspace, DFA_WS_DIMENSION);
5136 }
5137 }
5138 else
5139 #endif
5140
5141 for (i = 0; i < timeitm; i++)
5142 {
5143 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5144 (options | g_notempty), use_offsets, use_size_offsets);
5145 }
5146 total_match_time += (time_taken = clock() - start_time);
5147 fprintf(outfile, "Execute time %.4f milliseconds\n",
5148 (((double)time_taken * 1000.0) / (double)timeitm) /
5149 (double)CLOCKS_PER_SEC);
5150 }
5151
5152 /* If find_match_limit is set, we want to do repeated matches with
5153 varying limits in order to find the minimum value for the match limit and
5154 for the recursion limit. The match limits are relevant only to the normal
5155 running of pcre_exec(), so disable the JIT optimization. This makes it
5156 possible to run the same set of tests with and without JIT externally
5157 requested. */
5158
5159 if (find_match_limit)
5160 {
5161 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5162 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5163 extra->flags = 0;
5164
5165 (void)check_match_limit(re, extra, bptr, len, start_offset,
5166 options|g_notempty, use_offsets, use_size_offsets,
5167 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5168 PCRE_ERROR_MATCHLIMIT, "match()");
5169
5170 count = check_match_limit(re, extra, bptr, len, start_offset,
5171 options|g_notempty, use_offsets, use_size_offsets,
5172 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5173 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5174 }
5175
5176 /* If callout_data is set, use the interface with additional data */
5177
5178 else if (callout_data_set)
5179 {
5180 if (extra == NULL)
5181 {
5182 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5183 extra->flags = 0;
5184 }
5185 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5186 extra->callout_data = &callout_data;
5187 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5188 options | g_notempty, use_offsets, use_size_offsets);
5189 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5190 }
5191
5192 /* The normal case is just to do the match once, with the default
5193 value of match_limit. */
5194
5195 #if !defined NODFA
5196 else if (all_use_dfa || use_dfa)
5197 {
5198 if (dfa_workspace == NULL)
5199 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5200 if (dfa_matched++ == 0)
5201 dfa_workspace[0] = -1; /* To catch bad restart */
5202 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5203 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5204 DFA_WS_DIMENSION);
5205 if (count == 0)
5206 {
5207 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5208 count = use_size_offsets/2;
5209 }
5210 }
5211 #endif
5212
5213 else
5214 {
5215 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5216 options | g_notempty, use_offsets, use_size_offsets);
5217 if (count == 0)
5218 {
5219 fprintf(outfile, "Matched, but too many substrings\n");
5220 /* 2 is a special case; match can be returned */
5221 count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5222 }
5223 }
5224
5225 /* Matched */
5226
5227 if (count >= 0)
5228 {
5229 int i, maxcount;
5230 void *cnptr, *gnptr;
5231
5232 #if !defined NODFA
5233 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5234 #endif
5235 /* 2 is a special case; match can be returned */
5236 maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5237
5238 /* This is a check against a lunatic return value. */
5239
5240 if (count > maxcount)
5241 {
5242 fprintf(outfile,
5243 "** PCRE error: returned count %d is too big for offset size %d\n",
5244 count, use_size_offsets);
5245 count = use_size_offsets/3;
5246 if (do_g || do_G)
5247 {
5248 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5249 do_g = do_G = FALSE; /* Break g/G loop */
5250 }
5251 }
5252
5253 /* do_allcaps requests showing of all captures in the pattern, to check
5254 unset ones at the end. */
5255
5256 if (do_allcaps)
5257 {
5258 if (all_use_dfa || use_dfa)
5259 {
5260 fprintf(outfile, "** Show all captures ignored after DFA matching\n");
5261 }
5262 else
5263 {
5264 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5265 goto SKIP_DATA;
5266 count++; /* Allow for full match */
5267 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5268 }
5269 }
5270
5271 /* Output the captured substrings. Note that, for the matched string,
5272 the use of \K in an assertion can make the start later than the end. */
5273
5274 for (i = 0; i < count * 2; i += 2)
5275 {
5276 if (use_offsets[i] < 0)
5277 {
5278 if (use_offsets[i] != -1)
5279 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5280 use_offsets[i], i);
5281 if (use_offsets[i+1] != -1)
5282 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5283 use_offsets[i+1], i+1);
5284 fprintf(outfile, "%2d: <unset>\n", i/2);
5285 }
5286 else
5287 {
5288 int start = use_offsets[i];
5289 int end = use_offsets[i+1];
5290
5291 if (start > end)
5292 {
5293 start = use_offsets[i+1];
5294 end = use_offsets[i];
5295 fprintf(outfile, "Start of matched string is beyond its end - "
5296 "displaying from end to start.\n");
5297 }
5298
5299 fprintf(outfile, "%2d: ", i/2);
5300 PCHARSV(bptr, start, end - start, outfile);
5301 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5302 fprintf(outfile, "\n");
5303
5304 /* Note: don't use the start/end variables here because we want to
5305 show the text from what is reported as the end. */
5306
5307 if (do_showcaprest || (i == 0 && do_showrest))
5308 {
5309 fprintf(outfile, "%2d+ ", i/2);
5310 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5311 outfile);
5312 fprintf(outfile, "\n");
5313 }
5314 }
5315 }
5316
5317 if (markptr != NULL)
5318 {
5319 fprintf(outfile, "MK: ");
5320 PCHARSV(markptr, 0, -1, outfile);
5321 fprintf(outfile, "\n");
5322 }
5323
5324 for (i = 0; i < 32; i++)
5325 {
5326 if ((copystrings & (1 << i)) != 0)
5327 {
5328 int rc;
5329 char copybuffer[256];
5330 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5331 copybuffer, sizeof(copybuffer));
5332 if (rc < 0)
5333 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5334 else
5335 {
5336 fprintf(outfile, "%2dC ", i);
5337 PCHARSV(copybuffer, 0, rc, outfile);
5338 fprintf(outfile, " (%d)\n", rc);
5339 }
5340 }
5341 }
5342
5343 cnptr = copynames;
5344 for (;;)
5345 {
5346 int rc;
5347 char copybuffer[256];
5348
5349 #ifdef SUPPORT_PCRE32
5350 if (pcre_mode == PCRE32_MODE)
5351 {
5352 if (*(pcre_uint32 *)cnptr == 0) break;
5353 }
5354 #endif
5355 #ifdef SUPPORT_PCRE16
5356 if (pcre_mode == PCRE16_MODE)
5357 {
5358 if (*(pcre_uint16 *)cnptr == 0) break;
5359 }
5360 #endif
5361 #ifdef SUPPORT_PCRE8
5362 if (pcre_mode == PCRE8_MODE)
5363 {
5364 if (*(pcre_uint8 *)cnptr == 0) break;
5365 }
5366 #endif
5367
5368 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5369 cnptr, copybuffer, sizeof(copybuffer));
5370
5371 if (rc < 0)
5372 {
5373 fprintf(outfile, "copy substring ");
5374 PCHARSV(cnptr, 0, -1, outfile);
5375 fprintf(outfile, " failed %d\n", rc);
5376 }
5377 else
5378 {
5379 fprintf(outfile, " C ");
5380 PCHARSV(copybuffer, 0, rc, outfile);
5381 fprintf(outfile, " (%d) ", rc);
5382 PCHARSV(cnptr, 0, -1, outfile);
5383 putc('\n', outfile);
5384 }
5385
5386 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5387 }
5388
5389 for (i = 0; i < 32; i++)
5390 {
5391 if ((getstrings & (1 << i)) != 0)
5392 {
5393 int rc;
5394 const char *substring;
5395 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5396 if (rc < 0)
5397 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5398 else
5399 {
5400 fprintf(outfile, "%2dG ", i);
5401 PCHARSV(substring, 0, rc, outfile);
5402 fprintf(outfile, " (%d)\n", rc);
5403 PCRE_FREE_SUBSTRING(substring);
5404 }
5405 }
5406 }
5407
5408 gnptr = getnames;
5409 for (;;)
5410 {
5411 int rc;
5412 const char *substring;
5413
5414 #ifdef SUPPORT_PCRE32
5415 if (pcre_mode == PCRE32_MODE)
5416 {
5417 if (*(pcre_uint32 *)gnptr == 0) break;
5418 }
5419 #endif
5420 #ifdef SUPPORT_PCRE16
5421 if (pcre_mode == PCRE16_MODE)
5422 {
5423 if (*(pcre_uint16 *)gnptr == 0) break;
5424 }
5425 #endif
5426 #ifdef SUPPORT_PCRE8
5427 if (pcre_mode == PCRE8_MODE)
5428 {
5429 if (*(pcre_uint8 *)gnptr == 0) break;
5430 }
5431 #endif
5432
5433 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5434 gnptr, &substring);
5435 if (rc < 0)
5436 {
5437 fprintf(outfile, "get substring ");
5438 PCHARSV(gnptr, 0, -1, outfile);
5439 fprintf(outfile, " failed %d\n", rc);
5440 }
5441 else
5442 {
5443 fprintf(outfile, " G ");
5444 PCHARSV(substring, 0, rc, outfile);
5445 fprintf(outfile, " (%d) ", rc);
5446 PCHARSV(gnptr, 0, -1, outfile);
5447 PCRE_FREE_SUBSTRING(substring);
5448 putc('\n', outfile);
5449 }
5450
5451 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5452 }
5453
5454 if (getlist)
5455 {
5456 int rc;
5457 const char **stringlist;
5458 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5459 if (rc < 0)
5460 fprintf(outfile, "get substring list failed %d\n", rc);
5461 else
5462 {
5463 for (i = 0; i < count; i++)
5464 {
5465 fprintf(outfile, "%2dL ", i);
5466 PCHARSV(stringlist[i], 0, -1, outfile);
5467 putc('\n', outfile);
5468 }
5469 if (stringlist[i] != NULL)
5470 fprintf(outfile, "string list not terminated by NULL\n");
5471 PCRE_FREE_SUBSTRING_LIST(stringlist);
5472 }
5473 }
5474 }
5475
5476 /* There was a partial match. If the bumpalong point is not the same as
5477 the first inspected character, show the offset explicitly. */
5478
5479 else if (count == PCRE_ERROR_PARTIAL)
5480 {
5481 fprintf(outfile, "Partial match");
5482 if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5483 fprintf(outfile, " at offset %d", use_offsets[2]);
5484 if (markptr != NULL)
5485 {
5486 fprintf(outfile, ", mark=");
5487 PCHARSV(markptr, 0, -1, outfile);
5488 }
5489 if (use_size_offsets > 1)
5490 {
5491 fprintf(outfile, ": ");
5492 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5493 outfile);
5494 }
5495 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5496 fprintf(outfile, "\n");
5497 break; /* Out of the /g loop */
5498 }
5499
5500 /* Failed to match. If this is a /g or /G loop and we previously set
5501 g_notempty after a null match, this is not necessarily the end. We want
5502 to advance the start offset, and continue. We won't be at the end of the
5503 string - that was checked before setting g_notempty.
5504
5505 Complication arises in the case when the newline convention is "any",
5506 "crlf", or "anycrlf". If the previous match was at the end of a line
5507 terminated by CRLF, an advance of one character just passes the \r,
5508 whereas we should prefer the longer newline sequence, as does the code in
5509 pcre_exec(). Fudge the offset value to achieve this. We check for a
5510 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5511 find the default.
5512
5513 Otherwise, in the case of UTF-8 matching, the advance must be one
5514 character, not one byte. */
5515
5516 else
5517 {
5518 if (g_notempty != 0)
5519 {
5520 int onechar = 1;
5521 unsigned int obits = REAL_PCRE_OPTIONS(re);
5522 use_offsets[0] = start_offset;
5523 if ((obits & PCRE_NEWLINE_BITS) == 0)
5524 {
5525 int d;
5526 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5527 /* Note that these values are always the ASCII ones, even in
5528 EBCDIC environments. CR = 13, NL = 10. */
5529 obits = (d == 13)? PCRE_NEWLINE_CR :
5530 (d == 10)? PCRE_NEWLINE_LF :
5531 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5532 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5533 (d == -1)? PCRE_NEWLINE_ANY : 0;
5534 }
5535 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5536 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5537 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5538 &&
5539 start_offset < len - 1 && (
5540 #ifdef SUPPORT_PCRE8
5541 (pcre_mode == PCRE8_MODE &&
5542 bptr[start_offset] == '\r' &&
5543 bptr[start_offset + 1] == '\n') ||
5544 #endif
5545 #ifdef SUPPORT_PCRE16
5546 (pcre_mode == PCRE16_MODE &&
5547 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5548 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5549 #endif
5550 #ifdef SUPPORT_PCRE32
5551 (pcre_mode == PCRE32_MODE &&
5552 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5553 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5554 #endif
5555 0))
5556 onechar++;
5557 else if (use_utf)
5558 {
5559 while (start_offset + onechar < len)
5560 {
5561 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5562 onechar++;
5563 }
5564 }
5565 use_offsets[1] = start_offset + onechar;
5566 }
5567 else
5568 {
5569 switch(count)
5570 {
5571 case PCRE_ERROR_NOMATCH:
5572 if (gmatched == 0)
5573 {
5574 if (markptr == NULL)
5575 {
5576 fprintf(outfile, "No match");
5577 }
5578 else
5579 {
5580 fprintf(outfile, "No match, mark = ");
5581 PCHARSV(markptr, 0, -1, outfile);
5582 }
5583 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5584 putc('\n', outfile);
5585 }
5586 break;
5587
5588 case PCRE_ERROR_BADUTF8:
5589 case PCRE_ERROR_SHORTUTF8:
5590 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5591 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5592 8 * CHAR_SIZE);
5593 if (use_size_offsets >= 2)
5594 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5595 use_offsets[1]);
5596 fprintf(outfile, "\n");
5597 break;
5598
5599 case PCRE_ERROR_BADUTF8_OFFSET:
5600 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5601 8 * CHAR_SIZE);
5602 break;
5603
5604 default:
5605 if (count < 0 &&
5606 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5607 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5608 else
5609 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5610 break;
5611 }
5612
5613 break; /* Out of the /g loop */
5614 }
5615 }
5616
5617 /* If not /g or /G we are done */
5618
5619 if (!do_g && !do_G) break;
5620
5621 if (use_offsets == NULL)
5622 {
5623 fprintf(outfile, "Cannot do global matching without an ovector\n");
5624 break;
5625 }
5626
5627 if (use_size_offsets < 2)
5628 {
5629 fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
5630 break;
5631 }
5632
5633 /* If we have matched an empty string, first check to see if we are at
5634 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5635 Perl's /g options does. This turns out to be rather cunning. First we set
5636 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5637 same point. If this fails (picked up above) we advance to the next
5638 character. */
5639
5640 g_notempty = 0;
5641
5642 if (use_offsets[0] == use_offsets[1])
5643 {
5644 if (use_offsets[0] == len) break;
5645 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5646 }
5647
5648 /* For /g, update the start offset, leaving the rest alone. There is a
5649 tricky case when \K is used in a positive lookbehind assertion. This can
5650 cause the end of the match to be less than or equal to the start offset.
5651 In this case we restart at one past the start offset. This may return the
5652 same match if the original start offset was bumped along during the
5653 match, but eventually the new start offset will hit the actual start
5654 offset. (In PCRE2 the true start offset is available, and this can be
5655 done better. It is not worth doing more than making sure we do not loop
5656 at this stage in the life of PCRE1.) */
5657
5658 if (do_g)
5659 {
5660 if (g_notempty == 0 && use_offsets[1] <= start_offset)
5661 {
5662 if (start_offset >= len) break; /* End of subject */
5663 start_offset++;
5664 if (use_utf)
5665 {
5666 while (start_offset < len)
5667 {
5668 if ((bptr[start_offset] & 0xc0) != 0x80) break;
5669 start_offset++;
5670 }
5671 }
5672 }
5673 else start_offset = use_offsets[1];
5674 }
5675
5676 /* For /G, update the pointer and length */
5677
5678 else
5679 {
5680 bptr += use_offsets[1] * CHAR_SIZE;
5681 len -= use_offsets[1];
5682 }
5683 } /* End of loop for /g and /G */
5684
5685 NEXT_DATA: continue;
5686 } /* End of loop for data lines */
5687
5688 CONTINUE:
5689
5690 #if !defined NOPOSIX
5691 if ((posix || do_posix) && preg.re_pcre != 0) regfree(&preg);
5692 #endif
5693
5694 if (re != NULL) new_free(re);
5695 if (extra != NULL)
5696 {
5697 PCRE_FREE_STUDY(extra);
5698 }
5699 if (locale_set)
5700 {
5701 new_free((void *)tables);
5702 setlocale(LC_CTYPE, "C");
5703 locale_set = 0;
5704 }
5705 if (jit_stack != NULL)
5706 {
5707 PCRE_JIT_STACK_FREE(jit_stack);
5708 jit_stack = NULL;
5709 }
5710 }
5711
5712 if (infile == stdin) fprintf(outfile, "\n");
5713
5714 if (showtotaltimes)
5715 {
5716 fprintf(outfile, "--------------------------------------\n");
5717 if (timeit > 0)
5718 {
5719 fprintf(outfile, "Total compile time %.4f milliseconds\n",
5720 (((double)total_compile_time * 1000.0) / (double)timeit) /
5721 (double)CLOCKS_PER_SEC);
5722 fprintf(outfile, "Total study time %.4f milliseconds\n",
5723 (((double)total_study_time * 1000.0) / (double)timeit) /
5724 (double)CLOCKS_PER_SEC);
5725 }
5726 fprintf(outfile, "Total execute time %.4f milliseconds\n",
5727 (((double)total_match_time * 1000.0) / (double)timeitm) /
5728 (double)CLOCKS_PER_SEC);
5729 }
5730
5731 EXIT:
5732
5733 if (infile != NULL && infile != stdin) fclose(infile);
5734 if (outfile != NULL && outfile != stdout) fclose(outfile);
5735
5736 free(buffer);
5737 free(dbuffer);
5738 free(pbuffer);
5739 free(offsets);
5740
5741 #ifdef SUPPORT_PCRE16
5742 if (buffer16 != NULL) free(buffer16);
5743 #endif
5744 #ifdef SUPPORT_PCRE32
5745 if (buffer32 != NULL) free(buffer32);
5746 #endif
5747
5748 #if !defined NODFA
5749 if (dfa_workspace != NULL)
5750 free(dfa_workspace);
5751 #endif
5752
5753 #if defined(__VMS)
5754 yield = SS$_NORMAL; /* Return values via DCL symbols */
5755 #endif
5756
5757 return yield;
5758 }
5759
5760 /* End of pcretest.c */
5761

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5