/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1221 - (show annotations)
Sun Nov 11 20:27:03 2012 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 163902 byte(s)
Error occurred while calculating annotation data.
File tidies, preparing for 8.32-RC1.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136 #include "pcre_internal.h"
137
138 /* The pcre_printint() function, which prints the internal form of a compiled
139 regex, is held in a separate file so that (a) it can be compiled in either
140 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
141 when that is compiled in debug mode. */
142
143 #ifdef SUPPORT_PCRE8
144 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
145 #endif
146 #ifdef SUPPORT_PCRE16
147 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
148 #endif
149 #ifdef SUPPORT_PCRE32
150 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152
153 /* We need access to some of the data tables that PCRE uses. So as not to have
154 to keep two copies, we include the source files here, changing the names of the
155 external symbols to prevent clashes. */
156
157 #define PCRE_INCLUDED
158
159 #include "pcre_tables.c"
160 #include "pcre_ucd.c"
161
162 /* The definition of the macro PRINTABLE, which determines whether to print an
163 output character as-is or as a hex value when showing compiled patterns, is
164 the same as in the printint.src file. We uses it here in cases when the locale
165 has not been explicitly changed, so as to get consistent output from systems
166 that differ in their output from isprint() even in the "C" locale. */
167
168 #ifdef EBCDIC
169 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
170 #else
171 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
172 #endif
173
174 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
175
176 /* Posix support is disabled in 16 or 32 bit only mode. */
177 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
178 #define NOPOSIX
179 #endif
180
181 /* It is possible to compile this test program without including support for
182 testing the POSIX interface, though this is not available via the standard
183 Makefile. */
184
185 #if !defined NOPOSIX
186 #include "pcreposix.h"
187 #endif
188
189 /* It is also possible, originally for the benefit of a version that was
190 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
191 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
192 automatically cut out the UTF support if PCRE is built without it. */
193
194 #ifndef SUPPORT_UTF
195 #ifndef NOUTF
196 #define NOUTF
197 #endif
198 #endif
199
200 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
201 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
202 only from one place and is handled differently). I couldn't dream up any way of
203 using a single macro to do this in a generic way, because of the many different
204 argument requirements. We know that at least one of SUPPORT_PCRE8 and
205 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
206 use these in the definitions of generic macros.
207
208 **** Special note about the PCHARSxxx macros: the address of the string to be
209 printed is always given as two arguments: a base address followed by an offset.
210 The base address is cast to the correct data size for 8 or 16 bit data; the
211 offset is in units of this size. If the string were given as base+offset in one
212 argument, the casting might be incorrectly applied. */
213
214 #ifdef SUPPORT_PCRE8
215
216 #define PCHARS8(lv, p, offset, len, f) \
217 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
218
219 #define PCHARSV8(p, offset, len, f) \
220 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
221
222 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
223 p = read_capture_name8(p, cn8, re)
224
225 #define STRLEN8(p) ((int)strlen((char *)p))
226
227 #define SET_PCRE_CALLOUT8(callout) \
228 pcre_callout = callout
229
230 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
231 pcre_assign_jit_stack(extra, callback, userdata)
232
233 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
234 re = pcre_compile((char *)pat, options, error, erroffset, tables)
235
236 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
237 namesptr, cbuffer, size) \
238 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
239 (char *)namesptr, cbuffer, size)
240
241 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
242 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
243
244 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
245 offsets, size_offsets, workspace, size_workspace) \
246 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace)
248
249 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
250 offsets, size_offsets) \
251 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
252 offsets, size_offsets)
253
254 #define PCRE_FREE_STUDY8(extra) \
255 pcre_free_study(extra)
256
257 #define PCRE_FREE_SUBSTRING8(substring) \
258 pcre_free_substring(substring)
259
260 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
261 pcre_free_substring_list(listptr)
262
263 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
264 getnamesptr, subsptr) \
265 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
266 (char *)getnamesptr, subsptr)
267
268 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
269 n = pcre_get_stringnumber(re, (char *)ptr)
270
271 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
272 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
273
274 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
275 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
276
277 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
278 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
279
280 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
281 pcre_printint(re, outfile, debug_lengths)
282
283 #define PCRE_STUDY8(extra, re, options, error) \
284 extra = pcre_study(re, options, error)
285
286 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
287 pcre_jit_stack_alloc(startsize, maxsize)
288
289 #define PCRE_JIT_STACK_FREE8(stack) \
290 pcre_jit_stack_free(stack)
291
292 #define pcre8_maketables pcre_maketables
293
294 #endif /* SUPPORT_PCRE8 */
295
296 /* -----------------------------------------------------------*/
297
298 #ifdef SUPPORT_PCRE16
299
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
307 p = read_capture_name16(p, cn16, re)
308
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
313
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
317
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
321
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
330
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
336
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
341
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
344
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
347
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
362
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
366
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
370
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
373
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382
383 #endif /* SUPPORT_PCRE16 */
384
385 /* -----------------------------------------------------------*/
386
387 #ifdef SUPPORT_PCRE32
388
389 #define PCHARS32(lv, p, offset, len, f) \
390 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
391
392 #define PCHARSV32(p, offset, len, f) \
393 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
394
395 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396 p = read_capture_name32(p, cn32, re)
397
398 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
399
400 #define SET_PCRE_CALLOUT32(callout) \
401 pcre32_callout = (int (*)(pcre32_callout_block *))callout
402
403 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
404 pcre32_assign_jit_stack((pcre32_extra *)extra, \
405 (pcre32_jit_callback)callback, userdata)
406
407 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
408 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
409 tables)
410
411 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
412 namesptr, cbuffer, size) \
413 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
414 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
415
416 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
417 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
418 (PCRE_UCHAR32 *)cbuffer, size/2)
419
420 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
421 offsets, size_offsets, workspace, size_workspace) \
422 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
423 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
424 workspace, size_workspace)
425
426 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets) \
428 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
429 len, start_offset, options, offsets, size_offsets)
430
431 #define PCRE_FREE_STUDY32(extra) \
432 pcre32_free_study((pcre32_extra *)extra)
433
434 #define PCRE_FREE_SUBSTRING32(substring) \
435 pcre32_free_substring((PCRE_SPTR32)substring)
436
437 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
438 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
439
440 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
441 getnamesptr, subsptr) \
442 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
443 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
444
445 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
446 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
447
448 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
449 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
450 (PCRE_SPTR32 *)(void*)subsptr)
451
452 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
453 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
454 (PCRE_SPTR32 **)(void*)listptr)
455
456 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
457 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
458 tables)
459
460 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
461 pcre32_printint(re, outfile, debug_lengths)
462
463 #define PCRE_STUDY32(extra, re, options, error) \
464 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
465
466 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
467 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
468
469 #define PCRE_JIT_STACK_FREE32(stack) \
470 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
471
472 #endif /* SUPPORT_PCRE32 */
473
474
475 /* ----- More than one mode is supported; a runtime test is needed, except for
476 pcre_config(), and the JIT stack functions, when it doesn't matter which
477 available version is called. ----- */
478
479 enum {
480 PCRE8_MODE,
481 PCRE16_MODE,
482 PCRE32_MODE
483 };
484
485 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486 defined (SUPPORT_PCRE32)) >= 2
487
488 #define CHAR_SIZE (1 << pcre_mode)
489
490 /* There doesn't seem to be an easy way of writing these macros that can cope
491 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492 cases separately. */
493
494 /* ----- All three modes supported ----- */
495
496 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
497
498 #define PCHARS(lv, p, offset, len, f) \
499 if (pcre_mode == PCRE32_MODE) \
500 PCHARS32(lv, p, offset, len, f); \
501 else if (pcre_mode == PCRE16_MODE) \
502 PCHARS16(lv, p, offset, len, f); \
503 else \
504 PCHARS8(lv, p, offset, len, f)
505
506 #define PCHARSV(p, offset, len, f) \
507 if (pcre_mode == PCRE32_MODE) \
508 PCHARSV32(p, offset, len, f); \
509 else if (pcre_mode == PCRE16_MODE) \
510 PCHARSV16(p, offset, len, f); \
511 else \
512 PCHARSV8(p, offset, len, f)
513
514 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
515 if (pcre_mode == PCRE32_MODE) \
516 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
517 else if (pcre_mode == PCRE16_MODE) \
518 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
519 else \
520 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
521
522 #define SET_PCRE_CALLOUT(callout) \
523 if (pcre_mode == PCRE32_MODE) \
524 SET_PCRE_CALLOUT32(callout); \
525 else if (pcre_mode == PCRE16_MODE) \
526 SET_PCRE_CALLOUT16(callout); \
527 else \
528 SET_PCRE_CALLOUT8(callout)
529
530 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
531
532 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
533 if (pcre_mode == PCRE32_MODE) \
534 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
535 else if (pcre_mode == PCRE16_MODE) \
536 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
537 else \
538 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
539
540 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
541 if (pcre_mode == PCRE32_MODE) \
542 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
543 else if (pcre_mode == PCRE16_MODE) \
544 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
545 else \
546 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
547
548 #define PCRE_CONFIG pcre_config
549
550 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
551 namesptr, cbuffer, size) \
552 if (pcre_mode == PCRE32_MODE) \
553 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else \
559 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size)
561
562 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
563 if (pcre_mode == PCRE32_MODE) \
564 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
565 else if (pcre_mode == PCRE16_MODE) \
566 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
567 else \
568 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
569
570 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
571 offsets, size_offsets, workspace, size_workspace) \
572 if (pcre_mode == PCRE32_MODE) \
573 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace); \
575 else if (pcre_mode == PCRE16_MODE) \
576 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else \
579 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace)
581
582 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else \
591 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets)
593
594 #define PCRE_FREE_STUDY(extra) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_FREE_STUDY32(extra); \
597 else if (pcre_mode == PCRE16_MODE) \
598 PCRE_FREE_STUDY16(extra); \
599 else \
600 PCRE_FREE_STUDY8(extra)
601
602 #define PCRE_FREE_SUBSTRING(substring) \
603 if (pcre_mode == PCRE32_MODE) \
604 PCRE_FREE_SUBSTRING32(substring); \
605 else if (pcre_mode == PCRE16_MODE) \
606 PCRE_FREE_SUBSTRING16(substring); \
607 else \
608 PCRE_FREE_SUBSTRING8(substring)
609
610 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
611 if (pcre_mode == PCRE32_MODE) \
612 PCRE_FREE_SUBSTRING_LIST32(listptr); \
613 else if (pcre_mode == PCRE16_MODE) \
614 PCRE_FREE_SUBSTRING_LIST16(listptr); \
615 else \
616 PCRE_FREE_SUBSTRING_LIST8(listptr)
617
618 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
619 getnamesptr, subsptr) \
620 if (pcre_mode == PCRE32_MODE) \
621 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr); \
623 else if (pcre_mode == PCRE16_MODE) \
624 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else \
627 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr)
629
630 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
631 if (pcre_mode == PCRE32_MODE) \
632 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
633 else if (pcre_mode == PCRE16_MODE) \
634 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
635 else \
636 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
637
638 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
639 if (pcre_mode == PCRE32_MODE) \
640 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
641 else if (pcre_mode == PCRE16_MODE) \
642 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
643 else \
644 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
645
646 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
647 if (pcre_mode == PCRE32_MODE) \
648 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
649 else if (pcre_mode == PCRE16_MODE) \
650 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
651 else \
652 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
653
654 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
655 (pcre_mode == PCRE32_MODE ? \
656 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
657 : pcre_mode == PCRE16_MODE ? \
658 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
659 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
660
661 #define PCRE_JIT_STACK_FREE(stack) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_JIT_STACK_FREE32(stack); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_JIT_STACK_FREE16(stack); \
666 else \
667 PCRE_JIT_STACK_FREE8(stack)
668
669 #define PCRE_MAKETABLES \
670 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
671
672 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
673 if (pcre_mode == PCRE32_MODE) \
674 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
675 else if (pcre_mode == PCRE16_MODE) \
676 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
677 else \
678 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
679
680 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
681 if (pcre_mode == PCRE32_MODE) \
682 PCRE_PRINTINT32(re, outfile, debug_lengths); \
683 else if (pcre_mode == PCRE16_MODE) \
684 PCRE_PRINTINT16(re, outfile, debug_lengths); \
685 else \
686 PCRE_PRINTINT8(re, outfile, debug_lengths)
687
688 #define PCRE_STUDY(extra, re, options, error) \
689 if (pcre_mode == PCRE32_MODE) \
690 PCRE_STUDY32(extra, re, options, error); \
691 else if (pcre_mode == PCRE16_MODE) \
692 PCRE_STUDY16(extra, re, options, error); \
693 else \
694 PCRE_STUDY8(extra, re, options, error)
695
696
697 /* ----- Two out of three modes are supported ----- */
698
699 #else
700
701 /* We can use some macro trickery to make a single set of definitions work in
702 the three different cases. */
703
704 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
705
706 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707 #define BITONE 32
708 #define BITTWO 16
709
710 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
711
712 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713 #define BITONE 32
714 #define BITTWO 8
715
716 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
717
718 #else
719 #define BITONE 16
720 #define BITTWO 8
721 #endif
722
723 #define glue(a,b) a##b
724 #define G(a,b) glue(a,b)
725
726
727 /* ----- Common macros for two-mode cases ----- */
728
729 #define PCHARS(lv, p, offset, len, f) \
730 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731 G(PCHARS,BITONE)(lv, p, offset, len, f); \
732 else \
733 G(PCHARS,BITTWO)(lv, p, offset, len, f)
734
735 #define PCHARSV(p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARSV,BITONE)(p, offset, len, f); \
738 else \
739 G(PCHARSV,BITTWO)(p, offset, len, f)
740
741 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744 else \
745 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
746
747 #define SET_PCRE_CALLOUT(callout) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(SET_PCRE_CALLOUT,BITONE)(callout); \
750 else \
751 G(SET_PCRE_CALLOUT,BITTWO)(callout)
752
753 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
755
756 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759 else \
760 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
761
762 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765 else \
766 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
767
768 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
769
770 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771 namesptr, cbuffer, size) \
772 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774 namesptr, cbuffer, size); \
775 else \
776 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size)
778
779 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782 else \
783 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
784
785 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786 offsets, size_offsets, workspace, size_workspace) \
787 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789 offsets, size_offsets, workspace, size_workspace); \
790 else \
791 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace)
793
794 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets) \
796 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets); \
799 else \
800 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets)
802
803 #define PCRE_FREE_STUDY(extra) \
804 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805 G(PCRE_FREE_STUDY,BITONE)(extra); \
806 else \
807 G(PCRE_FREE_STUDY,BITTWO)(extra)
808
809 #define PCRE_FREE_SUBSTRING(substring) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812 else \
813 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
814
815 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818 else \
819 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
820
821 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822 getnamesptr, subsptr) \
823 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825 getnamesptr, subsptr); \
826 else \
827 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr)
829
830 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833 else \
834 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
835
836 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839 else \
840 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
841
842 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845 else \
846 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
847
848 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
852
853 #define PCRE_JIT_STACK_FREE(stack) \
854 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856 else \
857 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
858
859 #define PCRE_MAKETABLES \
860 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
862
863 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866 else \
867 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
868
869 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872 else \
873 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
874
875 #define PCRE_STUDY(extra, re, options, error) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878 else \
879 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
880
881 #endif /* Two out of three modes */
882
883 /* ----- End of cases where more than one mode is supported ----- */
884
885
886 /* ----- Only 8-bit mode is supported ----- */
887
888 #elif defined SUPPORT_PCRE8
889 #define CHAR_SIZE 1
890 #define PCHARS PCHARS8
891 #define PCHARSV PCHARSV8
892 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
893 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
894 #define STRLEN STRLEN8
895 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
896 #define PCRE_COMPILE PCRE_COMPILE8
897 #define PCRE_CONFIG pcre_config
898 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
899 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
900 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
901 #define PCRE_EXEC PCRE_EXEC8
902 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
903 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
904 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
905 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
906 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
907 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
908 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
909 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
910 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
911 #define PCRE_MAKETABLES pcre_maketables()
912 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
913 #define PCRE_PRINTINT PCRE_PRINTINT8
914 #define PCRE_STUDY PCRE_STUDY8
915
916 /* ----- Only 16-bit mode is supported ----- */
917
918 #elif defined SUPPORT_PCRE16
919 #define CHAR_SIZE 2
920 #define PCHARS PCHARS16
921 #define PCHARSV PCHARSV16
922 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
923 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
924 #define STRLEN STRLEN16
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
926 #define PCRE_COMPILE PCRE_COMPILE16
927 #define PCRE_CONFIG pcre16_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
931 #define PCRE_EXEC PCRE_EXEC16
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
941 #define PCRE_MAKETABLES pcre16_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
943 #define PCRE_PRINTINT PCRE_PRINTINT16
944 #define PCRE_STUDY PCRE_STUDY16
945
946 /* ----- Only 32-bit mode is supported ----- */
947
948 #elif defined SUPPORT_PCRE32
949 #define CHAR_SIZE 4
950 #define PCHARS PCHARS32
951 #define PCHARSV PCHARSV32
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
954 #define STRLEN STRLEN32
955 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
956 #define PCRE_COMPILE PCRE_COMPILE32
957 #define PCRE_CONFIG pcre32_config
958 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
959 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
960 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
961 #define PCRE_EXEC PCRE_EXEC32
962 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
963 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
964 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
965 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
966 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
967 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
968 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
969 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
970 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
971 #define PCRE_MAKETABLES pcre32_maketables()
972 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
973 #define PCRE_PRINTINT PCRE_PRINTINT32
974 #define PCRE_STUDY PCRE_STUDY32
975
976 #endif
977
978 /* ----- End of mode-specific function call macros ----- */
979
980
981 /* Other parameters */
982
983 #ifndef CLOCKS_PER_SEC
984 #ifdef CLK_TCK
985 #define CLOCKS_PER_SEC CLK_TCK
986 #else
987 #define CLOCKS_PER_SEC 100
988 #endif
989 #endif
990
991 #if !defined NODFA
992 #define DFA_WS_DIMENSION 1000
993 #endif
994
995 /* This is the default loop count for timing. */
996
997 #define LOOPREPEAT 500000
998
999 /* Static variables */
1000
1001 static FILE *outfile;
1002 static int log_store = 0;
1003 static int callout_count;
1004 static int callout_extra;
1005 static int callout_fail_count;
1006 static int callout_fail_id;
1007 static int debug_lengths;
1008 static int first_callout;
1009 static int jit_was_used;
1010 static int locale_set = 0;
1011 static int show_malloc;
1012 static int use_utf;
1013 static size_t gotten_store;
1014 static size_t first_gotten_store = 0;
1015 static const unsigned char *last_callout_mark = NULL;
1016
1017 /* The buffers grow automatically if very long input lines are encountered. */
1018
1019 static int buffer_size = 50000;
1020 static pcre_uint8 *buffer = NULL;
1021 static pcre_uint8 *pbuffer = NULL;
1022
1023 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1024
1025 #ifdef COMPILE_PCRE16
1026 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1027 #endif
1028
1029 #ifdef COMPILE_PCRE32
1030 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031 #endif
1032
1033 /* We need buffers for building 16/32-bit strings, and the tables of operator
1034 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035 pattern for saving/reloading testing. Luckily, the data for these tables is
1036 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038 LINK_SIZE is also used later in this program. */
1039
1040 #ifdef SUPPORT_PCRE16
1041 #undef IMM2_SIZE
1042 #define IMM2_SIZE 1
1043
1044 #if LINK_SIZE == 2
1045 #undef LINK_SIZE
1046 #define LINK_SIZE 1
1047 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1048 #undef LINK_SIZE
1049 #define LINK_SIZE 2
1050 #else
1051 #error LINK_SIZE must be either 2, 3, or 4
1052 #endif
1053
1054 static int buffer16_size = 0;
1055 static pcre_uint16 *buffer16 = NULL;
1056 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057 #endif /* SUPPORT_PCRE16 */
1058
1059 #ifdef SUPPORT_PCRE32
1060 #undef IMM2_SIZE
1061 #define IMM2_SIZE 1
1062 #undef LINK_SIZE
1063 #define LINK_SIZE 1
1064
1065 static int buffer32_size = 0;
1066 static pcre_uint32 *buffer32 = NULL;
1067 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068 #endif /* SUPPORT_PCRE32 */
1069
1070 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071 support, it can be changed by an option. If there is no 8-bit support, there
1072 must be 16-or 32-bit support, so default it to 1. */
1073
1074 #if defined SUPPORT_PCRE8
1075 static int pcre_mode = PCRE8_MODE;
1076 #elif defined SUPPORT_PCRE16
1077 static int pcre_mode = PCRE16_MODE;
1078 #elif defined SUPPORT_PCRE32
1079 static int pcre_mode = PCRE32_MODE;
1080 #endif
1081
1082 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1083
1084 static int jit_study_bits[] =
1085 {
1086 PCRE_STUDY_JIT_COMPILE,
1087 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1088 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1089 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1090 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1091 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1092 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1093 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1094 };
1095
1096 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1097 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1098
1099 /* Textual explanations for runtime error codes */
1100
1101 static const char *errtexts[] = {
1102 NULL, /* 0 is no error */
1103 NULL, /* NOMATCH is handled specially */
1104 "NULL argument passed",
1105 "bad option value",
1106 "magic number missing",
1107 "unknown opcode - pattern overwritten?",
1108 "no more memory",
1109 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1110 "match limit exceeded",
1111 "callout error code",
1112 NULL, /* BADUTF8/16 is handled specially */
1113 NULL, /* BADUTF8/16 offset is handled specially */
1114 NULL, /* PARTIAL is handled specially */
1115 "not used - internal error",
1116 "internal error - pattern overwritten?",
1117 "bad count value",
1118 "item unsupported for DFA matching",
1119 "backreference condition or recursion test not supported for DFA matching",
1120 "match limit not supported for DFA matching",
1121 "workspace size exceeded in DFA matching",
1122 "too much recursion for DFA matching",
1123 "recursion limit exceeded",
1124 "not used - internal error",
1125 "invalid combination of newline options",
1126 "bad offset value",
1127 NULL, /* SHORTUTF8/16 is handled specially */
1128 "nested recursion at the same subject position",
1129 "JIT stack limit reached",
1130 "pattern compiled in wrong mode: 8-bit/16-bit error",
1131 "pattern compiled with other endianness",
1132 "invalid data in workspace for DFA restart",
1133 "bad JIT option",
1134 "bad length"
1135 };
1136
1137
1138 /*************************************************
1139 * Alternate character tables *
1140 *************************************************/
1141
1142 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1143 using the default tables of the library. However, the T option can be used to
1144 select alternate sets of tables, for different kinds of testing. Note also that
1145 the L (locale) option also adjusts the tables. */
1146
1147 /* This is the set of tables distributed as default with PCRE. It recognizes
1148 only ASCII characters. */
1149
1150 static const pcre_uint8 tables0[] = {
1151
1152 /* This table is a lower casing table. */
1153
1154 0, 1, 2, 3, 4, 5, 6, 7,
1155 8, 9, 10, 11, 12, 13, 14, 15,
1156 16, 17, 18, 19, 20, 21, 22, 23,
1157 24, 25, 26, 27, 28, 29, 30, 31,
1158 32, 33, 34, 35, 36, 37, 38, 39,
1159 40, 41, 42, 43, 44, 45, 46, 47,
1160 48, 49, 50, 51, 52, 53, 54, 55,
1161 56, 57, 58, 59, 60, 61, 62, 63,
1162 64, 97, 98, 99,100,101,102,103,
1163 104,105,106,107,108,109,110,111,
1164 112,113,114,115,116,117,118,119,
1165 120,121,122, 91, 92, 93, 94, 95,
1166 96, 97, 98, 99,100,101,102,103,
1167 104,105,106,107,108,109,110,111,
1168 112,113,114,115,116,117,118,119,
1169 120,121,122,123,124,125,126,127,
1170 128,129,130,131,132,133,134,135,
1171 136,137,138,139,140,141,142,143,
1172 144,145,146,147,148,149,150,151,
1173 152,153,154,155,156,157,158,159,
1174 160,161,162,163,164,165,166,167,
1175 168,169,170,171,172,173,174,175,
1176 176,177,178,179,180,181,182,183,
1177 184,185,186,187,188,189,190,191,
1178 192,193,194,195,196,197,198,199,
1179 200,201,202,203,204,205,206,207,
1180 208,209,210,211,212,213,214,215,
1181 216,217,218,219,220,221,222,223,
1182 224,225,226,227,228,229,230,231,
1183 232,233,234,235,236,237,238,239,
1184 240,241,242,243,244,245,246,247,
1185 248,249,250,251,252,253,254,255,
1186
1187 /* This table is a case flipping table. */
1188
1189 0, 1, 2, 3, 4, 5, 6, 7,
1190 8, 9, 10, 11, 12, 13, 14, 15,
1191 16, 17, 18, 19, 20, 21, 22, 23,
1192 24, 25, 26, 27, 28, 29, 30, 31,
1193 32, 33, 34, 35, 36, 37, 38, 39,
1194 40, 41, 42, 43, 44, 45, 46, 47,
1195 48, 49, 50, 51, 52, 53, 54, 55,
1196 56, 57, 58, 59, 60, 61, 62, 63,
1197 64, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122, 91, 92, 93, 94, 95,
1201 96, 65, 66, 67, 68, 69, 70, 71,
1202 72, 73, 74, 75, 76, 77, 78, 79,
1203 80, 81, 82, 83, 84, 85, 86, 87,
1204 88, 89, 90,123,124,125,126,127,
1205 128,129,130,131,132,133,134,135,
1206 136,137,138,139,140,141,142,143,
1207 144,145,146,147,148,149,150,151,
1208 152,153,154,155,156,157,158,159,
1209 160,161,162,163,164,165,166,167,
1210 168,169,170,171,172,173,174,175,
1211 176,177,178,179,180,181,182,183,
1212 184,185,186,187,188,189,190,191,
1213 192,193,194,195,196,197,198,199,
1214 200,201,202,203,204,205,206,207,
1215 208,209,210,211,212,213,214,215,
1216 216,217,218,219,220,221,222,223,
1217 224,225,226,227,228,229,230,231,
1218 232,233,234,235,236,237,238,239,
1219 240,241,242,243,244,245,246,247,
1220 248,249,250,251,252,253,254,255,
1221
1222 /* This table contains bit maps for various character classes. Each map is 32
1223 bytes long and the bits run from the least significant end of each byte. The
1224 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1225 graph, print, punct, and cntrl. Other classes are built from combinations. */
1226
1227 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1228 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1229 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1230 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1231
1232 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1233 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1234 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1236
1237 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1244 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246
1247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1248 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1249 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1251
1252 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1253 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1255 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1256
1257 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1258 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261
1262 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1263 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1264 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266
1267 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1268 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271
1272 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1274 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276
1277 /* This table identifies various classes of character by individual bits:
1278 0x01 white space character
1279 0x02 letter
1280 0x04 decimal digit
1281 0x08 hexadecimal digit
1282 0x10 alphanumeric or '_'
1283 0x80 regular expression metacharacter or binary zero
1284 */
1285
1286 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1287 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1288 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1289 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1290 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1291 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1292 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1293 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1294 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1295 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1296 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1297 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1298 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1299 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1300 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1301 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1318
1319 /* This is a set of tables that came orginally from a Windows user. It seems to
1320 be at least an approximation of ISO 8859. In particular, there are characters
1321 greater than 128 that are marked as spaces, letters, etc. */
1322
1323 static const pcre_uint8 tables1[] = {
1324 0,1,2,3,4,5,6,7,
1325 8,9,10,11,12,13,14,15,
1326 16,17,18,19,20,21,22,23,
1327 24,25,26,27,28,29,30,31,
1328 32,33,34,35,36,37,38,39,
1329 40,41,42,43,44,45,46,47,
1330 48,49,50,51,52,53,54,55,
1331 56,57,58,59,60,61,62,63,
1332 64,97,98,99,100,101,102,103,
1333 104,105,106,107,108,109,110,111,
1334 112,113,114,115,116,117,118,119,
1335 120,121,122,91,92,93,94,95,
1336 96,97,98,99,100,101,102,103,
1337 104,105,106,107,108,109,110,111,
1338 112,113,114,115,116,117,118,119,
1339 120,121,122,123,124,125,126,127,
1340 128,129,130,131,132,133,134,135,
1341 136,137,138,139,140,141,142,143,
1342 144,145,146,147,148,149,150,151,
1343 152,153,154,155,156,157,158,159,
1344 160,161,162,163,164,165,166,167,
1345 168,169,170,171,172,173,174,175,
1346 176,177,178,179,180,181,182,183,
1347 184,185,186,187,188,189,190,191,
1348 224,225,226,227,228,229,230,231,
1349 232,233,234,235,236,237,238,239,
1350 240,241,242,243,244,245,246,215,
1351 248,249,250,251,252,253,254,223,
1352 224,225,226,227,228,229,230,231,
1353 232,233,234,235,236,237,238,239,
1354 240,241,242,243,244,245,246,247,
1355 248,249,250,251,252,253,254,255,
1356 0,1,2,3,4,5,6,7,
1357 8,9,10,11,12,13,14,15,
1358 16,17,18,19,20,21,22,23,
1359 24,25,26,27,28,29,30,31,
1360 32,33,34,35,36,37,38,39,
1361 40,41,42,43,44,45,46,47,
1362 48,49,50,51,52,53,54,55,
1363 56,57,58,59,60,61,62,63,
1364 64,97,98,99,100,101,102,103,
1365 104,105,106,107,108,109,110,111,
1366 112,113,114,115,116,117,118,119,
1367 120,121,122,91,92,93,94,95,
1368 96,65,66,67,68,69,70,71,
1369 72,73,74,75,76,77,78,79,
1370 80,81,82,83,84,85,86,87,
1371 88,89,90,123,124,125,126,127,
1372 128,129,130,131,132,133,134,135,
1373 136,137,138,139,140,141,142,143,
1374 144,145,146,147,148,149,150,151,
1375 152,153,154,155,156,157,158,159,
1376 160,161,162,163,164,165,166,167,
1377 168,169,170,171,172,173,174,175,
1378 176,177,178,179,180,181,182,183,
1379 184,185,186,187,188,189,190,191,
1380 224,225,226,227,228,229,230,231,
1381 232,233,234,235,236,237,238,239,
1382 240,241,242,243,244,245,246,215,
1383 248,249,250,251,252,253,254,223,
1384 192,193,194,195,196,197,198,199,
1385 200,201,202,203,204,205,206,207,
1386 208,209,210,211,212,213,214,247,
1387 216,217,218,219,220,221,222,255,
1388 0,62,0,0,1,0,0,0,
1389 0,0,0,0,0,0,0,0,
1390 32,0,0,0,1,0,0,0,
1391 0,0,0,0,0,0,0,0,
1392 0,0,0,0,0,0,255,3,
1393 126,0,0,0,126,0,0,0,
1394 0,0,0,0,0,0,0,0,
1395 0,0,0,0,0,0,0,0,
1396 0,0,0,0,0,0,255,3,
1397 0,0,0,0,0,0,0,0,
1398 0,0,0,0,0,0,12,2,
1399 0,0,0,0,0,0,0,0,
1400 0,0,0,0,0,0,0,0,
1401 254,255,255,7,0,0,0,0,
1402 0,0,0,0,0,0,0,0,
1403 255,255,127,127,0,0,0,0,
1404 0,0,0,0,0,0,0,0,
1405 0,0,0,0,254,255,255,7,
1406 0,0,0,0,0,4,32,4,
1407 0,0,0,128,255,255,127,255,
1408 0,0,0,0,0,0,255,3,
1409 254,255,255,135,254,255,255,7,
1410 0,0,0,0,0,4,44,6,
1411 255,255,127,255,255,255,127,255,
1412 0,0,0,0,254,255,255,255,
1413 255,255,255,255,255,255,255,127,
1414 0,0,0,0,254,255,255,255,
1415 255,255,255,255,255,255,255,255,
1416 0,2,0,0,255,255,255,255,
1417 255,255,255,255,255,255,255,127,
1418 0,0,0,0,255,255,255,255,
1419 255,255,255,255,255,255,255,255,
1420 0,0,0,0,254,255,0,252,
1421 1,0,0,248,1,0,0,120,
1422 0,0,0,0,254,255,255,255,
1423 0,0,128,0,0,0,128,0,
1424 255,255,255,255,0,0,0,0,
1425 0,0,0,0,0,0,0,128,
1426 255,255,255,255,0,0,0,0,
1427 0,0,0,0,0,0,0,0,
1428 128,0,0,0,0,0,0,0,
1429 0,1,1,0,1,1,0,0,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 1,0,0,0,128,0,0,0,
1433 128,128,128,128,0,0,128,0,
1434 28,28,28,28,28,28,28,28,
1435 28,28,0,0,0,0,0,128,
1436 0,26,26,26,26,26,26,18,
1437 18,18,18,18,18,18,18,18,
1438 18,18,18,18,18,18,18,18,
1439 18,18,18,128,128,0,128,16,
1440 0,26,26,26,26,26,26,18,
1441 18,18,18,18,18,18,18,18,
1442 18,18,18,18,18,18,18,18,
1443 18,18,18,128,128,0,0,0,
1444 0,0,0,0,0,1,0,0,
1445 0,0,0,0,0,0,0,0,
1446 0,0,0,0,0,0,0,0,
1447 0,0,0,0,0,0,0,0,
1448 1,0,0,0,0,0,0,0,
1449 0,0,18,0,0,0,0,0,
1450 0,0,20,20,0,18,0,0,
1451 0,20,18,0,0,0,0,0,
1452 18,18,18,18,18,18,18,18,
1453 18,18,18,18,18,18,18,18,
1454 18,18,18,18,18,18,18,0,
1455 18,18,18,18,18,18,18,18,
1456 18,18,18,18,18,18,18,18,
1457 18,18,18,18,18,18,18,18,
1458 18,18,18,18,18,18,18,0,
1459 18,18,18,18,18,18,18,18
1460 };
1461
1462
1463
1464
1465 #ifndef HAVE_STRERROR
1466 /*************************************************
1467 * Provide strerror() for non-ANSI libraries *
1468 *************************************************/
1469
1470 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1471 in their libraries, but can provide the same facility by this simple
1472 alternative function. */
1473
1474 extern int sys_nerr;
1475 extern char *sys_errlist[];
1476
1477 char *
1478 strerror(int n)
1479 {
1480 if (n < 0 || n >= sys_nerr) return "unknown error number";
1481 return sys_errlist[n];
1482 }
1483 #endif /* HAVE_STRERROR */
1484
1485
1486
1487 /*************************************************
1488 * Print newline configuration *
1489 *************************************************/
1490
1491 /*
1492 Arguments:
1493 rc the return code from PCRE_CONFIG_NEWLINE
1494 isc TRUE if called from "-C newline"
1495 Returns: nothing
1496 */
1497
1498 static void
1499 print_newline_config(int rc, BOOL isc)
1500 {
1501 const char *s = NULL;
1502 if (!isc) printf(" Newline sequence is ");
1503 switch(rc)
1504 {
1505 case CHAR_CR: s = "CR"; break;
1506 case CHAR_LF: s = "LF"; break;
1507 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1508 case -1: s = "ANY"; break;
1509 case -2: s = "ANYCRLF"; break;
1510
1511 default:
1512 printf("a non-standard value: 0x%04x\n", rc);
1513 return;
1514 }
1515
1516 printf("%s\n", s);
1517 }
1518
1519
1520
1521 /*************************************************
1522 * JIT memory callback *
1523 *************************************************/
1524
1525 static pcre_jit_stack* jit_callback(void *arg)
1526 {
1527 jit_was_used = TRUE;
1528 return (pcre_jit_stack *)arg;
1529 }
1530
1531
1532 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1533 /*************************************************
1534 * Convert UTF-8 string to value *
1535 *************************************************/
1536
1537 /* This function takes one or more bytes that represents a UTF-8 character,
1538 and returns the value of the character.
1539
1540 Argument:
1541 utf8bytes a pointer to the byte vector
1542 vptr a pointer to an int to receive the value
1543
1544 Returns: > 0 => the number of bytes consumed
1545 -6 to 0 => malformed UTF-8 character at offset = (-return)
1546 */
1547
1548 static int
1549 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1550 {
1551 pcre_uint32 c = *utf8bytes++;
1552 pcre_uint32 d = c;
1553 int i, j, s;
1554
1555 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1556 {
1557 if ((d & 0x80) == 0) break;
1558 d <<= 1;
1559 }
1560
1561 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1562 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1563
1564 /* i now has a value in the range 1-5 */
1565
1566 s = 6*i;
1567 d = (c & utf8_table3[i]) << s;
1568
1569 for (j = 0; j < i; j++)
1570 {
1571 c = *utf8bytes++;
1572 if ((c & 0xc0) != 0x80) return -(j+1);
1573 s -= 6;
1574 d |= (c & 0x3f) << s;
1575 }
1576
1577 /* Check that encoding was the correct unique one */
1578
1579 for (j = 0; j < utf8_table1_size; j++)
1580 if (d <= (pcre_uint32)utf8_table1[j]) break;
1581 if (j != i) return -(i+1);
1582
1583 /* Valid value */
1584
1585 *vptr = d;
1586 return i+1;
1587 }
1588 #endif /* NOUTF || SUPPORT_PCRE16 */
1589
1590
1591
1592 #if defined SUPPORT_PCRE8 && !defined NOUTF
1593 /*************************************************
1594 * Convert character value to UTF-8 *
1595 *************************************************/
1596
1597 /* This function takes an integer value in the range 0 - 0x7fffffff
1598 and encodes it as a UTF-8 character in 0 to 6 bytes.
1599
1600 Arguments:
1601 cvalue the character value
1602 utf8bytes pointer to buffer for result - at least 6 bytes long
1603
1604 Returns: number of characters placed in the buffer
1605 */
1606
1607 static int
1608 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1609 {
1610 register int i, j;
1611 if (cvalue > 0x7fffffffu)
1612 return -1;
1613 for (i = 0; i < utf8_table1_size; i++)
1614 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1615 utf8bytes += i;
1616 for (j = i; j > 0; j--)
1617 {
1618 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1619 cvalue >>= 6;
1620 }
1621 *utf8bytes = utf8_table2[i] | cvalue;
1622 return i + 1;
1623 }
1624 #endif
1625
1626
1627 #ifdef SUPPORT_PCRE16
1628 /*************************************************
1629 * Convert a string to 16-bit *
1630 *************************************************/
1631
1632 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1633 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1634 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1635 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1636 result is always left in buffer16.
1637
1638 Note that this function does not object to surrogate values. This is
1639 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1640 for the purpose of testing that they are correctly faulted.
1641
1642 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1643 in UTF-8 so that values greater than 255 can be handled.
1644
1645 Arguments:
1646 data TRUE if converting a data line; FALSE for a regex
1647 p points to a byte string
1648 utf true if UTF-8 (to be converted to UTF-16)
1649 len number of bytes in the string (excluding trailing zero)
1650
1651 Returns: number of 16-bit data items used (excluding trailing zero)
1652 OR -1 if a UTF-8 string is malformed
1653 OR -2 if a value > 0x10ffff is encountered
1654 OR -3 if a value > 0xffff is encountered when not in UTF mode
1655 */
1656
1657 static int
1658 to16(int data, pcre_uint8 *p, int utf, int len)
1659 {
1660 pcre_uint16 *pp;
1661
1662 if (buffer16_size < 2*len + 2)
1663 {
1664 if (buffer16 != NULL) free(buffer16);
1665 buffer16_size = 2*len + 2;
1666 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1667 if (buffer16 == NULL)
1668 {
1669 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1670 exit(1);
1671 }
1672 }
1673
1674 pp = buffer16;
1675
1676 if (!utf && !data)
1677 {
1678 while (len-- > 0) *pp++ = *p++;
1679 }
1680
1681 else
1682 {
1683 pcre_uint32 c = 0;
1684 while (len > 0)
1685 {
1686 int chlen = utf82ord(p, &c);
1687 if (chlen <= 0) return -1;
1688 if (c > 0x10ffff) return -2;
1689 p += chlen;
1690 len -= chlen;
1691 if (c < 0x10000) *pp++ = c; else
1692 {
1693 if (!utf) return -3;
1694 c -= 0x10000;
1695 *pp++ = 0xD800 | (c >> 10);
1696 *pp++ = 0xDC00 | (c & 0x3ff);
1697 }
1698 }
1699 }
1700
1701 *pp = 0;
1702 return pp - buffer16;
1703 }
1704 #endif
1705
1706 #ifdef SUPPORT_PCRE32
1707 /*************************************************
1708 * Convert a string to 32-bit *
1709 *************************************************/
1710
1711 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1712 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1713 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1714 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1715 result is always left in buffer32.
1716
1717 Note that this function does not object to surrogate values. This is
1718 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1719 for the purpose of testing that they are correctly faulted.
1720
1721 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1722 in UTF-8 so that values greater than 255 can be handled.
1723
1724 Arguments:
1725 data TRUE if converting a data line; FALSE for a regex
1726 p points to a byte string
1727 utf true if UTF-8 (to be converted to UTF-32)
1728 len number of bytes in the string (excluding trailing zero)
1729
1730 Returns: number of 32-bit data items used (excluding trailing zero)
1731 OR -1 if a UTF-8 string is malformed
1732 OR -2 if a value > 0x10ffff is encountered
1733 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1734 */
1735
1736 static int
1737 to32(int data, pcre_uint8 *p, int utf, int len)
1738 {
1739 pcre_uint32 *pp;
1740
1741 if (buffer32_size < 4*len + 4)
1742 {
1743 if (buffer32 != NULL) free(buffer32);
1744 buffer32_size = 4*len + 4;
1745 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1746 if (buffer32 == NULL)
1747 {
1748 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1749 exit(1);
1750 }
1751 }
1752
1753 pp = buffer32;
1754
1755 if (!utf && !data)
1756 {
1757 while (len-- > 0) *pp++ = *p++;
1758 }
1759
1760 else
1761 {
1762 pcre_uint32 c = 0;
1763 while (len > 0)
1764 {
1765 int chlen = utf82ord(p, &c);
1766 if (chlen <= 0) return -1;
1767 if (utf)
1768 {
1769 if (c > 0x10ffff) return -2;
1770 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1771 }
1772
1773 p += chlen;
1774 len -= chlen;
1775 *pp++ = c;
1776 }
1777 }
1778
1779 *pp = 0;
1780 return pp - buffer32;
1781 }
1782
1783 /* Check that a 32-bit character string is valid UTF-32.
1784
1785 Arguments:
1786 string points to the string
1787 length length of string, or -1 if the string is zero-terminated
1788
1789 Returns: TRUE if the string is a valid UTF-32 string
1790 FALSE otherwise
1791 */
1792
1793 #ifdef NEVER
1794
1795 #ifdef SUPPORT_UTF
1796 static BOOL
1797 valid_utf32(pcre_uint32 *string, int length)
1798 {
1799 register pcre_uint32 *p;
1800 register pcre_uint32 c;
1801
1802 for (p = string; length-- > 0; p++)
1803 {
1804 c = *p;
1805
1806 if (c > 0x10ffffu)
1807 return FALSE;
1808
1809 /* A surrogate */
1810 if ((c & 0xfffff800u) == 0xd800u)
1811 return FALSE;
1812
1813 /* Non-character */
1814 if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1815 return FALSE;
1816 }
1817
1818 return TRUE;
1819 }
1820 #endif /* SUPPORT_UTF */
1821
1822 #endif /* NEVER */
1823
1824
1825 #endif
1826
1827 /*************************************************
1828 * Read or extend an input line *
1829 *************************************************/
1830
1831 /* Input lines are read into buffer, but both patterns and data lines can be
1832 continued over multiple input lines. In addition, if the buffer fills up, we
1833 want to automatically expand it so as to be able to handle extremely large
1834 lines that are needed for certain stress tests. When the input buffer is
1835 expanded, the other two buffers must also be expanded likewise, and the
1836 contents of pbuffer, which are a copy of the input for callouts, must be
1837 preserved (for when expansion happens for a data line). This is not the most
1838 optimal way of handling this, but hey, this is just a test program!
1839
1840 Arguments:
1841 f the file to read
1842 start where in buffer to start (this *must* be within buffer)
1843 prompt for stdin or readline()
1844
1845 Returns: pointer to the start of new data
1846 could be a copy of start, or could be moved
1847 NULL if no data read and EOF reached
1848 */
1849
1850 static pcre_uint8 *
1851 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1852 {
1853 pcre_uint8 *here = start;
1854
1855 for (;;)
1856 {
1857 size_t rlen = (size_t)(buffer_size - (here - buffer));
1858
1859 if (rlen > 1000)
1860 {
1861 int dlen;
1862
1863 /* If libreadline or libedit support is required, use readline() to read a
1864 line if the input is a terminal. Note that readline() removes the trailing
1865 newline, so we must put it back again, to be compatible with fgets(). */
1866
1867 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1868 if (isatty(fileno(f)))
1869 {
1870 size_t len;
1871 char *s = readline(prompt);
1872 if (s == NULL) return (here == start)? NULL : start;
1873 len = strlen(s);
1874 if (len > 0) add_history(s);
1875 if (len > rlen - 1) len = rlen - 1;
1876 memcpy(here, s, len);
1877 here[len] = '\n';
1878 here[len+1] = 0;
1879 free(s);
1880 }
1881 else
1882 #endif
1883
1884 /* Read the next line by normal means, prompting if the file is stdin. */
1885
1886 {
1887 if (f == stdin) printf("%s", prompt);
1888 if (fgets((char *)here, rlen, f) == NULL)
1889 return (here == start)? NULL : start;
1890 }
1891
1892 dlen = (int)strlen((char *)here);
1893 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1894 here += dlen;
1895 }
1896
1897 else
1898 {
1899 int new_buffer_size = 2*buffer_size;
1900 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1901 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1902
1903 if (new_buffer == NULL || new_pbuffer == NULL)
1904 {
1905 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1906 exit(1);
1907 }
1908
1909 memcpy(new_buffer, buffer, buffer_size);
1910 memcpy(new_pbuffer, pbuffer, buffer_size);
1911
1912 buffer_size = new_buffer_size;
1913
1914 start = new_buffer + (start - buffer);
1915 here = new_buffer + (here - buffer);
1916
1917 free(buffer);
1918 free(pbuffer);
1919
1920 buffer = new_buffer;
1921 pbuffer = new_pbuffer;
1922 }
1923 }
1924
1925 return NULL; /* Control never gets here */
1926 }
1927
1928
1929
1930 /*************************************************
1931 * Read number from string *
1932 *************************************************/
1933
1934 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1935 around with conditional compilation, just do the job by hand. It is only used
1936 for unpicking arguments, so just keep it simple.
1937
1938 Arguments:
1939 str string to be converted
1940 endptr where to put the end pointer
1941
1942 Returns: the unsigned long
1943 */
1944
1945 static int
1946 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1947 {
1948 int result = 0;
1949 while(*str != 0 && isspace(*str)) str++;
1950 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1951 *endptr = str;
1952 return(result);
1953 }
1954
1955
1956
1957 /*************************************************
1958 * Print one character *
1959 *************************************************/
1960
1961 /* Print a single character either literally, or as a hex escape. */
1962
1963 static int pchar(pcre_uint32 c, FILE *f)
1964 {
1965 int n = 0;
1966 if (PRINTOK(c))
1967 {
1968 if (f != NULL) fprintf(f, "%c", c);
1969 return 1;
1970 }
1971
1972 if (c < 0x100)
1973 {
1974 if (use_utf)
1975 {
1976 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1977 return 6;
1978 }
1979 else
1980 {
1981 if (f != NULL) fprintf(f, "\\x%02x", c);
1982 return 4;
1983 }
1984 }
1985
1986 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1987 return n >= 0 ? n : 0;
1988 }
1989
1990
1991
1992 #ifdef SUPPORT_PCRE8
1993 /*************************************************
1994 * Print 8-bit character string *
1995 *************************************************/
1996
1997 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1998 If handed a NULL file, just counts chars without printing. */
1999
2000 static int pchars(pcre_uint8 *p, int length, FILE *f)
2001 {
2002 pcre_uint32 c = 0;
2003 int yield = 0;
2004
2005 if (length < 0)
2006 length = strlen((char *)p);
2007
2008 while (length-- > 0)
2009 {
2010 #if !defined NOUTF
2011 if (use_utf)
2012 {
2013 int rc = utf82ord(p, &c);
2014 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2015 {
2016 length -= rc - 1;
2017 p += rc;
2018 yield += pchar(c, f);
2019 continue;
2020 }
2021 }
2022 #endif
2023 c = *p++;
2024 yield += pchar(c, f);
2025 }
2026
2027 return yield;
2028 }
2029 #endif
2030
2031
2032
2033 #ifdef SUPPORT_PCRE16
2034 /*************************************************
2035 * Find length of 0-terminated 16-bit string *
2036 *************************************************/
2037
2038 static int strlen16(PCRE_SPTR16 p)
2039 {
2040 int len = 0;
2041 while (*p++ != 0) len++;
2042 return len;
2043 }
2044 #endif /* SUPPORT_PCRE16 */
2045
2046
2047
2048 #ifdef SUPPORT_PCRE32
2049 /*************************************************
2050 * Find length of 0-terminated 32-bit string *
2051 *************************************************/
2052
2053 static int strlen32(PCRE_SPTR32 p)
2054 {
2055 int len = 0;
2056 while (*p++ != 0) len++;
2057 return len;
2058 }
2059 #endif /* SUPPORT_PCRE32 */
2060
2061
2062
2063 #ifdef SUPPORT_PCRE16
2064 /*************************************************
2065 * Print 16-bit character string *
2066 *************************************************/
2067
2068 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2069 If handed a NULL file, just counts chars without printing. */
2070
2071 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2072 {
2073 int yield = 0;
2074
2075 if (length < 0)
2076 length = strlen16(p);
2077
2078 while (length-- > 0)
2079 {
2080 pcre_uint32 c = *p++ & 0xffff;
2081 #if !defined NOUTF
2082 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2083 {
2084 int d = *p & 0xffff;
2085 if (d >= 0xDC00 && d < 0xDFFF)
2086 {
2087 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2088 length--;
2089 p++;
2090 }
2091 }
2092 #endif
2093 yield += pchar(c, f);
2094 }
2095
2096 return yield;
2097 }
2098 #endif /* SUPPORT_PCRE16 */
2099
2100
2101
2102 #ifdef SUPPORT_PCRE32
2103 /*************************************************
2104 * Print 32-bit character string *
2105 *************************************************/
2106
2107 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2108 If handed a NULL file, just counts chars without printing. */
2109
2110 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2111 {
2112 int yield = 0;
2113
2114 (void)(utf); /* Avoid compiler warning */
2115
2116 if (length < 0)
2117 length = strlen32(p);
2118
2119 while (length-- > 0)
2120 {
2121 pcre_uint32 c = *p++;
2122 yield += pchar(c, f);
2123 }
2124
2125 return yield;
2126 }
2127 #endif /* SUPPORT_PCRE32 */
2128
2129
2130
2131 #ifdef SUPPORT_PCRE8
2132 /*************************************************
2133 * Read a capture name (8-bit) and check it *
2134 *************************************************/
2135
2136 static pcre_uint8 *
2137 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2138 {
2139 pcre_uint8 *npp = *pp;
2140 while (isalnum(*p)) *npp++ = *p++;
2141 *npp++ = 0;
2142 *npp = 0;
2143 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2144 {
2145 fprintf(outfile, "no parentheses with name \"");
2146 PCHARSV(*pp, 0, -1, outfile);
2147 fprintf(outfile, "\"\n");
2148 }
2149
2150 *pp = npp;
2151 return p;
2152 }
2153 #endif /* SUPPORT_PCRE8 */
2154
2155
2156
2157 #ifdef SUPPORT_PCRE16
2158 /*************************************************
2159 * Read a capture name (16-bit) and check it *
2160 *************************************************/
2161
2162 /* Note that the text being read is 8-bit. */
2163
2164 static pcre_uint8 *
2165 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2166 {
2167 pcre_uint16 *npp = *pp;
2168 while (isalnum(*p)) *npp++ = *p++;
2169 *npp++ = 0;
2170 *npp = 0;
2171 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2172 {
2173 fprintf(outfile, "no parentheses with name \"");
2174 PCHARSV(*pp, 0, -1, outfile);
2175 fprintf(outfile, "\"\n");
2176 }
2177 *pp = npp;
2178 return p;
2179 }
2180 #endif /* SUPPORT_PCRE16 */
2181
2182
2183
2184 #ifdef SUPPORT_PCRE32
2185 /*************************************************
2186 * Read a capture name (32-bit) and check it *
2187 *************************************************/
2188
2189 /* Note that the text being read is 8-bit. */
2190
2191 static pcre_uint8 *
2192 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2193 {
2194 pcre_uint32 *npp = *pp;
2195 while (isalnum(*p)) *npp++ = *p++;
2196 *npp++ = 0;
2197 *npp = 0;
2198 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2199 {
2200 fprintf(outfile, "no parentheses with name \"");
2201 PCHARSV(*pp, 0, -1, outfile);
2202 fprintf(outfile, "\"\n");
2203 }
2204 *pp = npp;
2205 return p;
2206 }
2207 #endif /* SUPPORT_PCRE32 */
2208
2209
2210
2211 /*************************************************
2212 * Callout function *
2213 *************************************************/
2214
2215 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2216 the match. Yield zero unless more callouts than the fail count, or the callout
2217 data is not zero. */
2218
2219 static int callout(pcre_callout_block *cb)
2220 {
2221 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2222 int i, pre_start, post_start, subject_length;
2223
2224 if (callout_extra)
2225 {
2226 fprintf(f, "Callout %d: last capture = %d\n",
2227 cb->callout_number, cb->capture_last);
2228
2229 for (i = 0; i < cb->capture_top * 2; i += 2)
2230 {
2231 if (cb->offset_vector[i] < 0)
2232 fprintf(f, "%2d: <unset>\n", i/2);
2233 else
2234 {
2235 fprintf(f, "%2d: ", i/2);
2236 PCHARSV(cb->subject, cb->offset_vector[i],
2237 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2238 fprintf(f, "\n");
2239 }
2240 }
2241 }
2242
2243 /* Re-print the subject in canonical form, the first time or if giving full
2244 datails. On subsequent calls in the same match, we use pchars just to find the
2245 printed lengths of the substrings. */
2246
2247 if (f != NULL) fprintf(f, "--->");
2248
2249 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2250 PCHARS(post_start, cb->subject, cb->start_match,
2251 cb->current_position - cb->start_match, f);
2252
2253 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2254
2255 PCHARSV(cb->subject, cb->current_position,
2256 cb->subject_length - cb->current_position, f);
2257
2258 if (f != NULL) fprintf(f, "\n");
2259
2260 /* Always print appropriate indicators, with callout number if not already
2261 shown. For automatic callouts, show the pattern offset. */
2262
2263 if (cb->callout_number == 255)
2264 {
2265 fprintf(outfile, "%+3d ", cb->pattern_position);
2266 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2267 }
2268 else
2269 {
2270 if (callout_extra) fprintf(outfile, " ");
2271 else fprintf(outfile, "%3d ", cb->callout_number);
2272 }
2273
2274 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2275 fprintf(outfile, "^");
2276
2277 if (post_start > 0)
2278 {
2279 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2280 fprintf(outfile, "^");
2281 }
2282
2283 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2284 fprintf(outfile, " ");
2285
2286 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2287 pbuffer + cb->pattern_position);
2288
2289 fprintf(outfile, "\n");
2290 first_callout = 0;
2291
2292 if (cb->mark != last_callout_mark)
2293 {
2294 if (cb->mark == NULL)
2295 fprintf(outfile, "Latest Mark: <unset>\n");
2296 else
2297 {
2298 fprintf(outfile, "Latest Mark: ");
2299 PCHARSV(cb->mark, 0, -1, outfile);
2300 putc('\n', outfile);
2301 }
2302 last_callout_mark = cb->mark;
2303 }
2304
2305 if (cb->callout_data != NULL)
2306 {
2307 int callout_data = *((int *)(cb->callout_data));
2308 if (callout_data != 0)
2309 {
2310 fprintf(outfile, "Callout data = %d\n", callout_data);
2311 return callout_data;
2312 }
2313 }
2314
2315 return (cb->callout_number != callout_fail_id)? 0 :
2316 (++callout_count >= callout_fail_count)? 1 : 0;
2317 }
2318
2319
2320 /*************************************************
2321 * Local malloc functions *
2322 *************************************************/
2323
2324 /* Alternative malloc function, to test functionality and save the size of a
2325 compiled re, which is the first store request that pcre_compile() makes. The
2326 show_malloc variable is set only during matching. */
2327
2328 static void *new_malloc(size_t size)
2329 {
2330 void *block = malloc(size);
2331 gotten_store = size;
2332 if (first_gotten_store == 0) first_gotten_store = size;
2333 if (show_malloc)
2334 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2335 return block;
2336 }
2337
2338 static void new_free(void *block)
2339 {
2340 if (show_malloc)
2341 fprintf(outfile, "free %p\n", block);
2342 free(block);
2343 }
2344
2345 /* For recursion malloc/free, to test stacking calls */
2346
2347 static void *stack_malloc(size_t size)
2348 {
2349 void *block = malloc(size);
2350 if (show_malloc)
2351 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2352 return block;
2353 }
2354
2355 static void stack_free(void *block)
2356 {
2357 if (show_malloc)
2358 fprintf(outfile, "stack_free %p\n", block);
2359 free(block);
2360 }
2361
2362
2363 /*************************************************
2364 * Call pcre_fullinfo() *
2365 *************************************************/
2366
2367 /* Get one piece of information from the pcre_fullinfo() function. When only
2368 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2369 value, but the code is defensive.
2370
2371 Arguments:
2372 re compiled regex
2373 study study data
2374 option PCRE_INFO_xxx option
2375 ptr where to put the data
2376
2377 Returns: 0 when OK, < 0 on error
2378 */
2379
2380 static int
2381 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2382 {
2383 int rc;
2384
2385 if (pcre_mode == PCRE32_MODE)
2386 #ifdef SUPPORT_PCRE32
2387 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2388 #else
2389 rc = PCRE_ERROR_BADMODE;
2390 #endif
2391 else if (pcre_mode == PCRE16_MODE)
2392 #ifdef SUPPORT_PCRE16
2393 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2394 #else
2395 rc = PCRE_ERROR_BADMODE;
2396 #endif
2397 else
2398 #ifdef SUPPORT_PCRE8
2399 rc = pcre_fullinfo(re, study, option, ptr);
2400 #else
2401 rc = PCRE_ERROR_BADMODE;
2402 #endif
2403
2404 if (rc < 0)
2405 {
2406 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2407 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2408 if (rc == PCRE_ERROR_BADMODE)
2409 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2410 "%d-bit mode\n", 8 * CHAR_SIZE,
2411 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2412 }
2413
2414 return rc;
2415 }
2416
2417
2418
2419 /*************************************************
2420 * Swap byte functions *
2421 *************************************************/
2422
2423 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2424 value, respectively.
2425
2426 Arguments:
2427 value any number
2428
2429 Returns: the byte swapped value
2430 */
2431
2432 static pcre_uint32
2433 swap_uint32(pcre_uint32 value)
2434 {
2435 return ((value & 0x000000ff) << 24) |
2436 ((value & 0x0000ff00) << 8) |
2437 ((value & 0x00ff0000) >> 8) |
2438 (value >> 24);
2439 }
2440
2441 static pcre_uint16
2442 swap_uint16(pcre_uint16 value)
2443 {
2444 return (value >> 8) | (value << 8);
2445 }
2446
2447
2448
2449 /*************************************************
2450 * Flip bytes in a compiled pattern *
2451 *************************************************/
2452
2453 /* This function is called if the 'F' option was present on a pattern that is
2454 to be written to a file. We flip the bytes of all the integer fields in the
2455 regex data block and the study block. In 16-bit mode this also flips relevant
2456 bytes in the pattern itself. This is to make it possible to test PCRE's
2457 ability to reload byte-flipped patterns, e.g. those compiled on a different
2458 architecture. */
2459
2460 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2461 static void
2462 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2463 {
2464 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2465 #ifdef SUPPORT_PCRE16
2466 int op;
2467 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2468 int length = re->name_count * re->name_entry_size;
2469 #ifdef SUPPORT_UTF
2470 BOOL utf = (re->options & PCRE_UTF16) != 0;
2471 BOOL utf16_char = FALSE;
2472 #endif /* SUPPORT_UTF */
2473 #endif /* SUPPORT_PCRE16 */
2474
2475 /* Always flip the bytes in the main data block and study blocks. */
2476
2477 re->magic_number = REVERSED_MAGIC_NUMBER;
2478 re->size = swap_uint32(re->size);
2479 re->options = swap_uint32(re->options);
2480 re->flags = swap_uint16(re->flags);
2481 re->top_bracket = swap_uint16(re->top_bracket);
2482 re->top_backref = swap_uint16(re->top_backref);
2483 re->first_char = swap_uint16(re->first_char);
2484 re->req_char = swap_uint16(re->req_char);
2485 re->name_table_offset = swap_uint16(re->name_table_offset);
2486 re->name_entry_size = swap_uint16(re->name_entry_size);
2487 re->name_count = swap_uint16(re->name_count);
2488
2489 if (extra != NULL)
2490 {
2491 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2492 rsd->size = swap_uint32(rsd->size);
2493 rsd->flags = swap_uint32(rsd->flags);
2494 rsd->minlength = swap_uint32(rsd->minlength);
2495 }
2496
2497 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2498 in the name table, if present, and then in the pattern itself. */
2499
2500 #ifdef SUPPORT_PCRE16
2501 if (pcre_mode != PCRE16_MODE) return;
2502
2503 while(TRUE)
2504 {
2505 /* Swap previous characters. */
2506 while (length-- > 0)
2507 {
2508 *ptr = swap_uint16(*ptr);
2509 ptr++;
2510 }
2511 #ifdef SUPPORT_UTF
2512 if (utf16_char)
2513 {
2514 if ((ptr[-1] & 0xfc00) == 0xd800)
2515 {
2516 /* We know that there is only one extra character in UTF-16. */
2517 *ptr = swap_uint16(*ptr);
2518 ptr++;
2519 }
2520 }
2521 utf16_char = FALSE;
2522 #endif /* SUPPORT_UTF */
2523
2524 /* Get next opcode. */
2525
2526 length = 0;
2527 op = *ptr;
2528 *ptr++ = swap_uint16(op);
2529
2530 switch (op)
2531 {
2532 case OP_END:
2533 return;
2534
2535 #ifdef SUPPORT_UTF
2536 case OP_CHAR:
2537 case OP_CHARI:
2538 case OP_NOT:
2539 case OP_NOTI:
2540 case OP_STAR:
2541 case OP_MINSTAR:
2542 case OP_PLUS:
2543 case OP_MINPLUS:
2544 case OP_QUERY:
2545 case OP_MINQUERY:
2546 case OP_UPTO:
2547 case OP_MINUPTO:
2548 case OP_EXACT:
2549 case OP_POSSTAR:
2550 case OP_POSPLUS:
2551 case OP_POSQUERY:
2552 case OP_POSUPTO:
2553 case OP_STARI:
2554 case OP_MINSTARI:
2555 case OP_PLUSI:
2556 case OP_MINPLUSI:
2557 case OP_QUERYI:
2558 case OP_MINQUERYI:
2559 case OP_UPTOI:
2560 case OP_MINUPTOI:
2561 case OP_EXACTI:
2562 case OP_POSSTARI:
2563 case OP_POSPLUSI:
2564 case OP_POSQUERYI:
2565 case OP_POSUPTOI:
2566 case OP_NOTSTAR:
2567 case OP_NOTMINSTAR:
2568 case OP_NOTPLUS:
2569 case OP_NOTMINPLUS:
2570 case OP_NOTQUERY:
2571 case OP_NOTMINQUERY:
2572 case OP_NOTUPTO:
2573 case OP_NOTMINUPTO:
2574 case OP_NOTEXACT:
2575 case OP_NOTPOSSTAR:
2576 case OP_NOTPOSPLUS:
2577 case OP_NOTPOSQUERY:
2578 case OP_NOTPOSUPTO:
2579 case OP_NOTSTARI:
2580 case OP_NOTMINSTARI:
2581 case OP_NOTPLUSI:
2582 case OP_NOTMINPLUSI:
2583 case OP_NOTQUERYI:
2584 case OP_NOTMINQUERYI:
2585 case OP_NOTUPTOI:
2586 case OP_NOTMINUPTOI:
2587 case OP_NOTEXACTI:
2588 case OP_NOTPOSSTARI:
2589 case OP_NOTPOSPLUSI:
2590 case OP_NOTPOSQUERYI:
2591 case OP_NOTPOSUPTOI:
2592 if (utf) utf16_char = TRUE;
2593 #endif
2594 /* Fall through. */
2595
2596 default:
2597 length = OP_lengths16[op] - 1;
2598 break;
2599
2600 case OP_CLASS:
2601 case OP_NCLASS:
2602 /* Skip the character bit map. */
2603 ptr += 32/sizeof(pcre_uint16);
2604 length = 0;
2605 break;
2606
2607 case OP_XCLASS:
2608 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2609 if (LINK_SIZE > 1)
2610 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2611 - (1 + LINK_SIZE + 1));
2612 else
2613 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2614
2615 /* Reverse the size of the XCLASS instance. */
2616 *ptr = swap_uint16(*ptr);
2617 ptr++;
2618 if (LINK_SIZE > 1)
2619 {
2620 *ptr = swap_uint16(*ptr);
2621 ptr++;
2622 }
2623
2624 op = *ptr;
2625 *ptr = swap_uint16(op);
2626 ptr++;
2627 if ((op & XCL_MAP) != 0)
2628 {
2629 /* Skip the character bit map. */
2630 ptr += 32/sizeof(pcre_uint16);
2631 length -= 32/sizeof(pcre_uint16);
2632 }
2633 break;
2634 }
2635 }
2636 /* Control should never reach here in 16 bit mode. */
2637 #endif /* SUPPORT_PCRE16 */
2638 }
2639 #endif /* SUPPORT_PCRE[8|16] */
2640
2641
2642
2643 #if defined SUPPORT_PCRE32
2644 static void
2645 regexflip_32(pcre *ere, pcre_extra *extra)
2646 {
2647 real_pcre32 *re = (real_pcre32 *)ere;
2648 int op;
2649 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2650 int length = re->name_count * re->name_entry_size;
2651
2652 /* Always flip the bytes in the main data block and study blocks. */
2653
2654 re->magic_number = REVERSED_MAGIC_NUMBER;
2655 re->size = swap_uint32(re->size);
2656 re->options = swap_uint32(re->options);
2657 re->flags = swap_uint16(re->flags);
2658 re->top_bracket = swap_uint16(re->top_bracket);
2659 re->top_backref = swap_uint16(re->top_backref);
2660 re->first_char = swap_uint32(re->first_char);
2661 re->req_char = swap_uint32(re->req_char);
2662 re->name_table_offset = swap_uint16(re->name_table_offset);
2663 re->name_entry_size = swap_uint16(re->name_entry_size);
2664 re->name_count = swap_uint16(re->name_count);
2665
2666 if (extra != NULL)
2667 {
2668 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2669 rsd->size = swap_uint32(rsd->size);
2670 rsd->flags = swap_uint32(rsd->flags);
2671 rsd->minlength = swap_uint32(rsd->minlength);
2672 }
2673
2674 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2675 the pattern itself. */
2676
2677 while(TRUE)
2678 {
2679 /* Swap previous characters. */
2680 while (length-- > 0)
2681 {
2682 *ptr = swap_uint32(*ptr);
2683 ptr++;
2684 }
2685
2686 /* Get next opcode. */
2687
2688 length = 0;
2689 op = *ptr;
2690 *ptr++ = swap_uint32(op);
2691
2692 switch (op)
2693 {
2694 case OP_END:
2695 return;
2696
2697 default:
2698 length = OP_lengths32[op] - 1;
2699 break;
2700
2701 case OP_CLASS:
2702 case OP_NCLASS:
2703 /* Skip the character bit map. */
2704 ptr += 32/sizeof(pcre_uint32);
2705 length = 0;
2706 break;
2707
2708 case OP_XCLASS:
2709 /* LINK_SIZE can only be 1 in 32-bit mode. */
2710 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2711
2712 /* Reverse the size of the XCLASS instance. */
2713 *ptr = swap_uint32(*ptr);
2714 ptr++;
2715
2716 op = *ptr;
2717 *ptr = swap_uint32(op);
2718 ptr++;
2719 if ((op & XCL_MAP) != 0)
2720 {
2721 /* Skip the character bit map. */
2722 ptr += 32/sizeof(pcre_uint32);
2723 length -= 32/sizeof(pcre_uint32);
2724 }
2725 break;
2726 }
2727 }
2728 /* Control should never reach here in 32 bit mode. */
2729 }
2730
2731 #endif /* SUPPORT_PCRE32 */
2732
2733
2734
2735 static void
2736 regexflip(pcre *ere, pcre_extra *extra)
2737 {
2738 #if defined SUPPORT_PCRE32
2739 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2740 regexflip_32(ere, extra);
2741 #endif
2742 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2743 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2744 regexflip8_or_16(ere, extra);
2745 #endif
2746 }
2747
2748
2749
2750 /*************************************************
2751 * Check match or recursion limit *
2752 *************************************************/
2753
2754 static int
2755 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2756 int start_offset, int options, int *use_offsets, int use_size_offsets,
2757 int flag, unsigned long int *limit, int errnumber, const char *msg)
2758 {
2759 int count;
2760 int min = 0;
2761 int mid = 64;
2762 int max = -1;
2763
2764 extra->flags |= flag;
2765
2766 for (;;)
2767 {
2768 *limit = mid;
2769
2770 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2771 use_offsets, use_size_offsets);
2772
2773 if (count == errnumber)
2774 {
2775 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2776 min = mid;
2777 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2778 }
2779
2780 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2781 count == PCRE_ERROR_PARTIAL)
2782 {
2783 if (mid == min + 1)
2784 {
2785 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2786 break;
2787 }
2788 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2789 max = mid;
2790 mid = (min + mid)/2;
2791 }
2792 else break; /* Some other error */
2793 }
2794
2795 extra->flags &= ~flag;
2796 return count;
2797 }
2798
2799
2800
2801 /*************************************************
2802 * Case-independent strncmp() function *
2803 *************************************************/
2804
2805 /*
2806 Arguments:
2807 s first string
2808 t second string
2809 n number of characters to compare
2810
2811 Returns: < 0, = 0, or > 0, according to the comparison
2812 */
2813
2814 static int
2815 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2816 {
2817 while (n--)
2818 {
2819 int c = tolower(*s++) - tolower(*t++);
2820 if (c) return c;
2821 }
2822 return 0;
2823 }
2824
2825
2826
2827 /*************************************************
2828 * Check newline indicator *
2829 *************************************************/
2830
2831 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2832 a message and return 0 if there is no match.
2833
2834 Arguments:
2835 p points after the leading '<'
2836 f file for error message
2837
2838 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2839 */
2840
2841 static int
2842 check_newline(pcre_uint8 *p, FILE *f)
2843 {
2844 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2845 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2846 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2847 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2848 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2849 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2850 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2851 fprintf(f, "Unknown newline type at: <%s\n", p);
2852 return 0;
2853 }
2854
2855
2856
2857 /*************************************************
2858 * Usage function *
2859 *************************************************/
2860
2861 static void
2862 usage(void)
2863 {
2864 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2865 printf("Input and output default to stdin and stdout.\n");
2866 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2867 printf("If input is a terminal, readline() is used to read from it.\n");
2868 #else
2869 printf("This version of pcretest is not linked with readline().\n");
2870 #endif
2871 printf("\nOptions:\n");
2872 #ifdef SUPPORT_PCRE16
2873 printf(" -16 use the 16-bit library\n");
2874 #endif
2875 #ifdef SUPPORT_PCRE32
2876 printf(" -32 use the 32-bit library\n");
2877 #endif
2878 printf(" -b show compiled code\n");
2879 printf(" -C show PCRE compile-time options and exit\n");
2880 printf(" -C arg show a specific compile-time option\n");
2881 printf(" and exit with its value. The arg can be:\n");
2882 printf(" linksize internal link size [2, 3, 4]\n");
2883 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2884 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2885 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2886 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2887 printf(" ucp Unicode Properties supported [0, 1]\n");
2888 printf(" jit Just-in-time compiler supported [0, 1]\n");
2889 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2890 printf(" -d debug: show compiled code and information (-b and -i)\n");
2891 #if !defined NODFA
2892 printf(" -dfa force DFA matching for all subjects\n");
2893 #endif
2894 printf(" -help show usage information\n");
2895 printf(" -i show information about compiled patterns\n"
2896 " -M find MATCH_LIMIT minimum for each subject\n"
2897 " -m output memory used information\n"
2898 " -o <n> set size of offsets vector to <n>\n");
2899 #if !defined NOPOSIX
2900 printf(" -p use POSIX interface\n");
2901 #endif
2902 printf(" -q quiet: do not output PCRE version number at start\n");
2903 printf(" -S <n> set stack size to <n> megabytes\n");
2904 printf(" -s force each pattern to be studied at basic level\n"
2905 " -s+ force each pattern to be studied, using JIT if available\n"
2906 " -s++ ditto, verifying when JIT was actually used\n"
2907 " -s+n force each pattern to be studied, using JIT if available,\n"
2908 " where 1 <= n <= 7 selects JIT options\n"
2909 " -s++n ditto, verifying when JIT was actually used\n"
2910 " -t time compilation and execution\n");
2911 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2912 printf(" -tm time execution (matching) only\n");
2913 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2914 }
2915
2916
2917
2918 /*************************************************
2919 * Main Program *
2920 *************************************************/
2921
2922 /* Read lines from named file or stdin and write to named file or stdout; lines
2923 consist of a regular expression, in delimiters and optionally followed by
2924 options, followed by a set of test data, terminated by an empty line. */
2925
2926 int main(int argc, char **argv)
2927 {
2928 FILE *infile = stdin;
2929 const char *version;
2930 int options = 0;
2931 int study_options = 0;
2932 int default_find_match_limit = FALSE;
2933 int op = 1;
2934 int timeit = 0;
2935 int timeitm = 0;
2936 int showinfo = 0;
2937 int showstore = 0;
2938 int force_study = -1;
2939 int force_study_options = 0;
2940 int quiet = 0;
2941 int size_offsets = 45;
2942 int size_offsets_max;
2943 int *offsets = NULL;
2944 int debug = 0;
2945 int done = 0;
2946 int all_use_dfa = 0;
2947 int verify_jit = 0;
2948 int yield = 0;
2949 int stack_size;
2950 pcre_uint8 *dbuffer = NULL;
2951 size_t dbuffer_size = 1u << 14;
2952
2953 #if !defined NOPOSIX
2954 int posix = 0;
2955 #endif
2956 #if !defined NODFA
2957 int *dfa_workspace = NULL;
2958 #endif
2959
2960 pcre_jit_stack *jit_stack = NULL;
2961
2962 /* These vectors store, end-to-end, a list of zero-terminated captured
2963 substring names, each list itself being terminated by an empty name. Assume
2964 that 1024 is plenty long enough for the few names we'll be testing. It is
2965 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2966 for the actual memory, to ensure alignment. */
2967
2968 pcre_uint32 copynames[1024];
2969 pcre_uint32 getnames[1024];
2970
2971 #ifdef SUPPORT_PCRE32
2972 pcre_uint32 *cn32ptr;
2973 pcre_uint32 *gn32ptr;
2974 #endif
2975
2976 #ifdef SUPPORT_PCRE16
2977 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2978 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2979 pcre_uint16 *cn16ptr;
2980 pcre_uint16 *gn16ptr;
2981 #endif
2982
2983 #ifdef SUPPORT_PCRE8
2984 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2985 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2986 pcre_uint8 *cn8ptr;
2987 pcre_uint8 *gn8ptr;
2988 #endif
2989
2990 /* Get buffers from malloc() so that valgrind will check their misuse when
2991 debugging. They grow automatically when very long lines are read. The 16-
2992 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2993
2994 buffer = (pcre_uint8 *)malloc(buffer_size);
2995 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2996
2997 /* The outfile variable is static so that new_malloc can use it. */
2998
2999 outfile = stdout;
3000
3001 /* The following _setmode() stuff is some Windows magic that tells its runtime
3002 library to translate CRLF into a single LF character. At least, that's what
3003 I've been told: never having used Windows I take this all on trust. Originally
3004 it set 0x8000, but then I was advised that _O_BINARY was better. */
3005
3006 #if defined(_WIN32) || defined(WIN32)
3007 _setmode( _fileno( stdout ), _O_BINARY );
3008 #endif
3009
3010 /* Get the version number: both pcre_version() and pcre16_version() give the
3011 same answer. We just need to ensure that we call one that is available. */
3012
3013 #if defined SUPPORT_PCRE8
3014 version = pcre_version();
3015 #elif defined SUPPORT_PCRE16
3016 version = pcre16_version();
3017 #elif defined SUPPORT_PCRE32
3018 version = pcre32_version();
3019 #endif
3020
3021 /* Scan options */
3022
3023 while (argc > 1 && argv[op][0] == '-')
3024 {
3025 pcre_uint8 *endptr;
3026 char *arg = argv[op];
3027
3028 if (strcmp(arg, "-m") == 0) showstore = 1;
3029 else if (strcmp(arg, "-s") == 0) force_study = 0;
3030
3031 else if (strncmp(arg, "-s+", 3) == 0)
3032 {
3033 arg += 3;
3034 if (*arg == '+') { arg++; verify_jit = TRUE; }
3035 force_study = 1;
3036 if (*arg == 0)
3037 force_study_options = jit_study_bits[6];
3038 else if (*arg >= '1' && *arg <= '7')
3039 force_study_options = jit_study_bits[*arg - '1'];
3040 else goto BAD_ARG;
3041 }
3042 else if (strcmp(arg, "-8") == 0)
3043 {
3044 #ifdef SUPPORT_PCRE8
3045 pcre_mode = PCRE8_MODE;
3046 #else
3047 printf("** This version of PCRE was built without 8-bit support\n");
3048 exit(1);
3049 #endif
3050 }
3051 else if (strcmp(arg, "-16") == 0)
3052 {
3053 #ifdef SUPPORT_PCRE16
3054 pcre_mode = PCRE16_MODE;
3055 #else
3056 printf("** This version of PCRE was built without 16-bit support\n");
3057 exit(1);
3058 #endif
3059 }
3060 else if (strcmp(arg, "-32") == 0)
3061 {
3062 #ifdef SUPPORT_PCRE32
3063 pcre_mode = PCRE32_MODE;
3064 #else
3065 printf("** This version of PCRE was built without 32-bit support\n");
3066 exit(1);
3067 #endif
3068 }
3069 else if (strcmp(arg, "-q") == 0) quiet = 1;
3070 else if (strcmp(arg, "-b") == 0) debug = 1;
3071 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3072 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3073 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3074 #if !defined NODFA
3075 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3076 #endif
3077 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3078 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3079 *endptr == 0))
3080 {
3081 op++;
3082 argc--;
3083 }
3084 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3085 {
3086 int both = arg[2] == 0;
3087 int temp;
3088 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3089 *endptr == 0))
3090 {
3091 timeitm = temp;
3092 op++;
3093 argc--;
3094 }
3095 else timeitm = LOOPREPEAT;
3096 if (both) timeit = timeitm;
3097 }
3098 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3099 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3100 *endptr == 0))
3101 {
3102 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3103 printf("PCRE: -S not supported on this OS\n");
3104 exit(1);
3105 #else
3106 int rc;
3107 struct rlimit rlim;
3108 getrlimit(RLIMIT_STACK, &rlim);
3109 rlim.rlim_cur = stack_size * 1024 * 1024;
3110 rc = setrlimit(RLIMIT_STACK, &rlim);
3111 if (rc != 0)
3112 {
3113 printf("PCRE: setrlimit() failed with error %d\n", rc);
3114 exit(1);
3115 }
3116 op++;
3117 argc--;
3118 #endif
3119 }
3120 #if !defined NOPOSIX
3121 else if (strcmp(arg, "-p") == 0) posix = 1;
3122 #endif
3123 else if (strcmp(arg, "-C") == 0)
3124 {
3125 int rc;
3126 unsigned long int lrc;
3127
3128 if (argc > 2)
3129 {
3130 if (strcmp(argv[op + 1], "linksize") == 0)
3131 {
3132 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3133 printf("%d\n", rc);
3134 yield = rc;
3135 }
3136 else if (strcmp(argv[op + 1], "pcre8") == 0)
3137 {
3138 #ifdef SUPPORT_PCRE8
3139 printf("1\n");
3140 yield = 1;
3141 #else
3142 printf("0\n");
3143 yield = 0;
3144 #endif
3145 goto EXIT;
3146 }
3147 else if (strcmp(argv[op + 1], "pcre16") == 0)
3148 {
3149 #ifdef SUPPORT_PCRE16
3150 printf("1\n");
3151 yield = 1;
3152 #else
3153 printf("0\n");
3154 yield = 0;
3155 #endif
3156 goto EXIT;
3157 }
3158 else if (strcmp(argv[op + 1], "pcre32") == 0)
3159 {
3160 #ifdef SUPPORT_PCRE32
3161 printf("1\n");
3162 yield = 1;
3163 #else
3164 printf("0\n");
3165 yield = 0;
3166 #endif
3167 goto EXIT;
3168 }
3169 if (strcmp(argv[op + 1], "utf") == 0)
3170 {
3171 #ifdef SUPPORT_PCRE8
3172 if (pcre_mode == PCRE8_MODE)
3173 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3174 #endif
3175 #ifdef SUPPORT_PCRE16
3176 if (pcre_mode == PCRE16_MODE)
3177 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3178 #endif
3179 #ifdef SUPPORT_PCRE32
3180 if (pcre_mode == PCRE32_MODE)
3181 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3182 #endif
3183 printf("%d\n", rc);
3184 yield = rc;
3185 goto EXIT;
3186 }
3187 else if (strcmp(argv[op + 1], "ucp") == 0)
3188 {
3189 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3190 printf("%d\n", rc);
3191 yield = rc;
3192 }
3193 else if (strcmp(argv[op + 1], "jit") == 0)
3194 {
3195 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3196 printf("%d\n", rc);
3197 yield = rc;
3198 }
3199 else if (strcmp(argv[op + 1], "newline") == 0)
3200 {
3201 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3202 print_newline_config(rc, TRUE);
3203 }
3204 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3205 {
3206 #ifdef EBCDIC
3207 printf("1\n");
3208 yield = 1;
3209 #else
3210 printf("0\n");
3211 #endif
3212 }
3213 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3214 {
3215 #ifdef EBCDIC
3216 printf("0x%02x\n", CHAR_LF);
3217 #else
3218 printf("0\n");
3219 #endif
3220 }
3221 else
3222 {
3223 printf("Unknown -C option: %s\n", argv[op + 1]);
3224 }
3225 goto EXIT;
3226 }
3227
3228 /* No argument for -C: output all configuration information. */
3229
3230 printf("PCRE version %s\n", version);
3231 printf("Compiled with\n");
3232
3233 #ifdef EBCDIC
3234 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3235 #endif
3236
3237 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3238 are set, either both UTFs are supported or both are not supported. */
3239
3240 #ifdef SUPPORT_PCRE8
3241 printf(" 8-bit support\n");
3242 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3243 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3244 #endif
3245 #ifdef SUPPORT_PCRE16
3246 printf(" 16-bit support\n");
3247 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3248 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3249 #endif
3250 #ifdef SUPPORT_PCRE32
3251 printf(" 32-bit support\n");
3252 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3253 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3254 #endif
3255
3256 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3257 printf(" %sUnicode properties support\n", rc? "" : "No ");
3258 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3259 if (rc)
3260 {
3261 const char *arch;
3262 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3263 printf(" Just-in-time compiler support: %s\n", arch);
3264 }
3265 else
3266 printf(" No just-in-time compiler support\n");
3267 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3268 print_newline_config(rc, FALSE);
3269 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3270 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3271 "all Unicode newlines");
3272 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3273 printf(" Internal link size = %d\n", rc);
3274 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3275 printf(" POSIX malloc threshold = %d\n", rc);
3276 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3277 printf(" Default match limit = %ld\n", lrc);
3278 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3279 printf(" Default recursion depth limit = %ld\n", lrc);
3280 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3281 printf(" Match recursion uses %s", rc? "stack" : "heap");
3282 if (showstore)
3283 {
3284 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3285 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3286 }
3287 printf("\n");
3288 goto EXIT;
3289 }
3290 else if (strcmp(arg, "-help") == 0 ||
3291 strcmp(arg, "--help") == 0)
3292 {
3293 usage();
3294 goto EXIT;
3295 }
3296 else
3297 {
3298 BAD_ARG:
3299 printf("** Unknown or malformed option %s\n", arg);
3300 usage();
3301 yield = 1;
3302 goto EXIT;
3303 }
3304 op++;
3305 argc--;
3306 }
3307
3308 /* Get the store for the offsets vector, and remember what it was */
3309
3310 size_offsets_max = size_offsets;
3311 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3312 if (offsets == NULL)
3313 {
3314 printf("** Failed to get %d bytes of memory for offsets vector\n",
3315 (int)(size_offsets_max * sizeof(int)));
3316 yield = 1;
3317 goto EXIT;
3318 }
3319
3320 /* Sort out the input and output files */
3321
3322 if (argc > 1)
3323 {
3324 infile = fopen(argv[op], INPUT_MODE);
3325 if (infile == NULL)
3326 {
3327 printf("** Failed to open %s\n", argv[op]);
3328 yield = 1;
3329 goto EXIT;
3330 }
3331 }
3332
3333 if (argc > 2)
3334 {
3335 outfile = fopen(argv[op+1], OUTPUT_MODE);
3336 if (outfile == NULL)
3337 {
3338 printf("** Failed to open %s\n", argv[op+1]);
3339 yield = 1;
3340 goto EXIT;
3341 }
3342 }
3343
3344 /* Set alternative malloc function */
3345
3346 #ifdef SUPPORT_PCRE8
3347 pcre_malloc = new_malloc;
3348 pcre_free = new_free;
3349 pcre_stack_malloc = stack_malloc;
3350 pcre_stack_free = stack_free;
3351 #endif
3352
3353 #ifdef SUPPORT_PCRE16
3354 pcre16_malloc = new_malloc;
3355 pcre16_free = new_free;
3356 pcre16_stack_malloc = stack_malloc;
3357 pcre16_stack_free = stack_free;
3358 #endif
3359
3360 #ifdef SUPPORT_PCRE32
3361 pcre32_malloc = new_malloc;
3362 pcre32_free = new_free;
3363 pcre32_stack_malloc = stack_malloc;
3364 pcre32_stack_free = stack_free;
3365 #endif
3366
3367 /* Heading line unless quiet, then prompt for first regex if stdin */
3368
3369 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3370
3371 /* Main loop */
3372
3373 while (!done)
3374 {
3375 pcre *re = NULL;
3376 pcre_extra *extra = NULL;
3377
3378 #if !defined NOPOSIX /* There are still compilers that require no indent */
3379 regex_t preg;
3380 int do_posix = 0;
3381 #endif
3382
3383 const char *error;
3384 pcre_uint8 *markptr;
3385 pcre_uint8 *p, *pp, *ppp;
3386 pcre_uint8 *to_file = NULL;
3387 const pcre_uint8 *tables = NULL;
3388 unsigned long int get_options;
3389 unsigned long int true_size, true_study_size = 0;
3390 size_t size, regex_gotten_store;
3391 int do_allcaps = 0;
3392 int do_mark = 0;
3393 int do_study = 0;
3394 int no_force_study = 0;
3395 int do_debug = debug;
3396 int do_G = 0;
3397 int do_g = 0;
3398 int do_showinfo = showinfo;
3399 int do_showrest = 0;
3400 int do_showcaprest = 0;
3401 int do_flip = 0;
3402 int erroroffset, len, delimiter, poffset;
3403
3404 #if !defined NODFA
3405 int dfa_matched = 0;
3406 #endif
3407
3408 use_utf = 0;
3409 debug_lengths = 1;
3410
3411 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3412 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3413 fflush(outfile);
3414
3415 p = buffer;
3416 while (isspace(*p)) p++;
3417 if (*p == 0) continue;
3418
3419 /* See if the pattern is to be loaded pre-compiled from a file. */
3420
3421 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3422 {
3423 pcre_uint32 magic;
3424 pcre_uint8 sbuf[8];
3425 FILE *f;
3426
3427 p++;
3428 if (*p == '!')
3429 {
3430 do_debug = TRUE;
3431 do_showinfo = TRUE;
3432 p++;
3433 }
3434
3435 pp = p + (int)strlen((char *)p);
3436 while (isspace(pp[-1])) pp--;
3437 *pp = 0;
3438
3439 f = fopen((char *)p, "rb");
3440 if (f == NULL)
3441 {
3442 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3443 continue;
3444 }
3445
3446 first_gotten_store = 0;
3447 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3448
3449 true_size =
3450 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3451 true_study_size =
3452 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3453
3454 re = (pcre *)new_malloc(true_size);
3455 if (re == NULL)
3456 {
3457 printf("** Failed to get %d bytes of memory for pcre object\n",
3458 (int)true_size);
3459 yield = 1;
3460 goto EXIT;
3461 }
3462 regex_gotten_store = first_gotten_store;
3463
3464 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3465
3466 magic = REAL_PCRE_MAGIC(re);
3467 if (magic != MAGIC_NUMBER)
3468 {
3469 if (swap_uint32(magic) == MAGIC_NUMBER)
3470 {
3471 do_flip = 1;
3472 }
3473 else
3474 {
3475 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3476 new_free(re);
3477 fclose(f);
3478 continue;
3479 }
3480 }
3481
3482 /* We hide the byte-invert info for little and big endian tests. */
3483 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3484 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3485
3486 /* Now see if there is any following study data. */
3487
3488 if (true_study_size != 0)
3489 {
3490 pcre_study_data *psd;
3491
3492 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3493 extra->flags = PCRE_EXTRA_STUDY_DATA;
3494
3495 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3496 extra->study_data = psd;
3497
3498 if (fread(psd, 1, true_study_size, f) != true_study_size)
3499 {
3500 FAIL_READ:
3501 fprintf(outfile, "Failed to read data from %s\n", p);
3502 if (extra != NULL)
3503 {
3504 PCRE_FREE_STUDY(extra);
3505 }
3506 new_free(re);
3507 fclose(f);
3508 continue;
3509 }
3510 fprintf(outfile, "Study data loaded from %s\n", p);
3511 do_study = 1; /* To get the data output if requested */
3512 }
3513 else fprintf(outfile, "No study data\n");
3514
3515 /* Flip the necessary bytes. */
3516 if (do_flip)
3517 {
3518 int rc;
3519 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3520 if (rc == PCRE_ERROR_BADMODE)
3521 {
3522 pcre_uint16 flags_in_host_byte_order;
3523 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3524 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3525 else
3526 flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3527 /* Simulate the result of the function call below. */
3528 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3529 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3530 PCRE_INFO_OPTIONS);
3531 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3532 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3533 new_free(re);
3534 fclose(f);
3535 continue;
3536 }
3537 }
3538
3539 /* Need to know if UTF-8 for printing data strings. */
3540
3541 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3542 {
3543 new_free(re);
3544 fclose(f);
3545 continue;
3546 }
3547 use_utf = (get_options & PCRE_UTF8) != 0;
3548
3549 fclose(f);
3550 goto SHOW_INFO;
3551 }
3552
3553 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3554 the pattern; if it isn't complete, read more. */
3555
3556 delimiter = *p++;
3557
3558 if (isalnum(delimiter) || delimiter == '\\')
3559 {
3560 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3561 goto SKIP_DATA;
3562 }
3563
3564 pp = p;
3565 poffset = (int)(p - buffer);
3566
3567 for(;;)
3568 {
3569 while (*pp != 0)
3570 {
3571 if (*pp == '\\' && pp[1] != 0) pp++;
3572 else if (*pp == delimiter) break;
3573 pp++;
3574 }
3575 if (*pp != 0) break;
3576 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3577 {
3578 fprintf(outfile, "** Unexpected EOF\n");
3579 done = 1;
3580 goto CONTINUE;
3581 }
3582 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3583 }
3584
3585 /* The buffer may have moved while being extended; reset the start of data
3586 pointer to the correct relative point in the buffer. */
3587
3588 p = buffer + poffset;
3589
3590 /* If the first character after the delimiter is backslash, make
3591 the pattern end with backslash. This is purely to provide a way
3592 of testing for the error message when a pattern ends with backslash. */
3593
3594 if (pp[1] == '\\') *pp++ = '\\';
3595
3596 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3597 for callouts. */
3598
3599 *pp++ = 0;
3600 strcpy((char *)pbuffer, (char *)p);
3601
3602 /* Look for options after final delimiter */
3603
3604 options = 0;
3605 study_options = force_study_options;
3606 log_store = showstore; /* default from command line */
3607
3608 while (*pp != 0)
3609 {
3610 switch (*pp++)
3611 {
3612 case 'f': options |= PCRE_FIRSTLINE; break;
3613 case 'g': do_g = 1; break;
3614 case 'i': options |= PCRE_CASELESS; break;
3615 case 'm': options |= PCRE_MULTILINE; break;
3616 case 's': options |= PCRE_DOTALL; break;
3617 case 'x': options |= PCRE_EXTENDED; break;
3618
3619 case '+':
3620 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3621 break;
3622
3623 case '=': do_allcaps = 1; break;
3624 case 'A': options |= PCRE_ANCHORED; break;
3625 case 'B': do_debug = 1; break;
3626 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3627 case 'D': do_debug = do_showinfo = 1; break;
3628 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3629 case 'F': do_flip = 1; break;
3630 case 'G': do_G = 1; break;
3631 case 'I': do_showinfo = 1; break;
3632 case 'J': options |= PCRE_DUPNAMES; break;
3633 case 'K': do_mark = 1; break;
3634 case 'M': log_store = 1; break;
3635 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3636
3637 #if !defined NOPOSIX
3638 case 'P': do_posix = 1; break;
3639 #endif
3640
3641 case 'S':
3642 do_study = 1;
3643 for (;;)
3644 {
3645 switch (*pp++)
3646 {
3647 case 'S':
3648 do_study = 0;
3649 no_force_study = 1;
3650 break;
3651
3652 case '!':
3653 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3654 break;
3655
3656 case '+':
3657 if (*pp == '+')
3658 {
3659 verify_jit = TRUE;
3660 pp++;
3661 }
3662 if (*pp >= '1' && *pp <= '7')
3663 study_options |= jit_study_bits[*pp++ - '1'];
3664 else
3665 study_options |= jit_study_bits[6];
3666 break;
3667
3668 case '-':
3669 study_options &= ~PCRE_STUDY_ALLJIT;
3670 break;
3671
3672 default:
3673 pp--;
3674 goto ENDLOOP;
3675 }
3676 }
3677 ENDLOOP:
3678 break;
3679
3680 case 'U': options |= PCRE_UNGREEDY; break;
3681 case 'W': options |= PCRE_UCP; break;
3682 case 'X': options |= PCRE_EXTRA; break;
3683 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3684 case 'Z': debug_lengths = 0; break;
3685 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3686 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3687
3688 case 'T':
3689 switch (*pp++)
3690 {
3691 case '0': tables = tables0; break;
3692 case '1': tables = tables1; break;
3693
3694 case '\r':
3695 case '\n':
3696 case ' ':
3697 case 0:
3698 fprintf(outfile, "** Missing table number after /T\n");
3699 goto SKIP_DATA;
3700
3701 default:
3702 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3703 goto SKIP_DATA;
3704 }
3705 break;
3706
3707 case 'L':
3708 ppp = pp;
3709 /* The '\r' test here is so that it works on Windows. */
3710 /* The '0' test is just in case this is an unterminated line. */
3711 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3712 *ppp = 0;
3713 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3714 {
3715 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3716 goto SKIP_DATA;
3717 }
3718 locale_set = 1;
3719 tables = PCRE_MAKETABLES;
3720 pp = ppp;
3721 break;
3722
3723 case '>':
3724 to_file = pp;
3725 while (*pp != 0) pp++;
3726 while (isspace(pp[-1])) pp--;
3727 *pp = 0;
3728 break;
3729
3730 case '<':
3731 {
3732 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3733 {
3734 options |= PCRE_JAVASCRIPT_COMPAT;
3735 pp += 3;
3736 }
3737 else
3738 {
3739 int x = check_newline(pp, outfile);
3740 if (x == 0) goto SKIP_DATA;
3741 options |= x;
3742 while (*pp++ != '>');
3743 }
3744 }
3745 break;
3746
3747 case '\r': /* So that it works in Windows */
3748 case '\n':
3749 case ' ':
3750 break;
3751
3752 default:
3753 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3754 goto SKIP_DATA;
3755 }
3756 }
3757
3758 /* Handle compiling via the POSIX interface, which doesn't support the
3759 timing, showing, or debugging options, nor the ability to pass over
3760 local character tables. Neither does it have 16-bit support. */
3761
3762 #if !defined NOPOSIX
3763 if (posix || do_posix)
3764 {
3765 int rc;
3766 int cflags = 0;
3767
3768 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3769 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3770 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3771 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3772 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3773 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3774 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3775
3776 first_gotten_store = 0;
3777 rc = regcomp(&preg, (char *)p, cflags);
3778
3779 /* Compilation failed; go back for another re, skipping to blank line
3780 if non-interactive. */
3781
3782 if (rc != 0)
3783 {
3784 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3785 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3786 goto SKIP_DATA;
3787 }
3788 }
3789
3790 /* Handle compiling via the native interface */
3791
3792 else
3793 #endif /* !defined NOPOSIX */
3794
3795 {
3796 /* In 16- or 32-bit mode, convert the input. */
3797
3798 #ifdef SUPPORT_PCRE16
3799 if (pcre_mode == PCRE16_MODE)
3800 {
3801 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3802 {
3803 case -1:
3804 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3805 "converted to UTF-16\n");
3806 goto SKIP_DATA;
3807
3808 case -2:
3809 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3810 "cannot be converted to UTF-16\n");
3811 goto SKIP_DATA;
3812
3813 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3814 fprintf(outfile, "**Failed: character value greater than 0xffff "
3815 "cannot be converted to 16-bit in non-UTF mode\n");
3816 goto SKIP_DATA;
3817
3818 default:
3819 break;
3820 }
3821 p = (pcre_uint8 *)buffer16;
3822 }
3823 #endif
3824
3825 #ifdef SUPPORT_PCRE32
3826 if (pcre_mode == PCRE32_MODE)
3827 {
3828 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3829 {
3830 case -1:
3831 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3832 "converted to UTF-32\n");
3833 goto SKIP_DATA;
3834
3835 case -2:
3836 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3837 "cannot be converted to UTF-32\n");
3838 goto SKIP_DATA;
3839
3840 case -3:
3841 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3842 goto SKIP_DATA;
3843
3844 default:
3845 break;
3846 }
3847 p = (pcre_uint8 *)buffer32;
3848 }
3849 #endif
3850
3851 /* Compile many times when timing */
3852
3853 if (timeit > 0)
3854 {
3855 register int i;
3856 clock_t time_taken;
3857 clock_t start_time = clock();
3858 for (i = 0; i < timeit; i++)
3859 {
3860 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3861 if (re != NULL) free(re);
3862 }
3863 time_taken = clock() - start_time;
3864 fprintf(outfile, "Compile time %.4f milliseconds\n",
3865 (((double)time_taken * 1000.0) / (double)timeit) /
3866 (double)CLOCKS_PER_SEC);
3867 }
3868
3869 first_gotten_store = 0;
3870 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3871
3872 /* Compilation failed; go back for another re, skipping to blank line
3873 if non-interactive. */
3874
3875 if (re == NULL)
3876 {
3877 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3878 SKIP_DATA:
3879 if (infile != stdin)
3880 {
3881 for (;;)
3882 {
3883 if (extend_inputline(infile, buffer, NULL) == NULL)
3884 {
3885 done = 1;
3886 goto CONTINUE;
3887 }
3888 len = (int)strlen((char *)buffer);
3889 while (len > 0 && isspace(buffer[len-1])) len--;
3890 if (len == 0) break;
3891 }
3892 fprintf(outfile, "\n");
3893 }
3894 goto CONTINUE;
3895 }
3896
3897 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3898 within the regex; check for this so that we know how to process the data
3899 lines. */
3900
3901 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3902 goto SKIP_DATA;
3903 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3904
3905 /* Extract the size for possible writing before possibly flipping it,
3906 and remember the store that was got. */
3907
3908 true_size = REAL_PCRE_SIZE(re);
3909 regex_gotten_store = first_gotten_store;
3910
3911 /* Output code size information if requested */
3912
3913 if (log_store)
3914 {
3915 int name_count, name_entry_size, real_pcre_size;
3916
3917 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3918 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3919 real_pcre_size = 0;
3920 #ifdef SUPPORT_PCRE8
3921 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3922 real_pcre_size = sizeof(real_pcre);
3923 #endif
3924 #ifdef SUPPORT_PCRE16
3925 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3926 real_pcre_size = sizeof(real_pcre16);
3927 #endif
3928 #ifdef SUPPORT_PCRE32
3929 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3930 real_pcre_size = sizeof(real_pcre32);
3931 #endif
3932 fprintf(outfile, "Memory allocation (code space): %d\n",
3933 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3934 }
3935
3936 /* If -s or /S was present, study the regex to generate additional info to
3937 help with the matching, unless the pattern has the SS option, which
3938 suppresses the effect of /S (used for a few test patterns where studying is
3939 never sensible). */
3940
3941 if (do_study || (force_study >= 0 && !no_force_study))
3942 {
3943 if (timeit > 0)
3944 {
3945 register int i;
3946 clock_t time_taken;
3947 clock_t start_time = clock();
3948 for (i = 0; i < timeit; i++)
3949 {
3950 PCRE_STUDY(extra, re, study_options, &error);
3951 }
3952 time_taken = clock() - start_time;
3953 if (extra != NULL)
3954 {
3955 PCRE_FREE_STUDY(extra);
3956 }
3957 fprintf(outfile, " Study time %.4f milliseconds\n",
3958 (((double)time_taken * 1000.0) / (double)timeit) /
3959 (double)CLOCKS_PER_SEC);
3960 }
3961 PCRE_STUDY(extra, re, study_options, &error);
3962 if (error != NULL)
3963 fprintf(outfile, "Failed to study: %s\n", error);
3964 else if (extra != NULL)
3965 {
3966 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3967 if (log_store)
3968 {
3969 size_t jitsize;
3970 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3971 jitsize != 0)
3972 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3973 }
3974 }
3975 }
3976
3977 /* If /K was present, we set up for handling MARK data. */
3978
3979 if (do_mark)
3980 {
3981 if (extra == NULL)
3982 {
3983 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3984 extra->flags = 0;
3985 }
3986 extra->mark = &markptr;
3987 extra->flags |= PCRE_EXTRA_MARK;
3988 }
3989
3990 /* Extract and display information from the compiled data if required. */
3991
3992 SHOW_INFO:
3993
3994 if (do_debug)
3995 {
3996 fprintf(outfile, "------------------------------------------------------------------\n");
3997 PCRE_PRINTINT(re, outfile, debug_lengths);
3998 }
3999
4000 /* We already have the options in get_options (see above) */
4001
4002 if (do_showinfo)
4003 {
4004 unsigned long int all_options;
4005 pcre_uint32 first_char, need_char;
4006 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4007 hascrorlf, maxlookbehind;
4008 int nameentrysize, namecount;
4009 const pcre_uint8 *nametable;
4010
4011 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4012 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4013 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4014 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4015 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4016 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4017 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4018 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4019 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4020 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4021 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4022 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4023 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4024 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4025 != 0)
4026 goto SKIP_DATA;
4027
4028 if (size != regex_gotten_store) fprintf(outfile,
4029 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4030 (int)size, (int)regex_gotten_store);
4031
4032 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4033 if (backrefmax > 0)
4034 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4035
4036 if (namecount > 0)
4037 {
4038 fprintf(outfile, "Named capturing subpatterns:\n");
4039 while (namecount-- > 0)
4040 {
4041 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4042 int length = (int)STRLEN(nametable + imm2_size);
4043 fprintf(outfile, " ");
4044 PCHARSV(nametable, imm2_size, length, outfile);
4045 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4046 #ifdef SUPPORT_PCRE32
4047 if (pcre_mode == PCRE32_MODE)
4048 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4049 #endif
4050 #ifdef SUPPORT_PCRE16
4051 if (pcre_mode == PCRE16_MODE)
4052 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4053 #endif
4054 #ifdef SUPPORT_PCRE8
4055 if (pcre_mode == PCRE8_MODE)
4056 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4057 #endif
4058 nametable += nameentrysize * CHAR_SIZE;
4059 }
4060 }
4061
4062 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4063 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4064
4065 all_options = REAL_PCRE_OPTIONS(re);
4066 if (do_flip) all_options = swap_uint32(all_options);
4067
4068 if (get_options == 0) fprintf(outfile, "No options\n");
4069 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4070 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4071 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4072 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4073 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4074 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4075 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4076 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4077 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4078 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4079 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4080 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4081 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4082 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4083 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4084 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4085 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4086 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4087
4088 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4089
4090 switch (get_options & PCRE_NEWLINE_BITS)
4091 {
4092 case PCRE_NEWLINE_CR:
4093 fprintf(outfile, "Forced newline sequence: CR\n");
4094 break;
4095
4096 case PCRE_NEWLINE_LF:
4097 fprintf(outfile, "Forced newline sequence: LF\n");
4098 break;
4099
4100 case PCRE_NEWLINE_CRLF:
4101 fprintf(outfile, "Forced newline sequence: CRLF\n");
4102 break;
4103
4104 case PCRE_NEWLINE_ANYCRLF:
4105 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4106 break;
4107
4108 case PCRE_NEWLINE_ANY:
4109 fprintf(outfile, "Forced newline sequence: ANY\n");
4110 break;
4111
4112 default:
4113 break;
4114 }
4115
4116 if (first_char_set == 2)
4117 {
4118 fprintf(outfile, "First char at start or follows newline\n");
4119 }
4120 else if (first_char_set == 1)
4121 {
4122 const char *caseless =
4123 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4124 "" : " (caseless)";
4125
4126 if (PRINTOK(first_char))
4127 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4128 else
4129 {
4130 fprintf(outfile, "First char = ");
4131 pchar(first_char, outfile);
4132 fprintf(outfile, "%s\n", caseless);
4133 }
4134 }
4135 else
4136 {
4137 fprintf(outfile, "No first char\n");
4138 }
4139
4140 if (need_char_set == 0)
4141 {
4142 fprintf(outfile, "No need char\n");
4143 }
4144 else
4145 {
4146 const char *caseless =
4147 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4148 "" : " (caseless)";
4149
4150 if (PRINTOK(need_char))
4151 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4152 else
4153 {
4154 fprintf(outfile, "Need char = ");
4155 pchar(need_char, outfile);
4156 fprintf(outfile, "%s\n", caseless);
4157 }
4158 }
4159
4160 if (maxlookbehind > 0)
4161 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4162
4163 /* Don't output study size; at present it is in any case a fixed
4164 value, but it varies, depending on the computer architecture, and
4165 so messes up the test suite. (And with the /F option, it might be
4166 flipped.) If study was forced by an external -s, don't show this
4167 information unless -i or -d was also present. This means that, except
4168 when auto-callouts are involved, the output from runs with and without
4169 -s should be identical. */
4170
4171 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4172 {
4173 if (extra == NULL)
4174 fprintf(outfile, "Study returned NULL\n");
4175 else
4176 {
4177 pcre_uint8 *start_bits = NULL;
4178 int minlength;
4179
4180 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4181 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4182
4183 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4184 {
4185 if (start_bits == NULL)
4186 fprintf(outfile, "No set of starting bytes\n");
4187 else
4188 {
4189 int i;
4190 int c = 24;
4191 fprintf(outfile, "Starting byte set: ");
4192 for (i = 0; i < 256; i++)
4193 {
4194 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4195 {
4196 if (c > 75)
4197 {
4198 fprintf(outfile, "\n ");
4199 c = 2;
4200 }
4201 if (PRINTOK(i) && i != ' ')
4202 {
4203 fprintf(outfile, "%c ", i);
4204 c += 2;
4205 }
4206 else
4207 {
4208 fprintf(outfile, "\\x%02x ", i);
4209 c += 5;
4210 }
4211 }
4212 }
4213 fprintf(outfile, "\n");
4214 }
4215 }
4216 }
4217
4218 /* Show this only if the JIT was set by /S, not by -s. */
4219
4220 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4221 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4222 {
4223 int jit;
4224 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4225 {
4226 if (jit)
4227 fprintf(outfile, "JIT study was successful\n");
4228 else
4229 #ifdef SUPPORT_JIT
4230 fprintf(outfile, "JIT study was not successful\n");
4231 #else
4232 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4233 #endif
4234 }
4235 }
4236 }
4237 }
4238
4239 /* If the '>' option was present, we write out the regex to a file, and
4240 that is all. The first 8 bytes of the file are the regex length and then
4241 the study length, in big-endian order. */
4242
4243 if (to_file != NULL)
4244 {
4245 FILE *f = fopen((char *)to_file, "wb");
4246 if (f == NULL)
4247 {
4248 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4249 }
4250 else
4251 {
4252 pcre_uint8 sbuf[8];
4253
4254 if (do_flip) regexflip(re, extra);
4255 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4256 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4257 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4258 sbuf[3] = (pcre_uint8)((true_size) & 255);
4259 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4260 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4261 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4262 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4263
4264 if (fwrite(sbuf, 1, 8, f) < 8 ||
4265 fwrite(re, 1, true_size, f) < true_size)
4266 {
4267 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4268 }
4269 else
4270 {
4271 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4272
4273 /* If there is study data, write it. */
4274
4275 if (extra != NULL)
4276 {
4277 if (fwrite(extra->study_data, 1, true_study_size, f) <
4278 true_study_size)
4279 {
4280 fprintf(outfile, "Write error on %s: %s\n", to_file,
4281 strerror(errno));
4282 }
4283 else fprintf(outfile, "Study data written to %s\n", to_file);
4284 }
4285 }
4286 fclose(f);
4287 }
4288
4289 new_free(re);
4290 if (extra != NULL)
4291 {
4292 PCRE_FREE_STUDY(extra);
4293 }
4294 if (locale_set)
4295 {
4296 new_free((void *)tables);
4297 setlocale(LC_CTYPE, "C");
4298 locale_set = 0;
4299 }
4300 continue; /* With next regex */
4301 }
4302 } /* End of non-POSIX compile */
4303
4304 /* Read data lines and test them */
4305
4306 for (;;)
4307 {
4308 #ifdef SUPPORT_PCRE8
4309 pcre_uint8 *q8;
4310 #endif
4311 #ifdef SUPPORT_PCRE16
4312 pcre_uint16 *q16;
4313 #endif
4314 #ifdef SUPPORT_PCRE32
4315 pcre_uint32 *q32;
4316 #endif
4317 pcre_uint8 *bptr;
4318 int *use_offsets = offsets;
4319 int use_size_offsets = size_offsets;
4320 int callout_data = 0;
4321 int callout_data_set = 0;
4322 int count;
4323 pcre_uint32 c;
4324 int copystrings = 0;
4325 int find_match_limit = default_find_match_limit;
4326 int getstrings = 0;
4327 int getlist = 0;
4328 int gmatched = 0;
4329 int start_offset = 0;
4330 int start_offset_sign = 1;
4331 int g_notempty = 0;
4332 int use_dfa = 0;
4333
4334 *copynames = 0;
4335 *getnames = 0;
4336
4337 #ifdef SUPPORT_PCRE32
4338 cn32ptr = copynames;
4339 gn32ptr = getnames;
4340 #endif
4341 #ifdef SUPPORT_PCRE16
4342 cn16ptr = copynames16;
4343 gn16ptr = getnames16;
4344 #endif
4345 #ifdef SUPPORT_PCRE8
4346 cn8ptr = copynames8;
4347 gn8ptr = getnames8;
4348 #endif
4349
4350 SET_PCRE_CALLOUT(callout);
4351 first_callout = 1;
4352 last_callout_mark = NULL;
4353 callout_extra = 0;
4354 callout_count = 0;
4355 callout_fail_count = 999999;
4356 callout_fail_id = -1;
4357 show_malloc = 0;
4358 options = 0;
4359
4360 if (extra != NULL) extra->flags &=
4361 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4362
4363 len = 0;
4364 for (;;)
4365 {
4366 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4367 {
4368 if (len > 0) /* Reached EOF without hitting a newline */
4369 {
4370 fprintf(outfile, "\n");
4371 break;
4372 }
4373 done = 1;
4374 goto CONTINUE;
4375 }
4376 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4377 len = (int)strlen((char *)buffer);
4378 if (buffer[len-1] == '\n') break;
4379 }
4380
4381 while (len > 0 && isspace(buffer[len-1])) len--;
4382 buffer[len] = 0;
4383 if (len == 0) break;
4384
4385 p = buffer;
4386 while (isspace(*p)) p++;
4387
4388 #ifndef NOUTF
4389 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4390 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4391 if (use_utf)
4392 {
4393 pcre_uint8 *q;
4394 pcre_uint32 cc;
4395 int n = 1;
4396
4397 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4398 if (n <= 0)
4399 {
4400 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4401 goto NEXT_DATA;
4402 }
4403 }
4404 #endif
4405
4406 #ifdef SUPPORT_VALGRIND
4407 /* Mark the dbuffer as addressable but undefined again. */
4408 if (dbuffer != NULL)
4409 {
4410 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4411 }
4412 #endif
4413
4414 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4415 the number of pcre_uchar units that will be needed. */
4416 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4417 {
4418 dbuffer_size *= 2;
4419 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4420 if (dbuffer == NULL)
4421 {
4422 fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size);
4423 exit(1);
4424 }
4425 }
4426
4427 #ifdef SUPPORT_PCRE8
4428 q8 = (pcre_uint8 *) dbuffer;
4429 #endif
4430 #ifdef SUPPORT_PCRE16
4431 q16 = (pcre_uint16 *) dbuffer;
4432 #endif
4433 #ifdef SUPPORT_PCRE32
4434 q32 = (pcre_uint32 *) dbuffer;
4435 #endif
4436
4437 while ((c = *p++) != 0)
4438 {
4439 int i = 0;
4440 int n = 0;
4441
4442 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4443 In non-UTF mode, allow the value of the byte to fall through to later,
4444 where values greater than 127 are turned into UTF-8 when running in
4445 16-bit or 32-bit mode. */
4446
4447 if (c != '\\')
4448 {
4449 #ifndef NOUTF
4450 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4451 #endif
4452 }
4453
4454 /* Handle backslash escapes */
4455
4456 else switch ((c = *p++))
4457 {
4458 case 'a': c = 7; break;
4459 case 'b': c = '\b'; break;
4460 case 'e': c = 27; break;
4461 case 'f': c = '\f'; break;
4462 case 'n': c = '\n'; break;
4463 case 'r': c = '\r'; break;
4464 case 't': c = '\t'; break;
4465 case 'v': c = '\v'; break;
4466
4467 case '0': case '1': case '2': case '3':
4468 case '4': case '5': case '6': case '7':
4469 c -= '0';
4470 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4471 c = c * 8 + *p++ - '0';
4472 break;
4473
4474 case 'x':
4475 if (*p == '{')
4476 {
4477 pcre_uint8 *pt = p;
4478 c = 0;
4479
4480 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4481 when isxdigit() is a macro that refers to its argument more than
4482 once. This is banned by the C Standard, but apparently happens in at
4483 least one MacOS environment. */
4484
4485 for (pt++; isxdigit(*pt); pt++)
4486 {
4487 if (++i == 9)
4488 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4489 "using only the first eight.\n");
4490 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4491 }
4492 if (*pt == '}')
4493 {
4494 p = pt + 1;
4495 break;
4496 }
4497 /* Not correct form for \x{...}; fall through */
4498 }
4499
4500 /* \x without {} always defines just one byte in 8-bit mode. This
4501 allows UTF-8 characters to be constructed byte by byte, and also allows
4502 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4503 Otherwise, pass it down to later code so that it can be turned into
4504 UTF-8 when running in 16/32-bit mode. */
4505
4506 c = 0;
4507 while (i++ < 2 && isxdigit(*p))
4508 {
4509 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4510 p++;
4511 }
4512 #if !defined NOUTF && defined SUPPORT_PCRE8
4513 if (use_utf && (pcre_mode == PCRE8_MODE))
4514 {
4515 *q8++ = c;
4516 continue;
4517 }
4518 #endif
4519 break;
4520
4521 case 0: /* \ followed by EOF allows for an empty line */
4522 p--;
4523 continue;
4524
4525 case '>':
4526 if (*p == '-')
4527 {
4528 start_offset_sign = -1;
4529 p++;
4530 }
4531 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4532 start_offset *= start_offset_sign;
4533 continue;
4534
4535 case 'A': /* Option setting */
4536 options |= PCRE_ANCHORED;
4537 continue;
4538
4539 case 'B':
4540 options |= PCRE_NOTBOL;
4541 continue;
4542
4543 case 'C':
4544 if (isdigit(*p)) /* Set copy string */
4545 {
4546 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4547 copystrings |= 1 << n;
4548 }
4549 else if (isalnum(*p))
4550 {
4551 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4552 }
4553 else if (*p == '+')
4554 {
4555 callout_extra = 1;
4556 p++;
4557 }
4558 else if (*p == '-')
4559 {
4560 SET_PCRE_CALLOUT(NULL);
4561 p++;
4562 }
4563 else if (*p == '!')
4564 {
4565 callout_fail_id = 0;
4566 p++;
4567 while(isdigit(*p))
4568 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4569 callout_fail_count = 0;
4570 if (*p == '!')
4571 {
4572 p++;
4573 while(isdigit(*p))
4574 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4575 }
4576 }
4577 else if (*p == '*')
4578 {
4579 int sign = 1;
4580 callout_data = 0;
4581 if (*(++p) == '-') { sign = -1; p++; }
4582 while(isdigit(*p))
4583 callout_data = callout_data * 10 + *p++ - '0';
4584 callout_data *= sign;
4585 callout_data_set = 1;
4586 }
4587 continue;
4588
4589 #if !defined NODFA
4590 case 'D':
4591 #if !defined NOPOSIX
4592 if (posix || do_posix)
4593 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4594 else
4595 #endif
4596 use_dfa = 1;
4597 continue;
4598 #endif
4599
4600 #if !defined NODFA
4601 case 'F':
4602 options |= PCRE_DFA_SHORTEST;
4603 continue;
4604 #endif
4605
4606 case 'G':
4607 if (isdigit(*p))
4608 {
4609 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4610 getstrings |= 1 << n;
4611 }
4612 else if (isalnum(*p))
4613 {
4614 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4615 }
4616 continue;
4617
4618 case 'J':
4619 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4620 if (extra != NULL
4621 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4622 && extra->executable_jit != NULL)
4623 {
4624 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4625 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4626 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4627 }
4628 continue;
4629
4630 case 'L':
4631 getlist = 1;
4632 continue;
4633
4634 case 'M':
4635 find_match_limit = 1;
4636 continue;
4637
4638 case 'N':
4639 if ((options & PCRE_NOTEMPTY) != 0)
4640 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4641 else
4642 options |= PCRE_NOTEMPTY;
4643 continue;
4644
4645 case 'O':
4646 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4647 if (n > size_offsets_max)
4648 {
4649 size_offsets_max = n;
4650 free(offsets);
4651 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4652 if (offsets == NULL)
4653 {
4654 printf("** Failed to get %d bytes of memory for offsets vector\n",
4655 (int)(size_offsets_max * sizeof(int)));
4656 yield = 1;
4657 goto EXIT;
4658 }
4659 }
4660 use_size_offsets = n;
4661 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4662 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4663 continue;
4664
4665 case 'P':
4666 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4667 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4668 continue;
4669
4670 case 'Q':
4671 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4672 if (extra == NULL)
4673 {
4674 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4675 extra->flags = 0;
4676 }
4677 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4678 extra->match_limit_recursion = n;
4679 continue;
4680
4681 case 'q':
4682 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4683 if (extra == NULL)
4684 {
4685 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4686 extra->flags = 0;
4687 }
4688 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4689 extra->match_limit = n;
4690 continue;
4691
4692 #if !defined NODFA
4693 case 'R':
4694 options |= PCRE_DFA_RESTART;
4695 continue;
4696 #endif
4697
4698 case 'S':
4699 show_malloc = 1;
4700 continue;
4701
4702 case 'Y':
4703 options |= PCRE_NO_START_OPTIMIZE;
4704 continue;
4705
4706 case 'Z':
4707 options |= PCRE_NOTEOL;
4708 continue;
4709
4710 case '?':
4711 options |= PCRE_NO_UTF8_CHECK;
4712 continue;
4713
4714 case '<':
4715 {
4716 int x = check_newline(p, outfile);
4717 if (x == 0) goto NEXT_DATA;
4718 options |= x;
4719 while (*p++ != '>');
4720 }
4721 continue;
4722 }
4723
4724 /* We now have a character value in c that may be greater than 255.
4725 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4726 than 127 in UTF mode must have come from \x{...} or octal constructs
4727 because values from \x.. get this far only in non-UTF mode. */
4728
4729 #ifdef SUPPORT_PCRE8
4730 if (pcre_mode == PCRE8_MODE)
4731 {
4732 #ifndef NOUTF
4733 if (use_utf)
4734 {
4735 if (c > 0x7fffffff)
4736 {
4737 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4738 "and so cannot be converted to UTF-8\n", c);
4739 goto NEXT_DATA;
4740 }
4741 q8 += ord2utf8(c, q8);
4742 }
4743 else
4744 #endif
4745 {
4746 if (c > 0xffu)
4747 {
4748 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4749 "and UTF-8 mode is not enabled.\n", c);
4750 fprintf(outfile, "** Truncation will probably give the wrong "
4751 "result.\n");
4752 }
4753 *q8++ = c;
4754 }
4755 }
4756 #endif
4757 #ifdef SUPPORT_PCRE16
4758 if (pcre_mode == PCRE16_MODE)
4759 {
4760 #ifndef NOUTF
4761 if (use_utf)
4762 {
4763 if (c > 0x10ffffu)
4764 {
4765 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4766 "0x10ffff and so cannot be converted to UTF-16\n", c);
4767 goto NEXT_DATA;
4768 }
4769 else if (c >= 0x10000u)
4770 {
4771 c-= 0x10000u;
4772 *q16++ = 0xD800 | (c >> 10);
4773 *q16++ = 0xDC00 | (c & 0x3ff);
4774 }
4775 else
4776 *q16++ = c;
4777 }
4778 else
4779 #endif
4780 {
4781 if (c > 0xffffu)
4782 {
4783 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4784 "and UTF-16 mode is not enabled.\n", c);
4785 fprintf(outfile, "** Truncation will probably give the wrong "
4786 "result.\n");
4787 }
4788
4789 *q16++ = c;
4790 }
4791 }
4792 #endif
4793 #ifdef SUPPORT_PCRE32
4794 if (pcre_mode == PCRE32_MODE)
4795 {
4796 *q32++ = c;
4797 }
4798 #endif
4799
4800 }
4801
4802 /* Reached end of subject string */
4803
4804 #ifdef SUPPORT_PCRE8
4805 if (pcre_mode == PCRE8_MODE)
4806 {
4807 *q8 = 0;
4808 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4809 }
4810 #endif
4811 #ifdef SUPPORT_PCRE16
4812 if (pcre_mode == PCRE16_MODE)
4813 {
4814 *q16 = 0;
4815 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4816 }
4817 #endif
4818 #ifdef SUPPORT_PCRE32
4819 if (pcre_mode == PCRE32_MODE)
4820 {
4821 *q32 = 0;
4822 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4823 }
4824 #endif
4825
4826 /* If we're compiling with explicit valgrind support, Mark the data from after
4827 its end to the end of the buffer as unaddressable, so that a read over the end
4828 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4829 If we're not building with valgrind support, at least move the data to the end
4830 of the buffer so that it might at least cause a crash.
4831 If we are using the POSIX interface, we must include the terminating zero. */
4832
4833 bptr = dbuffer;
4834
4835 #if !defined NOPOSIX
4836 if (posix || do_posix)
4837 {
4838 #ifdef SUPPORT_VALGRIND
4839 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4840 #else
4841 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4842 bptr += dbuffer_size - len - 1;
4843 #endif
4844 }
4845 else
4846 #endif
4847 {
4848 #ifdef SUPPORT_VALGRIND
4849 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4850 #else
4851 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4852 #endif
4853 }
4854
4855 if ((all_use_dfa || use_dfa) && find_match_limit)
4856 {
4857 printf("**Match limit not relevant for DFA matching: ignored\n");
4858 find_match_limit = 0;
4859 }
4860
4861 /* Handle matching via the POSIX interface, which does not
4862 support timing or playing with the match limit or callout data. */
4863
4864 #if !defined NOPOSIX
4865 if (posix || do_posix)
4866 {
4867 int rc;
4868 int eflags = 0;
4869 regmatch_t *pmatch = NULL;
4870 if (use_size_offsets > 0)
4871 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4872 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4873 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4874 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4875
4876 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4877
4878 if (rc != 0)
4879 {
4880 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4881 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4882 }
4883 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4884 {
4885 fprintf(outfile, "Matched with REG_NOSUB\n");
4886 }
4887 else
4888 {
4889 size_t i;
4890 for (i = 0; i < (size_t)use_size_offsets; i++)
4891 {
4892 if (pmatch[i].rm_so >= 0)
4893 {
4894 fprintf(outfile, "%2d: ", (int)i);
4895 PCHARSV(dbuffer, pmatch[i].rm_so,
4896 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4897 fprintf(outfile, "\n");
4898 if (do_showcaprest || (i == 0 && do_showrest))
4899 {
4900 fprintf(outfile, "%2d+ ", (int)i);
4901 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4902 outfile);
4903 fprintf(outfile, "\n");
4904 }
4905 }
4906 }
4907 }
4908 free(pmatch);
4909 goto NEXT_DATA;
4910 }
4911
4912 #endif /* !defined NOPOSIX */
4913
4914 /* Handle matching via the native interface - repeats for /g and /G */
4915
4916 /* Ensure that there is a JIT callback if we want to verify that JIT was
4917 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4918
4919 if (verify_jit && jit_stack == NULL && extra != NULL)
4920 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4921
4922 for (;; gmatched++) /* Loop for /g or /G */
4923 {
4924 markptr = NULL;
4925 jit_was_used = FALSE;
4926
4927 if (timeitm > 0)
4928 {
4929 register int i;
4930 clock_t time_taken;
4931 clock_t start_time = clock();
4932
4933 #if !defined NODFA
4934 if (all_use_dfa || use_dfa)
4935 {
4936 if ((options & PCRE_DFA_RESTART) != 0)
4937 {
4938 fprintf(outfile, "Timing DFA restarts is not supported\n");
4939 break;
4940 }
4941 if (dfa_workspace == NULL)
4942 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4943 for (i = 0; i < timeitm; i++)
4944 {
4945 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4946 (options | g_notempty), use_offsets, use_size_offsets,
4947 dfa_workspace, DFA_WS_DIMENSION);
4948 }
4949 }
4950 else
4951 #endif
4952
4953 for (i = 0; i < timeitm; i++)
4954 {
4955 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4956 (options | g_notempty), use_offsets, use_size_offsets);
4957 }
4958 time_taken = clock() - start_time;
4959 fprintf(outfile, "Execute time %.4f milliseconds\n",
4960 (((double)time_taken * 1000.0) / (double)timeitm) /
4961 (double)CLOCKS_PER_SEC);
4962 }
4963
4964 /* If find_match_limit is set, we want to do repeated matches with
4965 varying limits in order to find the minimum value for the match limit and
4966 for the recursion limit. The match limits are relevant only to the normal
4967 running of pcre_exec(), so disable the JIT optimization. This makes it
4968 possible to run the same set of tests with and without JIT externally
4969 requested. */
4970
4971 if (find_match_limit)
4972 {
4973 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4974 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4975 extra->flags = 0;
4976
4977 (void)check_match_limit(re, extra, bptr, len, start_offset,
4978 options|g_notempty, use_offsets, use_size_offsets,
4979 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4980 PCRE_ERROR_MATCHLIMIT, "match()");
4981
4982 count = check_match_limit(re, extra, bptr, len, start_offset,
4983 options|g_notempty, use_offsets, use_size_offsets,
4984 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4985 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4986 }
4987
4988 /* If callout_data is set, use the interface with additional data */
4989
4990 else if (callout_data_set)
4991 {
4992 if (extra == NULL)
4993 {
4994 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4995 extra->flags = 0;
4996 }
4997 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4998 extra->callout_data = &callout_data;
4999 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5000 options | g_notempty, use_offsets, use_size_offsets);
5001 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5002 }
5003
5004 /* The normal case is just to do the match once, with the default
5005 value of match_limit. */
5006
5007 #if !defined NODFA
5008 else if (all_use_dfa || use_dfa)
5009 {
5010 if (dfa_workspace == NULL)
5011 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5012 if (dfa_matched++ == 0)
5013 dfa_workspace[0] = -1; /* To catch bad restart */
5014 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5015 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5016 DFA_WS_DIMENSION);
5017 if (count == 0)
5018 {
5019 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5020 count = use_size_offsets/2;
5021 }
5022 }
5023 #endif
5024
5025 else
5026 {
5027 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5028 options | g_notempty, use_offsets, use_size_offsets);
5029 if (count == 0)
5030 {
5031 fprintf(outfile, "Matched, but too many substrings\n");
5032 count = use_size_offsets/3;
5033 }
5034 }
5035
5036 /* Matched */
5037
5038 if (count >= 0)
5039 {
5040 int i, maxcount;
5041 void *cnptr, *gnptr;
5042
5043 #if !defined NODFA
5044 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5045 #endif
5046 maxcount = use_size_offsets/3;
5047
5048 /* This is a check against a lunatic return value. */
5049
5050 if (count > maxcount)
5051 {
5052 fprintf(outfile,
5053 "** PCRE error: returned count %d is too big for offset size %d\n",
5054 count, use_size_offsets);
5055 count = use_size_offsets/3;
5056 if (do_g || do_G)
5057 {
5058 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5059 do_g = do_G = FALSE; /* Break g/G loop */
5060 }
5061 }
5062
5063 /* do_allcaps requests showing of all captures in the pattern, to check
5064 unset ones at the end. */
5065
5066 if (do_allcaps)
5067 {
5068 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5069 goto SKIP_DATA;
5070 count++; /* Allow for full match */
5071 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5072 }
5073
5074 /* Output the captured substrings */
5075
5076 for (i = 0; i < count * 2; i += 2)
5077 {
5078 if (use_offsets[i] < 0)
5079 {
5080 if (use_offsets[i] != -1)
5081 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5082 use_offsets[i], i);
5083 if (use_offsets[i+1] != -1)
5084 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5085 use_offsets[i+1], i+1);
5086 fprintf(outfile, "%2d: <unset>\n", i/2);
5087 }
5088 else
5089 {
5090 fprintf(outfile, "%2d: ", i/2);
5091 PCHARSV(bptr, use_offsets[i],
5092 use_offsets[i+1] - use_offsets[i], outfile);
5093 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5094 fprintf(outfile, "\n");
5095 if (do_showcaprest || (i == 0 && do_showrest))
5096 {
5097 fprintf(outfile, "%2d+ ", i/2);
5098 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5099 outfile);
5100 fprintf(outfile, "\n");
5101 }
5102 }
5103 }
5104
5105 if (markptr != NULL)
5106 {
5107 fprintf(outfile, "MK: ");
5108 PCHARSV(markptr, 0, -1, outfile);
5109 fprintf(outfile, "\n");
5110 }
5111
5112 for (i = 0; i < 32; i++)
5113 {
5114 if ((copystrings & (1 << i)) != 0)
5115 {
5116 int rc;
5117 char copybuffer[256];
5118 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5119 copybuffer, sizeof(copybuffer));
5120 if (rc < 0)
5121 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5122 else
5123 {
5124 fprintf(outfile, "%2dC ", i);
5125 PCHARSV(copybuffer, 0, rc, outfile);
5126 fprintf(outfile, " (%d)\n", rc);
5127 }
5128 }
5129 }
5130
5131 cnptr = copynames;
5132 for (;;)
5133 {
5134 int rc;
5135 char copybuffer[256];
5136
5137 #ifdef SUPPORT_PCRE32
5138 if (pcre_mode == PCRE32_MODE)
5139 {
5140 if (*(pcre_uint32 *)cnptr == 0) break;
5141 }
5142 #endif
5143 #ifdef SUPPORT_PCRE16
5144 if (pcre_mode == PCRE16_MODE)
5145 {
5146 if (*(pcre_uint16 *)cnptr == 0) break;
5147 }
5148 #endif
5149 #ifdef SUPPORT_PCRE8
5150 if (pcre_mode == PCRE8_MODE)
5151 {
5152 if (*(pcre_uint8 *)cnptr == 0) break;
5153 }
5154 #endif
5155
5156 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5157 cnptr, copybuffer, sizeof(copybuffer));
5158
5159 if (rc < 0)
5160 {
5161 fprintf(outfile, "copy substring ");
5162 PCHARSV(cnptr, 0, -1, outfile);
5163 fprintf(outfile, " failed %d\n", rc);
5164 }
5165 else
5166 {
5167 fprintf(outfile, " C ");
5168 PCHARSV(copybuffer, 0, rc, outfile);
5169 fprintf(outfile, " (%d) ", rc);
5170 PCHARSV(cnptr, 0, -1, outfile);
5171 putc('\n', outfile);
5172 }
5173
5174 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5175 }
5176
5177 for (i = 0; i < 32; i++)
5178 {
5179 if ((getstrings & (1 << i)) != 0)
5180 {
5181 int rc;
5182 const char *substring;
5183 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5184 if (rc < 0)
5185 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5186 else
5187 {
5188 fprintf(outfile, "%2dG ", i);
5189 PCHARSV(substring, 0, rc, outfile);
5190 fprintf(outfile, " (%d)\n", rc);
5191 PCRE_FREE_SUBSTRING(substring);
5192 }
5193 }
5194 }
5195
5196 gnptr = getnames;
5197 for (;;)
5198 {
5199 int rc;
5200 const char *substring;
5201
5202 #ifdef SUPPORT_PCRE32
5203 if (pcre_mode == PCRE32_MODE)
5204 {
5205 if (*(pcre_uint32 *)gnptr == 0) break;
5206 }
5207 #endif
5208 #ifdef SUPPORT_PCRE16
5209 if (pcre_mode == PCRE16_MODE)
5210 {
5211 if (*(pcre_uint16 *)gnptr == 0) break;
5212 }
5213 #endif
5214 #ifdef SUPPORT_PCRE8
5215 if (pcre_mode == PCRE8_MODE)
5216 {
5217 if (*(pcre_uint8 *)gnptr == 0) break;
5218 }
5219 #endif
5220
5221 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5222 gnptr, &substring);
5223 if (rc < 0)
5224 {
5225 fprintf(outfile, "get substring ");
5226 PCHARSV(gnptr, 0, -1, outfile);
5227 fprintf(outfile, " failed %d\n", rc);
5228 }
5229 else
5230 {
5231 fprintf(outfile, " G ");
5232 PCHARSV(substring, 0, rc, outfile);
5233 fprintf(outfile, " (%d) ", rc);
5234 PCHARSV(gnptr, 0, -1, outfile);
5235 PCRE_FREE_SUBSTRING(substring);
5236 putc('\n', outfile);
5237 }
5238
5239 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5240 }
5241
5242 if (getlist)
5243 {
5244 int rc;
5245 const char **stringlist;
5246 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5247 if (rc < 0)
5248 fprintf(outfile, "get substring list failed %d\n", rc);
5249 else
5250 {
5251 for (i = 0; i < count; i++)
5252 {
5253 fprintf(outfile, "%2dL ", i);
5254 PCHARSV(stringlist[i], 0, -1, outfile);
5255 putc('\n', outfile);
5256 }
5257 if (stringlist[i] != NULL)
5258 fprintf(outfile, "string list not terminated by NULL\n");
5259 PCRE_FREE_SUBSTRING_LIST(stringlist);
5260 }
5261 }
5262 }
5263
5264 /* There was a partial match */
5265
5266 else if (count == PCRE_ERROR_PARTIAL)
5267 {
5268 if (markptr == NULL) fprintf(outfile, "Partial match");
5269 else
5270 {
5271 fprintf(outfile, "Partial match, mark=");
5272 PCHARSV(markptr, 0, -1, outfile);
5273 }
5274 if (use_size_offsets > 1)
5275 {
5276 fprintf(outfile, ": ");
5277 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5278 outfile);
5279 }
5280 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5281 fprintf(outfile, "\n");
5282 break; /* Out of the /g loop */
5283 }
5284
5285 /* Failed to match. If this is a /g or /G loop and we previously set
5286 g_notempty after a null match, this is not necessarily the end. We want
5287 to advance the start offset, and continue. We won't be at the end of the
5288 string - that was checked before setting g_notempty.
5289
5290 Complication arises in the case when the newline convention is "any",
5291 "crlf", or "anycrlf". If the previous match was at the end of a line
5292 terminated by CRLF, an advance of one character just passes the \r,
5293 whereas we should prefer the longer newline sequence, as does the code in
5294 pcre_exec(). Fudge the offset value to achieve this. We check for a
5295 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5296 find the default.
5297
5298 Otherwise, in the case of UTF-8 matching, the advance must be one
5299 character, not one byte. */
5300
5301 else
5302 {
5303 if (g_notempty != 0)
5304 {
5305 int onechar = 1;
5306 unsigned int obits = REAL_PCRE_OPTIONS(re);
5307 use_offsets[0] = start_offset;
5308 if ((obits & PCRE_NEWLINE_BITS) == 0)
5309 {
5310 int d;
5311 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5312 /* Note that these values are always the ASCII ones, even in
5313 EBCDIC environments. CR = 13, NL = 10. */
5314 obits = (d == 13)? PCRE_NEWLINE_CR :
5315 (d == 10)? PCRE_NEWLINE_LF :
5316 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5317 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5318 (d == -1)? PCRE_NEWLINE_ANY : 0;
5319 }
5320 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5321 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5322 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5323 &&
5324 start_offset < len - 1 && (
5325 #ifdef SUPPORT_PCRE8
5326 (pcre_mode == PCRE8_MODE &&
5327 bptr[start_offset] == '\r' &&
5328 bptr[start_offset + 1] == '\n') ||
5329 #endif
5330 #ifdef SUPPORT_PCRE16
5331 (pcre_mode == PCRE16_MODE &&
5332 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5333 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5334 #endif
5335 #ifdef SUPPORT_PCRE32
5336 (pcre_mode == PCRE32_MODE &&
5337 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5338 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5339 #endif
5340 0))
5341 onechar++;
5342 else if (use_utf)
5343 {
5344 while (start_offset + onechar < len)
5345 {
5346 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5347 onechar++;
5348 }
5349 }
5350 use_offsets[1] = start_offset + onechar;
5351 }
5352 else
5353 {
5354 switch(count)
5355 {
5356 case PCRE_ERROR_NOMATCH:
5357 if (gmatched == 0)
5358 {
5359 if (markptr == NULL)
5360 {
5361 fprintf(outfile, "No match");
5362 }
5363 else
5364 {
5365 fprintf(outfile, "No match, mark = ");
5366 PCHARSV(markptr, 0, -1, outfile);
5367 }
5368 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5369 putc('\n', outfile);
5370 }
5371 break;
5372
5373 case PCRE_ERROR_BADUTF8:
5374 case PCRE_ERROR_SHORTUTF8:
5375 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5376 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5377 8 * CHAR_SIZE);
5378 if (use_size_offsets >= 2)
5379 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5380 use_offsets[1]);
5381 fprintf(outfile, "\n");
5382 break;
5383
5384 case PCRE_ERROR_BADUTF8_OFFSET:
5385 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5386 8 * CHAR_SIZE);
5387 break;
5388
5389 default:
5390 if (count < 0 &&
5391 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5392 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5393 else
5394 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5395 break;
5396 }
5397
5398 break; /* Out of the /g loop */
5399 }
5400 }
5401
5402 /* If not /g or /G we are done */
5403
5404 if (!do_g && !do_G) break;
5405
5406 /* If we have matched an empty string, first check to see if we are at
5407 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5408 Perl's /g options does. This turns out to be rather cunning. First we set
5409 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5410 same point. If this fails (picked up above) we advance to the next
5411 character. */
5412
5413 g_notempty = 0;
5414
5415 if (use_offsets[0] == use_offsets[1])
5416 {
5417 if (use_offsets[0] == len) break;
5418 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5419 }
5420
5421 /* For /g, update the start offset, leaving the rest alone */
5422
5423 if (do_g) start_offset = use_offsets[1];
5424
5425 /* For /G, update the pointer and length */
5426
5427 else
5428 {
5429 bptr += use_offsets[1] * CHAR_SIZE;
5430 len -= use_offsets[1];
5431 }
5432 } /* End of loop for /g and /G */
5433
5434 NEXT_DATA: continue;
5435 } /* End of loop for data lines */
5436
5437 CONTINUE:
5438
5439 #if !defined NOPOSIX
5440 if (posix || do_posix) regfree(&preg);
5441 #endif
5442
5443 if (re != NULL) new_free(re);
5444 if (extra != NULL)
5445 {
5446 PCRE_FREE_STUDY(extra);
5447 }
5448 if (locale_set)
5449 {
5450 new_free((void *)tables);
5451 setlocale(LC_CTYPE, "C");
5452 locale_set = 0;
5453 }
5454 if (jit_stack != NULL)
5455 {
5456 PCRE_JIT_STACK_FREE(jit_stack);
5457 jit_stack = NULL;
5458 }
5459 }
5460
5461 if (infile == stdin) fprintf(outfile, "\n");
5462
5463 EXIT:
5464
5465 if (infile != NULL && infile != stdin) fclose(infile);
5466 if (outfile != NULL && outfile != stdout) fclose(outfile);
5467
5468 free(buffer);
5469 free(dbuffer);
5470 free(pbuffer);
5471 free(offsets);
5472
5473 #ifdef SUPPORT_PCRE16
5474 if (buffer16 != NULL) free(buffer16);
5475 #endif
5476 #ifdef SUPPORT_PCRE32
5477 if (buffer32 != NULL) free(buffer32);
5478 #endif
5479
5480 #if !defined NODFA
5481 if (dfa_workspace != NULL)
5482 free(dfa_workspace);
5483 #endif
5484
5485 return yield;
5486 }
5487
5488 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5