/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1140 - (show annotations)
Fri Oct 19 13:41:32 2012 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 163899 byte(s)
I found a neater way of defining the macros in the case where 2 out of 3 modes 
are compiled.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #define pcre8_maketables pcre_maketables
307
308 #endif /* SUPPORT_PCRE8 */
309
310 /* -----------------------------------------------------------*/
311
312 #ifdef SUPPORT_PCRE16
313
314 #define PCHARS16(lv, p, offset, len, f) \
315 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
316
317 #define PCHARSV16(p, offset, len, f) \
318 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
319
320 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
321 p = read_capture_name16(p, cn16, re)
322
323 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
324
325 #define SET_PCRE_CALLOUT16(callout) \
326 pcre16_callout = (int (*)(pcre16_callout_block *))callout
327
328 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
329 pcre16_assign_jit_stack((pcre16_extra *)extra, \
330 (pcre16_jit_callback)callback, userdata)
331
332 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
333 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
334 tables)
335
336 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337 namesptr, cbuffer, size) \
338 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
340
341 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
342 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
343 (PCRE_UCHAR16 *)cbuffer, size/2)
344
345 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
346 offsets, size_offsets, workspace, size_workspace) \
347 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
348 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
349 workspace, size_workspace)
350
351 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
352 offsets, size_offsets) \
353 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
354 len, start_offset, options, offsets, size_offsets)
355
356 #define PCRE_FREE_STUDY16(extra) \
357 pcre16_free_study((pcre16_extra *)extra)
358
359 #define PCRE_FREE_SUBSTRING16(substring) \
360 pcre16_free_substring((PCRE_SPTR16)substring)
361
362 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
363 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
364
365 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
366 getnamesptr, subsptr) \
367 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
368 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
369
370 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
371 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
372
373 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
374 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
375 (PCRE_SPTR16 *)(void*)subsptr)
376
377 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
378 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
379 (PCRE_SPTR16 **)(void*)listptr)
380
381 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
382 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
383 tables)
384
385 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
386 pcre16_printint(re, outfile, debug_lengths)
387
388 #define PCRE_STUDY16(extra, re, options, error) \
389 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
390
391 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
392 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
393
394 #define PCRE_JIT_STACK_FREE16(stack) \
395 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
396
397 #endif /* SUPPORT_PCRE16 */
398
399 /* -----------------------------------------------------------*/
400
401 #ifdef SUPPORT_PCRE32
402
403 #define PCHARS32(lv, p, offset, len, f) \
404 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
405
406 #define PCHARSV32(p, offset, len, f) \
407 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
408
409 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
410 p = read_capture_name32(p, cn32, re)
411
412 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
413
414 #define SET_PCRE_CALLOUT32(callout) \
415 pcre32_callout = (int (*)(pcre32_callout_block *))callout
416
417 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
418 pcre32_assign_jit_stack((pcre32_extra *)extra, \
419 (pcre32_jit_callback)callback, userdata)
420
421 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
422 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
423 tables)
424
425 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
426 namesptr, cbuffer, size) \
427 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
428 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
429
430 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
431 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
432 (PCRE_UCHAR32 *)cbuffer, size/2)
433
434 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
435 offsets, size_offsets, workspace, size_workspace) \
436 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
437 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
438 workspace, size_workspace)
439
440 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
441 offsets, size_offsets) \
442 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
443 len, start_offset, options, offsets, size_offsets)
444
445 #define PCRE_FREE_STUDY32(extra) \
446 pcre32_free_study((pcre32_extra *)extra)
447
448 #define PCRE_FREE_SUBSTRING32(substring) \
449 pcre32_free_substring((PCRE_SPTR32)substring)
450
451 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
452 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
453
454 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
455 getnamesptr, subsptr) \
456 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
457 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
458
459 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
460 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
461
462 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
463 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
464 (PCRE_SPTR32 *)(void*)subsptr)
465
466 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
467 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
468 (PCRE_SPTR32 **)(void*)listptr)
469
470 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
471 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
472 tables)
473
474 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
475 pcre32_printint(re, outfile, debug_lengths)
476
477 #define PCRE_STUDY32(extra, re, options, error) \
478 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
479
480 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
481 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
482
483 #define PCRE_JIT_STACK_FREE32(stack) \
484 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
485
486 #endif /* SUPPORT_PCRE32 */
487
488
489 /* ----- More than one mode is supported; a runtime test is needed, except for
490 pcre_config(), and the JIT stack functions, when it doesn't matter which
491 available version is called. ----- */
492
493 enum {
494 PCRE8_MODE,
495 PCRE16_MODE,
496 PCRE32_MODE
497 };
498
499 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
500 defined (SUPPORT_PCRE32)) >= 2
501
502 #define CHAR_SIZE (1 << pcre_mode)
503
504 /* There doesn't seem to be an easy way of writing these macros that can cope
505 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
506 cases separately. */
507
508 /* ----- All three modes supported ----- */
509
510 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
511
512 #define PCHARS(lv, p, offset, len, f) \
513 if (pcre_mode == PCRE32_MODE) \
514 PCHARS32(lv, p, offset, len, f); \
515 else if (pcre_mode == PCRE16_MODE) \
516 PCHARS16(lv, p, offset, len, f); \
517 else \
518 PCHARS8(lv, p, offset, len, f)
519
520 #define PCHARSV(p, offset, len, f) \
521 if (pcre_mode == PCRE32_MODE) \
522 PCHARSV32(p, offset, len, f); \
523 else if (pcre_mode == PCRE16_MODE) \
524 PCHARSV16(p, offset, len, f); \
525 else \
526 PCHARSV8(p, offset, len, f)
527
528 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
529 if (pcre_mode == PCRE32_MODE) \
530 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
531 else if (pcre_mode == PCRE16_MODE) \
532 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
533 else \
534 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
535
536 #define SET_PCRE_CALLOUT(callout) \
537 if (pcre_mode == PCRE32_MODE) \
538 SET_PCRE_CALLOUT32(callout); \
539 else if (pcre_mode == PCRE16_MODE) \
540 SET_PCRE_CALLOUT16(callout); \
541 else \
542 SET_PCRE_CALLOUT8(callout)
543
544 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
545
546 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
547 if (pcre_mode == PCRE32_MODE) \
548 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
549 else if (pcre_mode == PCRE16_MODE) \
550 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
551 else \
552 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
553
554 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
557 else if (pcre_mode == PCRE16_MODE) \
558 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
559 else \
560 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
561
562 #define PCRE_CONFIG pcre_config
563
564 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
565 namesptr, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
568 namesptr, cbuffer, size); \
569 else if (pcre_mode == PCRE16_MODE) \
570 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
571 namesptr, cbuffer, size); \
572 else \
573 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
574 namesptr, cbuffer, size)
575
576 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
577 if (pcre_mode == PCRE32_MODE) \
578 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
579 else if (pcre_mode == PCRE16_MODE) \
580 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
581 else \
582 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
583
584 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
585 offsets, size_offsets, workspace, size_workspace) \
586 if (pcre_mode == PCRE32_MODE) \
587 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
588 offsets, size_offsets, workspace, size_workspace); \
589 else if (pcre_mode == PCRE16_MODE) \
590 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
591 offsets, size_offsets, workspace, size_workspace); \
592 else \
593 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
594 offsets, size_offsets, workspace, size_workspace)
595
596 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
597 offsets, size_offsets) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
600 offsets, size_offsets); \
601 else if (pcre_mode == PCRE16_MODE) \
602 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
603 offsets, size_offsets); \
604 else \
605 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
606 offsets, size_offsets)
607
608 #define PCRE_FREE_STUDY(extra) \
609 if (pcre_mode == PCRE32_MODE) \
610 PCRE_FREE_STUDY32(extra); \
611 else if (pcre_mode == PCRE16_MODE) \
612 PCRE_FREE_STUDY16(extra); \
613 else \
614 PCRE_FREE_STUDY8(extra)
615
616 #define PCRE_FREE_SUBSTRING(substring) \
617 if (pcre_mode == PCRE32_MODE) \
618 PCRE_FREE_SUBSTRING32(substring); \
619 else if (pcre_mode == PCRE16_MODE) \
620 PCRE_FREE_SUBSTRING16(substring); \
621 else \
622 PCRE_FREE_SUBSTRING8(substring)
623
624 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
625 if (pcre_mode == PCRE32_MODE) \
626 PCRE_FREE_SUBSTRING_LIST32(listptr); \
627 else if (pcre_mode == PCRE16_MODE) \
628 PCRE_FREE_SUBSTRING_LIST16(listptr); \
629 else \
630 PCRE_FREE_SUBSTRING_LIST8(listptr)
631
632 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
633 getnamesptr, subsptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
636 getnamesptr, subsptr); \
637 else if (pcre_mode == PCRE16_MODE) \
638 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
639 getnamesptr, subsptr); \
640 else \
641 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
642 getnamesptr, subsptr)
643
644 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
645 if (pcre_mode == PCRE32_MODE) \
646 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
647 else if (pcre_mode == PCRE16_MODE) \
648 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
649 else \
650 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
651
652 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
653 if (pcre_mode == PCRE32_MODE) \
654 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
655 else if (pcre_mode == PCRE16_MODE) \
656 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
657 else \
658 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
659
660 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
661 if (pcre_mode == PCRE32_MODE) \
662 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
663 else if (pcre_mode == PCRE16_MODE) \
664 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
665 else \
666 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
667
668 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
669 (pcre_mode == PCRE32_MODE ? \
670 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
671 : pcre_mode == PCRE16_MODE ? \
672 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
673 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
674
675 #define PCRE_JIT_STACK_FREE(stack) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_JIT_STACK_FREE32(stack); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_JIT_STACK_FREE16(stack); \
680 else \
681 PCRE_JIT_STACK_FREE8(stack)
682
683 #define PCRE_MAKETABLES \
684 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
685
686 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
687 if (pcre_mode == PCRE32_MODE) \
688 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
689 else if (pcre_mode == PCRE16_MODE) \
690 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
691 else \
692 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
693
694 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
695 if (pcre_mode == PCRE32_MODE) \
696 PCRE_PRINTINT32(re, outfile, debug_lengths); \
697 else if (pcre_mode == PCRE16_MODE) \
698 PCRE_PRINTINT16(re, outfile, debug_lengths); \
699 else \
700 PCRE_PRINTINT8(re, outfile, debug_lengths)
701
702 #define PCRE_STUDY(extra, re, options, error) \
703 if (pcre_mode == PCRE32_MODE) \
704 PCRE_STUDY32(extra, re, options, error); \
705 else if (pcre_mode == PCRE16_MODE) \
706 PCRE_STUDY16(extra, re, options, error); \
707 else \
708 PCRE_STUDY8(extra, re, options, error)
709
710
711 /* ----- Two out of three modes are supported ----- */
712
713 #else
714
715 /* We can use some macro trickery to make a single set of definitions work in
716 the three different cases. */
717
718 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
719
720 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
721 #define BITONE 32
722 #define BITTWO 16
723
724 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
725
726 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
727 #define BITONE 32
728 #define BITTWO 8
729
730 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
731
732 #else
733 #define BITONE 16
734 #define BITTWO 8
735 #endif
736
737 #define glue(a,b) a##b
738 #define G(a,b) glue(a,b)
739
740
741 /* ----- Common macros for two-mode cases ----- */
742
743 #define PCHARS(lv, p, offset, len, f) \
744 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
745 G(PCHARS,BITONE)(lv, p, offset, len, f); \
746 else \
747 G(PCHARS,BITTWO)(lv, p, offset, len, f)
748
749 #define PCHARSV(p, offset, len, f) \
750 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
751 G(PCHARSV,BITONE)(p, offset, len, f); \
752 else \
753 G(PCHARSV,BITTWO)(p, offset, len, f)
754
755 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
756 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
757 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
758 else \
759 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
760
761 #define SET_PCRE_CALLOUT(callout) \
762 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
763 G(SET_PCRE_CALLOUT,BITONE)(callout); \
764 else \
765 G(SET_PCRE_CALLOUT,BITTWO)(callout)
766
767 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
768 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
769
770 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
771 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
773 else \
774 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
775
776 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
777 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
779 else \
780 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
781
782 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
783
784 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
785 namesptr, cbuffer, size) \
786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
788 namesptr, cbuffer, size); \
789 else \
790 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
791 namesptr, cbuffer, size)
792
793 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
794 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
795 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
796 else \
797 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
798
799 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
800 offsets, size_offsets, workspace, size_workspace) \
801 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
803 offsets, size_offsets, workspace, size_workspace); \
804 else \
805 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
806 offsets, size_offsets, workspace, size_workspace)
807
808 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
809 offsets, size_offsets) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
812 offsets, size_offsets); \
813 else \
814 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
815 offsets, size_offsets)
816
817 #define PCRE_FREE_STUDY(extra) \
818 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
819 G(PCRE_FREE_STUDY,BITONE)(extra); \
820 else \
821 G(PCRE_FREE_STUDY,BITTWO)(extra)
822
823 #define PCRE_FREE_SUBSTRING(substring) \
824 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
825 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
826 else \
827 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
828
829 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
830 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
831 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
832 else \
833 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
834
835 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
836 getnamesptr, subsptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
839 getnamesptr, subsptr); \
840 else \
841 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
842 getnamesptr, subsptr)
843
844 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
845 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
847 else \
848 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
849
850 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
851 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
852 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
853 else \
854 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
855
856 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
857 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
858 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
859 else \
860 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
861
862 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
863 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
864 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
865 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
866
867 #define PCRE_JIT_STACK_FREE(stack) \
868 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
869 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
870 else \
871 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
872
873 #define PCRE_MAKETABLES \
874 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
875 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
876
877 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
878 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
879 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
880 else \
881 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
882
883 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
884 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
885 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
886 else \
887 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
888
889 #define PCRE_STUDY(extra, re, options, error) \
890 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
891 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
892 else \
893 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
894
895 #endif /* Two out of three modes */
896
897 /* ----- End of cases where more than one mode is supported ----- */
898
899
900 /* ----- Only 8-bit mode is supported ----- */
901
902 #elif defined SUPPORT_PCRE8
903 #define CHAR_SIZE 1
904 #define PCHARS PCHARS8
905 #define PCHARSV PCHARSV8
906 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
907 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
908 #define STRLEN STRLEN8
909 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
910 #define PCRE_COMPILE PCRE_COMPILE8
911 #define PCRE_CONFIG pcre_config
912 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
913 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
914 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
915 #define PCRE_EXEC PCRE_EXEC8
916 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
917 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
918 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
919 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
920 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
921 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
922 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
923 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
924 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
925 #define PCRE_MAKETABLES pcre_maketables()
926 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
927 #define PCRE_PRINTINT PCRE_PRINTINT8
928 #define PCRE_STUDY PCRE_STUDY8
929
930 /* ----- Only 16-bit mode is supported ----- */
931
932 #elif defined SUPPORT_PCRE16
933 #define CHAR_SIZE 2
934 #define PCHARS PCHARS16
935 #define PCHARSV PCHARSV16
936 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
937 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
938 #define STRLEN STRLEN16
939 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
940 #define PCRE_COMPILE PCRE_COMPILE16
941 #define PCRE_CONFIG pcre16_config
942 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
943 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
944 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
945 #define PCRE_EXEC PCRE_EXEC16
946 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
947 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
948 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
949 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
950 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
951 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
952 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
953 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
954 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
955 #define PCRE_MAKETABLES pcre16_maketables()
956 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
957 #define PCRE_PRINTINT PCRE_PRINTINT16
958 #define PCRE_STUDY PCRE_STUDY16
959
960 /* ----- Only 32-bit mode is supported ----- */
961
962 #elif defined SUPPORT_PCRE32
963 #define CHAR_SIZE 4
964 #define PCHARS PCHARS32
965 #define PCHARSV PCHARSV32
966 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
967 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
968 #define STRLEN STRLEN32
969 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
970 #define PCRE_COMPILE PCRE_COMPILE32
971 #define PCRE_CONFIG pcre32_config
972 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
973 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
974 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
975 #define PCRE_EXEC PCRE_EXEC32
976 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
977 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
978 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
979 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
980 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
981 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
982 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
983 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
984 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
985 #define PCRE_MAKETABLES pcre32_maketables()
986 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
987 #define PCRE_PRINTINT PCRE_PRINTINT32
988 #define PCRE_STUDY PCRE_STUDY32
989
990 #endif
991
992 /* ----- End of mode-specific function call macros ----- */
993
994
995 /* Other parameters */
996
997 #ifndef CLOCKS_PER_SEC
998 #ifdef CLK_TCK
999 #define CLOCKS_PER_SEC CLK_TCK
1000 #else
1001 #define CLOCKS_PER_SEC 100
1002 #endif
1003 #endif
1004
1005 #if !defined NODFA
1006 #define DFA_WS_DIMENSION 1000
1007 #endif
1008
1009 /* This is the default loop count for timing. */
1010
1011 #define LOOPREPEAT 500000
1012
1013 /* Static variables */
1014
1015 static FILE *outfile;
1016 static int log_store = 0;
1017 static int callout_count;
1018 static int callout_extra;
1019 static int callout_fail_count;
1020 static int callout_fail_id;
1021 static int debug_lengths;
1022 static int first_callout;
1023 static int jit_was_used;
1024 static int locale_set = 0;
1025 static int show_malloc;
1026 static int use_utf;
1027 static size_t gotten_store;
1028 static size_t first_gotten_store = 0;
1029 static const unsigned char *last_callout_mark = NULL;
1030
1031 /* The buffers grow automatically if very long input lines are encountered. */
1032
1033 static int buffer_size = 50000;
1034 static pcre_uint8 *buffer = NULL;
1035 static pcre_uint8 *pbuffer = NULL;
1036
1037 /* Another buffer is needed translation to 16/32-bit character strings. It will
1038 obtained and extended as required. */
1039
1040 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
1041
1042 /* We need the table of operator lengths that is used for 16/32-bit compiling,
1043 in order to swap bytes in a pattern for saving/reloading testing. Luckily, the
1044 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
1045 appropriately for the 16/32-bit world. Just as a safety check, make sure that
1046 COMPILE_PCRE[16|32] is *not* set. */
1047
1048 #ifdef COMPILE_PCRE16
1049 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1050 #endif
1051
1052 #ifdef COMPILE_PCRE32
1053 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1054 #endif
1055
1056 #if LINK_SIZE == 2
1057 #undef LINK_SIZE
1058 #define LINK_SIZE 1
1059 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1060 #undef LINK_SIZE
1061 #define LINK_SIZE 2
1062 #else
1063 #error LINK_SIZE must be either 2, 3, or 4
1064 #endif
1065
1066 #undef IMM2_SIZE
1067 #define IMM2_SIZE 1
1068
1069 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
1070
1071
1072 #ifdef SUPPORT_PCRE16
1073 static int buffer16_size = 0;
1074 static pcre_uint16 *buffer16 = NULL;
1075 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1076 #endif /* SUPPORT_PCRE16 */
1077
1078 #ifdef SUPPORT_PCRE32
1079 static int buffer32_size = 0;
1080 static pcre_uint32 *buffer32 = NULL;
1081 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1082 #endif /* SUPPORT_PCRE32 */
1083
1084 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1085 support, it can be changed by an option. If there is no 8-bit support, there
1086 must be 16-or 32-bit support, so default it to 1. */
1087
1088 #if defined SUPPORT_PCRE8
1089 static int pcre_mode = PCRE8_MODE;
1090 #elif defined SUPPORT_PCRE16
1091 static int pcre_mode = PCRE16_MODE;
1092 #elif defined SUPPORT_PCRE32
1093 static int pcre_mode = PCRE32_MODE;
1094 #endif
1095
1096 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1097
1098 static int jit_study_bits[] =
1099 {
1100 PCRE_STUDY_JIT_COMPILE,
1101 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1102 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1103 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1104 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1105 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1106 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1107 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1108 };
1109
1110 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1111 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1112
1113 /* Textual explanations for runtime error codes */
1114
1115 static const char *errtexts[] = {
1116 NULL, /* 0 is no error */
1117 NULL, /* NOMATCH is handled specially */
1118 "NULL argument passed",
1119 "bad option value",
1120 "magic number missing",
1121 "unknown opcode - pattern overwritten?",
1122 "no more memory",
1123 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1124 "match limit exceeded",
1125 "callout error code",
1126 NULL, /* BADUTF8/16 is handled specially */
1127 NULL, /* BADUTF8/16 offset is handled specially */
1128 NULL, /* PARTIAL is handled specially */
1129 "not used - internal error",
1130 "internal error - pattern overwritten?",
1131 "bad count value",
1132 "item unsupported for DFA matching",
1133 "backreference condition or recursion test not supported for DFA matching",
1134 "match limit not supported for DFA matching",
1135 "workspace size exceeded in DFA matching",
1136 "too much recursion for DFA matching",
1137 "recursion limit exceeded",
1138 "not used - internal error",
1139 "invalid combination of newline options",
1140 "bad offset value",
1141 NULL, /* SHORTUTF8/16 is handled specially */
1142 "nested recursion at the same subject position",
1143 "JIT stack limit reached",
1144 "pattern compiled in wrong mode: 8-bit/16-bit error",
1145 "pattern compiled with other endianness",
1146 "invalid data in workspace for DFA restart"
1147 };
1148
1149
1150 /*************************************************
1151 * Alternate character tables *
1152 *************************************************/
1153
1154 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1155 using the default tables of the library. However, the T option can be used to
1156 select alternate sets of tables, for different kinds of testing. Note also that
1157 the L (locale) option also adjusts the tables. */
1158
1159 /* This is the set of tables distributed as default with PCRE. It recognizes
1160 only ASCII characters. */
1161
1162 static const pcre_uint8 tables0[] = {
1163
1164 /* This table is a lower casing table. */
1165
1166 0, 1, 2, 3, 4, 5, 6, 7,
1167 8, 9, 10, 11, 12, 13, 14, 15,
1168 16, 17, 18, 19, 20, 21, 22, 23,
1169 24, 25, 26, 27, 28, 29, 30, 31,
1170 32, 33, 34, 35, 36, 37, 38, 39,
1171 40, 41, 42, 43, 44, 45, 46, 47,
1172 48, 49, 50, 51, 52, 53, 54, 55,
1173 56, 57, 58, 59, 60, 61, 62, 63,
1174 64, 97, 98, 99,100,101,102,103,
1175 104,105,106,107,108,109,110,111,
1176 112,113,114,115,116,117,118,119,
1177 120,121,122, 91, 92, 93, 94, 95,
1178 96, 97, 98, 99,100,101,102,103,
1179 104,105,106,107,108,109,110,111,
1180 112,113,114,115,116,117,118,119,
1181 120,121,122,123,124,125,126,127,
1182 128,129,130,131,132,133,134,135,
1183 136,137,138,139,140,141,142,143,
1184 144,145,146,147,148,149,150,151,
1185 152,153,154,155,156,157,158,159,
1186 160,161,162,163,164,165,166,167,
1187 168,169,170,171,172,173,174,175,
1188 176,177,178,179,180,181,182,183,
1189 184,185,186,187,188,189,190,191,
1190 192,193,194,195,196,197,198,199,
1191 200,201,202,203,204,205,206,207,
1192 208,209,210,211,212,213,214,215,
1193 216,217,218,219,220,221,222,223,
1194 224,225,226,227,228,229,230,231,
1195 232,233,234,235,236,237,238,239,
1196 240,241,242,243,244,245,246,247,
1197 248,249,250,251,252,253,254,255,
1198
1199 /* This table is a case flipping table. */
1200
1201 0, 1, 2, 3, 4, 5, 6, 7,
1202 8, 9, 10, 11, 12, 13, 14, 15,
1203 16, 17, 18, 19, 20, 21, 22, 23,
1204 24, 25, 26, 27, 28, 29, 30, 31,
1205 32, 33, 34, 35, 36, 37, 38, 39,
1206 40, 41, 42, 43, 44, 45, 46, 47,
1207 48, 49, 50, 51, 52, 53, 54, 55,
1208 56, 57, 58, 59, 60, 61, 62, 63,
1209 64, 97, 98, 99,100,101,102,103,
1210 104,105,106,107,108,109,110,111,
1211 112,113,114,115,116,117,118,119,
1212 120,121,122, 91, 92, 93, 94, 95,
1213 96, 65, 66, 67, 68, 69, 70, 71,
1214 72, 73, 74, 75, 76, 77, 78, 79,
1215 80, 81, 82, 83, 84, 85, 86, 87,
1216 88, 89, 90,123,124,125,126,127,
1217 128,129,130,131,132,133,134,135,
1218 136,137,138,139,140,141,142,143,
1219 144,145,146,147,148,149,150,151,
1220 152,153,154,155,156,157,158,159,
1221 160,161,162,163,164,165,166,167,
1222 168,169,170,171,172,173,174,175,
1223 176,177,178,179,180,181,182,183,
1224 184,185,186,187,188,189,190,191,
1225 192,193,194,195,196,197,198,199,
1226 200,201,202,203,204,205,206,207,
1227 208,209,210,211,212,213,214,215,
1228 216,217,218,219,220,221,222,223,
1229 224,225,226,227,228,229,230,231,
1230 232,233,234,235,236,237,238,239,
1231 240,241,242,243,244,245,246,247,
1232 248,249,250,251,252,253,254,255,
1233
1234 /* This table contains bit maps for various character classes. Each map is 32
1235 bytes long and the bits run from the least significant end of each byte. The
1236 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1237 graph, print, punct, and cntrl. Other classes are built from combinations. */
1238
1239 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243
1244 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1245 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1246 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1248
1249 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1251 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1253
1254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1255 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1256 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1257 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1258
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1263
1264 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1265 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1268
1269 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1270 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1273
1274 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1275 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1278
1279 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1280 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1283
1284 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1288
1289 /* This table identifies various classes of character by individual bits:
1290 0x01 white space character
1291 0x02 letter
1292 0x04 decimal digit
1293 0x08 hexadecimal digit
1294 0x10 alphanumeric or '_'
1295 0x80 regular expression metacharacter or binary zero
1296 */
1297
1298 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1299 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1302 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1303 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1304 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1305 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1306 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1307 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1308 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1309 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1310 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1311 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1312 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1313 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1318 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1321 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1325 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1326 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1330
1331 /* This is a set of tables that came orginally from a Windows user. It seems to
1332 be at least an approximation of ISO 8859. In particular, there are characters
1333 greater than 128 that are marked as spaces, letters, etc. */
1334
1335 static const pcre_uint8 tables1[] = {
1336 0,1,2,3,4,5,6,7,
1337 8,9,10,11,12,13,14,15,
1338 16,17,18,19,20,21,22,23,
1339 24,25,26,27,28,29,30,31,
1340 32,33,34,35,36,37,38,39,
1341 40,41,42,43,44,45,46,47,
1342 48,49,50,51,52,53,54,55,
1343 56,57,58,59,60,61,62,63,
1344 64,97,98,99,100,101,102,103,
1345 104,105,106,107,108,109,110,111,
1346 112,113,114,115,116,117,118,119,
1347 120,121,122,91,92,93,94,95,
1348 96,97,98,99,100,101,102,103,
1349 104,105,106,107,108,109,110,111,
1350 112,113,114,115,116,117,118,119,
1351 120,121,122,123,124,125,126,127,
1352 128,129,130,131,132,133,134,135,
1353 136,137,138,139,140,141,142,143,
1354 144,145,146,147,148,149,150,151,
1355 152,153,154,155,156,157,158,159,
1356 160,161,162,163,164,165,166,167,
1357 168,169,170,171,172,173,174,175,
1358 176,177,178,179,180,181,182,183,
1359 184,185,186,187,188,189,190,191,
1360 224,225,226,227,228,229,230,231,
1361 232,233,234,235,236,237,238,239,
1362 240,241,242,243,244,245,246,215,
1363 248,249,250,251,252,253,254,223,
1364 224,225,226,227,228,229,230,231,
1365 232,233,234,235,236,237,238,239,
1366 240,241,242,243,244,245,246,247,
1367 248,249,250,251,252,253,254,255,
1368 0,1,2,3,4,5,6,7,
1369 8,9,10,11,12,13,14,15,
1370 16,17,18,19,20,21,22,23,
1371 24,25,26,27,28,29,30,31,
1372 32,33,34,35,36,37,38,39,
1373 40,41,42,43,44,45,46,47,
1374 48,49,50,51,52,53,54,55,
1375 56,57,58,59,60,61,62,63,
1376 64,97,98,99,100,101,102,103,
1377 104,105,106,107,108,109,110,111,
1378 112,113,114,115,116,117,118,119,
1379 120,121,122,91,92,93,94,95,
1380 96,65,66,67,68,69,70,71,
1381 72,73,74,75,76,77,78,79,
1382 80,81,82,83,84,85,86,87,
1383 88,89,90,123,124,125,126,127,
1384 128,129,130,131,132,133,134,135,
1385 136,137,138,139,140,141,142,143,
1386 144,145,146,147,148,149,150,151,
1387 152,153,154,155,156,157,158,159,
1388 160,161,162,163,164,165,166,167,
1389 168,169,170,171,172,173,174,175,
1390 176,177,178,179,180,181,182,183,
1391 184,185,186,187,188,189,190,191,
1392 224,225,226,227,228,229,230,231,
1393 232,233,234,235,236,237,238,239,
1394 240,241,242,243,244,245,246,215,
1395 248,249,250,251,252,253,254,223,
1396 192,193,194,195,196,197,198,199,
1397 200,201,202,203,204,205,206,207,
1398 208,209,210,211,212,213,214,247,
1399 216,217,218,219,220,221,222,255,
1400 0,62,0,0,1,0,0,0,
1401 0,0,0,0,0,0,0,0,
1402 32,0,0,0,1,0,0,0,
1403 0,0,0,0,0,0,0,0,
1404 0,0,0,0,0,0,255,3,
1405 126,0,0,0,126,0,0,0,
1406 0,0,0,0,0,0,0,0,
1407 0,0,0,0,0,0,0,0,
1408 0,0,0,0,0,0,255,3,
1409 0,0,0,0,0,0,0,0,
1410 0,0,0,0,0,0,12,2,
1411 0,0,0,0,0,0,0,0,
1412 0,0,0,0,0,0,0,0,
1413 254,255,255,7,0,0,0,0,
1414 0,0,0,0,0,0,0,0,
1415 255,255,127,127,0,0,0,0,
1416 0,0,0,0,0,0,0,0,
1417 0,0,0,0,254,255,255,7,
1418 0,0,0,0,0,4,32,4,
1419 0,0,0,128,255,255,127,255,
1420 0,0,0,0,0,0,255,3,
1421 254,255,255,135,254,255,255,7,
1422 0,0,0,0,0,4,44,6,
1423 255,255,127,255,255,255,127,255,
1424 0,0,0,0,254,255,255,255,
1425 255,255,255,255,255,255,255,127,
1426 0,0,0,0,254,255,255,255,
1427 255,255,255,255,255,255,255,255,
1428 0,2,0,0,255,255,255,255,
1429 255,255,255,255,255,255,255,127,
1430 0,0,0,0,255,255,255,255,
1431 255,255,255,255,255,255,255,255,
1432 0,0,0,0,254,255,0,252,
1433 1,0,0,248,1,0,0,120,
1434 0,0,0,0,254,255,255,255,
1435 0,0,128,0,0,0,128,0,
1436 255,255,255,255,0,0,0,0,
1437 0,0,0,0,0,0,0,128,
1438 255,255,255,255,0,0,0,0,
1439 0,0,0,0,0,0,0,0,
1440 128,0,0,0,0,0,0,0,
1441 0,1,1,0,1,1,0,0,
1442 0,0,0,0,0,0,0,0,
1443 0,0,0,0,0,0,0,0,
1444 1,0,0,0,128,0,0,0,
1445 128,128,128,128,0,0,128,0,
1446 28,28,28,28,28,28,28,28,
1447 28,28,0,0,0,0,0,128,
1448 0,26,26,26,26,26,26,18,
1449 18,18,18,18,18,18,18,18,
1450 18,18,18,18,18,18,18,18,
1451 18,18,18,128,128,0,128,16,
1452 0,26,26,26,26,26,26,18,
1453 18,18,18,18,18,18,18,18,
1454 18,18,18,18,18,18,18,18,
1455 18,18,18,128,128,0,0,0,
1456 0,0,0,0,0,1,0,0,
1457 0,0,0,0,0,0,0,0,
1458 0,0,0,0,0,0,0,0,
1459 0,0,0,0,0,0,0,0,
1460 1,0,0,0,0,0,0,0,
1461 0,0,18,0,0,0,0,0,
1462 0,0,20,20,0,18,0,0,
1463 0,20,18,0,0,0,0,0,
1464 18,18,18,18,18,18,18,18,
1465 18,18,18,18,18,18,18,18,
1466 18,18,18,18,18,18,18,0,
1467 18,18,18,18,18,18,18,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,18,18,18,18,0,
1471 18,18,18,18,18,18,18,18
1472 };
1473
1474
1475
1476
1477 #ifndef HAVE_STRERROR
1478 /*************************************************
1479 * Provide strerror() for non-ANSI libraries *
1480 *************************************************/
1481
1482 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1483 in their libraries, but can provide the same facility by this simple
1484 alternative function. */
1485
1486 extern int sys_nerr;
1487 extern char *sys_errlist[];
1488
1489 char *
1490 strerror(int n)
1491 {
1492 if (n < 0 || n >= sys_nerr) return "unknown error number";
1493 return sys_errlist[n];
1494 }
1495 #endif /* HAVE_STRERROR */
1496
1497
1498
1499 /*************************************************
1500 * Print newline configuration *
1501 *************************************************/
1502
1503 /*
1504 Arguments:
1505 rc the return code from PCRE_CONFIG_NEWLINE
1506 isc TRUE if called from "-C newline"
1507 Returns: nothing
1508 */
1509
1510 static void
1511 print_newline_config(int rc, BOOL isc)
1512 {
1513 const char *s = NULL;
1514 if (!isc) printf(" Newline sequence is ");
1515 switch(rc)
1516 {
1517 case CHAR_CR: s = "CR"; break;
1518 case CHAR_LF: s = "LF"; break;
1519 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1520 case -1: s = "ANY"; break;
1521 case -2: s = "ANYCRLF"; break;
1522
1523 default:
1524 printf("a non-standard value: 0x%04x\n", rc);
1525 return;
1526 }
1527
1528 printf("%s\n", s);
1529 }
1530
1531
1532
1533 /*************************************************
1534 * JIT memory callback *
1535 *************************************************/
1536
1537 static pcre_jit_stack* jit_callback(void *arg)
1538 {
1539 jit_was_used = TRUE;
1540 return (pcre_jit_stack *)arg;
1541 }
1542
1543
1544 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1545 /*************************************************
1546 * Convert UTF-8 string to value *
1547 *************************************************/
1548
1549 /* This function takes one or more bytes that represents a UTF-8 character,
1550 and returns the value of the character.
1551
1552 Argument:
1553 utf8bytes a pointer to the byte vector
1554 vptr a pointer to an int to receive the value
1555
1556 Returns: > 0 => the number of bytes consumed
1557 -6 to 0 => malformed UTF-8 character at offset = (-return)
1558 */
1559
1560 static int
1561 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1562 {
1563 pcre_uint32 c = *utf8bytes++;
1564 pcre_uint32 d = c;
1565 int i, j, s;
1566
1567 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1568 {
1569 if ((d & 0x80) == 0) break;
1570 d <<= 1;
1571 }
1572
1573 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1574 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1575
1576 /* i now has a value in the range 1-5 */
1577
1578 s = 6*i;
1579 d = (c & utf8_table3[i]) << s;
1580
1581 for (j = 0; j < i; j++)
1582 {
1583 c = *utf8bytes++;
1584 if ((c & 0xc0) != 0x80) return -(j+1);
1585 s -= 6;
1586 d |= (c & 0x3f) << s;
1587 }
1588
1589 /* Check that encoding was the correct unique one */
1590
1591 for (j = 0; j < utf8_table1_size; j++)
1592 if (d <= (pcre_uint32)utf8_table1[j]) break;
1593 if (j != i) return -(i+1);
1594
1595 /* Valid value */
1596
1597 *vptr = d;
1598 return i+1;
1599 }
1600 #endif /* NOUTF || SUPPORT_PCRE16 */
1601
1602
1603
1604 #if defined SUPPORT_PCRE8 && !defined NOUTF
1605 /*************************************************
1606 * Convert character value to UTF-8 *
1607 *************************************************/
1608
1609 /* This function takes an integer value in the range 0 - 0x7fffffff
1610 and encodes it as a UTF-8 character in 0 to 6 bytes.
1611
1612 Arguments:
1613 cvalue the character value
1614 utf8bytes pointer to buffer for result - at least 6 bytes long
1615
1616 Returns: number of characters placed in the buffer
1617 */
1618
1619 static int
1620 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1621 {
1622 register int i, j;
1623 if (cvalue > 0x7fffffffu)
1624 return -1;
1625 for (i = 0; i < utf8_table1_size; i++)
1626 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1627 utf8bytes += i;
1628 for (j = i; j > 0; j--)
1629 {
1630 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1631 cvalue >>= 6;
1632 }
1633 *utf8bytes = utf8_table2[i] | cvalue;
1634 return i + 1;
1635 }
1636 #endif
1637
1638
1639 #ifdef SUPPORT_PCRE16
1640 /*************************************************
1641 * Convert a string to 16-bit *
1642 *************************************************/
1643
1644 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1645 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1646 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1647 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1648 result is always left in buffer16.
1649
1650 Note that this function does not object to surrogate values. This is
1651 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1652 for the purpose of testing that they are correctly faulted.
1653
1654 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1655 in UTF-8 so that values greater than 255 can be handled.
1656
1657 Arguments:
1658 data TRUE if converting a data line; FALSE for a regex
1659 p points to a byte string
1660 utf true if UTF-8 (to be converted to UTF-16)
1661 len number of bytes in the string (excluding trailing zero)
1662
1663 Returns: number of 16-bit data items used (excluding trailing zero)
1664 OR -1 if a UTF-8 string is malformed
1665 OR -2 if a value > 0x10ffff is encountered
1666 OR -3 if a value > 0xffff is encountered when not in UTF mode
1667 */
1668
1669 static int
1670 to16(int data, pcre_uint8 *p, int utf, int len)
1671 {
1672 pcre_uint16 *pp;
1673
1674 if (buffer16_size < 2*len + 2)
1675 {
1676 if (buffer16 != NULL) free(buffer16);
1677 buffer16_size = 2*len + 2;
1678 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1679 if (buffer16 == NULL)
1680 {
1681 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1682 exit(1);
1683 }
1684 }
1685
1686 pp = buffer16;
1687
1688 if (!utf && !data)
1689 {
1690 while (len-- > 0) *pp++ = *p++;
1691 }
1692
1693 else
1694 {
1695 pcre_uint32 c = 0;
1696 while (len > 0)
1697 {
1698 int chlen = utf82ord(p, &c);
1699 if (chlen <= 0) return -1;
1700 if (c > 0x10ffff) return -2;
1701 p += chlen;
1702 len -= chlen;
1703 if (c < 0x10000) *pp++ = c; else
1704 {
1705 if (!utf) return -3;
1706 c -= 0x10000;
1707 *pp++ = 0xD800 | (c >> 10);
1708 *pp++ = 0xDC00 | (c & 0x3ff);
1709 }
1710 }
1711 }
1712
1713 *pp = 0;
1714 return pp - buffer16;
1715 }
1716 #endif
1717
1718 #ifdef SUPPORT_PCRE32
1719 /*************************************************
1720 * Convert a string to 32-bit *
1721 *************************************************/
1722
1723 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1724 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1725 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1726 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1727 result is always left in buffer32.
1728
1729 Note that this function does not object to surrogate values. This is
1730 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1731 for the purpose of testing that they are correctly faulted.
1732
1733 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1734 in UTF-8 so that values greater than 255 can be handled.
1735
1736 Arguments:
1737 data TRUE if converting a data line; FALSE for a regex
1738 p points to a byte string
1739 utf true if UTF-8 (to be converted to UTF-32)
1740 len number of bytes in the string (excluding trailing zero)
1741
1742 Returns: number of 32-bit data items used (excluding trailing zero)
1743 OR -1 if a UTF-8 string is malformed
1744 OR -2 if a value > 0x10ffff is encountered
1745 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1746 */
1747
1748 static int
1749 to32(int data, pcre_uint8 *p, int utf, int len)
1750 {
1751 pcre_uint32 *pp;
1752
1753 if (buffer32_size < 4*len + 4)
1754 {
1755 if (buffer32 != NULL) free(buffer32);
1756 buffer32_size = 4*len + 4;
1757 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1758 if (buffer32 == NULL)
1759 {
1760 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1761 exit(1);
1762 }
1763 }
1764
1765 pp = buffer32;
1766
1767 if (!utf && !data)
1768 {
1769 while (len-- > 0) *pp++ = *p++;
1770 }
1771
1772 else
1773 {
1774 pcre_uint32 c = 0;
1775 while (len > 0)
1776 {
1777 int chlen = utf82ord(p, &c);
1778 if (chlen <= 0) return -1;
1779 if (utf)
1780 {
1781 if (c > 0x10ffff) return -2;
1782 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1783 }
1784
1785 p += chlen;
1786 len -= chlen;
1787 *pp++ = c;
1788 }
1789 }
1790
1791 *pp = 0;
1792 return pp - buffer32;
1793 }
1794
1795 /* Check that a 32-bit character string is valid UTF-32.
1796
1797 Arguments:
1798 string points to the string
1799 length length of string, or -1 if the string is zero-terminated
1800
1801 Returns: TRUE if the string is a valid UTF-32 string
1802 FALSE otherwise
1803 */
1804
1805 #ifdef SUPPORT_UTF
1806 static BOOL
1807 valid_utf32(pcre_uint32 *string, int length)
1808 {
1809 register pcre_uint32 *p;
1810 register pcre_uint32 c;
1811
1812 for (p = string; length-- > 0; p++)
1813 {
1814 c = *p;
1815
1816 if (c > 0x10ffffu)
1817 return FALSE;
1818
1819 /* A surrogate */
1820 if ((c & 0xfffff800u) == 0xd800u)
1821 return FALSE;
1822
1823 /* Non-character */
1824 if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1825 return FALSE;
1826 }
1827
1828 return TRUE;
1829 }
1830 #endif /* SUPPORT_UTF */
1831
1832 #endif
1833
1834 /*************************************************
1835 * Read or extend an input line *
1836 *************************************************/
1837
1838 /* Input lines are read into buffer, but both patterns and data lines can be
1839 continued over multiple input lines. In addition, if the buffer fills up, we
1840 want to automatically expand it so as to be able to handle extremely large
1841 lines that are needed for certain stress tests. When the input buffer is
1842 expanded, the other two buffers must also be expanded likewise, and the
1843 contents of pbuffer, which are a copy of the input for callouts, must be
1844 preserved (for when expansion happens for a data line). This is not the most
1845 optimal way of handling this, but hey, this is just a test program!
1846
1847 Arguments:
1848 f the file to read
1849 start where in buffer to start (this *must* be within buffer)
1850 prompt for stdin or readline()
1851
1852 Returns: pointer to the start of new data
1853 could be a copy of start, or could be moved
1854 NULL if no data read and EOF reached
1855 */
1856
1857 static pcre_uint8 *
1858 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1859 {
1860 pcre_uint8 *here = start;
1861
1862 for (;;)
1863 {
1864 size_t rlen = (size_t)(buffer_size - (here - buffer));
1865
1866 if (rlen > 1000)
1867 {
1868 int dlen;
1869
1870 /* If libreadline or libedit support is required, use readline() to read a
1871 line if the input is a terminal. Note that readline() removes the trailing
1872 newline, so we must put it back again, to be compatible with fgets(). */
1873
1874 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1875 if (isatty(fileno(f)))
1876 {
1877 size_t len;
1878 char *s = readline(prompt);
1879 if (s == NULL) return (here == start)? NULL : start;
1880 len = strlen(s);
1881 if (len > 0) add_history(s);
1882 if (len > rlen - 1) len = rlen - 1;
1883 memcpy(here, s, len);
1884 here[len] = '\n';
1885 here[len+1] = 0;
1886 free(s);
1887 }
1888 else
1889 #endif
1890
1891 /* Read the next line by normal means, prompting if the file is stdin. */
1892
1893 {
1894 if (f == stdin) printf("%s", prompt);
1895 if (fgets((char *)here, rlen, f) == NULL)
1896 return (here == start)? NULL : start;
1897 }
1898
1899 dlen = (int)strlen((char *)here);
1900 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1901 here += dlen;
1902 }
1903
1904 else
1905 {
1906 int new_buffer_size = 2*buffer_size;
1907 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1908 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1909
1910 if (new_buffer == NULL || new_pbuffer == NULL)
1911 {
1912 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1913 exit(1);
1914 }
1915
1916 memcpy(new_buffer, buffer, buffer_size);
1917 memcpy(new_pbuffer, pbuffer, buffer_size);
1918
1919 buffer_size = new_buffer_size;
1920
1921 start = new_buffer + (start - buffer);
1922 here = new_buffer + (here - buffer);
1923
1924 free(buffer);
1925 free(pbuffer);
1926
1927 buffer = new_buffer;
1928 pbuffer = new_pbuffer;
1929 }
1930 }
1931
1932 return NULL; /* Control never gets here */
1933 }
1934
1935
1936
1937 /*************************************************
1938 * Read number from string *
1939 *************************************************/
1940
1941 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1942 around with conditional compilation, just do the job by hand. It is only used
1943 for unpicking arguments, so just keep it simple.
1944
1945 Arguments:
1946 str string to be converted
1947 endptr where to put the end pointer
1948
1949 Returns: the unsigned long
1950 */
1951
1952 static int
1953 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1954 {
1955 int result = 0;
1956 while(*str != 0 && isspace(*str)) str++;
1957 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1958 *endptr = str;
1959 return(result);
1960 }
1961
1962
1963
1964 /*************************************************
1965 * Print one character *
1966 *************************************************/
1967
1968 /* Print a single character either literally, or as a hex escape. */
1969
1970 static int pchar(pcre_uint32 c, FILE *f)
1971 {
1972 int n = 0;
1973 if (PRINTOK(c))
1974 {
1975 if (f != NULL) fprintf(f, "%c", c);
1976 return 1;
1977 }
1978
1979 if (c < 0x100)
1980 {
1981 if (use_utf)
1982 {
1983 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1984 return 6;
1985 }
1986 else
1987 {
1988 if (f != NULL) fprintf(f, "\\x%02x", c);
1989 return 4;
1990 }
1991 }
1992
1993 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1994 return n >= 0 ? n : 0;
1995 }
1996
1997
1998
1999 #ifdef SUPPORT_PCRE8
2000 /*************************************************
2001 * Print 8-bit character string *
2002 *************************************************/
2003
2004 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2005 If handed a NULL file, just counts chars without printing. */
2006
2007 static int pchars(pcre_uint8 *p, int length, FILE *f)
2008 {
2009 pcre_uint32 c = 0;
2010 int yield = 0;
2011
2012 if (length < 0)
2013 length = strlen((char *)p);
2014
2015 while (length-- > 0)
2016 {
2017 #if !defined NOUTF
2018 if (use_utf)
2019 {
2020 int rc = utf82ord(p, &c);
2021 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2022 {
2023 length -= rc - 1;
2024 p += rc;
2025 yield += pchar(c, f);
2026 continue;
2027 }
2028 }
2029 #endif
2030 c = *p++;
2031 yield += pchar(c, f);
2032 }
2033
2034 return yield;
2035 }
2036 #endif
2037
2038
2039
2040 #ifdef SUPPORT_PCRE16
2041 /*************************************************
2042 * Find length of 0-terminated 16-bit string *
2043 *************************************************/
2044
2045 static int strlen16(PCRE_SPTR16 p)
2046 {
2047 int len = 0;
2048 while (*p++ != 0) len++;
2049 return len;
2050 }
2051 #endif /* SUPPORT_PCRE16 */
2052
2053
2054
2055 #ifdef SUPPORT_PCRE32
2056 /*************************************************
2057 * Find length of 0-terminated 32-bit string *
2058 *************************************************/
2059
2060 static int strlen32(PCRE_SPTR32 p)
2061 {
2062 int len = 0;
2063 while (*p++ != 0) len++;
2064 return len;
2065 }
2066 #endif /* SUPPORT_PCRE32 */
2067
2068
2069
2070 #ifdef SUPPORT_PCRE16
2071 /*************************************************
2072 * Print 16-bit character string *
2073 *************************************************/
2074
2075 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2076 If handed a NULL file, just counts chars without printing. */
2077
2078 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2079 {
2080 int yield = 0;
2081
2082 if (length < 0)
2083 length = strlen16(p);
2084
2085 while (length-- > 0)
2086 {
2087 pcre_uint32 c = *p++ & 0xffff;
2088 #if !defined NOUTF
2089 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2090 {
2091 int d = *p & 0xffff;
2092 if (d >= 0xDC00 && d < 0xDFFF)
2093 {
2094 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2095 length--;
2096 p++;
2097 }
2098 }
2099 #endif
2100 yield += pchar(c, f);
2101 }
2102
2103 return yield;
2104 }
2105 #endif /* SUPPORT_PCRE16 */
2106
2107
2108
2109 #ifdef SUPPORT_PCRE32
2110 /*************************************************
2111 * Print 32-bit character string *
2112 *************************************************/
2113
2114 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2115 If handed a NULL file, just counts chars without printing. */
2116
2117 #define UTF32_MASK (0x1fffffu)
2118
2119 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2120 {
2121 int yield = 0;
2122
2123 if (length < 0)
2124 length = strlen32(p);
2125
2126 while (length-- > 0)
2127 {
2128 pcre_uint32 c = *p++;
2129 if (utf) c &= UTF32_MASK;
2130 yield += pchar(c, f);
2131 }
2132
2133 return yield;
2134 }
2135 #endif /* SUPPORT_PCRE32 */
2136
2137
2138
2139 #ifdef SUPPORT_PCRE8
2140 /*************************************************
2141 * Read a capture name (8-bit) and check it *
2142 *************************************************/
2143
2144 static pcre_uint8 *
2145 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2146 {
2147 pcre_uint8 *npp = *pp;
2148 while (isalnum(*p)) *npp++ = *p++;
2149 *npp++ = 0;
2150 *npp = 0;
2151 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2152 {
2153 fprintf(outfile, "no parentheses with name \"");
2154 PCHARSV(*pp, 0, -1, outfile);
2155 fprintf(outfile, "\"\n");
2156 }
2157
2158 *pp = npp;
2159 return p;
2160 }
2161 #endif /* SUPPORT_PCRE8 */
2162
2163
2164
2165 #ifdef SUPPORT_PCRE16
2166 /*************************************************
2167 * Read a capture name (16-bit) and check it *
2168 *************************************************/
2169
2170 /* Note that the text being read is 8-bit. */
2171
2172 static pcre_uint8 *
2173 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2174 {
2175 pcre_uint16 *npp = *pp;
2176 while (isalnum(*p)) *npp++ = *p++;
2177 *npp++ = 0;
2178 *npp = 0;
2179 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2180 {
2181 fprintf(outfile, "no parentheses with name \"");
2182 PCHARSV(*pp, 0, -1, outfile);
2183 fprintf(outfile, "\"\n");
2184 }
2185 *pp = npp;
2186 return p;
2187 }
2188 #endif /* SUPPORT_PCRE16 */
2189
2190
2191
2192 #ifdef SUPPORT_PCRE32
2193 /*************************************************
2194 * Read a capture name (32-bit) and check it *
2195 *************************************************/
2196
2197 /* Note that the text being read is 8-bit. */
2198
2199 static pcre_uint8 *
2200 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2201 {
2202 pcre_uint32 *npp = *pp;
2203 while (isalnum(*p)) *npp++ = *p++;
2204 *npp++ = 0;
2205 *npp = 0;
2206 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2207 {
2208 fprintf(outfile, "no parentheses with name \"");
2209 PCHARSV(*pp, 0, -1, outfile);
2210 fprintf(outfile, "\"\n");
2211 }
2212 *pp = npp;
2213 return p;
2214 }
2215 #endif /* SUPPORT_PCRE32 */
2216
2217
2218
2219 /*************************************************
2220 * Callout function *
2221 *************************************************/
2222
2223 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2224 the match. Yield zero unless more callouts than the fail count, or the callout
2225 data is not zero. */
2226
2227 static int callout(pcre_callout_block *cb)
2228 {
2229 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2230 int i, pre_start, post_start, subject_length;
2231
2232 if (callout_extra)
2233 {
2234 fprintf(f, "Callout %d: last capture = %d\n",
2235 cb->callout_number, cb->capture_last);
2236
2237 for (i = 0; i < cb->capture_top * 2; i += 2)
2238 {
2239 if (cb->offset_vector[i] < 0)
2240 fprintf(f, "%2d: <unset>\n", i/2);
2241 else
2242 {
2243 fprintf(f, "%2d: ", i/2);
2244 PCHARSV(cb->subject, cb->offset_vector[i],
2245 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2246 fprintf(f, "\n");
2247 }
2248 }
2249 }
2250
2251 /* Re-print the subject in canonical form, the first time or if giving full
2252 datails. On subsequent calls in the same match, we use pchars just to find the
2253 printed lengths of the substrings. */
2254
2255 if (f != NULL) fprintf(f, "--->");
2256
2257 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2258 PCHARS(post_start, cb->subject, cb->start_match,
2259 cb->current_position - cb->start_match, f);
2260
2261 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2262
2263 PCHARSV(cb->subject, cb->current_position,
2264 cb->subject_length - cb->current_position, f);
2265
2266 if (f != NULL) fprintf(f, "\n");
2267
2268 /* Always print appropriate indicators, with callout number if not already
2269 shown. For automatic callouts, show the pattern offset. */
2270
2271 if (cb->callout_number == 255)
2272 {
2273 fprintf(outfile, "%+3d ", cb->pattern_position);
2274 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2275 }
2276 else
2277 {
2278 if (callout_extra) fprintf(outfile, " ");
2279 else fprintf(outfile, "%3d ", cb->callout_number);
2280 }
2281
2282 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2283 fprintf(outfile, "^");
2284
2285 if (post_start > 0)
2286 {
2287 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2288 fprintf(outfile, "^");
2289 }
2290
2291 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2292 fprintf(outfile, " ");
2293
2294 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2295 pbuffer + cb->pattern_position);
2296
2297 fprintf(outfile, "\n");
2298 first_callout = 0;
2299
2300 if (cb->mark != last_callout_mark)
2301 {
2302 if (cb->mark == NULL)
2303 fprintf(outfile, "Latest Mark: <unset>\n");
2304 else
2305 {
2306 fprintf(outfile, "Latest Mark: ");
2307 PCHARSV(cb->mark, 0, -1, outfile);
2308 putc('\n', outfile);
2309 }
2310 last_callout_mark = cb->mark;
2311 }
2312
2313 if (cb->callout_data != NULL)
2314 {
2315 int callout_data = *((int *)(cb->callout_data));
2316 if (callout_data != 0)
2317 {
2318 fprintf(outfile, "Callout data = %d\n", callout_data);
2319 return callout_data;
2320 }
2321 }
2322
2323 return (cb->callout_number != callout_fail_id)? 0 :
2324 (++callout_count >= callout_fail_count)? 1 : 0;
2325 }
2326
2327
2328 /*************************************************
2329 * Local malloc functions *
2330 *************************************************/
2331
2332 /* Alternative malloc function, to test functionality and save the size of a
2333 compiled re, which is the first store request that pcre_compile() makes. The
2334 show_malloc variable is set only during matching. */
2335
2336 static void *new_malloc(size_t size)
2337 {
2338 void *block = malloc(size);
2339 gotten_store = size;
2340 if (first_gotten_store == 0) first_gotten_store = size;
2341 if (show_malloc)
2342 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2343 return block;
2344 }
2345
2346 static void new_free(void *block)
2347 {
2348 if (show_malloc)
2349 fprintf(outfile, "free %p\n", block);
2350 free(block);
2351 }
2352
2353 /* For recursion malloc/free, to test stacking calls */
2354
2355 static void *stack_malloc(size_t size)
2356 {
2357 void *block = malloc(size);
2358 if (show_malloc)
2359 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2360 return block;
2361 }
2362
2363 static void stack_free(void *block)
2364 {
2365 if (show_malloc)
2366 fprintf(outfile, "stack_free %p\n", block);
2367 free(block);
2368 }
2369
2370
2371 /*************************************************
2372 * Call pcre_fullinfo() *
2373 *************************************************/
2374
2375 /* Get one piece of information from the pcre_fullinfo() function. When only
2376 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2377 value, but the code is defensive.
2378
2379 Arguments:
2380 re compiled regex
2381 study study data
2382 option PCRE_INFO_xxx option
2383 ptr where to put the data
2384
2385 Returns: 0 when OK, < 0 on error
2386 */
2387
2388 static int
2389 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2390 {
2391 int rc;
2392
2393 if (pcre_mode == PCRE32_MODE)
2394 #ifdef SUPPORT_PCRE32
2395 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2396 #else
2397 rc = PCRE_ERROR_BADMODE;
2398 #endif
2399 else if (pcre_mode == PCRE16_MODE)
2400 #ifdef SUPPORT_PCRE16
2401 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2402 #else
2403 rc = PCRE_ERROR_BADMODE;
2404 #endif
2405 else
2406 #ifdef SUPPORT_PCRE8
2407 rc = pcre_fullinfo(re, study, option, ptr);
2408 #else
2409 rc = PCRE_ERROR_BADMODE;
2410 #endif
2411
2412 if (rc < 0)
2413 {
2414 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2415 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2416 if (rc == PCRE_ERROR_BADMODE)
2417 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2418 "%d-bit mode\n", 8 * CHAR_SIZE,
2419 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2420 }
2421
2422 return rc;
2423 }
2424
2425
2426
2427 /*************************************************
2428 * Swap byte functions *
2429 *************************************************/
2430
2431 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2432 value, respectively.
2433
2434 Arguments:
2435 value any number
2436
2437 Returns: the byte swapped value
2438 */
2439
2440 static pcre_uint32
2441 swap_uint32(pcre_uint32 value)
2442 {
2443 return ((value & 0x000000ff) << 24) |
2444 ((value & 0x0000ff00) << 8) |
2445 ((value & 0x00ff0000) >> 8) |
2446 (value >> 24);
2447 }
2448
2449 static pcre_uint16
2450 swap_uint16(pcre_uint16 value)
2451 {
2452 return (value >> 8) | (value << 8);
2453 }
2454
2455
2456
2457 /*************************************************
2458 * Flip bytes in a compiled pattern *
2459 *************************************************/
2460
2461 /* This function is called if the 'F' option was present on a pattern that is
2462 to be written to a file. We flip the bytes of all the integer fields in the
2463 regex data block and the study block. In 16-bit mode this also flips relevant
2464 bytes in the pattern itself. This is to make it possible to test PCRE's
2465 ability to reload byte-flipped patterns, e.g. those compiled on a different
2466 architecture. */
2467
2468 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2469 static void
2470 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2471 {
2472 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2473 #ifdef SUPPORT_PCRE16
2474 int op;
2475 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2476 int length = re->name_count * re->name_entry_size;
2477 #ifdef SUPPORT_UTF
2478 BOOL utf = (re->options & PCRE_UTF16) != 0;
2479 BOOL utf16_char = FALSE;
2480 #endif /* SUPPORT_UTF */
2481 #endif /* SUPPORT_PCRE16 */
2482
2483 /* Always flip the bytes in the main data block and study blocks. */
2484
2485 re->magic_number = REVERSED_MAGIC_NUMBER;
2486 re->size = swap_uint32(re->size);
2487 re->options = swap_uint32(re->options);
2488 re->flags = swap_uint16(re->flags);
2489 re->top_bracket = swap_uint16(re->top_bracket);
2490 re->top_backref = swap_uint16(re->top_backref);
2491 re->first_char = swap_uint16(re->first_char);
2492 re->req_char = swap_uint16(re->req_char);
2493 re->name_table_offset = swap_uint16(re->name_table_offset);
2494 re->name_entry_size = swap_uint16(re->name_entry_size);
2495 re->name_count = swap_uint16(re->name_count);
2496
2497 if (extra != NULL)
2498 {
2499 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2500 rsd->size = swap_uint32(rsd->size);
2501 rsd->flags = swap_uint32(rsd->flags);
2502 rsd->minlength = swap_uint32(rsd->minlength);
2503 }
2504
2505 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2506 in the name table, if present, and then in the pattern itself. */
2507
2508 #ifdef SUPPORT_PCRE16
2509 if (pcre_mode != PCRE16_MODE) return;
2510
2511 while(TRUE)
2512 {
2513 /* Swap previous characters. */
2514 while (length-- > 0)
2515 {
2516 *ptr = swap_uint16(*ptr);
2517 ptr++;
2518 }
2519 #ifdef SUPPORT_UTF
2520 if (utf16_char)
2521 {
2522 if ((ptr[-1] & 0xfc00) == 0xd800)
2523 {
2524 /* We know that there is only one extra character in UTF-16. */
2525 *ptr = swap_uint16(*ptr);
2526 ptr++;
2527 }
2528 }
2529 utf16_char = FALSE;
2530 #endif /* SUPPORT_UTF */
2531
2532 /* Get next opcode. */
2533
2534 length = 0;
2535 op = *ptr;
2536 *ptr++ = swap_uint16(op);
2537
2538 switch (op)
2539 {
2540 case OP_END:
2541 return;
2542
2543 #ifdef SUPPORT_UTF
2544 case OP_CHAR:
2545 case OP_CHARI:
2546 case OP_NOT:
2547 case OP_NOTI:
2548 case OP_STAR:
2549 case OP_MINSTAR:
2550 case OP_PLUS:
2551 case OP_MINPLUS:
2552 case OP_QUERY:
2553 case OP_MINQUERY:
2554 case OP_UPTO:
2555 case OP_MINUPTO:
2556 case OP_EXACT:
2557 case OP_POSSTAR:
2558 case OP_POSPLUS:
2559 case OP_POSQUERY:
2560 case OP_POSUPTO:
2561 case OP_STARI:
2562 case OP_MINSTARI:
2563 case OP_PLUSI:
2564 case OP_MINPLUSI:
2565 case OP_QUERYI:
2566 case OP_MINQUERYI:
2567 case OP_UPTOI:
2568 case OP_MINUPTOI:
2569 case OP_EXACTI:
2570 case OP_POSSTARI:
2571 case OP_POSPLUSI:
2572 case OP_POSQUERYI:
2573 case OP_POSUPTOI:
2574 case OP_NOTSTAR:
2575 case OP_NOTMINSTAR:
2576 case OP_NOTPLUS:
2577 case OP_NOTMINPLUS:
2578 case OP_NOTQUERY:
2579 case OP_NOTMINQUERY:
2580 case OP_NOTUPTO:
2581 case OP_NOTMINUPTO:
2582 case OP_NOTEXACT:
2583 case OP_NOTPOSSTAR:
2584 case OP_NOTPOSPLUS:
2585 case OP_NOTPOSQUERY:
2586 case OP_NOTPOSUPTO:
2587 case OP_NOTSTARI:
2588 case OP_NOTMINSTARI:
2589 case OP_NOTPLUSI:
2590 case OP_NOTMINPLUSI:
2591 case OP_NOTQUERYI:
2592 case OP_NOTMINQUERYI:
2593 case OP_NOTUPTOI:
2594 case OP_NOTMINUPTOI:
2595 case OP_NOTEXACTI:
2596 case OP_NOTPOSSTARI:
2597 case OP_NOTPOSPLUSI:
2598 case OP_NOTPOSQUERYI:
2599 case OP_NOTPOSUPTOI:
2600 if (utf) utf16_char = TRUE;
2601 #endif
2602 /* Fall through. */
2603
2604 default:
2605 length = OP_lengths16[op] - 1;
2606 break;
2607
2608 case OP_CLASS:
2609 case OP_NCLASS:
2610 /* Skip the character bit map. */
2611 ptr += 32/sizeof(pcre_uint16);
2612 length = 0;
2613 break;
2614
2615 case OP_XCLASS:
2616 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2617 if (LINK_SIZE > 1)
2618 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2619 - (1 + LINK_SIZE + 1));
2620 else
2621 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2622
2623 /* Reverse the size of the XCLASS instance. */
2624 *ptr = swap_uint16(*ptr);
2625 ptr++;
2626 if (LINK_SIZE > 1)
2627 {
2628 *ptr = swap_uint16(*ptr);
2629 ptr++;
2630 }
2631
2632 op = *ptr;
2633 *ptr = swap_uint16(op);
2634 ptr++;
2635 if ((op & XCL_MAP) != 0)
2636 {
2637 /* Skip the character bit map. */
2638 ptr += 32/sizeof(pcre_uint16);
2639 length -= 32/sizeof(pcre_uint16);
2640 }
2641 break;
2642 }
2643 }
2644 /* Control should never reach here in 16 bit mode. */
2645 #endif /* SUPPORT_PCRE16 */
2646 }
2647 #endif /* SUPPORT_PCRE[8|16] */
2648
2649
2650
2651 #if defined SUPPORT_PCRE32
2652 static void
2653 regexflip_32(pcre *ere, pcre_extra *extra)
2654 {
2655 real_pcre32 *re = (real_pcre32 *)ere;
2656 int op;
2657 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2658 int length = re->name_count * re->name_entry_size;
2659
2660 /* Always flip the bytes in the main data block and study blocks. */
2661
2662 re->magic_number = REVERSED_MAGIC_NUMBER;
2663 re->size = swap_uint32(re->size);
2664 re->options = swap_uint32(re->options);
2665 re->flags = swap_uint16(re->flags);
2666 re->top_bracket = swap_uint16(re->top_bracket);
2667 re->top_backref = swap_uint16(re->top_backref);
2668 re->first_char = swap_uint32(re->first_char);
2669 re->req_char = swap_uint32(re->req_char);
2670 re->name_table_offset = swap_uint16(re->name_table_offset);
2671 re->name_entry_size = swap_uint16(re->name_entry_size);
2672 re->name_count = swap_uint16(re->name_count);
2673
2674 if (extra != NULL)
2675 {
2676 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2677 rsd->size = swap_uint32(rsd->size);
2678 rsd->flags = swap_uint32(rsd->flags);
2679 rsd->minlength = swap_uint32(rsd->minlength);
2680 }
2681
2682 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2683 the pattern itself. */
2684
2685 while(TRUE)
2686 {
2687 /* Swap previous characters. */
2688 while (length-- > 0)
2689 {
2690 *ptr = swap_uint32(*ptr);
2691 ptr++;
2692 }
2693
2694 /* Get next opcode. */
2695
2696 length = 0;
2697 op = *ptr;
2698 *ptr++ = swap_uint32(op);
2699
2700 switch (op)
2701 {
2702 case OP_END:
2703 return;
2704
2705 default:
2706 length = OP_lengths32[op] - 1;
2707 break;
2708
2709 case OP_CLASS:
2710 case OP_NCLASS:
2711 /* Skip the character bit map. */
2712 ptr += 32/sizeof(pcre_uint32);
2713 length = 0;
2714 break;
2715
2716 case OP_XCLASS:
2717 /* LINK_SIZE can only be 1 in 32-bit mode. */
2718 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2719
2720 /* Reverse the size of the XCLASS instance. */
2721 *ptr = swap_uint32(*ptr);
2722 ptr++;
2723
2724 op = *ptr;
2725 *ptr = swap_uint32(op);
2726 ptr++;
2727 if ((op & XCL_MAP) != 0)
2728 {
2729 /* Skip the character bit map. */
2730 ptr += 32/sizeof(pcre_uint32);
2731 length -= 32/sizeof(pcre_uint32);
2732 }
2733 break;
2734 }
2735 }
2736 /* Control should never reach here in 32 bit mode. */
2737 }
2738
2739 #endif /* SUPPORT_PCRE32 */
2740
2741
2742
2743 static void
2744 regexflip(pcre *ere, pcre_extra *extra)
2745 {
2746 #if defined SUPPORT_PCRE32
2747 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2748 regexflip_32(ere, extra);
2749 #endif
2750 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2751 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2752 regexflip8_or_16(ere, extra);
2753 #endif
2754 }
2755
2756
2757
2758 /*************************************************
2759 * Check match or recursion limit *
2760 *************************************************/
2761
2762 static int
2763 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2764 int start_offset, int options, int *use_offsets, int use_size_offsets,
2765 int flag, unsigned long int *limit, int errnumber, const char *msg)
2766 {
2767 int count;
2768 int min = 0;
2769 int mid = 64;
2770 int max = -1;
2771
2772 extra->flags |= flag;
2773
2774 for (;;)
2775 {
2776 *limit = mid;
2777
2778 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2779 use_offsets, use_size_offsets);
2780
2781 if (count == errnumber)
2782 {
2783 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2784 min = mid;
2785 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2786 }
2787
2788 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2789 count == PCRE_ERROR_PARTIAL)
2790 {
2791 if (mid == min + 1)
2792 {
2793 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2794 break;
2795 }
2796 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2797 max = mid;
2798 mid = (min + mid)/2;
2799 }
2800 else break; /* Some other error */
2801 }
2802
2803 extra->flags &= ~flag;
2804 return count;
2805 }
2806
2807
2808
2809 /*************************************************
2810 * Case-independent strncmp() function *
2811 *************************************************/
2812
2813 /*
2814 Arguments:
2815 s first string
2816 t second string
2817 n number of characters to compare
2818
2819 Returns: < 0, = 0, or > 0, according to the comparison
2820 */
2821
2822 static int
2823 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2824 {
2825 while (n--)
2826 {
2827 int c = tolower(*s++) - tolower(*t++);
2828 if (c) return c;
2829 }
2830 return 0;
2831 }
2832
2833
2834
2835 /*************************************************
2836 * Check newline indicator *
2837 *************************************************/
2838
2839 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2840 a message and return 0 if there is no match.
2841
2842 Arguments:
2843 p points after the leading '<'
2844 f file for error message
2845
2846 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2847 */
2848
2849 static int
2850 check_newline(pcre_uint8 *p, FILE *f)
2851 {
2852 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2853 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2854 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2855 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2856 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2857 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2858 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2859 fprintf(f, "Unknown newline type at: <%s\n", p);
2860 return 0;
2861 }
2862
2863
2864
2865 /*************************************************
2866 * Usage function *
2867 *************************************************/
2868
2869 static void
2870 usage(void)
2871 {
2872 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2873 printf("Input and output default to stdin and stdout.\n");
2874 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2875 printf("If input is a terminal, readline() is used to read from it.\n");
2876 #else
2877 printf("This version of pcretest is not linked with readline().\n");
2878 #endif
2879 printf("\nOptions:\n");
2880 #ifdef SUPPORT_PCRE16
2881 printf(" -16 use the 16-bit library\n");
2882 #endif
2883 #ifdef SUPPORT_PCRE32
2884 printf(" -32 use the 32-bit library\n");
2885 #endif
2886 printf(" -b show compiled code\n");
2887 printf(" -C show PCRE compile-time options and exit\n");
2888 printf(" -C arg show a specific compile-time option\n");
2889 printf(" and exit with its value. The arg can be:\n");
2890 printf(" linksize internal link size [2, 3, 4]\n");
2891 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2892 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2893 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2894 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2895 printf(" ucp Unicode Properties supported [0, 1]\n");
2896 printf(" jit Just-in-time compiler supported [0, 1]\n");
2897 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2898 printf(" -d debug: show compiled code and information (-b and -i)\n");
2899 #if !defined NODFA
2900 printf(" -dfa force DFA matching for all subjects\n");
2901 #endif
2902 printf(" -help show usage information\n");
2903 printf(" -i show information about compiled patterns\n"
2904 " -M find MATCH_LIMIT minimum for each subject\n"
2905 " -m output memory used information\n"
2906 " -o <n> set size of offsets vector to <n>\n");
2907 #if !defined NOPOSIX
2908 printf(" -p use POSIX interface\n");
2909 #endif
2910 printf(" -q quiet: do not output PCRE version number at start\n");
2911 printf(" -S <n> set stack size to <n> megabytes\n");
2912 printf(" -s force each pattern to be studied at basic level\n"
2913 " -s+ force each pattern to be studied, using JIT if available\n"
2914 " -s++ ditto, verifying when JIT was actually used\n"
2915 " -s+n force each pattern to be studied, using JIT if available,\n"
2916 " where 1 <= n <= 7 selects JIT options\n"
2917 " -s++n ditto, verifying when JIT was actually used\n"
2918 " -t time compilation and execution\n");
2919 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2920 printf(" -tm time execution (matching) only\n");
2921 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2922 }
2923
2924
2925
2926 /*************************************************
2927 * Main Program *
2928 *************************************************/
2929
2930 /* Read lines from named file or stdin and write to named file or stdout; lines
2931 consist of a regular expression, in delimiters and optionally followed by
2932 options, followed by a set of test data, terminated by an empty line. */
2933
2934 int main(int argc, char **argv)
2935 {
2936 FILE *infile = stdin;
2937 const char *version;
2938 int options = 0;
2939 int study_options = 0;
2940 int default_find_match_limit = FALSE;
2941 int op = 1;
2942 int timeit = 0;
2943 int timeitm = 0;
2944 int showinfo = 0;
2945 int showstore = 0;
2946 int force_study = -1;
2947 int force_study_options = 0;
2948 int quiet = 0;
2949 int size_offsets = 45;
2950 int size_offsets_max;
2951 int *offsets = NULL;
2952 int debug = 0;
2953 int done = 0;
2954 int all_use_dfa = 0;
2955 int verify_jit = 0;
2956 int yield = 0;
2957 #ifdef SUPPORT_PCRE32
2958 int mask_utf32 = 0;
2959 #endif
2960 int stack_size;
2961 pcre_uint8 *dbuffer = NULL;
2962 size_t dbuffer_size = 1u << 14;
2963
2964 #if !defined NOPOSIX
2965 int posix = 0;
2966 #endif
2967 #if !defined NODFA
2968 int *dfa_workspace = NULL;
2969 #endif
2970
2971 pcre_jit_stack *jit_stack = NULL;
2972
2973 /* These vectors store, end-to-end, a list of zero-terminated captured
2974 substring names, each list itself being terminated by an empty name. Assume
2975 that 1024 is plenty long enough for the few names we'll be testing. It is
2976 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2977 for the actual memory, to ensure alignment. */
2978
2979 pcre_uint32 copynames[1024];
2980 pcre_uint32 getnames[1024];
2981
2982 #ifdef SUPPORT_PCRE32
2983 pcre_uint32 *cn32ptr;
2984 pcre_uint32 *gn32ptr;
2985 #endif
2986
2987 #ifdef SUPPORT_PCRE16
2988 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2989 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2990 pcre_uint16 *cn16ptr;
2991 pcre_uint16 *gn16ptr;
2992 #endif
2993
2994 #ifdef SUPPORT_PCRE8
2995 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2996 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2997 pcre_uint8 *cn8ptr;
2998 pcre_uint8 *gn8ptr;
2999 #endif
3000
3001 /* Get buffers from malloc() so that valgrind will check their misuse when
3002 debugging. They grow automatically when very long lines are read. The 16-
3003 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3004
3005 buffer = (pcre_uint8 *)malloc(buffer_size);
3006 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3007
3008 /* The outfile variable is static so that new_malloc can use it. */
3009
3010 outfile = stdout;
3011
3012 /* The following _setmode() stuff is some Windows magic that tells its runtime
3013 library to translate CRLF into a single LF character. At least, that's what
3014 I've been told: never having used Windows I take this all on trust. Originally
3015 it set 0x8000, but then I was advised that _O_BINARY was better. */
3016
3017 #if defined(_WIN32) || defined(WIN32)
3018 _setmode( _fileno( stdout ), _O_BINARY );
3019 #endif
3020
3021 /* Get the version number: both pcre_version() and pcre16_version() give the
3022 same answer. We just need to ensure that we call one that is available. */
3023
3024 #if defined SUPPORT_PCRE8
3025 version = pcre_version();
3026 #elif defined SUPPORT_PCRE16
3027 version = pcre16_version();
3028 #elif defined SUPPORT_PCRE32
3029 version = pcre32_version();
3030 #endif
3031
3032 /* Scan options */
3033
3034 while (argc > 1 && argv[op][0] == '-')
3035 {
3036 pcre_uint8 *endptr;
3037 char *arg = argv[op];
3038
3039 if (strcmp(arg, "-m") == 0) showstore = 1;
3040 else if (strcmp(arg, "-s") == 0) force_study = 0;
3041
3042 else if (strncmp(arg, "-s+", 3) == 0)
3043 {
3044 arg += 3;
3045 if (*arg == '+') { arg++; verify_jit = TRUE; }
3046 force_study = 1;
3047 if (*arg == 0)
3048 force_study_options = jit_study_bits[6];
3049 else if (*arg >= '1' && *arg <= '7')
3050 force_study_options = jit_study_bits[*arg - '1'];
3051 else goto BAD_ARG;
3052 }
3053 else if (strcmp(arg, "-8") == 0)
3054 {
3055 #ifdef SUPPORT_PCRE8
3056 pcre_mode = PCRE8_MODE;
3057 #else
3058 printf("** This version of PCRE was built without 8-bit support\n");
3059 exit(1);
3060 #endif
3061 }
3062 else if (strcmp(arg, "-16") == 0)
3063 {
3064 #ifdef SUPPORT_PCRE16
3065 pcre_mode = PCRE16_MODE;
3066 #else
3067 printf("** This version of PCRE was built without 16-bit support\n");
3068 exit(1);
3069 #endif
3070 }
3071 else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
3072 {
3073 #ifdef SUPPORT_PCRE32
3074 pcre_mode = PCRE32_MODE;
3075 mask_utf32 = (strcmp(arg, "-32+") == 0);
3076 #else
3077 printf("** This version of PCRE was built without 32-bit support\n");
3078 exit(1);
3079 #endif
3080 }
3081 else if (strcmp(arg, "-q") == 0) quiet = 1;
3082 else if (strcmp(arg, "-b") == 0) debug = 1;
3083 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3084 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3085 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3086 #if !defined NODFA
3087 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3088 #endif
3089 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3090 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3091 *endptr == 0))
3092 {
3093 op++;
3094 argc--;
3095 }
3096 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3097 {
3098 int both = arg[2] == 0;
3099 int temp;
3100 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3101 *endptr == 0))
3102 {
3103 timeitm = temp;
3104 op++;
3105 argc--;
3106 }
3107 else timeitm = LOOPREPEAT;
3108 if (both) timeit = timeitm;
3109 }
3110 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3111 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3112 *endptr == 0))
3113 {
3114 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3115 printf("PCRE: -S not supported on this OS\n");
3116 exit(1);
3117 #else
3118 int rc;
3119 struct rlimit rlim;
3120 getrlimit(RLIMIT_STACK, &rlim);
3121 rlim.rlim_cur = stack_size * 1024 * 1024;
3122 rc = setrlimit(RLIMIT_STACK, &rlim);
3123 if (rc != 0)
3124 {
3125 printf("PCRE: setrlimit() failed with error %d\n", rc);
3126 exit(1);
3127 }
3128 op++;
3129 argc--;
3130 #endif
3131 }
3132 #if !defined NOPOSIX
3133 else if (strcmp(arg, "-p") == 0) posix = 1;
3134 #endif
3135 else if (strcmp(arg, "-C") == 0)
3136 {
3137 int rc;
3138 unsigned long int lrc;
3139
3140 if (argc > 2)
3141 {
3142 if (strcmp(argv[op + 1], "linksize") == 0)
3143 {
3144 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3145 printf("%d\n", rc);
3146 yield = rc;
3147 }
3148 else if (strcmp(argv[op + 1], "pcre8") == 0)
3149 {
3150 #ifdef SUPPORT_PCRE8
3151 printf("1\n");
3152 yield = 1;
3153 #else
3154 printf("0\n");
3155 yield = 0;
3156 #endif
3157 }
3158 else if (strcmp(argv[op + 1], "pcre16") == 0)
3159 {
3160 #ifdef SUPPORT_PCRE16
3161 printf("1\n");
3162 yield = 1;
3163 #else
3164 printf("0\n");
3165 yield = 0;
3166 #endif
3167 }
3168 else if (strcmp(argv[op + 1], "pcre32") == 0)
3169 {
3170 #ifdef SUPPORT_PCRE32
3171 printf("1\n");
3172 yield = 1;
3173 #else
3174 printf("0\n");
3175 yield = 0;
3176 #endif
3177 goto EXIT;
3178 }
3179 if (strcmp(argv[op + 1], "utf") == 0)
3180 {
3181 #ifdef SUPPORT_PCRE8
3182 if (pcre_mode == PCRE8_MODE)
3183 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3184 #endif
3185 #ifdef SUPPORT_PCRE16
3186 if (pcre_mode == PCRE16_MODE)
3187 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3188 #endif
3189 #ifdef SUPPORT_PCRE32
3190 if (pcre_mode == PCRE32_MODE)
3191 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3192 #endif
3193 printf("%d\n", rc);
3194 yield = rc;
3195 goto EXIT;
3196 }
3197 else if (strcmp(argv[op + 1], "ucp") == 0)
3198 {
3199 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3200 printf("%d\n", rc);
3201 yield = rc;
3202 }
3203 else if (strcmp(argv[op + 1], "jit") == 0)
3204 {
3205 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3206 printf("%d\n", rc);
3207 yield = rc;
3208 }
3209 else if (strcmp(argv[op + 1], "newline") == 0)
3210 {
3211 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3212 print_newline_config(rc, TRUE);
3213 }
3214 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3215 {
3216 #ifdef EBCDIC
3217 printf("1\n");
3218 yield = 1;
3219 #else
3220 printf("0\n");
3221 #endif
3222 }
3223 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3224 {
3225 #ifdef EBCDIC
3226 printf("0x%02x\n", CHAR_LF);
3227 #else
3228 printf("0\n");
3229 #endif
3230 }
3231 else
3232 {
3233 printf("Unknown -C option: %s\n", argv[op + 1]);
3234 }
3235 goto EXIT;
3236 }
3237
3238 /* No argument for -C: output all configuration information. */
3239
3240 printf("PCRE version %s\n", version);
3241 printf("Compiled with\n");
3242
3243 #ifdef EBCDIC
3244 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3245 #endif
3246
3247 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3248 are set, either both UTFs are supported or both are not supported. */
3249
3250 #ifdef SUPPORT_PCRE8
3251 printf(" 8-bit support\n");
3252 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3253 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3254 #endif
3255 #ifdef SUPPORT_PCRE16
3256 printf(" 16-bit support\n");
3257 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3258 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3259 #endif
3260 #ifdef SUPPORT_PCRE32
3261 printf(" 32-bit support\n");
3262 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3263 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3264 #endif
3265
3266 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3267 printf(" %sUnicode properties support\n", rc? "" : "No ");
3268 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3269 if (rc)
3270 {
3271 const char *arch;
3272 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3273 printf(" Just-in-time compiler support: %s\n", arch);
3274 }
3275 else
3276 printf(" No just-in-time compiler support\n");
3277 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3278 print_newline_config(rc, FALSE);
3279 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3280 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3281 "all Unicode newlines");
3282 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3283 printf(" Internal link size = %d\n", rc);
3284 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3285 printf(" POSIX malloc threshold = %d\n", rc);
3286 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3287 printf(" Default match limit = %ld\n", lrc);
3288 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3289 printf(" Default recursion depth limit = %ld\n", lrc);
3290 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3291 printf(" Match recursion uses %s", rc? "stack" : "heap");
3292 if (showstore)
3293 {
3294 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3295 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3296 }
3297 printf("\n");
3298 goto EXIT;
3299 }
3300 else if (strcmp(arg, "-help") == 0 ||
3301 strcmp(arg, "--help") == 0)
3302 {
3303 usage();
3304 goto EXIT;
3305 }
3306 else
3307 {
3308 BAD_ARG:
3309 printf("** Unknown or malformed option %s\n", arg);
3310 usage();
3311 yield = 1;
3312 goto EXIT;
3313 }
3314 op++;
3315 argc--;
3316 }
3317
3318 /* Get the store for the offsets vector, and remember what it was */
3319
3320 size_offsets_max = size_offsets;
3321 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3322 if (offsets == NULL)
3323 {
3324 printf("** Failed to get %d bytes of memory for offsets vector\n",
3325 (int)(size_offsets_max * sizeof(int)));
3326 yield = 1;
3327 goto EXIT;
3328 }
3329
3330 /* Sort out the input and output files */
3331
3332 if (argc > 1)
3333 {
3334 infile = fopen(argv[op], INPUT_MODE);
3335 if (infile == NULL)
3336 {
3337 printf("** Failed to open %s\n", argv[op]);
3338 yield = 1;
3339 goto EXIT;
3340 }
3341 }
3342
3343 if (argc > 2)
3344 {
3345 outfile = fopen(argv[op+1], OUTPUT_MODE);
3346 if (outfile == NULL)
3347 {
3348 printf("** Failed to open %s\n", argv[op+1]);
3349 yield = 1;
3350 goto EXIT;
3351 }
3352 }
3353
3354 /* Set alternative malloc function */
3355
3356 #ifdef SUPPORT_PCRE8
3357 pcre_malloc = new_malloc;
3358 pcre_free = new_free;
3359 pcre_stack_malloc = stack_malloc;
3360 pcre_stack_free = stack_free;
3361 #endif
3362
3363 #ifdef SUPPORT_PCRE16
3364 pcre16_malloc = new_malloc;
3365 pcre16_free = new_free;
3366 pcre16_stack_malloc = stack_malloc;
3367 pcre16_stack_free = stack_free;
3368 #endif
3369
3370 #ifdef SUPPORT_PCRE32
3371 pcre32_malloc = new_malloc;
3372 pcre32_free = new_free;
3373 pcre32_stack_malloc = stack_malloc;
3374 pcre32_stack_free = stack_free;
3375 #endif
3376
3377 /* Heading line unless quiet, then prompt for first regex if stdin */
3378
3379 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3380
3381 /* Main loop */
3382
3383 while (!done)
3384 {
3385 pcre *re = NULL;
3386 pcre_extra *extra = NULL;
3387
3388 #if !defined NOPOSIX /* There are still compilers that require no indent */
3389 regex_t preg;
3390 int do_posix = 0;
3391 #endif
3392
3393 const char *error;
3394 pcre_uint8 *markptr;
3395 pcre_uint8 *p, *pp, *ppp;
3396 pcre_uint8 *to_file = NULL;
3397 const pcre_uint8 *tables = NULL;
3398 unsigned long int get_options;
3399 unsigned long int true_size, true_study_size = 0;
3400 size_t size, regex_gotten_store;
3401 int do_allcaps = 0;
3402 int do_mark = 0;
3403 int do_study = 0;
3404 int no_force_study = 0;
3405 int do_debug = debug;
3406 int do_G = 0;
3407 int do_g = 0;
3408 int do_showinfo = showinfo;
3409 int do_showrest = 0;
3410 int do_showcaprest = 0;
3411 int do_flip = 0;
3412 int erroroffset, len, delimiter, poffset;
3413
3414 #if !defined NODFA
3415 int dfa_matched = 0;
3416 #endif
3417
3418 use_utf = 0;
3419 debug_lengths = 1;
3420
3421 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3422 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3423 fflush(outfile);
3424
3425 p = buffer;
3426 while (isspace(*p)) p++;
3427 if (*p == 0) continue;
3428
3429 /* See if the pattern is to be loaded pre-compiled from a file. */
3430
3431 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3432 {
3433 pcre_uint32 magic;
3434 pcre_uint8 sbuf[8];
3435 FILE *f;
3436
3437 p++;
3438 if (*p == '!')
3439 {
3440 do_debug = TRUE;
3441 do_showinfo = TRUE;
3442 p++;
3443 }
3444
3445 pp = p + (int)strlen((char *)p);
3446 while (isspace(pp[-1])) pp--;
3447 *pp = 0;
3448
3449 f = fopen((char *)p, "rb");
3450 if (f == NULL)
3451 {
3452 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3453 continue;
3454 }
3455
3456 first_gotten_store = 0;
3457 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3458
3459 true_size =
3460 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3461 true_study_size =
3462 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3463
3464 re = (pcre *)new_malloc(true_size);
3465 if (re == NULL)
3466 {
3467 printf("** Failed to get %d bytes of memory for pcre object\n",
3468 (int)true_size);
3469 yield = 1;
3470 goto EXIT;
3471 }
3472 regex_gotten_store = first_gotten_store;
3473
3474 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3475
3476 magic = REAL_PCRE_MAGIC(re);
3477 if (magic != MAGIC_NUMBER)
3478 {
3479 if (swap_uint32(magic) == MAGIC_NUMBER)
3480 {
3481 do_flip = 1;
3482 }
3483 else
3484 {
3485 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3486 new_free(re);
3487 fclose(f);
3488 continue;
3489 }
3490 }
3491
3492 /* We hide the byte-invert info for little and big endian tests. */
3493 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3494 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3495
3496 /* Now see if there is any following study data. */
3497
3498 if (true_study_size != 0)
3499 {
3500 pcre_study_data *psd;
3501
3502 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3503 extra->flags = PCRE_EXTRA_STUDY_DATA;
3504
3505 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3506 extra->study_data = psd;
3507
3508 if (fread(psd, 1, true_study_size, f) != true_study_size)
3509 {
3510 FAIL_READ:
3511 fprintf(outfile, "Failed to read data from %s\n", p);
3512 if (extra != NULL)
3513 {
3514 PCRE_FREE_STUDY(extra);
3515 }
3516 new_free(re);
3517 fclose(f);
3518 continue;
3519 }
3520 fprintf(outfile, "Study data loaded from %s\n", p);
3521 do_study = 1; /* To get the data output if requested */
3522 }
3523 else fprintf(outfile, "No study data\n");
3524
3525 /* Flip the necessary bytes. */
3526 if (do_flip)
3527 {
3528 int rc;
3529 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3530 if (rc == PCRE_ERROR_BADMODE)
3531 {
3532 /* Simulate the result of the function call below. */
3533 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3534 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3535 PCRE_INFO_OPTIONS);
3536 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3537 "%d-bit mode\n", 8 * CHAR_SIZE,
3538 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3539 new_free(re);
3540 fclose(f);
3541 continue;
3542 }
3543 }
3544
3545 /* Need to know if UTF-8 for printing data strings. */
3546
3547 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3548 {
3549 new_free(re);
3550 fclose(f);
3551 continue;
3552 }
3553 use_utf = (get_options & PCRE_UTF8) != 0;
3554
3555 fclose(f);
3556 goto SHOW_INFO;
3557 }
3558
3559 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3560 the pattern; if it isn't complete, read more. */
3561
3562 delimiter = *p++;
3563
3564 if (isalnum(delimiter) || delimiter == '\\')
3565 {
3566 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3567 goto SKIP_DATA;
3568 }
3569
3570 pp = p;
3571 poffset = (int)(p - buffer);
3572
3573 for(;;)
3574 {
3575 while (*pp != 0)
3576 {
3577 if (*pp == '\\' && pp[1] != 0) pp++;
3578 else if (*pp == delimiter) break;
3579 pp++;
3580 }
3581 if (*pp != 0) break;
3582 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3583 {
3584 fprintf(outfile, "** Unexpected EOF\n");
3585 done = 1;
3586 goto CONTINUE;
3587 }
3588 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3589 }
3590
3591 /* The buffer may have moved while being extended; reset the start of data
3592 pointer to the correct relative point in the buffer. */
3593
3594 p = buffer + poffset;
3595
3596 /* If the first character after the delimiter is backslash, make
3597 the pattern end with backslash. This is purely to provide a way
3598 of testing for the error message when a pattern ends with backslash. */
3599
3600 if (pp[1] == '\\') *pp++ = '\\';
3601
3602 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3603 for callouts. */
3604
3605 *pp++ = 0;
3606 strcpy((char *)pbuffer, (char *)p);
3607
3608 /* Look for options after final delimiter */
3609
3610 options = 0;
3611 study_options = force_study_options;
3612 log_store = showstore; /* default from command line */
3613
3614 while (*pp != 0)
3615 {
3616 switch (*pp++)
3617 {
3618 case 'f': options |= PCRE_FIRSTLINE; break;
3619 case 'g': do_g = 1; break;
3620 case 'i': options |= PCRE_CASELESS; break;
3621 case 'm': options |= PCRE_MULTILINE; break;
3622 case 's': options |= PCRE_DOTALL; break;
3623 case 'x': options |= PCRE_EXTENDED; break;
3624
3625 case '+':
3626 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3627 break;
3628
3629 case '=': do_allcaps = 1; break;
3630 case 'A': options |= PCRE_ANCHORED; break;
3631 case 'B': do_debug = 1; break;
3632 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3633 case 'D': do_debug = do_showinfo = 1; break;
3634 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3635 case 'F': do_flip = 1; break;
3636 case 'G': do_G = 1; break;
3637 case 'I': do_showinfo = 1; break;
3638 case 'J': options |= PCRE_DUPNAMES; break;
3639 case 'K': do_mark = 1; break;
3640 case 'M': log_store = 1; break;
3641 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3642
3643 #if !defined NOPOSIX
3644 case 'P': do_posix = 1; break;
3645 #endif
3646
3647 case 'S':
3648 do_study = 1;
3649 for (;;)
3650 {
3651 switch (*pp++)
3652 {
3653 case 'S':
3654 do_study = 0;
3655 no_force_study = 1;
3656 break;
3657
3658 case '!':
3659 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3660 break;
3661
3662 case '+':
3663 if (*pp == '+')
3664 {
3665 verify_jit = TRUE;
3666 pp++;
3667 }
3668 if (*pp >= '1' && *pp <= '7')
3669 study_options |= jit_study_bits[*pp++ - '1'];
3670 else
3671 study_options |= jit_study_bits[6];
3672 break;
3673
3674 case '-':
3675 study_options &= ~PCRE_STUDY_ALLJIT;
3676 break;
3677
3678 default:
3679 pp--;
3680 goto ENDLOOP;
3681 }
3682 }
3683 ENDLOOP:
3684 break;
3685
3686 case 'U': options |= PCRE_UNGREEDY; break;
3687 case 'W': options |= PCRE_UCP; break;
3688 case 'X': options |= PCRE_EXTRA; break;
3689 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3690 case 'Z': debug_lengths = 0; break;
3691 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3692 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3693
3694 case 'T':
3695 switch (*pp++)
3696 {
3697 case '0': tables = tables0; break;
3698 case '1': tables = tables1; break;
3699
3700 case '\r':
3701 case '\n':
3702 case ' ':
3703 case 0:
3704 fprintf(outfile, "** Missing table number after /T\n");
3705 goto SKIP_DATA;
3706
3707 default:
3708 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3709 goto SKIP_DATA;
3710 }
3711 break;
3712
3713 case 'L':
3714 ppp = pp;
3715 /* The '\r' test here is so that it works on Windows. */
3716 /* The '0' test is just in case this is an unterminated line. */
3717 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3718 *ppp = 0;
3719 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3720 {
3721 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3722 goto SKIP_DATA;
3723 }
3724 locale_set = 1;
3725 tables = PCRE_MAKETABLES;
3726 pp = ppp;
3727 break;
3728
3729 case '>':
3730 to_file = pp;
3731 while (*pp != 0) pp++;
3732 while (isspace(pp[-1])) pp--;
3733 *pp = 0;
3734 break;
3735
3736 case '<':
3737 {
3738 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3739 {
3740 options |= PCRE_JAVASCRIPT_COMPAT;
3741 pp += 3;
3742 }
3743 else
3744 {
3745 int x = check_newline(pp, outfile);
3746 if (x == 0) goto SKIP_DATA;
3747 options |= x;
3748 while (*pp++ != '>');
3749 }
3750 }
3751 break;
3752
3753 case '\r': /* So that it works in Windows */
3754 case '\n':
3755 case ' ':
3756 break;
3757
3758 default:
3759 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3760 goto SKIP_DATA;
3761 }
3762 }
3763
3764 /* Handle compiling via the POSIX interface, which doesn't support the
3765 timing, showing, or debugging options, nor the ability to pass over
3766 local character tables. Neither does it have 16-bit support. */
3767
3768 #if !defined NOPOSIX
3769 if (posix || do_posix)
3770 {
3771 int rc;
3772 int cflags = 0;
3773
3774 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3775 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3776 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3777 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3778 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3779 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3780 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3781
3782 first_gotten_store = 0;
3783 rc = regcomp(&preg, (char *)p, cflags);
3784
3785 /* Compilation failed; go back for another re, skipping to blank line
3786 if non-interactive. */
3787
3788 if (rc != 0)
3789 {
3790 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3791 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3792 goto SKIP_DATA;
3793 }
3794 }
3795
3796 /* Handle compiling via the native interface */
3797
3798 else
3799 #endif /* !defined NOPOSIX */
3800
3801 {
3802 /* In 16- or 32-bit mode, convert the input. */
3803
3804 #ifdef SUPPORT_PCRE16
3805 if (pcre_mode == PCRE16_MODE)
3806 {
3807 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3808 {
3809 case -1:
3810 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3811 "converted to UTF-16\n");
3812 goto SKIP_DATA;
3813
3814 case -2:
3815 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3816 "cannot be converted to UTF-16\n");
3817 goto SKIP_DATA;
3818
3819 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3820 fprintf(outfile, "**Failed: character value greater than 0xffff "
3821 "cannot be converted to 16-bit in non-UTF mode\n");
3822 goto SKIP_DATA;
3823
3824 default:
3825 break;
3826 }
3827 p = (pcre_uint8 *)buffer16;
3828 }
3829 #endif
3830
3831 #ifdef SUPPORT_PCRE32
3832 if (pcre_mode == PCRE32_MODE)
3833 {
3834 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3835 {
3836 case -1:
3837 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3838 "converted to UTF-32\n");
3839 goto SKIP_DATA;
3840
3841 case -2:
3842 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3843 "cannot be converted to UTF-32\n");
3844 goto SKIP_DATA;
3845
3846 case -3:
3847 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3848 goto SKIP_DATA;
3849
3850 default:
3851 break;
3852 }
3853 p = (pcre_uint8 *)buffer32;
3854 }
3855 #endif
3856
3857 /* Compile many times when timing */
3858
3859 if (timeit > 0)
3860 {
3861 register int i;
3862 clock_t time_taken;
3863 clock_t start_time = clock();
3864 for (i = 0; i < timeit; i++)
3865 {
3866 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3867 if (re != NULL) free(re);
3868 }
3869 time_taken = clock() - start_time;
3870 fprintf(outfile, "Compile time %.4f milliseconds\n",
3871 (((double)time_taken * 1000.0) / (double)timeit) /
3872 (double)CLOCKS_PER_SEC);
3873 }
3874
3875 first_gotten_store = 0;
3876 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3877
3878 /* Compilation failed; go back for another re, skipping to blank line
3879 if non-interactive. */
3880
3881 if (re == NULL)
3882 {
3883 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3884 SKIP_DATA:
3885 if (infile != stdin)
3886 {
3887 for (;;)
3888 {
3889 if (extend_inputline(infile, buffer, NULL) == NULL)
3890 {
3891 done = 1;
3892 goto CONTINUE;
3893 }
3894 len = (int)strlen((char *)buffer);
3895 while (len > 0 && isspace(buffer[len-1])) len--;
3896 if (len == 0) break;
3897 }
3898 fprintf(outfile, "\n");
3899 }
3900 goto CONTINUE;
3901 }
3902
3903 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3904 within the regex; check for this so that we know how to process the data
3905 lines. */
3906
3907 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3908 goto SKIP_DATA;
3909 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3910
3911 /* Extract the size for possible writing before possibly flipping it,
3912 and remember the store that was got. */
3913
3914 true_size = REAL_PCRE_SIZE(re);
3915 regex_gotten_store = first_gotten_store;
3916
3917 /* Output code size information if requested */
3918
3919 if (log_store)
3920 {
3921 int name_count, name_entry_size, real_pcre_size;
3922
3923 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3924 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3925 real_pcre_size = 0;
3926 #ifdef SUPPORT_PCRE8
3927 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3928 real_pcre_size = sizeof(real_pcre);
3929 #endif
3930 #ifdef SUPPORT_PCRE16
3931 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3932 real_pcre_size = sizeof(real_pcre16);
3933 #endif
3934 #ifdef SUPPORT_PCRE32
3935 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3936 real_pcre_size = sizeof(real_pcre32);
3937 #endif
3938 fprintf(outfile, "Memory allocation (code space): %d\n",
3939 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3940 }
3941
3942 /* If -s or /S was present, study the regex to generate additional info to
3943 help with the matching, unless the pattern has the SS option, which
3944 suppresses the effect of /S (used for a few test patterns where studying is
3945 never sensible). */
3946
3947 if (do_study || (force_study >= 0 && !no_force_study))
3948 {
3949 if (timeit > 0)
3950 {
3951 register int i;
3952 clock_t time_taken;
3953 clock_t start_time = clock();
3954 for (i = 0; i < timeit; i++)
3955 {
3956 PCRE_STUDY(extra, re, study_options, &error);
3957 }
3958 time_taken = clock() - start_time;
3959 if (extra != NULL)
3960 {
3961 PCRE_FREE_STUDY(extra);
3962 }
3963 fprintf(outfile, " Study time %.4f milliseconds\n",
3964 (((double)time_taken * 1000.0) / (double)timeit) /
3965 (double)CLOCKS_PER_SEC);
3966 }
3967 PCRE_STUDY(extra, re, study_options, &error);
3968 if (error != NULL)
3969 fprintf(outfile, "Failed to study: %s\n", error);
3970 else if (extra != NULL)
3971 {
3972 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3973 if (log_store)
3974 {
3975 size_t jitsize;
3976 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3977 jitsize != 0)
3978 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3979 }
3980 }
3981 }
3982
3983 /* If /K was present, we set up for handling MARK data. */
3984
3985 if (do_mark)
3986 {
3987 if (extra == NULL)
3988 {
3989 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3990 extra->flags = 0;
3991 }
3992 extra->mark = &markptr;
3993 extra->flags |= PCRE_EXTRA_MARK;
3994 }
3995
3996 /* Extract and display information from the compiled data if required. */
3997
3998 SHOW_INFO:
3999
4000 if (do_debug)
4001 {
4002 fprintf(outfile, "------------------------------------------------------------------\n");
4003 PCRE_PRINTINT(re, outfile, debug_lengths);
4004 }
4005
4006 /* We already have the options in get_options (see above) */
4007
4008 if (do_showinfo)
4009 {
4010 unsigned long int all_options;
4011 pcre_uint32 first_char, need_char;
4012 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4013 hascrorlf, maxlookbehind;
4014 int nameentrysize, namecount;
4015 const pcre_uint8 *nametable;
4016
4017 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4018 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4019 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4020 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4021 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4022 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4023 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4024 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4025 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4026 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4027 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4028 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4029 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4030 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4031 != 0)
4032 goto SKIP_DATA;
4033
4034 if (size != regex_gotten_store) fprintf(outfile,
4035 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4036 (int)size, (int)regex_gotten_store);
4037
4038 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4039 if (backrefmax > 0)
4040 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4041
4042 if (namecount > 0)
4043 {
4044 fprintf(outfile, "Named capturing subpatterns:\n");
4045 while (namecount-- > 0)
4046 {
4047 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4048 int length = (int)STRLEN(nametable + imm2_size);
4049 fprintf(outfile, " ");
4050 PCHARSV(nametable, imm2_size, length, outfile);
4051 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4052 #ifdef SUPPORT_PCRE32
4053 if (pcre_mode == PCRE32_MODE)
4054 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4055 #endif
4056 #ifdef SUPPORT_PCRE16
4057 if (pcre_mode == PCRE16_MODE)
4058 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4059 #endif
4060 #ifdef SUPPORT_PCRE8
4061 if (pcre_mode == PCRE8_MODE)
4062 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4063 #endif
4064 nametable += nameentrysize * CHAR_SIZE;
4065 }
4066 }
4067
4068 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4069 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4070
4071 all_options = REAL_PCRE_OPTIONS(re);
4072 if (do_flip) all_options = swap_uint32(all_options);
4073
4074 if (get_options == 0) fprintf(outfile, "No options\n");
4075 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4076 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4077 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4078 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4079 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4080 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4081 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4082 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4083 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4084 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4085 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4086 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4087 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4088 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4089 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4090 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4091 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4092 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4093
4094 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4095
4096 switch (get_options & PCRE_NEWLINE_BITS)
4097 {
4098 case PCRE_NEWLINE_CR:
4099 fprintf(outfile, "Forced newline sequence: CR\n");
4100 break;
4101
4102 case PCRE_NEWLINE_LF:
4103 fprintf(outfile, "Forced newline sequence: LF\n");
4104 break;
4105
4106 case PCRE_NEWLINE_CRLF:
4107 fprintf(outfile, "Forced newline sequence: CRLF\n");
4108 break;
4109
4110 case PCRE_NEWLINE_ANYCRLF:
4111 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4112 break;
4113
4114 case PCRE_NEWLINE_ANY:
4115 fprintf(outfile, "Forced newline sequence: ANY\n");
4116 break;
4117
4118 default:
4119 break;
4120 }
4121
4122 if (first_char_set == 2)
4123 {
4124 fprintf(outfile, "First char at start or follows newline\n");
4125 }
4126 else if (first_char_set == 1)
4127 {
4128 const char *caseless =
4129 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4130 "" : " (caseless)";
4131
4132 if (PRINTOK(first_char))
4133 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4134 else
4135 {
4136 fprintf(outfile, "First char = ");
4137 pchar(first_char, outfile);
4138 fprintf(outfile, "%s\n", caseless);
4139 }
4140 }
4141 else
4142 {
4143 fprintf(outfile, "No first char\n");
4144 }
4145
4146 if (need_char_set == 0)
4147 {
4148 fprintf(outfile, "No need char\n");
4149 }
4150 else
4151 {
4152 const char *caseless =
4153 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4154 "" : " (caseless)";
4155
4156 if (PRINTOK(need_char))
4157 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4158 else
4159 {
4160 fprintf(outfile, "Need char = ");
4161 pchar(need_char, outfile);
4162 fprintf(outfile, "%s\n", caseless);
4163 }
4164 }
4165
4166 if (maxlookbehind > 0)
4167 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4168
4169 /* Don't output study size; at present it is in any case a fixed
4170 value, but it varies, depending on the computer architecture, and
4171 so messes up the test suite. (And with the /F option, it might be
4172 flipped.) If study was forced by an external -s, don't show this
4173 information unless -i or -d was also present. This means that, except
4174 when auto-callouts are involved, the output from runs with and without
4175 -s should be identical. */
4176
4177 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4178 {
4179 if (extra == NULL)
4180 fprintf(outfile, "Study returned NULL\n");
4181 else
4182 {
4183 pcre_uint8 *start_bits = NULL;
4184 int minlength;
4185
4186 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4187 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4188
4189 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4190 {
4191 if (start_bits == NULL)
4192 fprintf(outfile, "No set of starting bytes\n");
4193 else
4194 {
4195 int i;
4196 int c = 24;
4197 fprintf(outfile, "Starting byte set: ");
4198 for (i = 0; i < 256; i++)
4199 {
4200 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4201 {
4202 if (c > 75)
4203 {
4204 fprintf(outfile, "\n ");
4205 c = 2;
4206 }
4207 if (PRINTOK(i) && i != ' ')
4208 {
4209 fprintf(outfile, "%c ", i);
4210 c += 2;
4211 }
4212 else
4213 {
4214 fprintf(outfile, "\\x%02x ", i);
4215 c += 5;
4216 }
4217 }
4218 }
4219 fprintf(outfile, "\n");
4220 }
4221 }
4222 }
4223
4224 /* Show this only if the JIT was set by /S, not by -s. */
4225
4226 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4227 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4228 {
4229 int jit;
4230 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4231 {
4232 if (jit)
4233 fprintf(outfile, "JIT study was successful\n");
4234 else
4235 #ifdef SUPPORT_JIT
4236 fprintf(outfile, "JIT study was not successful\n");
4237 #else
4238 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4239 #endif
4240 }
4241 }
4242 }
4243 }
4244
4245 /* If the '>' option was present, we write out the regex to a file, and
4246 that is all. The first 8 bytes of the file are the regex length and then
4247 the study length, in big-endian order. */
4248
4249 if (to_file != NULL)
4250 {
4251 FILE *f = fopen((char *)to_file, "wb");
4252 if (f == NULL)
4253 {
4254 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4255 }
4256 else
4257 {
4258 pcre_uint8 sbuf[8];
4259
4260 if (do_flip) regexflip(re, extra);
4261 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4262 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4263 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4264 sbuf[3] = (pcre_uint8)((true_size) & 255);
4265 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4266 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4267 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4268 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4269
4270 if (fwrite(sbuf, 1, 8, f) < 8 ||
4271 fwrite(re, 1, true_size, f) < true_size)
4272 {
4273 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4274 }
4275 else
4276 {
4277 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4278
4279 /* If there is study data, write it. */
4280
4281 if (extra != NULL)
4282 {
4283 if (fwrite(extra->study_data, 1, true_study_size, f) <
4284 true_study_size)
4285 {
4286 fprintf(outfile, "Write error on %s: %s\n", to_file,
4287 strerror(errno));
4288 }
4289 else fprintf(outfile, "Study data written to %s\n", to_file);
4290 }
4291 }
4292 fclose(f);
4293 }
4294
4295 new_free(re);
4296 if (extra != NULL)
4297 {
4298 PCRE_FREE_STUDY(extra);
4299 }
4300 if (locale_set)
4301 {
4302 new_free((void *)tables);
4303 setlocale(LC_CTYPE, "C");
4304 locale_set = 0;
4305 }
4306 continue; /* With next regex */
4307 }
4308 } /* End of non-POSIX compile */
4309
4310 /* Read data lines and test them */
4311
4312 for (;;)
4313 {
4314 #ifdef SUPPORT_PCRE8
4315 pcre_uint8 *q8;
4316 #endif
4317 #ifdef SUPPORT_PCRE16
4318 pcre_uint16 *q16;
4319 #endif
4320 #ifdef SUPPORT_PCRE32
4321 pcre_uint32 *q32;
4322 #endif
4323 pcre_uint8 *bptr;
4324 int *use_offsets = offsets;
4325 int use_size_offsets = size_offsets;
4326 int callout_data = 0;
4327 int callout_data_set = 0;
4328 int count;
4329 pcre_uint32 c;
4330 int copystrings = 0;
4331 int find_match_limit = default_find_match_limit;
4332 int getstrings = 0;
4333 int getlist = 0;
4334 int gmatched = 0;
4335 int start_offset = 0;
4336 int start_offset_sign = 1;
4337 int g_notempty = 0;
4338 int use_dfa = 0;
4339
4340 *copynames = 0;
4341 *getnames = 0;
4342
4343 #ifdef SUPPORT_PCRE32
4344 cn32ptr = copynames;
4345 gn32ptr = getnames;
4346 #endif
4347 #ifdef SUPPORT_PCRE16
4348 cn16ptr = copynames16;
4349 gn16ptr = getnames16;
4350 #endif
4351 #ifdef SUPPORT_PCRE8
4352 cn8ptr = copynames8;
4353 gn8ptr = getnames8;
4354 #endif
4355
4356 SET_PCRE_CALLOUT(callout);
4357 first_callout = 1;
4358 last_callout_mark = NULL;
4359 callout_extra = 0;
4360 callout_count = 0;
4361 callout_fail_count = 999999;
4362 callout_fail_id = -1;
4363 show_malloc = 0;
4364 options = 0;
4365
4366 if (extra != NULL) extra->flags &=
4367 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4368
4369 len = 0;
4370 for (;;)
4371 {
4372 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4373 {
4374 if (len > 0) /* Reached EOF without hitting a newline */
4375 {
4376 fprintf(outfile, "\n");
4377 break;
4378 }
4379 done = 1;
4380 goto CONTINUE;
4381 }
4382 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4383 len = (int)strlen((char *)buffer);
4384 if (buffer[len-1] == '\n') break;
4385 }
4386
4387 while (len > 0 && isspace(buffer[len-1])) len--;
4388 buffer[len] = 0;
4389 if (len == 0) break;
4390
4391 p = buffer;
4392 while (isspace(*p)) p++;
4393
4394 #ifndef NOUTF
4395 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4396 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4397 if (use_utf)
4398 {
4399 pcre_uint8 *q;
4400 pcre_uint32 cc;
4401 int n = 1;
4402
4403 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4404 if (n <= 0)
4405 {
4406 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4407 goto NEXT_DATA;
4408 }
4409 }
4410 #endif
4411
4412 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4413 the number of pcre_uchar units that will be needed. */
4414 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4415 {
4416 dbuffer_size *= 2;
4417 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4418 if (dbuffer == NULL)
4419 {
4420 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4421 exit(1);
4422 }
4423 }
4424
4425 #ifdef SUPPORT_PCRE8
4426 q8 = (pcre_uint8 *) dbuffer;
4427 #endif
4428 #ifdef SUPPORT_PCRE16
4429 q16 = (pcre_uint16 *) dbuffer;
4430 #endif
4431 #ifdef SUPPORT_PCRE32
4432 q32 = (pcre_uint32 *) dbuffer;
4433 #endif
4434
4435 while ((c = *p++) != 0)
4436 {
4437 int i = 0;
4438 int n = 0;
4439
4440 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4441 In non-UTF mode, allow the value of the byte to fall through to later,
4442 where values greater than 127 are turned into UTF-8 when running in
4443 16-bit or 32-bit mode. */
4444
4445 if (c != '\\')
4446 {
4447 #ifndef NOUTF
4448 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4449 #endif
4450 }
4451
4452 /* Handle backslash escapes */
4453
4454 else switch ((c = *p++))
4455 {
4456 case 'a': c = 7; break;
4457 case 'b': c = '\b'; break;
4458 case 'e': c = 27; break;
4459 case 'f': c = '\f'; break;
4460 case 'n': c = '\n'; break;
4461 case 'r': c = '\r'; break;
4462 case 't': c = '\t'; break;
4463 case 'v': c = '\v'; break;
4464
4465 case '0': case '1': case '2': case '3':
4466 case '4': case '5': case '6': case '7':
4467 c -= '0';
4468 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4469 c = c * 8 + *p++ - '0';
4470 break;
4471
4472 case 'x':
4473 if (*p == '{')
4474 {
4475 pcre_uint8 *pt = p;
4476 c = 0;
4477
4478 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4479 when isxdigit() is a macro that refers to its argument more than
4480 once. This is banned by the C Standard, but apparently happens in at
4481 least one MacOS environment. */
4482
4483 for (pt++; isxdigit(*pt); pt++)
4484 {
4485 if (++i == 9)
4486 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4487 "using only the first eight.\n");
4488 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4489 }
4490 if (*pt == '}')
4491 {
4492 p = pt + 1;
4493 break;
4494 }
4495 /* Not correct form for \x{...}; fall through */
4496 }
4497
4498 /* \x without {} always defines just one byte in 8-bit mode. This
4499 allows UTF-8 characters to be constructed byte by byte, and also allows
4500 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4501 Otherwise, pass it down to later code so that it can be turned into
4502 UTF-8 when running in 16/32-bit mode. */
4503
4504 c = 0;
4505 while (i++ < 2 && isxdigit(*p))
4506 {
4507 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4508 p++;
4509 }
4510 #if !defined NOUTF && defined SUPPORT_PCRE8
4511 if (use_utf && (pcre_mode == PCRE8_MODE))
4512 {
4513 *q8++ = c;
4514 continue;
4515 }
4516 #endif
4517 break;
4518
4519 case 0: /* \ followed by EOF allows for an empty line */
4520 p--;
4521 continue;
4522
4523 case '>':
4524 if (*p == '-')
4525 {
4526 start_offset_sign = -1;
4527 p++;
4528 }
4529 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4530 start_offset *= start_offset_sign;
4531 continue;
4532
4533 case 'A': /* Option setting */
4534 options |= PCRE_ANCHORED;
4535 continue;
4536
4537 case 'B':
4538 options |= PCRE_NOTBOL;
4539 continue;
4540
4541 case 'C':
4542 if (isdigit(*p)) /* Set copy string */
4543 {
4544 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4545 copystrings |= 1 << n;
4546 }
4547 else if (isalnum(*p))
4548 {
4549 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4550 }
4551 else if (*p == '+')
4552 {
4553 callout_extra = 1;
4554 p++;
4555 }
4556 else if (*p == '-')
4557 {
4558 SET_PCRE_CALLOUT(NULL);
4559 p++;
4560 }
4561 else if (*p == '!')
4562 {
4563 callout_fail_id = 0;
4564 p++;
4565 while(isdigit(*p))
4566 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4567 callout_fail_count = 0;
4568 if (*p == '!')
4569 {
4570 p++;
4571 while(isdigit(*p))
4572 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4573 }
4574 }
4575 else if (*p == '*')
4576 {
4577 int sign = 1;
4578 callout_data = 0;
4579 if (*(++p) == '-') { sign = -1; p++; }
4580 while(isdigit(*p))
4581 callout_data = callout_data * 10 + *p++ - '0';
4582 callout_data *= sign;
4583 callout_data_set = 1;
4584 }
4585 continue;
4586
4587 #if !defined NODFA
4588 case 'D':
4589 #if !defined NOPOSIX
4590 if (posix || do_posix)
4591 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4592 else
4593 #endif
4594 use_dfa = 1;
4595 continue;
4596 #endif
4597
4598 #if !defined NODFA
4599 case 'F':
4600 options |= PCRE_DFA_SHORTEST;
4601 continue;
4602 #endif
4603
4604 case 'G':
4605 if (isdigit(*p))
4606 {
4607 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4608 getstrings |= 1 << n;
4609 }
4610 else if (isalnum(*p))
4611 {
4612 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4613 }
4614 continue;
4615
4616 case 'J':
4617 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4618 if (extra != NULL
4619 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4620 && extra->executable_jit != NULL)
4621 {
4622 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4623 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4624 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4625 }
4626 continue;
4627
4628 case 'L':
4629 getlist = 1;
4630 continue;
4631
4632 case 'M':
4633 find_match_limit = 1;
4634 continue;
4635
4636 case 'N':
4637 if ((options & PCRE_NOTEMPTY) != 0)
4638 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4639 else
4640 options |= PCRE_NOTEMPTY;
4641 continue;
4642
4643 case 'O':
4644 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4645 if (n > size_offsets_max)
4646 {
4647 size_offsets_max = n;
4648 free(offsets);
4649 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4650 if (offsets == NULL)
4651 {
4652 printf("** Failed to get %d bytes of memory for offsets vector\n",
4653 (int)(size_offsets_max * sizeof(int)));
4654 yield = 1;
4655 goto EXIT;
4656 }
4657 }
4658 use_size_offsets = n;
4659 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4660 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4661 continue;
4662
4663 case 'P':
4664 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4665 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4666 continue;
4667
4668 case 'Q':
4669 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4670 if (extra == NULL)
4671 {
4672 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4673 extra->flags = 0;
4674 }
4675 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4676 extra->match_limit_recursion = n;
4677 continue;
4678
4679 case 'q':
4680 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4681 if (extra == NULL)
4682 {
4683 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4684 extra->flags = 0;
4685 }
4686 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4687 extra->match_limit = n;
4688 continue;
4689
4690 #if !defined NODFA
4691 case 'R':
4692 options |= PCRE_DFA_RESTART;
4693 continue;
4694 #endif
4695
4696 case 'S':
4697 show_malloc = 1;
4698 continue;
4699
4700 case 'Y':
4701 options |= PCRE_NO_START_OPTIMIZE;
4702 continue;
4703
4704 case 'Z':
4705 options |= PCRE_NOTEOL;
4706 continue;
4707
4708 case '?':
4709 options |= PCRE_NO_UTF8_CHECK;
4710 continue;
4711
4712 case '<':
4713 {
4714 int x = check_newline(p, outfile);
4715 if (x == 0) goto NEXT_DATA;
4716 options |= x;
4717 while (*p++ != '>');
4718 }
4719 continue;
4720 }
4721
4722 /* We now have a character value in c that may be greater than 255.
4723 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4724 than 127 in UTF mode must have come from \x{...} or octal constructs
4725 because values from \x.. get this far only in non-UTF mode. */
4726
4727 #ifdef SUPPORT_PCRE8
4728 if (pcre_mode == PCRE8_MODE)
4729 {
4730 #ifndef NOUTF
4731 if (use_utf)
4732 {
4733 q8 += ord2utf8(c, q8);
4734 }
4735 else
4736 #endif
4737 {
4738 if (c > 0xffu)
4739 {
4740 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4741 "and UTF-8 mode is not enabled.\n", c);
4742 fprintf(outfile, "** Truncation will probably give the wrong "
4743 "result.\n");
4744 }
4745 *q8++ = c;
4746 }
4747 }
4748 #endif
4749 #ifdef SUPPORT_PCRE16
4750 if (pcre_mode == PCRE16_MODE)
4751 {
4752 #ifndef NOUTF
4753 if (use_utf)
4754 {
4755 if (c > 0x10ffffu)
4756 {
4757 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4758 "cannot be converted to UTF-16\n");
4759 goto NEXT_DATA;
4760 }
4761 else if (c >= 0x10000u)
4762 {
4763 c-= 0x10000u;
4764 *q16++ = 0xD800 | (c >> 10);
4765 *q16++ = 0xDC00 | (c & 0x3ff);
4766 }
4767 else
4768 *q16++ = c;
4769 }
4770 else
4771 #endif
4772 {
4773 if (c > 0xffffu)
4774 {
4775 fprintf(outfile, "** Character value is greater than 0xffff "
4776 "and UTF-16 mode is not enabled.\n");
4777 fprintf(outfile, "** Truncation will probably give the wrong "
4778 "result.\n");
4779 }
4780
4781 *q16++ = c;
4782 }
4783 }
4784 #endif
4785 #ifdef SUPPORT_PCRE32
4786 if (pcre_mode == PCRE32_MODE)
4787 {
4788 *q32++ = c;
4789 }
4790 #endif
4791
4792 }
4793
4794 /* Reached end of subject string */
4795
4796 #ifdef SUPPORT_PCRE8
4797 if (pcre_mode == PCRE8_MODE)
4798 {
4799 *q8 = 0;
4800 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4801 }
4802 #endif
4803 #ifdef SUPPORT_PCRE16
4804 if (pcre_mode == PCRE16_MODE)
4805 {
4806 *q16 = 0;
4807 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4808 }
4809 #endif
4810 #ifdef SUPPORT_PCRE32
4811 if (pcre_mode == PCRE32_MODE)
4812 {
4813 *q32 = 0;
4814 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4815 }
4816 #endif
4817
4818 #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
4819 /* If we're requsted to test UTF-32 masking of high bits, change the data
4820 string to have high bits set, unless the string is invalid UTF-32.
4821 Since the JIT doesn't support this yet, only do it when not JITing. */
4822 if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
4823 valid_utf32((pcre_uint32 *)dbuffer, len))
4824 {
4825 for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
4826 *q32 |= ~(pcre_uint32)UTF32_MASK;
4827
4828 /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
4829 options |= PCRE_NO_UTF32_CHECK;
4830 }
4831 #endif
4832
4833 /* Move the data to the end of the buffer so that a read over the end of
4834 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4835 we are using the POSIX interface, we must include the terminating zero. */
4836
4837 bptr = dbuffer;
4838
4839 #if !defined NOPOSIX
4840 if (posix || do_posix)
4841 {
4842 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4843 bptr += dbuffer_size - len - 1;
4844 }
4845 else
4846 #endif
4847 {
4848 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4849 }
4850
4851 if ((all_use_dfa || use_dfa) && find_match_limit)
4852 {
4853 printf("**Match limit not relevant for DFA matching: ignored\n");
4854 find_match_limit = 0;
4855 }
4856
4857 /* Handle matching via the POSIX interface, which does not
4858 support timing or playing with the match limit or callout data. */
4859
4860 #if !defined NOPOSIX
4861 if (posix || do_posix)
4862 {
4863 int rc;
4864 int eflags = 0;
4865 regmatch_t *pmatch = NULL;
4866 if (use_size_offsets > 0)
4867 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4868 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4869 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4870 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4871
4872 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4873
4874 if (rc != 0)
4875 {
4876 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4877 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4878 }
4879 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4880 {
4881 fprintf(outfile, "Matched with REG_NOSUB\n");
4882 }
4883 else
4884 {
4885 size_t i;
4886 for (i = 0; i < (size_t)use_size_offsets; i++)
4887 {
4888 if (pmatch[i].rm_so >= 0)
4889 {
4890 fprintf(outfile, "%2d: ", (int)i);
4891 PCHARSV(dbuffer, pmatch[i].rm_so,
4892 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4893 fprintf(outfile, "\n");
4894 if (do_showcaprest || (i == 0 && do_showrest))
4895 {
4896 fprintf(outfile, "%2d+ ", (int)i);
4897 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4898 outfile);
4899 fprintf(outfile, "\n");
4900 }
4901 }
4902 }
4903 }
4904 free(pmatch);
4905 goto NEXT_DATA;
4906 }
4907
4908 #endif /* !defined NOPOSIX */
4909
4910 /* Handle matching via the native interface - repeats for /g and /G */
4911
4912 /* Ensure that there is a JIT callback if we want to verify that JIT was
4913 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4914
4915 if (verify_jit && jit_stack == NULL && extra != NULL)
4916 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4917
4918 for (;; gmatched++) /* Loop for /g or /G */
4919 {
4920 markptr = NULL;
4921 jit_was_used = FALSE;
4922
4923 if (timeitm > 0)
4924 {
4925 register int i;
4926 clock_t time_taken;
4927 clock_t start_time = clock();
4928
4929 #if !defined NODFA
4930 if (all_use_dfa || use_dfa)
4931 {
4932 if ((options & PCRE_DFA_RESTART) != 0)
4933 {
4934 fprintf(outfile, "Timing DFA restarts is not supported\n");
4935 break;
4936 }
4937 if (dfa_workspace == NULL)
4938 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4939 for (i = 0; i < timeitm; i++)
4940 {
4941 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4942 (options | g_notempty), use_offsets, use_size_offsets,
4943 dfa_workspace, DFA_WS_DIMENSION);
4944 }
4945 }
4946 else
4947 #endif
4948
4949 for (i = 0; i < timeitm; i++)
4950 {
4951 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4952 (options | g_notempty), use_offsets, use_size_offsets);
4953 }
4954 time_taken = clock() - start_time;
4955 fprintf(outfile, "Execute time %.4f milliseconds\n",
4956 (((double)time_taken * 1000.0) / (double)timeitm) /
4957 (double)CLOCKS_PER_SEC);
4958 }
4959
4960 /* If find_match_limit is set, we want to do repeated matches with
4961 varying limits in order to find the minimum value for the match limit and
4962 for the recursion limit. The match limits are relevant only to the normal
4963 running of pcre_exec(), so disable the JIT optimization. This makes it
4964 possible to run the same set of tests with and without JIT externally
4965 requested. */
4966
4967 if (find_match_limit)
4968 {
4969 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4970 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4971 extra->flags = 0;
4972
4973 (void)check_match_limit(re, extra, bptr, len, start_offset,
4974 options|g_notempty, use_offsets, use_size_offsets,
4975 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4976 PCRE_ERROR_MATCHLIMIT, "match()");
4977
4978 count = check_match_limit(re, extra, bptr, len, start_offset,
4979 options|g_notempty, use_offsets, use_size_offsets,
4980 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4981 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4982 }
4983
4984 /* If callout_data is set, use the interface with additional data */
4985
4986 else if (callout_data_set)
4987 {
4988 if (extra == NULL)
4989 {
4990 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4991 extra->flags = 0;
4992 }
4993 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4994 extra->callout_data = &callout_data;
4995 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4996 options | g_notempty, use_offsets, use_size_offsets);
4997 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4998 }
4999
5000 /* The normal case is just to do the match once, with the default
5001 value of match_limit. */
5002
5003 #if !defined NODFA
5004 else if (all_use_dfa || use_dfa)
5005 {
5006 if (dfa_workspace == NULL)
5007 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5008 if (dfa_matched++ == 0)
5009 dfa_workspace[0] = -1; /* To catch bad restart */
5010 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5011 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5012 DFA_WS_DIMENSION);
5013 if (count == 0)
5014 {
5015 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5016 count = use_size_offsets/2;
5017 }
5018 }
5019 #endif
5020
5021 else
5022 {
5023 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5024 options | g_notempty, use_offsets, use_size_offsets);
5025 if (count == 0)
5026 {
5027 fprintf(outfile, "Matched, but too many substrings\n");
5028 count = use_size_offsets/3;
5029 }
5030 }
5031
5032 /* Matched */
5033
5034 if (count >= 0)
5035 {
5036 int i, maxcount;
5037 void *cnptr, *gnptr;
5038
5039 #if !defined NODFA
5040 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5041 #endif
5042 maxcount = use_size_offsets/3;
5043
5044 /* This is a check against a lunatic return value. */
5045
5046 if (count > maxcount)
5047 {
5048 fprintf(outfile,
5049 "** PCRE error: returned count %d is too big for offset size %d\n",
5050 count, use_size_offsets);
5051 count = use_size_offsets/3;
5052 if (do_g || do_G)
5053 {
5054 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5055 do_g = do_G = FALSE; /* Break g/G loop */
5056 }
5057 }
5058
5059 /* do_allcaps requests showing of all captures in the pattern, to check
5060 unset ones at the end. */
5061
5062 if (do_allcaps)
5063 {
5064 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5065 goto SKIP_DATA;
5066 count++; /* Allow for full match */
5067 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5068 }
5069
5070 /* Output the captured substrings */
5071
5072 for (i = 0; i < count * 2; i += 2)
5073 {
5074 if (use_offsets[i] < 0)
5075 {
5076 if (use_offsets[i] != -1)
5077 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5078 use_offsets[i], i);
5079 if (use_offsets[i+1] != -1)
5080 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5081 use_offsets[i+1], i+1);
5082 fprintf(outfile, "%2d: <unset>\n", i/2);
5083 }
5084 else
5085 {
5086 fprintf(outfile, "%2d: ", i/2);
5087 PCHARSV(bptr, use_offsets[i],
5088 use_offsets[i+1] - use_offsets[i], outfile);
5089 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5090 fprintf(outfile, "\n");
5091 if (do_showcaprest || (i == 0 && do_showrest))
5092 {
5093 fprintf(outfile, "%2d+ ", i/2);
5094 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5095 outfile);
5096 fprintf(outfile, "\n");
5097 }
5098 }
5099 }
5100
5101 if (markptr != NULL)
5102 {
5103 fprintf(outfile, "MK: ");
5104 PCHARSV(markptr, 0, -1, outfile);
5105 fprintf(outfile, "\n");
5106 }
5107
5108 for (i = 0; i < 32; i++)
5109 {
5110 if ((copystrings & (1 << i)) != 0)
5111 {
5112 int rc;
5113 char copybuffer[256];
5114 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5115 copybuffer, sizeof(copybuffer));
5116 if (rc < 0)
5117 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5118 else
5119 {
5120 fprintf(outfile, "%2dC ", i);
5121 PCHARSV(copybuffer, 0, rc, outfile);
5122 fprintf(outfile, " (%d)\n", rc);
5123 }
5124 }
5125 }
5126
5127 cnptr = copynames;
5128 for (;;)
5129 {
5130 int rc;
5131 char copybuffer[256];
5132
5133 #ifdef SUPPORT_PCRE32
5134 if (pcre_mode == PCRE32_MODE)
5135 {
5136 if (*(pcre_uint32 *)cnptr == 0) break;
5137 }
5138 #endif
5139 #ifdef SUPPORT_PCRE16
5140 if (pcre_mode == PCRE16_MODE)
5141 {
5142 if (*(pcre_uint16 *)cnptr == 0) break;
5143 }
5144 #endif
5145 #ifdef SUPPORT_PCRE8
5146 if (pcre_mode == PCRE8_MODE)
5147 {
5148 if (*(pcre_uint8 *)cnptr == 0) break;
5149 }
5150 #endif
5151
5152 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5153 cnptr, copybuffer, sizeof(copybuffer));
5154
5155 if (rc < 0)
5156 {
5157 fprintf(outfile, "copy substring ");
5158 PCHARSV(cnptr, 0, -1, outfile);
5159 fprintf(outfile, " failed %d\n", rc);
5160 }
5161 else
5162 {
5163 fprintf(outfile, " C ");
5164 PCHARSV(copybuffer, 0, rc, outfile);
5165 fprintf(outfile, " (%d) ", rc);
5166 PCHARSV(cnptr, 0, -1, outfile);
5167 putc('\n', outfile);
5168 }
5169
5170 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5171 }
5172
5173 for (i = 0; i < 32; i++)
5174 {
5175 if ((getstrings & (1 << i)) != 0)
5176 {
5177 int rc;
5178 const char *substring;
5179 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5180 if (rc < 0)
5181 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5182 else
5183 {
5184 fprintf(outfile, "%2dG ", i);
5185 PCHARSV(substring, 0, rc, outfile);
5186 fprintf(outfile, " (%d)\n", rc);
5187 PCRE_FREE_SUBSTRING(substring);
5188 }
5189 }
5190 }
5191
5192 gnptr = getnames;
5193 for (;;)
5194 {
5195 int rc;
5196 const char *substring;
5197
5198 #ifdef SUPPORT_PCRE32
5199 if (pcre_mode == PCRE32_MODE)
5200 {
5201 if (*(pcre_uint32 *)gnptr == 0) break;
5202 }
5203 #endif
5204 #ifdef SUPPORT_PCRE16
5205 if (pcre_mode == PCRE16_MODE)
5206 {
5207 if (*(pcre_uint16 *)gnptr == 0) break;
5208 }
5209 #endif
5210 #ifdef SUPPORT_PCRE8
5211 if (pcre_mode == PCRE8_MODE)
5212 {
5213 if (*(pcre_uint8 *)gnptr == 0) break;
5214 }
5215 #endif
5216
5217 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5218 gnptr, &substring);
5219 if (rc < 0)
5220 {
5221 fprintf(outfile, "get substring ");
5222 PCHARSV(gnptr, 0, -1, outfile);
5223 fprintf(outfile, " failed %d\n", rc);
5224 }
5225 else
5226 {
5227 fprintf(outfile, " G ");
5228 PCHARSV(substring, 0, rc, outfile);
5229 fprintf(outfile, " (%d) ", rc);
5230 PCHARSV(gnptr, 0, -1, outfile);
5231 PCRE_FREE_SUBSTRING(substring);
5232 putc('\n', outfile);
5233 }
5234
5235 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5236 }
5237
5238 if (getlist)
5239 {
5240 int rc;
5241 const char **stringlist;
5242 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5243 if (rc < 0)
5244 fprintf(outfile, "get substring list failed %d\n", rc);
5245 else
5246 {
5247 for (i = 0; i < count; i++)
5248 {
5249 fprintf(outfile, "%2dL ", i);
5250 PCHARSV(stringlist[i], 0, -1, outfile);
5251 putc('\n', outfile);
5252 }
5253 if (stringlist[i] != NULL)
5254 fprintf(outfile, "string list not terminated by NULL\n");
5255 PCRE_FREE_SUBSTRING_LIST(stringlist);
5256 }
5257 }
5258 }
5259
5260 /* There was a partial match */
5261
5262 else if (count == PCRE_ERROR_PARTIAL)
5263 {
5264 if (markptr == NULL) fprintf(outfile, "Partial match");
5265 else
5266 {
5267 fprintf(outfile, "Partial match, mark=");
5268 PCHARSV(markptr, 0, -1, outfile);
5269 }
5270 if (use_size_offsets > 1)
5271 {
5272 fprintf(outfile, ": ");
5273 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5274 outfile);
5275 }
5276 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5277 fprintf(outfile, "\n");
5278 break; /* Out of the /g loop */
5279 }
5280
5281 /* Failed to match. If this is a /g or /G loop and we previously set
5282 g_notempty after a null match, this is not necessarily the end. We want
5283 to advance the start offset, and continue. We won't be at the end of the
5284 string - that was checked before setting g_notempty.
5285
5286 Complication arises in the case when the newline convention is "any",
5287 "crlf", or "anycrlf". If the previous match was at the end of a line
5288 terminated by CRLF, an advance of one character just passes the \r,
5289 whereas we should prefer the longer newline sequence, as does the code in
5290 pcre_exec(). Fudge the offset value to achieve this. We check for a
5291 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5292 find the default.
5293
5294 Otherwise, in the case of UTF-8 matching, the advance must be one
5295 character, not one byte. */
5296
5297 else
5298 {
5299 if (g_notempty != 0)
5300 {
5301 int onechar = 1;
5302 unsigned int obits = REAL_PCRE_OPTIONS(re);
5303 use_offsets[0] = start_offset;
5304 if ((obits & PCRE_NEWLINE_BITS) == 0)
5305 {
5306 int d;
5307 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5308 /* Note that these values are always the ASCII ones, even in
5309 EBCDIC environments. CR = 13, NL = 10. */
5310 obits = (d == 13)? PCRE_NEWLINE_CR :
5311 (d == 10)? PCRE_NEWLINE_LF :
5312 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5313 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5314 (d == -1)? PCRE_NEWLINE_ANY : 0;
5315 }
5316 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5317 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5318 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5319 &&
5320 start_offset < len - 1 && (
5321 #ifdef SUPPORT_PCRE8
5322 (pcre_mode == PCRE8_MODE &&
5323 bptr[start_offset] == '\r' &&
5324 bptr[start_offset + 1] == '\n') ||
5325 #endif
5326 #ifdef SUPPORT_PCRE16
5327 (pcre_mode == PCRE16_MODE &&
5328 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5329 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5330 #endif
5331 #ifdef SUPPORT_PCRE32
5332 (pcre_mode == PCRE32_MODE &&
5333 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5334 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5335 #endif
5336 0))
5337 onechar++;
5338 else if (use_utf)
5339 {
5340 while (start_offset + onechar < len)
5341 {
5342 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5343 onechar++;
5344 }
5345 }
5346 use_offsets[1] = start_offset + onechar;
5347 }
5348 else
5349 {
5350 switch(count)
5351 {
5352 case PCRE_ERROR_NOMATCH:
5353 if (gmatched == 0)
5354 {
5355 if (markptr == NULL)
5356 {
5357 fprintf(outfile, "No match");
5358 }
5359 else
5360 {
5361 fprintf(outfile, "No match, mark = ");
5362 PCHARSV(markptr, 0, -1, outfile);
5363 }
5364 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5365 putc('\n', outfile);
5366 }
5367 break;
5368
5369 case PCRE_ERROR_BADUTF8:
5370 case PCRE_ERROR_SHORTUTF8:
5371 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5372 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5373 8 * CHAR_SIZE);
5374 if (use_size_offsets >= 2)
5375 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5376 use_offsets[1]);
5377 fprintf(outfile, "\n");
5378 break;
5379
5380 case PCRE_ERROR_BADUTF8_OFFSET:
5381 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5382 8 * CHAR_SIZE);
5383 break;
5384
5385 default:
5386 if (count < 0 &&
5387 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5388 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5389 else
5390 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5391 break;
5392 }
5393
5394 break; /* Out of the /g loop */
5395 }
5396 }
5397
5398 /* If not /g or /G we are done */
5399
5400 if (!do_g && !do_G) break;
5401
5402 /* If we have matched an empty string, first check to see if we are at
5403 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5404 Perl's /g options does. This turns out to be rather cunning. First we set
5405 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5406 same point. If this fails (picked up above) we advance to the next
5407 character. */
5408
5409 g_notempty = 0;
5410
5411 if (use_offsets[0] == use_offsets[1])
5412 {
5413 if (use_offsets[0] == len) break;
5414 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5415 }
5416
5417 /* For /g, update the start offset, leaving the rest alone */
5418
5419 if (do_g) start_offset = use_offsets[1];
5420
5421 /* For /G, update the pointer and length */
5422
5423 else
5424 {
5425 bptr += use_offsets[1] * CHAR_SIZE;
5426 len -= use_offsets[1];
5427 }
5428 } /* End of loop for /g and /G */
5429
5430 NEXT_DATA: continue;
5431 } /* End of loop for data lines */
5432
5433 CONTINUE:
5434
5435 #if !defined NOPOSIX
5436 if (posix || do_posix) regfree(&preg);
5437 #endif
5438
5439 if (re != NULL) new_free(re);
5440 if (extra != NULL)
5441 {
5442 PCRE_FREE_STUDY(extra);
5443 }
5444 if (locale_set)
5445 {
5446 new_free((void *)tables);
5447 setlocale(LC_CTYPE, "C");
5448 locale_set = 0;
5449 }
5450 if (jit_stack != NULL)
5451 {
5452 PCRE_JIT_STACK_FREE(jit_stack);
5453 jit_stack = NULL;
5454 }
5455 }
5456
5457 if (infile == stdin) fprintf(outfile, "\n");
5458
5459 EXIT:
5460
5461 if (infile != NULL && infile != stdin) fclose(infile);
5462 if (outfile != NULL && outfile != stdout) fclose(outfile);
5463
5464 free(buffer);
5465 free(dbuffer);
5466 free(pbuffer);
5467 free(offsets);
5468
5469 #ifdef SUPPORT_PCRE16
5470 if (buffer16 != NULL) free(buffer16);
5471 #endif
5472 #ifdef SUPPORT_PCRE32
5473 if (buffer32 != NULL) free(buffer32);
5474 #endif
5475
5476 #if !defined NODFA
5477 if (dfa_workspace != NULL)
5478 free(dfa_workspace);
5479 #endif
5480
5481 return yield;
5482 }
5483
5484 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5