/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1142 - (show annotations)
Fri Oct 19 15:45:43 2012 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 163394 byte(s)
Fix bugs in pcretest when different combinations of 8-, 16-, and 32-bit 
libraries were compiled. For example, test 2 segfaulted when only 16- and 
32-bit libraries were compiled.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136 #include "pcre_internal.h"
137
138 /* The pcre_printint() function, which prints the internal form of a compiled
139 regex, is held in a separate file so that (a) it can be compiled in either
140 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
141 when that is compiled in debug mode. */
142
143 #ifdef SUPPORT_PCRE8
144 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
145 #endif
146 #ifdef SUPPORT_PCRE16
147 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
148 #endif
149 #ifdef SUPPORT_PCRE32
150 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152
153 /* We need access to some of the data tables that PCRE uses. So as not to have
154 to keep two copies, we include the source files here, changing the names of the
155 external symbols to prevent clashes. */
156
157 #define PCRE_INCLUDED
158
159 #include "pcre_tables.c"
160 #include "pcre_ucd.c"
161
162 /* The definition of the macro PRINTABLE, which determines whether to print an
163 output character as-is or as a hex value when showing compiled patterns, is
164 the same as in the printint.src file. We uses it here in cases when the locale
165 has not been explicitly changed, so as to get consistent output from systems
166 that differ in their output from isprint() even in the "C" locale. */
167
168 #ifdef EBCDIC
169 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
170 #else
171 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
172 #endif
173
174 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
175
176 /* Posix support is disabled in 16 or 32 bit only mode. */
177 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
178 #define NOPOSIX
179 #endif
180
181 /* It is possible to compile this test program without including support for
182 testing the POSIX interface, though this is not available via the standard
183 Makefile. */
184
185 #if !defined NOPOSIX
186 #include "pcreposix.h"
187 #endif
188
189 /* It is also possible, originally for the benefit of a version that was
190 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
191 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
192 automatically cut out the UTF support if PCRE is built without it. */
193
194 #ifndef SUPPORT_UTF
195 #ifndef NOUTF
196 #define NOUTF
197 #endif
198 #endif
199
200 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
201 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
202 only from one place and is handled differently). I couldn't dream up any way of
203 using a single macro to do this in a generic way, because of the many different
204 argument requirements. We know that at least one of SUPPORT_PCRE8 and
205 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
206 use these in the definitions of generic macros.
207
208 **** Special note about the PCHARSxxx macros: the address of the string to be
209 printed is always given as two arguments: a base address followed by an offset.
210 The base address is cast to the correct data size for 8 or 16 bit data; the
211 offset is in units of this size. If the string were given as base+offset in one
212 argument, the casting might be incorrectly applied. */
213
214 #ifdef SUPPORT_PCRE8
215
216 #define PCHARS8(lv, p, offset, len, f) \
217 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
218
219 #define PCHARSV8(p, offset, len, f) \
220 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
221
222 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
223 p = read_capture_name8(p, cn8, re)
224
225 #define STRLEN8(p) ((int)strlen((char *)p))
226
227 #define SET_PCRE_CALLOUT8(callout) \
228 pcre_callout = callout
229
230 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
231 pcre_assign_jit_stack(extra, callback, userdata)
232
233 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
234 re = pcre_compile((char *)pat, options, error, erroffset, tables)
235
236 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
237 namesptr, cbuffer, size) \
238 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
239 (char *)namesptr, cbuffer, size)
240
241 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
242 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
243
244 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
245 offsets, size_offsets, workspace, size_workspace) \
246 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace)
248
249 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
250 offsets, size_offsets) \
251 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
252 offsets, size_offsets)
253
254 #define PCRE_FREE_STUDY8(extra) \
255 pcre_free_study(extra)
256
257 #define PCRE_FREE_SUBSTRING8(substring) \
258 pcre_free_substring(substring)
259
260 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
261 pcre_free_substring_list(listptr)
262
263 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
264 getnamesptr, subsptr) \
265 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
266 (char *)getnamesptr, subsptr)
267
268 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
269 n = pcre_get_stringnumber(re, (char *)ptr)
270
271 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
272 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
273
274 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
275 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
276
277 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
278 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
279
280 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
281 pcre_printint(re, outfile, debug_lengths)
282
283 #define PCRE_STUDY8(extra, re, options, error) \
284 extra = pcre_study(re, options, error)
285
286 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
287 pcre_jit_stack_alloc(startsize, maxsize)
288
289 #define PCRE_JIT_STACK_FREE8(stack) \
290 pcre_jit_stack_free(stack)
291
292 #define pcre8_maketables pcre_maketables
293
294 #endif /* SUPPORT_PCRE8 */
295
296 /* -----------------------------------------------------------*/
297
298 #ifdef SUPPORT_PCRE16
299
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
307 p = read_capture_name16(p, cn16, re)
308
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
313
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
317
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
321
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
330
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
336
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
341
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
344
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
347
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
362
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
366
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
370
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
373
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382
383 #endif /* SUPPORT_PCRE16 */
384
385 /* -----------------------------------------------------------*/
386
387 #ifdef SUPPORT_PCRE32
388
389 #define PCHARS32(lv, p, offset, len, f) \
390 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
391
392 #define PCHARSV32(p, offset, len, f) \
393 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
394
395 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396 p = read_capture_name32(p, cn32, re)
397
398 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
399
400 #define SET_PCRE_CALLOUT32(callout) \
401 pcre32_callout = (int (*)(pcre32_callout_block *))callout
402
403 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
404 pcre32_assign_jit_stack((pcre32_extra *)extra, \
405 (pcre32_jit_callback)callback, userdata)
406
407 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
408 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
409 tables)
410
411 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
412 namesptr, cbuffer, size) \
413 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
414 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
415
416 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
417 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
418 (PCRE_UCHAR32 *)cbuffer, size/2)
419
420 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
421 offsets, size_offsets, workspace, size_workspace) \
422 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
423 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
424 workspace, size_workspace)
425
426 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets) \
428 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
429 len, start_offset, options, offsets, size_offsets)
430
431 #define PCRE_FREE_STUDY32(extra) \
432 pcre32_free_study((pcre32_extra *)extra)
433
434 #define PCRE_FREE_SUBSTRING32(substring) \
435 pcre32_free_substring((PCRE_SPTR32)substring)
436
437 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
438 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
439
440 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
441 getnamesptr, subsptr) \
442 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
443 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
444
445 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
446 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
447
448 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
449 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
450 (PCRE_SPTR32 *)(void*)subsptr)
451
452 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
453 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
454 (PCRE_SPTR32 **)(void*)listptr)
455
456 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
457 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
458 tables)
459
460 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
461 pcre32_printint(re, outfile, debug_lengths)
462
463 #define PCRE_STUDY32(extra, re, options, error) \
464 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
465
466 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
467 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
468
469 #define PCRE_JIT_STACK_FREE32(stack) \
470 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
471
472 #endif /* SUPPORT_PCRE32 */
473
474
475 /* ----- More than one mode is supported; a runtime test is needed, except for
476 pcre_config(), and the JIT stack functions, when it doesn't matter which
477 available version is called. ----- */
478
479 enum {
480 PCRE8_MODE,
481 PCRE16_MODE,
482 PCRE32_MODE
483 };
484
485 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486 defined (SUPPORT_PCRE32)) >= 2
487
488 #define CHAR_SIZE (1 << pcre_mode)
489
490 /* There doesn't seem to be an easy way of writing these macros that can cope
491 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492 cases separately. */
493
494 /* ----- All three modes supported ----- */
495
496 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
497
498 #define PCHARS(lv, p, offset, len, f) \
499 if (pcre_mode == PCRE32_MODE) \
500 PCHARS32(lv, p, offset, len, f); \
501 else if (pcre_mode == PCRE16_MODE) \
502 PCHARS16(lv, p, offset, len, f); \
503 else \
504 PCHARS8(lv, p, offset, len, f)
505
506 #define PCHARSV(p, offset, len, f) \
507 if (pcre_mode == PCRE32_MODE) \
508 PCHARSV32(p, offset, len, f); \
509 else if (pcre_mode == PCRE16_MODE) \
510 PCHARSV16(p, offset, len, f); \
511 else \
512 PCHARSV8(p, offset, len, f)
513
514 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
515 if (pcre_mode == PCRE32_MODE) \
516 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
517 else if (pcre_mode == PCRE16_MODE) \
518 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
519 else \
520 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
521
522 #define SET_PCRE_CALLOUT(callout) \
523 if (pcre_mode == PCRE32_MODE) \
524 SET_PCRE_CALLOUT32(callout); \
525 else if (pcre_mode == PCRE16_MODE) \
526 SET_PCRE_CALLOUT16(callout); \
527 else \
528 SET_PCRE_CALLOUT8(callout)
529
530 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
531
532 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
533 if (pcre_mode == PCRE32_MODE) \
534 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
535 else if (pcre_mode == PCRE16_MODE) \
536 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
537 else \
538 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
539
540 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
541 if (pcre_mode == PCRE32_MODE) \
542 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
543 else if (pcre_mode == PCRE16_MODE) \
544 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
545 else \
546 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
547
548 #define PCRE_CONFIG pcre_config
549
550 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
551 namesptr, cbuffer, size) \
552 if (pcre_mode == PCRE32_MODE) \
553 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else \
559 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size)
561
562 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
563 if (pcre_mode == PCRE32_MODE) \
564 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
565 else if (pcre_mode == PCRE16_MODE) \
566 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
567 else \
568 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
569
570 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
571 offsets, size_offsets, workspace, size_workspace) \
572 if (pcre_mode == PCRE32_MODE) \
573 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace); \
575 else if (pcre_mode == PCRE16_MODE) \
576 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else \
579 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace)
581
582 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else \
591 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets)
593
594 #define PCRE_FREE_STUDY(extra) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_FREE_STUDY32(extra); \
597 else if (pcre_mode == PCRE16_MODE) \
598 PCRE_FREE_STUDY16(extra); \
599 else \
600 PCRE_FREE_STUDY8(extra)
601
602 #define PCRE_FREE_SUBSTRING(substring) \
603 if (pcre_mode == PCRE32_MODE) \
604 PCRE_FREE_SUBSTRING32(substring); \
605 else if (pcre_mode == PCRE16_MODE) \
606 PCRE_FREE_SUBSTRING16(substring); \
607 else \
608 PCRE_FREE_SUBSTRING8(substring)
609
610 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
611 if (pcre_mode == PCRE32_MODE) \
612 PCRE_FREE_SUBSTRING_LIST32(listptr); \
613 else if (pcre_mode == PCRE16_MODE) \
614 PCRE_FREE_SUBSTRING_LIST16(listptr); \
615 else \
616 PCRE_FREE_SUBSTRING_LIST8(listptr)
617
618 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
619 getnamesptr, subsptr) \
620 if (pcre_mode == PCRE32_MODE) \
621 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr); \
623 else if (pcre_mode == PCRE16_MODE) \
624 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else \
627 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr)
629
630 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
631 if (pcre_mode == PCRE32_MODE) \
632 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
633 else if (pcre_mode == PCRE16_MODE) \
634 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
635 else \
636 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
637
638 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
639 if (pcre_mode == PCRE32_MODE) \
640 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
641 else if (pcre_mode == PCRE16_MODE) \
642 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
643 else \
644 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
645
646 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
647 if (pcre_mode == PCRE32_MODE) \
648 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
649 else if (pcre_mode == PCRE16_MODE) \
650 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
651 else \
652 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
653
654 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
655 (pcre_mode == PCRE32_MODE ? \
656 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
657 : pcre_mode == PCRE16_MODE ? \
658 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
659 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
660
661 #define PCRE_JIT_STACK_FREE(stack) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_JIT_STACK_FREE32(stack); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_JIT_STACK_FREE16(stack); \
666 else \
667 PCRE_JIT_STACK_FREE8(stack)
668
669 #define PCRE_MAKETABLES \
670 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
671
672 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
673 if (pcre_mode == PCRE32_MODE) \
674 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
675 else if (pcre_mode == PCRE16_MODE) \
676 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
677 else \
678 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
679
680 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
681 if (pcre_mode == PCRE32_MODE) \
682 PCRE_PRINTINT32(re, outfile, debug_lengths); \
683 else if (pcre_mode == PCRE16_MODE) \
684 PCRE_PRINTINT16(re, outfile, debug_lengths); \
685 else \
686 PCRE_PRINTINT8(re, outfile, debug_lengths)
687
688 #define PCRE_STUDY(extra, re, options, error) \
689 if (pcre_mode == PCRE32_MODE) \
690 PCRE_STUDY32(extra, re, options, error); \
691 else if (pcre_mode == PCRE16_MODE) \
692 PCRE_STUDY16(extra, re, options, error); \
693 else \
694 PCRE_STUDY8(extra, re, options, error)
695
696
697 /* ----- Two out of three modes are supported ----- */
698
699 #else
700
701 /* We can use some macro trickery to make a single set of definitions work in
702 the three different cases. */
703
704 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
705
706 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707 #define BITONE 32
708 #define BITTWO 16
709
710 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
711
712 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713 #define BITONE 32
714 #define BITTWO 8
715
716 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
717
718 #else
719 #define BITONE 16
720 #define BITTWO 8
721 #endif
722
723 #define glue(a,b) a##b
724 #define G(a,b) glue(a,b)
725
726
727 /* ----- Common macros for two-mode cases ----- */
728
729 #define PCHARS(lv, p, offset, len, f) \
730 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731 G(PCHARS,BITONE)(lv, p, offset, len, f); \
732 else \
733 G(PCHARS,BITTWO)(lv, p, offset, len, f)
734
735 #define PCHARSV(p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARSV,BITONE)(p, offset, len, f); \
738 else \
739 G(PCHARSV,BITTWO)(p, offset, len, f)
740
741 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744 else \
745 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
746
747 #define SET_PCRE_CALLOUT(callout) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(SET_PCRE_CALLOUT,BITONE)(callout); \
750 else \
751 G(SET_PCRE_CALLOUT,BITTWO)(callout)
752
753 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
755
756 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759 else \
760 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
761
762 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765 else \
766 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
767
768 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
769
770 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771 namesptr, cbuffer, size) \
772 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774 namesptr, cbuffer, size); \
775 else \
776 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size)
778
779 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782 else \
783 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
784
785 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786 offsets, size_offsets, workspace, size_workspace) \
787 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789 offsets, size_offsets, workspace, size_workspace); \
790 else \
791 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace)
793
794 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets) \
796 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets); \
799 else \
800 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets)
802
803 #define PCRE_FREE_STUDY(extra) \
804 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805 G(PCRE_FREE_STUDY,BITONE)(extra); \
806 else \
807 G(PCRE_FREE_STUDY,BITTWO)(extra)
808
809 #define PCRE_FREE_SUBSTRING(substring) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812 else \
813 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
814
815 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818 else \
819 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
820
821 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822 getnamesptr, subsptr) \
823 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825 getnamesptr, subsptr); \
826 else \
827 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr)
829
830 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833 else \
834 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
835
836 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839 else \
840 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
841
842 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845 else \
846 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
847
848 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
852
853 #define PCRE_JIT_STACK_FREE(stack) \
854 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856 else \
857 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
858
859 #define PCRE_MAKETABLES \
860 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
862
863 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866 else \
867 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
868
869 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872 else \
873 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
874
875 #define PCRE_STUDY(extra, re, options, error) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878 else \
879 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
880
881 #endif /* Two out of three modes */
882
883 /* ----- End of cases where more than one mode is supported ----- */
884
885
886 /* ----- Only 8-bit mode is supported ----- */
887
888 #elif defined SUPPORT_PCRE8
889 #define CHAR_SIZE 1
890 #define PCHARS PCHARS8
891 #define PCHARSV PCHARSV8
892 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
893 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
894 #define STRLEN STRLEN8
895 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
896 #define PCRE_COMPILE PCRE_COMPILE8
897 #define PCRE_CONFIG pcre_config
898 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
899 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
900 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
901 #define PCRE_EXEC PCRE_EXEC8
902 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
903 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
904 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
905 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
906 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
907 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
908 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
909 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
910 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
911 #define PCRE_MAKETABLES pcre_maketables()
912 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
913 #define PCRE_PRINTINT PCRE_PRINTINT8
914 #define PCRE_STUDY PCRE_STUDY8
915
916 /* ----- Only 16-bit mode is supported ----- */
917
918 #elif defined SUPPORT_PCRE16
919 #define CHAR_SIZE 2
920 #define PCHARS PCHARS16
921 #define PCHARSV PCHARSV16
922 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
923 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
924 #define STRLEN STRLEN16
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
926 #define PCRE_COMPILE PCRE_COMPILE16
927 #define PCRE_CONFIG pcre16_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
931 #define PCRE_EXEC PCRE_EXEC16
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
941 #define PCRE_MAKETABLES pcre16_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
943 #define PCRE_PRINTINT PCRE_PRINTINT16
944 #define PCRE_STUDY PCRE_STUDY16
945
946 /* ----- Only 32-bit mode is supported ----- */
947
948 #elif defined SUPPORT_PCRE32
949 #define CHAR_SIZE 4
950 #define PCHARS PCHARS32
951 #define PCHARSV PCHARSV32
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
954 #define STRLEN STRLEN32
955 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
956 #define PCRE_COMPILE PCRE_COMPILE32
957 #define PCRE_CONFIG pcre32_config
958 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
959 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
960 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
961 #define PCRE_EXEC PCRE_EXEC32
962 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
963 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
964 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
965 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
966 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
967 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
968 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
969 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
970 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
971 #define PCRE_MAKETABLES pcre32_maketables()
972 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
973 #define PCRE_PRINTINT PCRE_PRINTINT32
974 #define PCRE_STUDY PCRE_STUDY32
975
976 #endif
977
978 /* ----- End of mode-specific function call macros ----- */
979
980
981 /* Other parameters */
982
983 #ifndef CLOCKS_PER_SEC
984 #ifdef CLK_TCK
985 #define CLOCKS_PER_SEC CLK_TCK
986 #else
987 #define CLOCKS_PER_SEC 100
988 #endif
989 #endif
990
991 #if !defined NODFA
992 #define DFA_WS_DIMENSION 1000
993 #endif
994
995 /* This is the default loop count for timing. */
996
997 #define LOOPREPEAT 500000
998
999 /* Static variables */
1000
1001 static FILE *outfile;
1002 static int log_store = 0;
1003 static int callout_count;
1004 static int callout_extra;
1005 static int callout_fail_count;
1006 static int callout_fail_id;
1007 static int debug_lengths;
1008 static int first_callout;
1009 static int jit_was_used;
1010 static int locale_set = 0;
1011 static int show_malloc;
1012 static int use_utf;
1013 static size_t gotten_store;
1014 static size_t first_gotten_store = 0;
1015 static const unsigned char *last_callout_mark = NULL;
1016
1017 /* The buffers grow automatically if very long input lines are encountered. */
1018
1019 static int buffer_size = 50000;
1020 static pcre_uint8 *buffer = NULL;
1021 static pcre_uint8 *pbuffer = NULL;
1022
1023 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1024
1025 #ifdef COMPILE_PCRE16
1026 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1027 #endif
1028
1029 #ifdef COMPILE_PCRE32
1030 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031 #endif
1032
1033 /* We need buffers for building 16/32-bit strings, and the tables of operator
1034 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035 pattern for saving/reloading testing. Luckily, the data for these tables is
1036 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038 LINK_SIZE is also used later in this program. */
1039
1040 #ifdef SUPPORT_PCRE16
1041 #undef IMM2_SIZE
1042 #define IMM2_SIZE 1
1043
1044 #if LINK_SIZE == 2
1045 #undef LINK_SIZE
1046 #define LINK_SIZE 1
1047 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1048 #undef LINK_SIZE
1049 #define LINK_SIZE 2
1050 #else
1051 #error LINK_SIZE must be either 2, 3, or 4
1052 #endif
1053
1054 static int buffer16_size = 0;
1055 static pcre_uint16 *buffer16 = NULL;
1056 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057 #endif /* SUPPORT_PCRE16 */
1058
1059 #ifdef SUPPORT_PCRE32
1060 #undef IMM2_SIZE
1061 #define IMM2_SIZE 1
1062 #undef LINK_SIZE
1063 #define LINK_SIZE 1
1064
1065 static int buffer32_size = 0;
1066 static pcre_uint32 *buffer32 = NULL;
1067 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068 #endif /* SUPPORT_PCRE32 */
1069
1070 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071 support, it can be changed by an option. If there is no 8-bit support, there
1072 must be 16-or 32-bit support, so default it to 1. */
1073
1074 #if defined SUPPORT_PCRE8
1075 static int pcre_mode = PCRE8_MODE;
1076 #elif defined SUPPORT_PCRE16
1077 static int pcre_mode = PCRE16_MODE;
1078 #elif defined SUPPORT_PCRE32
1079 static int pcre_mode = PCRE32_MODE;
1080 #endif
1081
1082 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1083
1084 static int jit_study_bits[] =
1085 {
1086 PCRE_STUDY_JIT_COMPILE,
1087 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1088 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1089 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1090 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1091 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1092 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1093 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1094 };
1095
1096 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1097 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1098
1099 /* Textual explanations for runtime error codes */
1100
1101 static const char *errtexts[] = {
1102 NULL, /* 0 is no error */
1103 NULL, /* NOMATCH is handled specially */
1104 "NULL argument passed",
1105 "bad option value",
1106 "magic number missing",
1107 "unknown opcode - pattern overwritten?",
1108 "no more memory",
1109 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1110 "match limit exceeded",
1111 "callout error code",
1112 NULL, /* BADUTF8/16 is handled specially */
1113 NULL, /* BADUTF8/16 offset is handled specially */
1114 NULL, /* PARTIAL is handled specially */
1115 "not used - internal error",
1116 "internal error - pattern overwritten?",
1117 "bad count value",
1118 "item unsupported for DFA matching",
1119 "backreference condition or recursion test not supported for DFA matching",
1120 "match limit not supported for DFA matching",
1121 "workspace size exceeded in DFA matching",
1122 "too much recursion for DFA matching",
1123 "recursion limit exceeded",
1124 "not used - internal error",
1125 "invalid combination of newline options",
1126 "bad offset value",
1127 NULL, /* SHORTUTF8/16 is handled specially */
1128 "nested recursion at the same subject position",
1129 "JIT stack limit reached",
1130 "pattern compiled in wrong mode: 8-bit/16-bit error",
1131 "pattern compiled with other endianness",
1132 "invalid data in workspace for DFA restart"
1133 };
1134
1135
1136 /*************************************************
1137 * Alternate character tables *
1138 *************************************************/
1139
1140 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1141 using the default tables of the library. However, the T option can be used to
1142 select alternate sets of tables, for different kinds of testing. Note also that
1143 the L (locale) option also adjusts the tables. */
1144
1145 /* This is the set of tables distributed as default with PCRE. It recognizes
1146 only ASCII characters. */
1147
1148 static const pcre_uint8 tables0[] = {
1149
1150 /* This table is a lower casing table. */
1151
1152 0, 1, 2, 3, 4, 5, 6, 7,
1153 8, 9, 10, 11, 12, 13, 14, 15,
1154 16, 17, 18, 19, 20, 21, 22, 23,
1155 24, 25, 26, 27, 28, 29, 30, 31,
1156 32, 33, 34, 35, 36, 37, 38, 39,
1157 40, 41, 42, 43, 44, 45, 46, 47,
1158 48, 49, 50, 51, 52, 53, 54, 55,
1159 56, 57, 58, 59, 60, 61, 62, 63,
1160 64, 97, 98, 99,100,101,102,103,
1161 104,105,106,107,108,109,110,111,
1162 112,113,114,115,116,117,118,119,
1163 120,121,122, 91, 92, 93, 94, 95,
1164 96, 97, 98, 99,100,101,102,103,
1165 104,105,106,107,108,109,110,111,
1166 112,113,114,115,116,117,118,119,
1167 120,121,122,123,124,125,126,127,
1168 128,129,130,131,132,133,134,135,
1169 136,137,138,139,140,141,142,143,
1170 144,145,146,147,148,149,150,151,
1171 152,153,154,155,156,157,158,159,
1172 160,161,162,163,164,165,166,167,
1173 168,169,170,171,172,173,174,175,
1174 176,177,178,179,180,181,182,183,
1175 184,185,186,187,188,189,190,191,
1176 192,193,194,195,196,197,198,199,
1177 200,201,202,203,204,205,206,207,
1178 208,209,210,211,212,213,214,215,
1179 216,217,218,219,220,221,222,223,
1180 224,225,226,227,228,229,230,231,
1181 232,233,234,235,236,237,238,239,
1182 240,241,242,243,244,245,246,247,
1183 248,249,250,251,252,253,254,255,
1184
1185 /* This table is a case flipping table. */
1186
1187 0, 1, 2, 3, 4, 5, 6, 7,
1188 8, 9, 10, 11, 12, 13, 14, 15,
1189 16, 17, 18, 19, 20, 21, 22, 23,
1190 24, 25, 26, 27, 28, 29, 30, 31,
1191 32, 33, 34, 35, 36, 37, 38, 39,
1192 40, 41, 42, 43, 44, 45, 46, 47,
1193 48, 49, 50, 51, 52, 53, 54, 55,
1194 56, 57, 58, 59, 60, 61, 62, 63,
1195 64, 97, 98, 99,100,101,102,103,
1196 104,105,106,107,108,109,110,111,
1197 112,113,114,115,116,117,118,119,
1198 120,121,122, 91, 92, 93, 94, 95,
1199 96, 65, 66, 67, 68, 69, 70, 71,
1200 72, 73, 74, 75, 76, 77, 78, 79,
1201 80, 81, 82, 83, 84, 85, 86, 87,
1202 88, 89, 90,123,124,125,126,127,
1203 128,129,130,131,132,133,134,135,
1204 136,137,138,139,140,141,142,143,
1205 144,145,146,147,148,149,150,151,
1206 152,153,154,155,156,157,158,159,
1207 160,161,162,163,164,165,166,167,
1208 168,169,170,171,172,173,174,175,
1209 176,177,178,179,180,181,182,183,
1210 184,185,186,187,188,189,190,191,
1211 192,193,194,195,196,197,198,199,
1212 200,201,202,203,204,205,206,207,
1213 208,209,210,211,212,213,214,215,
1214 216,217,218,219,220,221,222,223,
1215 224,225,226,227,228,229,230,231,
1216 232,233,234,235,236,237,238,239,
1217 240,241,242,243,244,245,246,247,
1218 248,249,250,251,252,253,254,255,
1219
1220 /* This table contains bit maps for various character classes. Each map is 32
1221 bytes long and the bits run from the least significant end of each byte. The
1222 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1223 graph, print, punct, and cntrl. Other classes are built from combinations. */
1224
1225 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1226 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1227 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1228 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1229
1230 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1231 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1232 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1233 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1234
1235 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1236 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1244
1245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1248 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249
1250 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1251 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1252 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1253 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254
1255 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1256 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1257 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1258 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1259
1260 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1261 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1262 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1263 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1264
1265 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1266 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1267 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1268 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1269
1270 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1272 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274
1275 /* This table identifies various classes of character by individual bits:
1276 0x01 white space character
1277 0x02 letter
1278 0x04 decimal digit
1279 0x08 hexadecimal digit
1280 0x10 alphanumeric or '_'
1281 0x80 regular expression metacharacter or binary zero
1282 */
1283
1284 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1285 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1287 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1288 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1289 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1290 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1291 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1292 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1293 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1294 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1295 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1296 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1297 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1298 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1299 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1316
1317 /* This is a set of tables that came orginally from a Windows user. It seems to
1318 be at least an approximation of ISO 8859. In particular, there are characters
1319 greater than 128 that are marked as spaces, letters, etc. */
1320
1321 static const pcre_uint8 tables1[] = {
1322 0,1,2,3,4,5,6,7,
1323 8,9,10,11,12,13,14,15,
1324 16,17,18,19,20,21,22,23,
1325 24,25,26,27,28,29,30,31,
1326 32,33,34,35,36,37,38,39,
1327 40,41,42,43,44,45,46,47,
1328 48,49,50,51,52,53,54,55,
1329 56,57,58,59,60,61,62,63,
1330 64,97,98,99,100,101,102,103,
1331 104,105,106,107,108,109,110,111,
1332 112,113,114,115,116,117,118,119,
1333 120,121,122,91,92,93,94,95,
1334 96,97,98,99,100,101,102,103,
1335 104,105,106,107,108,109,110,111,
1336 112,113,114,115,116,117,118,119,
1337 120,121,122,123,124,125,126,127,
1338 128,129,130,131,132,133,134,135,
1339 136,137,138,139,140,141,142,143,
1340 144,145,146,147,148,149,150,151,
1341 152,153,154,155,156,157,158,159,
1342 160,161,162,163,164,165,166,167,
1343 168,169,170,171,172,173,174,175,
1344 176,177,178,179,180,181,182,183,
1345 184,185,186,187,188,189,190,191,
1346 224,225,226,227,228,229,230,231,
1347 232,233,234,235,236,237,238,239,
1348 240,241,242,243,244,245,246,215,
1349 248,249,250,251,252,253,254,223,
1350 224,225,226,227,228,229,230,231,
1351 232,233,234,235,236,237,238,239,
1352 240,241,242,243,244,245,246,247,
1353 248,249,250,251,252,253,254,255,
1354 0,1,2,3,4,5,6,7,
1355 8,9,10,11,12,13,14,15,
1356 16,17,18,19,20,21,22,23,
1357 24,25,26,27,28,29,30,31,
1358 32,33,34,35,36,37,38,39,
1359 40,41,42,43,44,45,46,47,
1360 48,49,50,51,52,53,54,55,
1361 56,57,58,59,60,61,62,63,
1362 64,97,98,99,100,101,102,103,
1363 104,105,106,107,108,109,110,111,
1364 112,113,114,115,116,117,118,119,
1365 120,121,122,91,92,93,94,95,
1366 96,65,66,67,68,69,70,71,
1367 72,73,74,75,76,77,78,79,
1368 80,81,82,83,84,85,86,87,
1369 88,89,90,123,124,125,126,127,
1370 128,129,130,131,132,133,134,135,
1371 136,137,138,139,140,141,142,143,
1372 144,145,146,147,148,149,150,151,
1373 152,153,154,155,156,157,158,159,
1374 160,161,162,163,164,165,166,167,
1375 168,169,170,171,172,173,174,175,
1376 176,177,178,179,180,181,182,183,
1377 184,185,186,187,188,189,190,191,
1378 224,225,226,227,228,229,230,231,
1379 232,233,234,235,236,237,238,239,
1380 240,241,242,243,244,245,246,215,
1381 248,249,250,251,252,253,254,223,
1382 192,193,194,195,196,197,198,199,
1383 200,201,202,203,204,205,206,207,
1384 208,209,210,211,212,213,214,247,
1385 216,217,218,219,220,221,222,255,
1386 0,62,0,0,1,0,0,0,
1387 0,0,0,0,0,0,0,0,
1388 32,0,0,0,1,0,0,0,
1389 0,0,0,0,0,0,0,0,
1390 0,0,0,0,0,0,255,3,
1391 126,0,0,0,126,0,0,0,
1392 0,0,0,0,0,0,0,0,
1393 0,0,0,0,0,0,0,0,
1394 0,0,0,0,0,0,255,3,
1395 0,0,0,0,0,0,0,0,
1396 0,0,0,0,0,0,12,2,
1397 0,0,0,0,0,0,0,0,
1398 0,0,0,0,0,0,0,0,
1399 254,255,255,7,0,0,0,0,
1400 0,0,0,0,0,0,0,0,
1401 255,255,127,127,0,0,0,0,
1402 0,0,0,0,0,0,0,0,
1403 0,0,0,0,254,255,255,7,
1404 0,0,0,0,0,4,32,4,
1405 0,0,0,128,255,255,127,255,
1406 0,0,0,0,0,0,255,3,
1407 254,255,255,135,254,255,255,7,
1408 0,0,0,0,0,4,44,6,
1409 255,255,127,255,255,255,127,255,
1410 0,0,0,0,254,255,255,255,
1411 255,255,255,255,255,255,255,127,
1412 0,0,0,0,254,255,255,255,
1413 255,255,255,255,255,255,255,255,
1414 0,2,0,0,255,255,255,255,
1415 255,255,255,255,255,255,255,127,
1416 0,0,0,0,255,255,255,255,
1417 255,255,255,255,255,255,255,255,
1418 0,0,0,0,254,255,0,252,
1419 1,0,0,248,1,0,0,120,
1420 0,0,0,0,254,255,255,255,
1421 0,0,128,0,0,0,128,0,
1422 255,255,255,255,0,0,0,0,
1423 0,0,0,0,0,0,0,128,
1424 255,255,255,255,0,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 128,0,0,0,0,0,0,0,
1427 0,1,1,0,1,1,0,0,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,0,0,
1430 1,0,0,0,128,0,0,0,
1431 128,128,128,128,0,0,128,0,
1432 28,28,28,28,28,28,28,28,
1433 28,28,0,0,0,0,0,128,
1434 0,26,26,26,26,26,26,18,
1435 18,18,18,18,18,18,18,18,
1436 18,18,18,18,18,18,18,18,
1437 18,18,18,128,128,0,128,16,
1438 0,26,26,26,26,26,26,18,
1439 18,18,18,18,18,18,18,18,
1440 18,18,18,18,18,18,18,18,
1441 18,18,18,128,128,0,0,0,
1442 0,0,0,0,0,1,0,0,
1443 0,0,0,0,0,0,0,0,
1444 0,0,0,0,0,0,0,0,
1445 0,0,0,0,0,0,0,0,
1446 1,0,0,0,0,0,0,0,
1447 0,0,18,0,0,0,0,0,
1448 0,0,20,20,0,18,0,0,
1449 0,20,18,0,0,0,0,0,
1450 18,18,18,18,18,18,18,18,
1451 18,18,18,18,18,18,18,18,
1452 18,18,18,18,18,18,18,0,
1453 18,18,18,18,18,18,18,18,
1454 18,18,18,18,18,18,18,18,
1455 18,18,18,18,18,18,18,18,
1456 18,18,18,18,18,18,18,0,
1457 18,18,18,18,18,18,18,18
1458 };
1459
1460
1461
1462
1463 #ifndef HAVE_STRERROR
1464 /*************************************************
1465 * Provide strerror() for non-ANSI libraries *
1466 *************************************************/
1467
1468 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1469 in their libraries, but can provide the same facility by this simple
1470 alternative function. */
1471
1472 extern int sys_nerr;
1473 extern char *sys_errlist[];
1474
1475 char *
1476 strerror(int n)
1477 {
1478 if (n < 0 || n >= sys_nerr) return "unknown error number";
1479 return sys_errlist[n];
1480 }
1481 #endif /* HAVE_STRERROR */
1482
1483
1484
1485 /*************************************************
1486 * Print newline configuration *
1487 *************************************************/
1488
1489 /*
1490 Arguments:
1491 rc the return code from PCRE_CONFIG_NEWLINE
1492 isc TRUE if called from "-C newline"
1493 Returns: nothing
1494 */
1495
1496 static void
1497 print_newline_config(int rc, BOOL isc)
1498 {
1499 const char *s = NULL;
1500 if (!isc) printf(" Newline sequence is ");
1501 switch(rc)
1502 {
1503 case CHAR_CR: s = "CR"; break;
1504 case CHAR_LF: s = "LF"; break;
1505 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1506 case -1: s = "ANY"; break;
1507 case -2: s = "ANYCRLF"; break;
1508
1509 default:
1510 printf("a non-standard value: 0x%04x\n", rc);
1511 return;
1512 }
1513
1514 printf("%s\n", s);
1515 }
1516
1517
1518
1519 /*************************************************
1520 * JIT memory callback *
1521 *************************************************/
1522
1523 static pcre_jit_stack* jit_callback(void *arg)
1524 {
1525 jit_was_used = TRUE;
1526 return (pcre_jit_stack *)arg;
1527 }
1528
1529
1530 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1531 /*************************************************
1532 * Convert UTF-8 string to value *
1533 *************************************************/
1534
1535 /* This function takes one or more bytes that represents a UTF-8 character,
1536 and returns the value of the character.
1537
1538 Argument:
1539 utf8bytes a pointer to the byte vector
1540 vptr a pointer to an int to receive the value
1541
1542 Returns: > 0 => the number of bytes consumed
1543 -6 to 0 => malformed UTF-8 character at offset = (-return)
1544 */
1545
1546 static int
1547 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1548 {
1549 pcre_uint32 c = *utf8bytes++;
1550 pcre_uint32 d = c;
1551 int i, j, s;
1552
1553 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1554 {
1555 if ((d & 0x80) == 0) break;
1556 d <<= 1;
1557 }
1558
1559 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1560 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1561
1562 /* i now has a value in the range 1-5 */
1563
1564 s = 6*i;
1565 d = (c & utf8_table3[i]) << s;
1566
1567 for (j = 0; j < i; j++)
1568 {
1569 c = *utf8bytes++;
1570 if ((c & 0xc0) != 0x80) return -(j+1);
1571 s -= 6;
1572 d |= (c & 0x3f) << s;
1573 }
1574
1575 /* Check that encoding was the correct unique one */
1576
1577 for (j = 0; j < utf8_table1_size; j++)
1578 if (d <= (pcre_uint32)utf8_table1[j]) break;
1579 if (j != i) return -(i+1);
1580
1581 /* Valid value */
1582
1583 *vptr = d;
1584 return i+1;
1585 }
1586 #endif /* NOUTF || SUPPORT_PCRE16 */
1587
1588
1589
1590 #if defined SUPPORT_PCRE8 && !defined NOUTF
1591 /*************************************************
1592 * Convert character value to UTF-8 *
1593 *************************************************/
1594
1595 /* This function takes an integer value in the range 0 - 0x7fffffff
1596 and encodes it as a UTF-8 character in 0 to 6 bytes.
1597
1598 Arguments:
1599 cvalue the character value
1600 utf8bytes pointer to buffer for result - at least 6 bytes long
1601
1602 Returns: number of characters placed in the buffer
1603 */
1604
1605 static int
1606 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1607 {
1608 register int i, j;
1609 if (cvalue > 0x7fffffffu)
1610 return -1;
1611 for (i = 0; i < utf8_table1_size; i++)
1612 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1613 utf8bytes += i;
1614 for (j = i; j > 0; j--)
1615 {
1616 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1617 cvalue >>= 6;
1618 }
1619 *utf8bytes = utf8_table2[i] | cvalue;
1620 return i + 1;
1621 }
1622 #endif
1623
1624
1625 #ifdef SUPPORT_PCRE16
1626 /*************************************************
1627 * Convert a string to 16-bit *
1628 *************************************************/
1629
1630 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1631 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1632 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1633 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1634 result is always left in buffer16.
1635
1636 Note that this function does not object to surrogate values. This is
1637 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1638 for the purpose of testing that they are correctly faulted.
1639
1640 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1641 in UTF-8 so that values greater than 255 can be handled.
1642
1643 Arguments:
1644 data TRUE if converting a data line; FALSE for a regex
1645 p points to a byte string
1646 utf true if UTF-8 (to be converted to UTF-16)
1647 len number of bytes in the string (excluding trailing zero)
1648
1649 Returns: number of 16-bit data items used (excluding trailing zero)
1650 OR -1 if a UTF-8 string is malformed
1651 OR -2 if a value > 0x10ffff is encountered
1652 OR -3 if a value > 0xffff is encountered when not in UTF mode
1653 */
1654
1655 static int
1656 to16(int data, pcre_uint8 *p, int utf, int len)
1657 {
1658 pcre_uint16 *pp;
1659
1660 if (buffer16_size < 2*len + 2)
1661 {
1662 if (buffer16 != NULL) free(buffer16);
1663 buffer16_size = 2*len + 2;
1664 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1665 if (buffer16 == NULL)
1666 {
1667 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1668 exit(1);
1669 }
1670 }
1671
1672 pp = buffer16;
1673
1674 if (!utf && !data)
1675 {
1676 while (len-- > 0) *pp++ = *p++;
1677 }
1678
1679 else
1680 {
1681 pcre_uint32 c = 0;
1682 while (len > 0)
1683 {
1684 int chlen = utf82ord(p, &c);
1685 if (chlen <= 0) return -1;
1686 if (c > 0x10ffff) return -2;
1687 p += chlen;
1688 len -= chlen;
1689 if (c < 0x10000) *pp++ = c; else
1690 {
1691 if (!utf) return -3;
1692 c -= 0x10000;
1693 *pp++ = 0xD800 | (c >> 10);
1694 *pp++ = 0xDC00 | (c & 0x3ff);
1695 }
1696 }
1697 }
1698
1699 *pp = 0;
1700 return pp - buffer16;
1701 }
1702 #endif
1703
1704 #ifdef SUPPORT_PCRE32
1705 /*************************************************
1706 * Convert a string to 32-bit *
1707 *************************************************/
1708
1709 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1710 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1711 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1712 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1713 result is always left in buffer32.
1714
1715 Note that this function does not object to surrogate values. This is
1716 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1717 for the purpose of testing that they are correctly faulted.
1718
1719 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1720 in UTF-8 so that values greater than 255 can be handled.
1721
1722 Arguments:
1723 data TRUE if converting a data line; FALSE for a regex
1724 p points to a byte string
1725 utf true if UTF-8 (to be converted to UTF-32)
1726 len number of bytes in the string (excluding trailing zero)
1727
1728 Returns: number of 32-bit data items used (excluding trailing zero)
1729 OR -1 if a UTF-8 string is malformed
1730 OR -2 if a value > 0x10ffff is encountered
1731 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1732 */
1733
1734 static int
1735 to32(int data, pcre_uint8 *p, int utf, int len)
1736 {
1737 pcre_uint32 *pp;
1738
1739 if (buffer32_size < 4*len + 4)
1740 {
1741 if (buffer32 != NULL) free(buffer32);
1742 buffer32_size = 4*len + 4;
1743 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1744 if (buffer32 == NULL)
1745 {
1746 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1747 exit(1);
1748 }
1749 }
1750
1751 pp = buffer32;
1752
1753 if (!utf && !data)
1754 {
1755 while (len-- > 0) *pp++ = *p++;
1756 }
1757
1758 else
1759 {
1760 pcre_uint32 c = 0;
1761 while (len > 0)
1762 {
1763 int chlen = utf82ord(p, &c);
1764 if (chlen <= 0) return -1;
1765 if (utf)
1766 {
1767 if (c > 0x10ffff) return -2;
1768 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1769 }
1770
1771 p += chlen;
1772 len -= chlen;
1773 *pp++ = c;
1774 }
1775 }
1776
1777 *pp = 0;
1778 return pp - buffer32;
1779 }
1780
1781 /* Check that a 32-bit character string is valid UTF-32.
1782
1783 Arguments:
1784 string points to the string
1785 length length of string, or -1 if the string is zero-terminated
1786
1787 Returns: TRUE if the string is a valid UTF-32 string
1788 FALSE otherwise
1789 */
1790
1791 #ifdef SUPPORT_UTF
1792 static BOOL
1793 valid_utf32(pcre_uint32 *string, int length)
1794 {
1795 register pcre_uint32 *p;
1796 register pcre_uint32 c;
1797
1798 for (p = string; length-- > 0; p++)
1799 {
1800 c = *p;
1801
1802 if (c > 0x10ffffu)
1803 return FALSE;
1804
1805 /* A surrogate */
1806 if ((c & 0xfffff800u) == 0xd800u)
1807 return FALSE;
1808
1809 /* Non-character */
1810 if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1811 return FALSE;
1812 }
1813
1814 return TRUE;
1815 }
1816 #endif /* SUPPORT_UTF */
1817
1818 #endif
1819
1820 /*************************************************
1821 * Read or extend an input line *
1822 *************************************************/
1823
1824 /* Input lines are read into buffer, but both patterns and data lines can be
1825 continued over multiple input lines. In addition, if the buffer fills up, we
1826 want to automatically expand it so as to be able to handle extremely large
1827 lines that are needed for certain stress tests. When the input buffer is
1828 expanded, the other two buffers must also be expanded likewise, and the
1829 contents of pbuffer, which are a copy of the input for callouts, must be
1830 preserved (for when expansion happens for a data line). This is not the most
1831 optimal way of handling this, but hey, this is just a test program!
1832
1833 Arguments:
1834 f the file to read
1835 start where in buffer to start (this *must* be within buffer)
1836 prompt for stdin or readline()
1837
1838 Returns: pointer to the start of new data
1839 could be a copy of start, or could be moved
1840 NULL if no data read and EOF reached
1841 */
1842
1843 static pcre_uint8 *
1844 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1845 {
1846 pcre_uint8 *here = start;
1847
1848 for (;;)
1849 {
1850 size_t rlen = (size_t)(buffer_size - (here - buffer));
1851
1852 if (rlen > 1000)
1853 {
1854 int dlen;
1855
1856 /* If libreadline or libedit support is required, use readline() to read a
1857 line if the input is a terminal. Note that readline() removes the trailing
1858 newline, so we must put it back again, to be compatible with fgets(). */
1859
1860 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1861 if (isatty(fileno(f)))
1862 {
1863 size_t len;
1864 char *s = readline(prompt);
1865 if (s == NULL) return (here == start)? NULL : start;
1866 len = strlen(s);
1867 if (len > 0) add_history(s);
1868 if (len > rlen - 1) len = rlen - 1;
1869 memcpy(here, s, len);
1870 here[len] = '\n';
1871 here[len+1] = 0;
1872 free(s);
1873 }
1874 else
1875 #endif
1876
1877 /* Read the next line by normal means, prompting if the file is stdin. */
1878
1879 {
1880 if (f == stdin) printf("%s", prompt);
1881 if (fgets((char *)here, rlen, f) == NULL)
1882 return (here == start)? NULL : start;
1883 }
1884
1885 dlen = (int)strlen((char *)here);
1886 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1887 here += dlen;
1888 }
1889
1890 else
1891 {
1892 int new_buffer_size = 2*buffer_size;
1893 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1894 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1895
1896 if (new_buffer == NULL || new_pbuffer == NULL)
1897 {
1898 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1899 exit(1);
1900 }
1901
1902 memcpy(new_buffer, buffer, buffer_size);
1903 memcpy(new_pbuffer, pbuffer, buffer_size);
1904
1905 buffer_size = new_buffer_size;
1906
1907 start = new_buffer + (start - buffer);
1908 here = new_buffer + (here - buffer);
1909
1910 free(buffer);
1911 free(pbuffer);
1912
1913 buffer = new_buffer;
1914 pbuffer = new_pbuffer;
1915 }
1916 }
1917
1918 return NULL; /* Control never gets here */
1919 }
1920
1921
1922
1923 /*************************************************
1924 * Read number from string *
1925 *************************************************/
1926
1927 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1928 around with conditional compilation, just do the job by hand. It is only used
1929 for unpicking arguments, so just keep it simple.
1930
1931 Arguments:
1932 str string to be converted
1933 endptr where to put the end pointer
1934
1935 Returns: the unsigned long
1936 */
1937
1938 static int
1939 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1940 {
1941 int result = 0;
1942 while(*str != 0 && isspace(*str)) str++;
1943 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1944 *endptr = str;
1945 return(result);
1946 }
1947
1948
1949
1950 /*************************************************
1951 * Print one character *
1952 *************************************************/
1953
1954 /* Print a single character either literally, or as a hex escape. */
1955
1956 static int pchar(pcre_uint32 c, FILE *f)
1957 {
1958 int n = 0;
1959 if (PRINTOK(c))
1960 {
1961 if (f != NULL) fprintf(f, "%c", c);
1962 return 1;
1963 }
1964
1965 if (c < 0x100)
1966 {
1967 if (use_utf)
1968 {
1969 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1970 return 6;
1971 }
1972 else
1973 {
1974 if (f != NULL) fprintf(f, "\\x%02x", c);
1975 return 4;
1976 }
1977 }
1978
1979 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1980 return n >= 0 ? n : 0;
1981 }
1982
1983
1984
1985 #ifdef SUPPORT_PCRE8
1986 /*************************************************
1987 * Print 8-bit character string *
1988 *************************************************/
1989
1990 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1991 If handed a NULL file, just counts chars without printing. */
1992
1993 static int pchars(pcre_uint8 *p, int length, FILE *f)
1994 {
1995 pcre_uint32 c = 0;
1996 int yield = 0;
1997
1998 if (length < 0)
1999 length = strlen((char *)p);
2000
2001 while (length-- > 0)
2002 {
2003 #if !defined NOUTF
2004 if (use_utf)
2005 {
2006 int rc = utf82ord(p, &c);
2007 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2008 {
2009 length -= rc - 1;
2010 p += rc;
2011 yield += pchar(c, f);
2012 continue;
2013 }
2014 }
2015 #endif
2016 c = *p++;
2017 yield += pchar(c, f);
2018 }
2019
2020 return yield;
2021 }
2022 #endif
2023
2024
2025
2026 #ifdef SUPPORT_PCRE16
2027 /*************************************************
2028 * Find length of 0-terminated 16-bit string *
2029 *************************************************/
2030
2031 static int strlen16(PCRE_SPTR16 p)
2032 {
2033 int len = 0;
2034 while (*p++ != 0) len++;
2035 return len;
2036 }
2037 #endif /* SUPPORT_PCRE16 */
2038
2039
2040
2041 #ifdef SUPPORT_PCRE32
2042 /*************************************************
2043 * Find length of 0-terminated 32-bit string *
2044 *************************************************/
2045
2046 static int strlen32(PCRE_SPTR32 p)
2047 {
2048 int len = 0;
2049 while (*p++ != 0) len++;
2050 return len;
2051 }
2052 #endif /* SUPPORT_PCRE32 */
2053
2054
2055
2056 #ifdef SUPPORT_PCRE16
2057 /*************************************************
2058 * Print 16-bit character string *
2059 *************************************************/
2060
2061 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2062 If handed a NULL file, just counts chars without printing. */
2063
2064 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2065 {
2066 int yield = 0;
2067
2068 if (length < 0)
2069 length = strlen16(p);
2070
2071 while (length-- > 0)
2072 {
2073 pcre_uint32 c = *p++ & 0xffff;
2074 #if !defined NOUTF
2075 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2076 {
2077 int d = *p & 0xffff;
2078 if (d >= 0xDC00 && d < 0xDFFF)
2079 {
2080 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2081 length--;
2082 p++;
2083 }
2084 }
2085 #endif
2086 yield += pchar(c, f);
2087 }
2088
2089 return yield;
2090 }
2091 #endif /* SUPPORT_PCRE16 */
2092
2093
2094
2095 #ifdef SUPPORT_PCRE32
2096 /*************************************************
2097 * Print 32-bit character string *
2098 *************************************************/
2099
2100 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2101 If handed a NULL file, just counts chars without printing. */
2102
2103 #define UTF32_MASK (0x1fffffu)
2104
2105 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2106 {
2107 int yield = 0;
2108
2109 if (length < 0)
2110 length = strlen32(p);
2111
2112 while (length-- > 0)
2113 {
2114 pcre_uint32 c = *p++;
2115 if (utf) c &= UTF32_MASK;
2116 yield += pchar(c, f);
2117 }
2118
2119 return yield;
2120 }
2121 #endif /* SUPPORT_PCRE32 */
2122
2123
2124
2125 #ifdef SUPPORT_PCRE8
2126 /*************************************************
2127 * Read a capture name (8-bit) and check it *
2128 *************************************************/
2129
2130 static pcre_uint8 *
2131 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132 {
2133 pcre_uint8 *npp = *pp;
2134 while (isalnum(*p)) *npp++ = *p++;
2135 *npp++ = 0;
2136 *npp = 0;
2137 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138 {
2139 fprintf(outfile, "no parentheses with name \"");
2140 PCHARSV(*pp, 0, -1, outfile);
2141 fprintf(outfile, "\"\n");
2142 }
2143
2144 *pp = npp;
2145 return p;
2146 }
2147 #endif /* SUPPORT_PCRE8 */
2148
2149
2150
2151 #ifdef SUPPORT_PCRE16
2152 /*************************************************
2153 * Read a capture name (16-bit) and check it *
2154 *************************************************/
2155
2156 /* Note that the text being read is 8-bit. */
2157
2158 static pcre_uint8 *
2159 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160 {
2161 pcre_uint16 *npp = *pp;
2162 while (isalnum(*p)) *npp++ = *p++;
2163 *npp++ = 0;
2164 *npp = 0;
2165 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166 {
2167 fprintf(outfile, "no parentheses with name \"");
2168 PCHARSV(*pp, 0, -1, outfile);
2169 fprintf(outfile, "\"\n");
2170 }
2171 *pp = npp;
2172 return p;
2173 }
2174 #endif /* SUPPORT_PCRE16 */
2175
2176
2177
2178 #ifdef SUPPORT_PCRE32
2179 /*************************************************
2180 * Read a capture name (32-bit) and check it *
2181 *************************************************/
2182
2183 /* Note that the text being read is 8-bit. */
2184
2185 static pcre_uint8 *
2186 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2187 {
2188 pcre_uint32 *npp = *pp;
2189 while (isalnum(*p)) *npp++ = *p++;
2190 *npp++ = 0;
2191 *npp = 0;
2192 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2193 {
2194 fprintf(outfile, "no parentheses with name \"");
2195 PCHARSV(*pp, 0, -1, outfile);
2196 fprintf(outfile, "\"\n");
2197 }
2198 *pp = npp;
2199 return p;
2200 }
2201 #endif /* SUPPORT_PCRE32 */
2202
2203
2204
2205 /*************************************************
2206 * Callout function *
2207 *************************************************/
2208
2209 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2210 the match. Yield zero unless more callouts than the fail count, or the callout
2211 data is not zero. */
2212
2213 static int callout(pcre_callout_block *cb)
2214 {
2215 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2216 int i, pre_start, post_start, subject_length;
2217
2218 if (callout_extra)
2219 {
2220 fprintf(f, "Callout %d: last capture = %d\n",
2221 cb->callout_number, cb->capture_last);
2222
2223 for (i = 0; i < cb->capture_top * 2; i += 2)
2224 {
2225 if (cb->offset_vector[i] < 0)
2226 fprintf(f, "%2d: <unset>\n", i/2);
2227 else
2228 {
2229 fprintf(f, "%2d: ", i/2);
2230 PCHARSV(cb->subject, cb->offset_vector[i],
2231 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232 fprintf(f, "\n");
2233 }
2234 }
2235 }
2236
2237 /* Re-print the subject in canonical form, the first time or if giving full
2238 datails. On subsequent calls in the same match, we use pchars just to find the
2239 printed lengths of the substrings. */
2240
2241 if (f != NULL) fprintf(f, "--->");
2242
2243 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244 PCHARS(post_start, cb->subject, cb->start_match,
2245 cb->current_position - cb->start_match, f);
2246
2247 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2248
2249 PCHARSV(cb->subject, cb->current_position,
2250 cb->subject_length - cb->current_position, f);
2251
2252 if (f != NULL) fprintf(f, "\n");
2253
2254 /* Always print appropriate indicators, with callout number if not already
2255 shown. For automatic callouts, show the pattern offset. */
2256
2257 if (cb->callout_number == 255)
2258 {
2259 fprintf(outfile, "%+3d ", cb->pattern_position);
2260 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2261 }
2262 else
2263 {
2264 if (callout_extra) fprintf(outfile, " ");
2265 else fprintf(outfile, "%3d ", cb->callout_number);
2266 }
2267
2268 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2269 fprintf(outfile, "^");
2270
2271 if (post_start > 0)
2272 {
2273 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2274 fprintf(outfile, "^");
2275 }
2276
2277 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2278 fprintf(outfile, " ");
2279
2280 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2281 pbuffer + cb->pattern_position);
2282
2283 fprintf(outfile, "\n");
2284 first_callout = 0;
2285
2286 if (cb->mark != last_callout_mark)
2287 {
2288 if (cb->mark == NULL)
2289 fprintf(outfile, "Latest Mark: <unset>\n");
2290 else
2291 {
2292 fprintf(outfile, "Latest Mark: ");
2293 PCHARSV(cb->mark, 0, -1, outfile);
2294 putc('\n', outfile);
2295 }
2296 last_callout_mark = cb->mark;
2297 }
2298
2299 if (cb->callout_data != NULL)
2300 {
2301 int callout_data = *((int *)(cb->callout_data));
2302 if (callout_data != 0)
2303 {
2304 fprintf(outfile, "Callout data = %d\n", callout_data);
2305 return callout_data;
2306 }
2307 }
2308
2309 return (cb->callout_number != callout_fail_id)? 0 :
2310 (++callout_count >= callout_fail_count)? 1 : 0;
2311 }
2312
2313
2314 /*************************************************
2315 * Local malloc functions *
2316 *************************************************/
2317
2318 /* Alternative malloc function, to test functionality and save the size of a
2319 compiled re, which is the first store request that pcre_compile() makes. The
2320 show_malloc variable is set only during matching. */
2321
2322 static void *new_malloc(size_t size)
2323 {
2324 void *block = malloc(size);
2325 gotten_store = size;
2326 if (first_gotten_store == 0) first_gotten_store = size;
2327 if (show_malloc)
2328 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2329 return block;
2330 }
2331
2332 static void new_free(void *block)
2333 {
2334 if (show_malloc)
2335 fprintf(outfile, "free %p\n", block);
2336 free(block);
2337 }
2338
2339 /* For recursion malloc/free, to test stacking calls */
2340
2341 static void *stack_malloc(size_t size)
2342 {
2343 void *block = malloc(size);
2344 if (show_malloc)
2345 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346 return block;
2347 }
2348
2349 static void stack_free(void *block)
2350 {
2351 if (show_malloc)
2352 fprintf(outfile, "stack_free %p\n", block);
2353 free(block);
2354 }
2355
2356
2357 /*************************************************
2358 * Call pcre_fullinfo() *
2359 *************************************************/
2360
2361 /* Get one piece of information from the pcre_fullinfo() function. When only
2362 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2363 value, but the code is defensive.
2364
2365 Arguments:
2366 re compiled regex
2367 study study data
2368 option PCRE_INFO_xxx option
2369 ptr where to put the data
2370
2371 Returns: 0 when OK, < 0 on error
2372 */
2373
2374 static int
2375 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376 {
2377 int rc;
2378
2379 if (pcre_mode == PCRE32_MODE)
2380 #ifdef SUPPORT_PCRE32
2381 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2382 #else
2383 rc = PCRE_ERROR_BADMODE;
2384 #endif
2385 else if (pcre_mode == PCRE16_MODE)
2386 #ifdef SUPPORT_PCRE16
2387 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388 #else
2389 rc = PCRE_ERROR_BADMODE;
2390 #endif
2391 else
2392 #ifdef SUPPORT_PCRE8
2393 rc = pcre_fullinfo(re, study, option, ptr);
2394 #else
2395 rc = PCRE_ERROR_BADMODE;
2396 #endif
2397
2398 if (rc < 0)
2399 {
2400 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2401 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2402 if (rc == PCRE_ERROR_BADMODE)
2403 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2404 "%d-bit mode\n", 8 * CHAR_SIZE,
2405 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2406 }
2407
2408 return rc;
2409 }
2410
2411
2412
2413 /*************************************************
2414 * Swap byte functions *
2415 *************************************************/
2416
2417 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418 value, respectively.
2419
2420 Arguments:
2421 value any number
2422
2423 Returns: the byte swapped value
2424 */
2425
2426 static pcre_uint32
2427 swap_uint32(pcre_uint32 value)
2428 {
2429 return ((value & 0x000000ff) << 24) |
2430 ((value & 0x0000ff00) << 8) |
2431 ((value & 0x00ff0000) >> 8) |
2432 (value >> 24);
2433 }
2434
2435 static pcre_uint16
2436 swap_uint16(pcre_uint16 value)
2437 {
2438 return (value >> 8) | (value << 8);
2439 }
2440
2441
2442
2443 /*************************************************
2444 * Flip bytes in a compiled pattern *
2445 *************************************************/
2446
2447 /* This function is called if the 'F' option was present on a pattern that is
2448 to be written to a file. We flip the bytes of all the integer fields in the
2449 regex data block and the study block. In 16-bit mode this also flips relevant
2450 bytes in the pattern itself. This is to make it possible to test PCRE's
2451 ability to reload byte-flipped patterns, e.g. those compiled on a different
2452 architecture. */
2453
2454 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2455 static void
2456 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2457 {
2458 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2459 #ifdef SUPPORT_PCRE16
2460 int op;
2461 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462 int length = re->name_count * re->name_entry_size;
2463 #ifdef SUPPORT_UTF
2464 BOOL utf = (re->options & PCRE_UTF16) != 0;
2465 BOOL utf16_char = FALSE;
2466 #endif /* SUPPORT_UTF */
2467 #endif /* SUPPORT_PCRE16 */
2468
2469 /* Always flip the bytes in the main data block and study blocks. */
2470
2471 re->magic_number = REVERSED_MAGIC_NUMBER;
2472 re->size = swap_uint32(re->size);
2473 re->options = swap_uint32(re->options);
2474 re->flags = swap_uint16(re->flags);
2475 re->top_bracket = swap_uint16(re->top_bracket);
2476 re->top_backref = swap_uint16(re->top_backref);
2477 re->first_char = swap_uint16(re->first_char);
2478 re->req_char = swap_uint16(re->req_char);
2479 re->name_table_offset = swap_uint16(re->name_table_offset);
2480 re->name_entry_size = swap_uint16(re->name_entry_size);
2481 re->name_count = swap_uint16(re->name_count);
2482
2483 if (extra != NULL)
2484 {
2485 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2486 rsd->size = swap_uint32(rsd->size);
2487 rsd->flags = swap_uint32(rsd->flags);
2488 rsd->minlength = swap_uint32(rsd->minlength);
2489 }
2490
2491 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2492 in the name table, if present, and then in the pattern itself. */
2493
2494 #ifdef SUPPORT_PCRE16
2495 if (pcre_mode != PCRE16_MODE) return;
2496
2497 while(TRUE)
2498 {
2499 /* Swap previous characters. */
2500 while (length-- > 0)
2501 {
2502 *ptr = swap_uint16(*ptr);
2503 ptr++;
2504 }
2505 #ifdef SUPPORT_UTF
2506 if (utf16_char)
2507 {
2508 if ((ptr[-1] & 0xfc00) == 0xd800)
2509 {
2510 /* We know that there is only one extra character in UTF-16. */
2511 *ptr = swap_uint16(*ptr);
2512 ptr++;
2513 }
2514 }
2515 utf16_char = FALSE;
2516 #endif /* SUPPORT_UTF */
2517
2518 /* Get next opcode. */
2519
2520 length = 0;
2521 op = *ptr;
2522 *ptr++ = swap_uint16(op);
2523
2524 switch (op)
2525 {
2526 case OP_END:
2527 return;
2528
2529 #ifdef SUPPORT_UTF
2530 case OP_CHAR:
2531 case OP_CHARI:
2532 case OP_NOT:
2533 case OP_NOTI:
2534 case OP_STAR:
2535 case OP_MINSTAR:
2536 case OP_PLUS:
2537 case OP_MINPLUS:
2538 case OP_QUERY:
2539 case OP_MINQUERY:
2540 case OP_UPTO:
2541 case OP_MINUPTO:
2542 case OP_EXACT:
2543 case OP_POSSTAR:
2544 case OP_POSPLUS:
2545 case OP_POSQUERY:
2546 case OP_POSUPTO:
2547 case OP_STARI:
2548 case OP_MINSTARI:
2549 case OP_PLUSI:
2550 case OP_MINPLUSI:
2551 case OP_QUERYI:
2552 case OP_MINQUERYI:
2553 case OP_UPTOI:
2554 case OP_MINUPTOI:
2555 case OP_EXACTI:
2556 case OP_POSSTARI:
2557 case OP_POSPLUSI:
2558 case OP_POSQUERYI:
2559 case OP_POSUPTOI:
2560 case OP_NOTSTAR:
2561 case OP_NOTMINSTAR:
2562 case OP_NOTPLUS:
2563 case OP_NOTMINPLUS:
2564 case OP_NOTQUERY:
2565 case OP_NOTMINQUERY:
2566 case OP_NOTUPTO:
2567 case OP_NOTMINUPTO:
2568 case OP_NOTEXACT:
2569 case OP_NOTPOSSTAR:
2570 case OP_NOTPOSPLUS:
2571 case OP_NOTPOSQUERY:
2572 case OP_NOTPOSUPTO:
2573 case OP_NOTSTARI:
2574 case OP_NOTMINSTARI:
2575 case OP_NOTPLUSI:
2576 case OP_NOTMINPLUSI:
2577 case OP_NOTQUERYI:
2578 case OP_NOTMINQUERYI:
2579 case OP_NOTUPTOI:
2580 case OP_NOTMINUPTOI:
2581 case OP_NOTEXACTI:
2582 case OP_NOTPOSSTARI:
2583 case OP_NOTPOSPLUSI:
2584 case OP_NOTPOSQUERYI:
2585 case OP_NOTPOSUPTOI:
2586 if (utf) utf16_char = TRUE;
2587 #endif
2588 /* Fall through. */
2589
2590 default:
2591 length = OP_lengths16[op] - 1;
2592 break;
2593
2594 case OP_CLASS:
2595 case OP_NCLASS:
2596 /* Skip the character bit map. */
2597 ptr += 32/sizeof(pcre_uint16);
2598 length = 0;
2599 break;
2600
2601 case OP_XCLASS:
2602 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2603 if (LINK_SIZE > 1)
2604 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2605 - (1 + LINK_SIZE + 1));
2606 else
2607 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2608
2609 /* Reverse the size of the XCLASS instance. */
2610 *ptr = swap_uint16(*ptr);
2611 ptr++;
2612 if (LINK_SIZE > 1)
2613 {
2614 *ptr = swap_uint16(*ptr);
2615 ptr++;
2616 }
2617
2618 op = *ptr;
2619 *ptr = swap_uint16(op);
2620 ptr++;
2621 if ((op & XCL_MAP) != 0)
2622 {
2623 /* Skip the character bit map. */
2624 ptr += 32/sizeof(pcre_uint16);
2625 length -= 32/sizeof(pcre_uint16);
2626 }
2627 break;
2628 }
2629 }
2630 /* Control should never reach here in 16 bit mode. */
2631 #endif /* SUPPORT_PCRE16 */
2632 }
2633 #endif /* SUPPORT_PCRE[8|16] */
2634
2635
2636
2637 #if defined SUPPORT_PCRE32
2638 static void
2639 regexflip_32(pcre *ere, pcre_extra *extra)
2640 {
2641 real_pcre32 *re = (real_pcre32 *)ere;
2642 int op;
2643 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2644 int length = re->name_count * re->name_entry_size;
2645
2646 /* Always flip the bytes in the main data block and study blocks. */
2647
2648 re->magic_number = REVERSED_MAGIC_NUMBER;
2649 re->size = swap_uint32(re->size);
2650 re->options = swap_uint32(re->options);
2651 re->flags = swap_uint16(re->flags);
2652 re->top_bracket = swap_uint16(re->top_bracket);
2653 re->top_backref = swap_uint16(re->top_backref);
2654 re->first_char = swap_uint32(re->first_char);
2655 re->req_char = swap_uint32(re->req_char);
2656 re->name_table_offset = swap_uint16(re->name_table_offset);
2657 re->name_entry_size = swap_uint16(re->name_entry_size);
2658 re->name_count = swap_uint16(re->name_count);
2659
2660 if (extra != NULL)
2661 {
2662 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2663 rsd->size = swap_uint32(rsd->size);
2664 rsd->flags = swap_uint32(rsd->flags);
2665 rsd->minlength = swap_uint32(rsd->minlength);
2666 }
2667
2668 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2669 the pattern itself. */
2670
2671 while(TRUE)
2672 {
2673 /* Swap previous characters. */
2674 while (length-- > 0)
2675 {
2676 *ptr = swap_uint32(*ptr);
2677 ptr++;
2678 }
2679
2680 /* Get next opcode. */
2681
2682 length = 0;
2683 op = *ptr;
2684 *ptr++ = swap_uint32(op);
2685
2686 switch (op)
2687 {
2688 case OP_END:
2689 return;
2690
2691 default:
2692 length = OP_lengths32[op] - 1;
2693 break;
2694
2695 case OP_CLASS:
2696 case OP_NCLASS:
2697 /* Skip the character bit map. */
2698 ptr += 32/sizeof(pcre_uint32);
2699 length = 0;
2700 break;
2701
2702 case OP_XCLASS:
2703 /* LINK_SIZE can only be 1 in 32-bit mode. */
2704 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2705
2706 /* Reverse the size of the XCLASS instance. */
2707 *ptr = swap_uint32(*ptr);
2708 ptr++;
2709
2710 op = *ptr;
2711 *ptr = swap_uint32(op);
2712 ptr++;
2713 if ((op & XCL_MAP) != 0)
2714 {
2715 /* Skip the character bit map. */
2716 ptr += 32/sizeof(pcre_uint32);
2717 length -= 32/sizeof(pcre_uint32);
2718 }
2719 break;
2720 }
2721 }
2722 /* Control should never reach here in 32 bit mode. */
2723 }
2724
2725 #endif /* SUPPORT_PCRE32 */
2726
2727
2728
2729 static void
2730 regexflip(pcre *ere, pcre_extra *extra)
2731 {
2732 #if defined SUPPORT_PCRE32
2733 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2734 regexflip_32(ere, extra);
2735 #endif
2736 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2737 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2738 regexflip8_or_16(ere, extra);
2739 #endif
2740 }
2741
2742
2743
2744 /*************************************************
2745 * Check match or recursion limit *
2746 *************************************************/
2747
2748 static int
2749 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2750 int start_offset, int options, int *use_offsets, int use_size_offsets,
2751 int flag, unsigned long int *limit, int errnumber, const char *msg)
2752 {
2753 int count;
2754 int min = 0;
2755 int mid = 64;
2756 int max = -1;
2757
2758 extra->flags |= flag;
2759
2760 for (;;)
2761 {
2762 *limit = mid;
2763
2764 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2765 use_offsets, use_size_offsets);
2766
2767 if (count == errnumber)
2768 {
2769 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2770 min = mid;
2771 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2772 }
2773
2774 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2775 count == PCRE_ERROR_PARTIAL)
2776 {
2777 if (mid == min + 1)
2778 {
2779 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2780 break;
2781 }
2782 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2783 max = mid;
2784 mid = (min + mid)/2;
2785 }
2786 else break; /* Some other error */
2787 }
2788
2789 extra->flags &= ~flag;
2790 return count;
2791 }
2792
2793
2794
2795 /*************************************************
2796 * Case-independent strncmp() function *
2797 *************************************************/
2798
2799 /*
2800 Arguments:
2801 s first string
2802 t second string
2803 n number of characters to compare
2804
2805 Returns: < 0, = 0, or > 0, according to the comparison
2806 */
2807
2808 static int
2809 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2810 {
2811 while (n--)
2812 {
2813 int c = tolower(*s++) - tolower(*t++);
2814 if (c) return c;
2815 }
2816 return 0;
2817 }
2818
2819
2820
2821 /*************************************************
2822 * Check newline indicator *
2823 *************************************************/
2824
2825 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2826 a message and return 0 if there is no match.
2827
2828 Arguments:
2829 p points after the leading '<'
2830 f file for error message
2831
2832 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2833 */
2834
2835 static int
2836 check_newline(pcre_uint8 *p, FILE *f)
2837 {
2838 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2839 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2840 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2841 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2842 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2843 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2844 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2845 fprintf(f, "Unknown newline type at: <%s\n", p);
2846 return 0;
2847 }
2848
2849
2850
2851 /*************************************************
2852 * Usage function *
2853 *************************************************/
2854
2855 static void
2856 usage(void)
2857 {
2858 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2859 printf("Input and output default to stdin and stdout.\n");
2860 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2861 printf("If input is a terminal, readline() is used to read from it.\n");
2862 #else
2863 printf("This version of pcretest is not linked with readline().\n");
2864 #endif
2865 printf("\nOptions:\n");
2866 #ifdef SUPPORT_PCRE16
2867 printf(" -16 use the 16-bit library\n");
2868 #endif
2869 #ifdef SUPPORT_PCRE32
2870 printf(" -32 use the 32-bit library\n");
2871 #endif
2872 printf(" -b show compiled code\n");
2873 printf(" -C show PCRE compile-time options and exit\n");
2874 printf(" -C arg show a specific compile-time option\n");
2875 printf(" and exit with its value. The arg can be:\n");
2876 printf(" linksize internal link size [2, 3, 4]\n");
2877 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2878 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2879 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2880 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2881 printf(" ucp Unicode Properties supported [0, 1]\n");
2882 printf(" jit Just-in-time compiler supported [0, 1]\n");
2883 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2884 printf(" -d debug: show compiled code and information (-b and -i)\n");
2885 #if !defined NODFA
2886 printf(" -dfa force DFA matching for all subjects\n");
2887 #endif
2888 printf(" -help show usage information\n");
2889 printf(" -i show information about compiled patterns\n"
2890 " -M find MATCH_LIMIT minimum for each subject\n"
2891 " -m output memory used information\n"
2892 " -o <n> set size of offsets vector to <n>\n");
2893 #if !defined NOPOSIX
2894 printf(" -p use POSIX interface\n");
2895 #endif
2896 printf(" -q quiet: do not output PCRE version number at start\n");
2897 printf(" -S <n> set stack size to <n> megabytes\n");
2898 printf(" -s force each pattern to be studied at basic level\n"
2899 " -s+ force each pattern to be studied, using JIT if available\n"
2900 " -s++ ditto, verifying when JIT was actually used\n"
2901 " -s+n force each pattern to be studied, using JIT if available,\n"
2902 " where 1 <= n <= 7 selects JIT options\n"
2903 " -s++n ditto, verifying when JIT was actually used\n"
2904 " -t time compilation and execution\n");
2905 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2906 printf(" -tm time execution (matching) only\n");
2907 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2908 }
2909
2910
2911
2912 /*************************************************
2913 * Main Program *
2914 *************************************************/
2915
2916 /* Read lines from named file or stdin and write to named file or stdout; lines
2917 consist of a regular expression, in delimiters and optionally followed by
2918 options, followed by a set of test data, terminated by an empty line. */
2919
2920 int main(int argc, char **argv)
2921 {
2922 FILE *infile = stdin;
2923 const char *version;
2924 int options = 0;
2925 int study_options = 0;
2926 int default_find_match_limit = FALSE;
2927 int op = 1;
2928 int timeit = 0;
2929 int timeitm = 0;
2930 int showinfo = 0;
2931 int showstore = 0;
2932 int force_study = -1;
2933 int force_study_options = 0;
2934 int quiet = 0;
2935 int size_offsets = 45;
2936 int size_offsets_max;
2937 int *offsets = NULL;
2938 int debug = 0;
2939 int done = 0;
2940 int all_use_dfa = 0;
2941 int verify_jit = 0;
2942 int yield = 0;
2943 #ifdef SUPPORT_PCRE32
2944 int mask_utf32 = 0;
2945 #endif
2946 int stack_size;
2947 pcre_uint8 *dbuffer = NULL;
2948 size_t dbuffer_size = 1u << 14;
2949
2950 #if !defined NOPOSIX
2951 int posix = 0;
2952 #endif
2953 #if !defined NODFA
2954 int *dfa_workspace = NULL;
2955 #endif
2956
2957 pcre_jit_stack *jit_stack = NULL;
2958
2959 /* These vectors store, end-to-end, a list of zero-terminated captured
2960 substring names, each list itself being terminated by an empty name. Assume
2961 that 1024 is plenty long enough for the few names we'll be testing. It is
2962 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2963 for the actual memory, to ensure alignment. */
2964
2965 pcre_uint32 copynames[1024];
2966 pcre_uint32 getnames[1024];
2967
2968 #ifdef SUPPORT_PCRE32
2969 pcre_uint32 *cn32ptr;
2970 pcre_uint32 *gn32ptr;
2971 #endif
2972
2973 #ifdef SUPPORT_PCRE16
2974 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2975 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2976 pcre_uint16 *cn16ptr;
2977 pcre_uint16 *gn16ptr;
2978 #endif
2979
2980 #ifdef SUPPORT_PCRE8
2981 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2982 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2983 pcre_uint8 *cn8ptr;
2984 pcre_uint8 *gn8ptr;
2985 #endif
2986
2987 /* Get buffers from malloc() so that valgrind will check their misuse when
2988 debugging. They grow automatically when very long lines are read. The 16-
2989 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2990
2991 buffer = (pcre_uint8 *)malloc(buffer_size);
2992 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2993
2994 /* The outfile variable is static so that new_malloc can use it. */
2995
2996 outfile = stdout;
2997
2998 /* The following _setmode() stuff is some Windows magic that tells its runtime
2999 library to translate CRLF into a single LF character. At least, that's what
3000 I've been told: never having used Windows I take this all on trust. Originally
3001 it set 0x8000, but then I was advised that _O_BINARY was better. */
3002
3003 #if defined(_WIN32) || defined(WIN32)
3004 _setmode( _fileno( stdout ), _O_BINARY );
3005 #endif
3006
3007 /* Get the version number: both pcre_version() and pcre16_version() give the
3008 same answer. We just need to ensure that we call one that is available. */
3009
3010 #if defined SUPPORT_PCRE8
3011 version = pcre_version();
3012 #elif defined SUPPORT_PCRE16
3013 version = pcre16_version();
3014 #elif defined SUPPORT_PCRE32
3015 version = pcre32_version();
3016 #endif
3017
3018 /* Scan options */
3019
3020 while (argc > 1 && argv[op][0] == '-')
3021 {
3022 pcre_uint8 *endptr;
3023 char *arg = argv[op];
3024
3025 if (strcmp(arg, "-m") == 0) showstore = 1;
3026 else if (strcmp(arg, "-s") == 0) force_study = 0;
3027
3028 else if (strncmp(arg, "-s+", 3) == 0)
3029 {
3030 arg += 3;
3031 if (*arg == '+') { arg++; verify_jit = TRUE; }
3032 force_study = 1;
3033 if (*arg == 0)
3034 force_study_options = jit_study_bits[6];
3035 else if (*arg >= '1' && *arg <= '7')
3036 force_study_options = jit_study_bits[*arg - '1'];
3037 else goto BAD_ARG;
3038 }
3039 else if (strcmp(arg, "-8") == 0)
3040 {
3041 #ifdef SUPPORT_PCRE8
3042 pcre_mode = PCRE8_MODE;
3043 #else
3044 printf("** This version of PCRE was built without 8-bit support\n");
3045 exit(1);
3046 #endif
3047 }
3048 else if (strcmp(arg, "-16") == 0)
3049 {
3050 #ifdef SUPPORT_PCRE16
3051 pcre_mode = PCRE16_MODE;
3052 #else
3053 printf("** This version of PCRE was built without 16-bit support\n");
3054 exit(1);
3055 #endif
3056 }
3057 else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
3058 {
3059 #ifdef SUPPORT_PCRE32
3060 pcre_mode = PCRE32_MODE;
3061 mask_utf32 = (strcmp(arg, "-32+") == 0);
3062 #else
3063 printf("** This version of PCRE was built without 32-bit support\n");
3064 exit(1);
3065 #endif
3066 }
3067 else if (strcmp(arg, "-q") == 0) quiet = 1;
3068 else if (strcmp(arg, "-b") == 0) debug = 1;
3069 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3070 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3071 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3072 #if !defined NODFA
3073 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3074 #endif
3075 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3076 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3077 *endptr == 0))
3078 {
3079 op++;
3080 argc--;
3081 }
3082 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3083 {
3084 int both = arg[2] == 0;
3085 int temp;
3086 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3087 *endptr == 0))
3088 {
3089 timeitm = temp;
3090 op++;
3091 argc--;
3092 }
3093 else timeitm = LOOPREPEAT;
3094 if (both) timeit = timeitm;
3095 }
3096 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3097 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3098 *endptr == 0))
3099 {
3100 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3101 printf("PCRE: -S not supported on this OS\n");
3102 exit(1);
3103 #else
3104 int rc;
3105 struct rlimit rlim;
3106 getrlimit(RLIMIT_STACK, &rlim);
3107 rlim.rlim_cur = stack_size * 1024 * 1024;
3108 rc = setrlimit(RLIMIT_STACK, &rlim);
3109 if (rc != 0)
3110 {
3111 printf("PCRE: setrlimit() failed with error %d\n", rc);
3112 exit(1);
3113 }
3114 op++;
3115 argc--;
3116 #endif
3117 }
3118 #if !defined NOPOSIX
3119 else if (strcmp(arg, "-p") == 0) posix = 1;
3120 #endif
3121 else if (strcmp(arg, "-C") == 0)
3122 {
3123 int rc;
3124 unsigned long int lrc;
3125
3126 if (argc > 2)
3127 {
3128 if (strcmp(argv[op + 1], "linksize") == 0)
3129 {
3130 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3131 printf("%d\n", rc);
3132 yield = rc;
3133 }
3134 else if (strcmp(argv[op + 1], "pcre8") == 0)
3135 {
3136 #ifdef SUPPORT_PCRE8
3137 printf("1\n");
3138 yield = 1;
3139 #else
3140 printf("0\n");
3141 yield = 0;
3142 #endif
3143 }
3144 else if (strcmp(argv[op + 1], "pcre16") == 0)
3145 {
3146 #ifdef SUPPORT_PCRE16
3147 printf("1\n");
3148 yield = 1;
3149 #else
3150 printf("0\n");
3151 yield = 0;
3152 #endif
3153 }
3154 else if (strcmp(argv[op + 1], "pcre32") == 0)
3155 {
3156 #ifdef SUPPORT_PCRE32
3157 printf("1\n");
3158 yield = 1;
3159 #else
3160 printf("0\n");
3161 yield = 0;
3162 #endif
3163 goto EXIT;
3164 }
3165 if (strcmp(argv[op + 1], "utf") == 0)
3166 {
3167 #ifdef SUPPORT_PCRE8
3168 if (pcre_mode == PCRE8_MODE)
3169 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3170 #endif
3171 #ifdef SUPPORT_PCRE16
3172 if (pcre_mode == PCRE16_MODE)
3173 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3174 #endif
3175 #ifdef SUPPORT_PCRE32
3176 if (pcre_mode == PCRE32_MODE)
3177 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3178 #endif
3179 printf("%d\n", rc);
3180 yield = rc;
3181 goto EXIT;
3182 }
3183 else if (strcmp(argv[op + 1], "ucp") == 0)
3184 {
3185 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3186 printf("%d\n", rc);
3187 yield = rc;
3188 }
3189 else if (strcmp(argv[op + 1], "jit") == 0)
3190 {
3191 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3192 printf("%d\n", rc);
3193 yield = rc;
3194 }
3195 else if (strcmp(argv[op + 1], "newline") == 0)
3196 {
3197 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3198 print_newline_config(rc, TRUE);
3199 }
3200 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3201 {
3202 #ifdef EBCDIC
3203 printf("1\n");
3204 yield = 1;
3205 #else
3206 printf("0\n");
3207 #endif
3208 }
3209 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3210 {
3211 #ifdef EBCDIC
3212 printf("0x%02x\n", CHAR_LF);
3213 #else
3214 printf("0\n");
3215 #endif
3216 }
3217 else
3218 {
3219 printf("Unknown -C option: %s\n", argv[op + 1]);
3220 }
3221 goto EXIT;
3222 }
3223
3224 /* No argument for -C: output all configuration information. */
3225
3226 printf("PCRE version %s\n", version);
3227 printf("Compiled with\n");
3228
3229 #ifdef EBCDIC
3230 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3231 #endif
3232
3233 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3234 are set, either both UTFs are supported or both are not supported. */
3235
3236 #ifdef SUPPORT_PCRE8
3237 printf(" 8-bit support\n");
3238 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3239 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3240 #endif
3241 #ifdef SUPPORT_PCRE16
3242 printf(" 16-bit support\n");
3243 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3244 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3245 #endif
3246 #ifdef SUPPORT_PCRE32
3247 printf(" 32-bit support\n");
3248 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3249 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3250 #endif
3251
3252 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3253 printf(" %sUnicode properties support\n", rc? "" : "No ");
3254 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3255 if (rc)
3256 {
3257 const char *arch;
3258 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3259 printf(" Just-in-time compiler support: %s\n", arch);
3260 }
3261 else
3262 printf(" No just-in-time compiler support\n");
3263 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3264 print_newline_config(rc, FALSE);
3265 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3266 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3267 "all Unicode newlines");
3268 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3269 printf(" Internal link size = %d\n", rc);
3270 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3271 printf(" POSIX malloc threshold = %d\n", rc);
3272 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3273 printf(" Default match limit = %ld\n", lrc);
3274 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3275 printf(" Default recursion depth limit = %ld\n", lrc);
3276 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3277 printf(" Match recursion uses %s", rc? "stack" : "heap");
3278 if (showstore)
3279 {
3280 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3281 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3282 }
3283 printf("\n");
3284 goto EXIT;
3285 }
3286 else if (strcmp(arg, "-help") == 0 ||
3287 strcmp(arg, "--help") == 0)
3288 {
3289 usage();
3290 goto EXIT;
3291 }
3292 else
3293 {
3294 BAD_ARG:
3295 printf("** Unknown or malformed option %s\n", arg);
3296 usage();
3297 yield = 1;
3298 goto EXIT;
3299 }
3300 op++;
3301 argc--;
3302 }
3303
3304 /* Get the store for the offsets vector, and remember what it was */
3305
3306 size_offsets_max = size_offsets;
3307 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3308 if (offsets == NULL)
3309 {
3310 printf("** Failed to get %d bytes of memory for offsets vector\n",
3311 (int)(size_offsets_max * sizeof(int)));
3312 yield = 1;
3313 goto EXIT;
3314 }
3315
3316 /* Sort out the input and output files */
3317
3318 if (argc > 1)
3319 {
3320 infile = fopen(argv[op], INPUT_MODE);
3321 if (infile == NULL)
3322 {
3323 printf("** Failed to open %s\n", argv[op]);
3324 yield = 1;
3325 goto EXIT;
3326 }
3327 }
3328
3329 if (argc > 2)
3330 {
3331 outfile = fopen(argv[op+1], OUTPUT_MODE);
3332 if (outfile == NULL)
3333 {
3334 printf("** Failed to open %s\n", argv[op+1]);
3335 yield = 1;
3336 goto EXIT;
3337 }
3338 }
3339
3340 /* Set alternative malloc function */
3341
3342 #ifdef SUPPORT_PCRE8
3343 pcre_malloc = new_malloc;
3344 pcre_free = new_free;
3345 pcre_stack_malloc = stack_malloc;
3346 pcre_stack_free = stack_free;
3347 #endif
3348
3349 #ifdef SUPPORT_PCRE16
3350 pcre16_malloc = new_malloc;
3351 pcre16_free = new_free;
3352 pcre16_stack_malloc = stack_malloc;
3353 pcre16_stack_free = stack_free;
3354 #endif
3355
3356 #ifdef SUPPORT_PCRE32
3357 pcre32_malloc = new_malloc;
3358 pcre32_free = new_free;
3359 pcre32_stack_malloc = stack_malloc;
3360 pcre32_stack_free = stack_free;
3361 #endif
3362
3363 /* Heading line unless quiet, then prompt for first regex if stdin */
3364
3365 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3366
3367 /* Main loop */
3368
3369 while (!done)
3370 {
3371 pcre *re = NULL;
3372 pcre_extra *extra = NULL;
3373
3374 #if !defined NOPOSIX /* There are still compilers that require no indent */
3375 regex_t preg;
3376 int do_posix = 0;
3377 #endif
3378
3379 const char *error;
3380 pcre_uint8 *markptr;
3381 pcre_uint8 *p, *pp, *ppp;
3382 pcre_uint8 *to_file = NULL;
3383 const pcre_uint8 *tables = NULL;
3384 unsigned long int get_options;
3385 unsigned long int true_size, true_study_size = 0;
3386 size_t size, regex_gotten_store;
3387 int do_allcaps = 0;
3388 int do_mark = 0;
3389 int do_study = 0;
3390 int no_force_study = 0;
3391 int do_debug = debug;
3392 int do_G = 0;
3393 int do_g = 0;
3394 int do_showinfo = showinfo;
3395 int do_showrest = 0;
3396 int do_showcaprest = 0;
3397 int do_flip = 0;
3398 int erroroffset, len, delimiter, poffset;
3399
3400 #if !defined NODFA
3401 int dfa_matched = 0;
3402 #endif
3403
3404 use_utf = 0;
3405 debug_lengths = 1;
3406
3407 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3408 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3409 fflush(outfile);
3410
3411 p = buffer;
3412 while (isspace(*p)) p++;
3413 if (*p == 0) continue;
3414
3415 /* See if the pattern is to be loaded pre-compiled from a file. */
3416
3417 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3418 {
3419 pcre_uint32 magic;
3420 pcre_uint8 sbuf[8];
3421 FILE *f;
3422
3423 p++;
3424 if (*p == '!')
3425 {
3426 do_debug = TRUE;
3427 do_showinfo = TRUE;
3428 p++;
3429 }
3430
3431 pp = p + (int)strlen((char *)p);
3432 while (isspace(pp[-1])) pp--;
3433 *pp = 0;
3434
3435 f = fopen((char *)p, "rb");
3436 if (f == NULL)
3437 {
3438 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3439 continue;
3440 }
3441
3442 first_gotten_store = 0;
3443 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3444
3445 true_size =
3446 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3447 true_study_size =
3448 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3449
3450 re = (pcre *)new_malloc(true_size);
3451 if (re == NULL)
3452 {
3453 printf("** Failed to get %d bytes of memory for pcre object\n",
3454 (int)true_size);
3455 yield = 1;
3456 goto EXIT;
3457 }
3458 regex_gotten_store = first_gotten_store;
3459
3460 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3461
3462 magic = REAL_PCRE_MAGIC(re);
3463 if (magic != MAGIC_NUMBER)
3464 {
3465 if (swap_uint32(magic) == MAGIC_NUMBER)
3466 {
3467 do_flip = 1;
3468 }
3469 else
3470 {
3471 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3472 new_free(re);
3473 fclose(f);
3474 continue;
3475 }
3476 }
3477
3478 /* We hide the byte-invert info for little and big endian tests. */
3479 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3480 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3481
3482 /* Now see if there is any following study data. */
3483
3484 if (true_study_size != 0)
3485 {
3486 pcre_study_data *psd;
3487
3488 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3489 extra->flags = PCRE_EXTRA_STUDY_DATA;
3490
3491 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3492 extra->study_data = psd;
3493
3494 if (fread(psd, 1, true_study_size, f) != true_study_size)
3495 {
3496 FAIL_READ:
3497 fprintf(outfile, "Failed to read data from %s\n", p);
3498 if (extra != NULL)
3499 {
3500 PCRE_FREE_STUDY(extra);
3501 }
3502 new_free(re);
3503 fclose(f);
3504 continue;
3505 }
3506 fprintf(outfile, "Study data loaded from %s\n", p);
3507 do_study = 1; /* To get the data output if requested */
3508 }
3509 else fprintf(outfile, "No study data\n");
3510
3511 /* Flip the necessary bytes. */
3512 if (do_flip)
3513 {
3514 int rc;
3515 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3516 if (rc == PCRE_ERROR_BADMODE)
3517 {
3518 /* Simulate the result of the function call below. */
3519 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3520 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3521 PCRE_INFO_OPTIONS);
3522 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3523 "%d-bit mode\n", 8 * CHAR_SIZE,
3524 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3525 new_free(re);
3526 fclose(f);
3527 continue;
3528 }
3529 }
3530
3531 /* Need to know if UTF-8 for printing data strings. */
3532
3533 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3534 {
3535 new_free(re);
3536 fclose(f);
3537 continue;
3538 }
3539 use_utf = (get_options & PCRE_UTF8) != 0;
3540
3541 fclose(f);
3542 goto SHOW_INFO;
3543 }
3544
3545 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3546 the pattern; if it isn't complete, read more. */
3547
3548 delimiter = *p++;
3549
3550 if (isalnum(delimiter) || delimiter == '\\')
3551 {
3552 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3553 goto SKIP_DATA;
3554 }
3555
3556 pp = p;
3557 poffset = (int)(p - buffer);
3558
3559 for(;;)
3560 {
3561 while (*pp != 0)
3562 {
3563 if (*pp == '\\' && pp[1] != 0) pp++;
3564 else if (*pp == delimiter) break;
3565 pp++;
3566 }
3567 if (*pp != 0) break;
3568 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3569 {
3570 fprintf(outfile, "** Unexpected EOF\n");
3571 done = 1;
3572 goto CONTINUE;
3573 }
3574 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3575 }
3576
3577 /* The buffer may have moved while being extended; reset the start of data
3578 pointer to the correct relative point in the buffer. */
3579
3580 p = buffer + poffset;
3581
3582 /* If the first character after the delimiter is backslash, make
3583 the pattern end with backslash. This is purely to provide a way
3584 of testing for the error message when a pattern ends with backslash. */
3585
3586 if (pp[1] == '\\') *pp++ = '\\';
3587
3588 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3589 for callouts. */
3590
3591 *pp++ = 0;
3592 strcpy((char *)pbuffer, (char *)p);
3593
3594 /* Look for options after final delimiter */
3595
3596 options = 0;
3597 study_options = force_study_options;
3598 log_store = showstore; /* default from command line */
3599
3600 while (*pp != 0)
3601 {
3602 switch (*pp++)
3603 {
3604 case 'f': options |= PCRE_FIRSTLINE; break;
3605 case 'g': do_g = 1; break;
3606 case 'i': options |= PCRE_CASELESS; break;
3607 case 'm': options |= PCRE_MULTILINE; break;
3608 case 's': options |= PCRE_DOTALL; break;
3609 case 'x': options |= PCRE_EXTENDED; break;
3610
3611 case '+':
3612 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3613 break;
3614
3615 case '=': do_allcaps = 1; break;
3616 case 'A': options |= PCRE_ANCHORED; break;
3617 case 'B': do_debug = 1; break;
3618 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3619 case 'D': do_debug = do_showinfo = 1; break;
3620 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3621 case 'F': do_flip = 1; break;
3622 case 'G': do_G = 1; break;
3623 case 'I': do_showinfo = 1; break;
3624 case 'J': options |= PCRE_DUPNAMES; break;
3625 case 'K': do_mark = 1; break;
3626 case 'M': log_store = 1; break;
3627 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3628
3629 #if !defined NOPOSIX
3630 case 'P': do_posix = 1; break;
3631 #endif
3632
3633 case 'S':
3634 do_study = 1;
3635 for (;;)
3636 {
3637 switch (*pp++)
3638 {
3639 case 'S':
3640 do_study = 0;
3641 no_force_study = 1;
3642 break;
3643
3644 case '!':
3645 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3646 break;
3647
3648 case '+':
3649 if (*pp == '+')
3650 {
3651 verify_jit = TRUE;
3652 pp++;
3653 }
3654 if (*pp >= '1' && *pp <= '7')
3655 study_options |= jit_study_bits[*pp++ - '1'];
3656 else
3657 study_options |= jit_study_bits[6];
3658 break;
3659
3660 case '-':
3661 study_options &= ~PCRE_STUDY_ALLJIT;
3662 break;
3663
3664 default:
3665 pp--;
3666 goto ENDLOOP;
3667 }
3668 }
3669 ENDLOOP:
3670 break;
3671
3672 case 'U': options |= PCRE_UNGREEDY; break;
3673 case 'W': options |= PCRE_UCP; break;
3674 case 'X': options |= PCRE_EXTRA; break;
3675 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3676 case 'Z': debug_lengths = 0; break;
3677 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3678 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3679
3680 case 'T':
3681 switch (*pp++)
3682 {
3683 case '0': tables = tables0; break;
3684 case '1': tables = tables1; break;
3685
3686 case '\r':
3687 case '\n':
3688 case ' ':
3689 case 0:
3690 fprintf(outfile, "** Missing table number after /T\n");
3691 goto SKIP_DATA;
3692
3693 default:
3694 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3695 goto SKIP_DATA;
3696 }
3697 break;
3698
3699 case 'L':
3700 ppp = pp;
3701 /* The '\r' test here is so that it works on Windows. */
3702 /* The '0' test is just in case this is an unterminated line. */
3703 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3704 *ppp = 0;
3705 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3706 {
3707 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3708 goto SKIP_DATA;
3709 }
3710 locale_set = 1;
3711 tables = PCRE_MAKETABLES;
3712 pp = ppp;
3713 break;
3714
3715 case '>':
3716 to_file = pp;
3717 while (*pp != 0) pp++;
3718 while (isspace(pp[-1])) pp--;
3719 *pp = 0;
3720 break;
3721
3722 case '<':
3723 {
3724 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3725 {
3726 options |= PCRE_JAVASCRIPT_COMPAT;
3727 pp += 3;
3728 }
3729 else
3730 {
3731 int x = check_newline(pp, outfile);
3732 if (x == 0) goto SKIP_DATA;
3733 options |= x;
3734 while (*pp++ != '>');
3735 }
3736 }
3737 break;
3738
3739 case '\r': /* So that it works in Windows */
3740 case '\n':
3741 case ' ':
3742 break;
3743
3744 default:
3745 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3746 goto SKIP_DATA;
3747 }
3748 }
3749
3750 /* Handle compiling via the POSIX interface, which doesn't support the
3751 timing, showing, or debugging options, nor the ability to pass over
3752 local character tables. Neither does it have 16-bit support. */
3753
3754 #if !defined NOPOSIX
3755 if (posix || do_posix)
3756 {
3757 int rc;
3758 int cflags = 0;
3759
3760 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3761 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3762 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3763 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3764 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3765 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3766 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3767
3768 first_gotten_store = 0;
3769 rc = regcomp(&preg, (char *)p, cflags);
3770
3771 /* Compilation failed; go back for another re, skipping to blank line
3772 if non-interactive. */
3773
3774 if (rc != 0)
3775 {
3776 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3777 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3778 goto SKIP_DATA;
3779 }
3780 }
3781
3782 /* Handle compiling via the native interface */
3783
3784 else
3785 #endif /* !defined NOPOSIX */
3786
3787 {
3788 /* In 16- or 32-bit mode, convert the input. */
3789
3790 #ifdef SUPPORT_PCRE16
3791 if (pcre_mode == PCRE16_MODE)
3792 {
3793 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3794 {
3795 case -1:
3796 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3797 "converted to UTF-16\n");
3798 goto SKIP_DATA;
3799
3800 case -2:
3801 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3802 "cannot be converted to UTF-16\n");
3803 goto SKIP_DATA;
3804
3805 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3806 fprintf(outfile, "**Failed: character value greater than 0xffff "
3807 "cannot be converted to 16-bit in non-UTF mode\n");
3808 goto SKIP_DATA;
3809
3810 default:
3811 break;
3812 }
3813 p = (pcre_uint8 *)buffer16;
3814 }
3815 #endif
3816
3817 #ifdef SUPPORT_PCRE32
3818 if (pcre_mode == PCRE32_MODE)
3819 {
3820 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3821 {
3822 case -1:
3823 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3824 "converted to UTF-32\n");
3825 goto SKIP_DATA;
3826
3827 case -2:
3828 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3829 "cannot be converted to UTF-32\n");
3830 goto SKIP_DATA;
3831
3832 case -3:
3833 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3834 goto SKIP_DATA;
3835
3836 default:
3837 break;
3838 }
3839 p = (pcre_uint8 *)buffer32;
3840 }
3841 #endif
3842
3843 /* Compile many times when timing */
3844
3845 if (timeit > 0)
3846 {
3847 register int i;
3848 clock_t time_taken;
3849 clock_t start_time = clock();
3850 for (i = 0; i < timeit; i++)
3851 {
3852 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3853 if (re != NULL) free(re);
3854 }
3855 time_taken = clock() - start_time;
3856 fprintf(outfile, "Compile time %.4f milliseconds\n",
3857 (((double)time_taken * 1000.0) / (double)timeit) /
3858 (double)CLOCKS_PER_SEC);
3859 }
3860
3861 first_gotten_store = 0;
3862 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3863
3864 /* Compilation failed; go back for another re, skipping to blank line
3865 if non-interactive. */
3866
3867 if (re == NULL)
3868 {
3869 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3870 SKIP_DATA:
3871 if (infile != stdin)
3872 {
3873 for (;;)
3874 {
3875 if (extend_inputline(infile, buffer, NULL) == NULL)
3876 {
3877 done = 1;
3878 goto CONTINUE;
3879 }
3880 len = (int)strlen((char *)buffer);
3881 while (len > 0 && isspace(buffer[len-1])) len--;
3882 if (len == 0) break;
3883 }
3884 fprintf(outfile, "\n");
3885 }
3886 goto CONTINUE;
3887 }
3888
3889 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3890 within the regex; check for this so that we know how to process the data
3891 lines. */
3892
3893 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3894 goto SKIP_DATA;
3895 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3896
3897 /* Extract the size for possible writing before possibly flipping it,
3898 and remember the store that was got. */
3899
3900 true_size = REAL_PCRE_SIZE(re);
3901 regex_gotten_store = first_gotten_store;
3902
3903 /* Output code size information if requested */
3904
3905 if (log_store)
3906 {
3907 int name_count, name_entry_size, real_pcre_size;
3908
3909 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3910 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3911 real_pcre_size = 0;
3912 #ifdef SUPPORT_PCRE8
3913 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3914 real_pcre_size = sizeof(real_pcre);
3915 #endif
3916 #ifdef SUPPORT_PCRE16
3917 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3918 real_pcre_size = sizeof(real_pcre16);
3919 #endif
3920 #ifdef SUPPORT_PCRE32
3921 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3922 real_pcre_size = sizeof(real_pcre32);
3923 #endif
3924 fprintf(outfile, "Memory allocation (code space): %d\n",
3925 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3926 }
3927
3928 /* If -s or /S was present, study the regex to generate additional info to
3929 help with the matching, unless the pattern has the SS option, which
3930 suppresses the effect of /S (used for a few test patterns where studying is
3931 never sensible). */
3932
3933 if (do_study || (force_study >= 0 && !no_force_study))
3934 {
3935 if (timeit > 0)
3936 {
3937 register int i;
3938 clock_t time_taken;
3939 clock_t start_time = clock();
3940 for (i = 0; i < timeit; i++)
3941 {
3942 PCRE_STUDY(extra, re, study_options, &error);
3943 }
3944 time_taken = clock() - start_time;
3945 if (extra != NULL)
3946 {
3947 PCRE_FREE_STUDY(extra);
3948 }
3949 fprintf(outfile, " Study time %.4f milliseconds\n",
3950 (((double)time_taken * 1000.0) / (double)timeit) /
3951 (double)CLOCKS_PER_SEC);
3952 }
3953 PCRE_STUDY(extra, re, study_options, &error);
3954 if (error != NULL)
3955 fprintf(outfile, "Failed to study: %s\n", error);
3956 else if (extra != NULL)
3957 {
3958 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3959 if (log_store)
3960 {
3961 size_t jitsize;
3962 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3963 jitsize != 0)
3964 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3965 }
3966 }
3967 }
3968
3969 /* If /K was present, we set up for handling MARK data. */
3970
3971 if (do_mark)
3972 {
3973 if (extra == NULL)
3974 {
3975 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3976 extra->flags = 0;
3977 }
3978 extra->mark = &markptr;
3979 extra->flags |= PCRE_EXTRA_MARK;
3980 }
3981
3982 /* Extract and display information from the compiled data if required. */
3983
3984 SHOW_INFO:
3985
3986 if (do_debug)
3987 {
3988 fprintf(outfile, "------------------------------------------------------------------\n");
3989 PCRE_PRINTINT(re, outfile, debug_lengths);
3990 }
3991
3992 /* We already have the options in get_options (see above) */
3993
3994 if (do_showinfo)
3995 {
3996 unsigned long int all_options;
3997 pcre_uint32 first_char, need_char;
3998 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3999 hascrorlf, maxlookbehind;
4000 int nameentrysize, namecount;
4001 const pcre_uint8 *nametable;
4002
4003 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4004 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4005 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4006 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4007 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4008 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4009 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4010 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4011 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4012 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4013 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4014 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4015 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4016 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4017 != 0)
4018 goto SKIP_DATA;
4019
4020 if (size != regex_gotten_store) fprintf(outfile,
4021 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4022 (int)size, (int)regex_gotten_store);
4023
4024 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4025 if (backrefmax > 0)
4026 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4027
4028 if (namecount > 0)
4029 {
4030 fprintf(outfile, "Named capturing subpatterns:\n");
4031 while (namecount-- > 0)
4032 {
4033 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4034 int length = (int)STRLEN(nametable + imm2_size);
4035 fprintf(outfile, " ");
4036 PCHARSV(nametable, imm2_size, length, outfile);
4037 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4038 #ifdef SUPPORT_PCRE32
4039 if (pcre_mode == PCRE32_MODE)
4040 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4041 #endif
4042 #ifdef SUPPORT_PCRE16
4043 if (pcre_mode == PCRE16_MODE)
4044 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4045 #endif
4046 #ifdef SUPPORT_PCRE8
4047 if (pcre_mode == PCRE8_MODE)
4048 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4049 #endif
4050 nametable += nameentrysize * CHAR_SIZE;
4051 }
4052 }
4053
4054 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4055 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4056
4057 all_options = REAL_PCRE_OPTIONS(re);
4058 if (do_flip) all_options = swap_uint32(all_options);
4059
4060 if (get_options == 0) fprintf(outfile, "No options\n");
4061 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4062 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4063 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4064 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4065 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4066 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4067 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4068 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4069 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4070 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4071 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4072 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4073 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4074 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4075 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4076 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4077 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4078 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4079
4080 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4081
4082 switch (get_options & PCRE_NEWLINE_BITS)
4083 {
4084 case PCRE_NEWLINE_CR:
4085 fprintf(outfile, "Forced newline sequence: CR\n");
4086 break;
4087
4088 case PCRE_NEWLINE_LF:
4089 fprintf(outfile, "Forced newline sequence: LF\n");
4090 break;
4091
4092 case PCRE_NEWLINE_CRLF:
4093 fprintf(outfile, "Forced newline sequence: CRLF\n");
4094 break;
4095
4096 case PCRE_NEWLINE_ANYCRLF:
4097 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4098 break;
4099
4100 case PCRE_NEWLINE_ANY:
4101 fprintf(outfile, "Forced newline sequence: ANY\n");
4102 break;
4103
4104 default:
4105 break;
4106 }
4107
4108 if (first_char_set == 2)
4109 {
4110 fprintf(outfile, "First char at start or follows newline\n");
4111 }
4112 else if (first_char_set == 1)
4113 {
4114 const char *caseless =
4115 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4116 "" : " (caseless)";
4117
4118 if (PRINTOK(first_char))
4119 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4120 else
4121 {
4122 fprintf(outfile, "First char = ");
4123 pchar(first_char, outfile);
4124 fprintf(outfile, "%s\n", caseless);
4125 }
4126 }
4127 else
4128 {
4129 fprintf(outfile, "No first char\n");
4130 }
4131
4132 if (need_char_set == 0)
4133 {
4134 fprintf(outfile, "No need char\n");
4135 }
4136 else
4137 {
4138 const char *caseless =
4139 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4140 "" : " (caseless)";
4141
4142 if (PRINTOK(need_char))
4143 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4144 else
4145 {
4146 fprintf(outfile, "Need char = ");
4147 pchar(need_char, outfile);
4148 fprintf(outfile, "%s\n", caseless);
4149 }
4150 }
4151
4152 if (maxlookbehind > 0)
4153 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4154
4155 /* Don't output study size; at present it is in any case a fixed
4156 value, but it varies, depending on the computer architecture, and
4157 so messes up the test suite. (And with the /F option, it might be
4158 flipped.) If study was forced by an external -s, don't show this
4159 information unless -i or -d was also present. This means that, except
4160 when auto-callouts are involved, the output from runs with and without
4161 -s should be identical. */
4162
4163 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4164 {
4165 if (extra == NULL)
4166 fprintf(outfile, "Study returned NULL\n");
4167 else
4168 {
4169 pcre_uint8 *start_bits = NULL;
4170 int minlength;
4171
4172 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4173 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4174
4175 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4176 {
4177 if (start_bits == NULL)
4178 fprintf(outfile, "No set of starting bytes\n");
4179 else
4180 {
4181 int i;
4182 int c = 24;
4183 fprintf(outfile, "Starting byte set: ");
4184 for (i = 0; i < 256; i++)
4185 {
4186 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4187 {
4188 if (c > 75)
4189 {
4190 fprintf(outfile, "\n ");
4191 c = 2;
4192 }
4193 if (PRINTOK(i) && i != ' ')
4194 {
4195 fprintf(outfile, "%c ", i);
4196 c += 2;
4197 }
4198 else
4199 {
4200 fprintf(outfile, "\\x%02x ", i);
4201 c += 5;
4202 }
4203 }
4204 }
4205 fprintf(outfile, "\n");
4206 }
4207 }
4208 }
4209
4210 /* Show this only if the JIT was set by /S, not by -s. */
4211
4212 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4213 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4214 {
4215 int jit;
4216 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4217 {
4218 if (jit)
4219 fprintf(outfile, "JIT study was successful\n");
4220 else
4221 #ifdef SUPPORT_JIT
4222 fprintf(outfile, "JIT study was not successful\n");
4223 #else
4224 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4225 #endif
4226 }
4227 }
4228 }
4229 }
4230
4231 /* If the '>' option was present, we write out the regex to a file, and
4232 that is all. The first 8 bytes of the file are the regex length and then
4233 the study length, in big-endian order. */
4234
4235 if (to_file != NULL)
4236 {
4237 FILE *f = fopen((char *)to_file, "wb");
4238 if (f == NULL)
4239 {
4240 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4241 }
4242 else
4243 {
4244 pcre_uint8 sbuf[8];
4245
4246 if (do_flip) regexflip(re, extra);
4247 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4248 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4249 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4250 sbuf[3] = (pcre_uint8)((true_size) & 255);
4251 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4252 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4253 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4254 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4255
4256 if (fwrite(sbuf, 1, 8, f) < 8 ||
4257 fwrite(re, 1, true_size, f) < true_size)
4258 {
4259 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4260 }
4261 else
4262 {
4263 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4264
4265 /* If there is study data, write it. */
4266
4267 if (extra != NULL)
4268 {
4269 if (fwrite(extra->study_data, 1, true_study_size, f) <
4270 true_study_size)
4271 {
4272 fprintf(outfile, "Write error on %s: %s\n", to_file,
4273 strerror(errno));
4274 }
4275 else fprintf(outfile, "Study data written to %s\n", to_file);
4276 }
4277 }
4278 fclose(f);
4279 }
4280
4281 new_free(re);
4282 if (extra != NULL)
4283 {
4284 PCRE_FREE_STUDY(extra);
4285 }
4286 if (locale_set)
4287 {
4288 new_free((void *)tables);
4289 setlocale(LC_CTYPE, "C");
4290 locale_set = 0;
4291 }
4292 continue; /* With next regex */
4293 }
4294 } /* End of non-POSIX compile */
4295
4296 /* Read data lines and test them */
4297
4298 for (;;)
4299 {
4300 #ifdef SUPPORT_PCRE8
4301 pcre_uint8 *q8;
4302 #endif
4303 #ifdef SUPPORT_PCRE16
4304 pcre_uint16 *q16;
4305 #endif
4306 #ifdef SUPPORT_PCRE32
4307 pcre_uint32 *q32;
4308 #endif
4309 pcre_uint8 *bptr;
4310 int *use_offsets = offsets;
4311 int use_size_offsets = size_offsets;
4312 int callout_data = 0;
4313 int callout_data_set = 0;
4314 int count;
4315 pcre_uint32 c;
4316 int copystrings = 0;
4317 int find_match_limit = default_find_match_limit;
4318 int getstrings = 0;
4319 int getlist = 0;
4320 int gmatched = 0;
4321 int start_offset = 0;
4322 int start_offset_sign = 1;
4323 int g_notempty = 0;
4324 int use_dfa = 0;
4325
4326 *copynames = 0;
4327 *getnames = 0;
4328
4329 #ifdef SUPPORT_PCRE32
4330 cn32ptr = copynames;
4331 gn32ptr = getnames;
4332 #endif
4333 #ifdef SUPPORT_PCRE16
4334 cn16ptr = copynames16;
4335 gn16ptr = getnames16;
4336 #endif
4337 #ifdef SUPPORT_PCRE8
4338 cn8ptr = copynames8;
4339 gn8ptr = getnames8;
4340 #endif
4341
4342 SET_PCRE_CALLOUT(callout);
4343 first_callout = 1;
4344 last_callout_mark = NULL;
4345 callout_extra = 0;
4346 callout_count = 0;
4347 callout_fail_count = 999999;
4348 callout_fail_id = -1;
4349 show_malloc = 0;
4350 options = 0;
4351
4352 if (extra != NULL) extra->flags &=
4353 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4354
4355 len = 0;
4356 for (;;)
4357 {
4358 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4359 {
4360 if (len > 0) /* Reached EOF without hitting a newline */
4361 {
4362 fprintf(outfile, "\n");
4363 break;
4364 }
4365 done = 1;
4366 goto CONTINUE;
4367 }
4368 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4369 len = (int)strlen((char *)buffer);
4370 if (buffer[len-1] == '\n') break;
4371 }
4372
4373 while (len > 0 && isspace(buffer[len-1])) len--;
4374 buffer[len] = 0;
4375 if (len == 0) break;
4376
4377 p = buffer;
4378 while (isspace(*p)) p++;
4379
4380 #ifndef NOUTF
4381 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4382 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4383 if (use_utf)
4384 {
4385 pcre_uint8 *q;
4386 pcre_uint32 cc;
4387 int n = 1;
4388
4389 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4390 if (n <= 0)
4391 {
4392 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4393 goto NEXT_DATA;
4394 }
4395 }
4396 #endif
4397
4398 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4399 the number of pcre_uchar units that will be needed. */
4400 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4401 {
4402 dbuffer_size *= 2;
4403 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4404 if (dbuffer == NULL)
4405 {
4406 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4407 exit(1);
4408 }
4409 }
4410
4411 #ifdef SUPPORT_PCRE8
4412 q8 = (pcre_uint8 *) dbuffer;
4413 #endif
4414 #ifdef SUPPORT_PCRE16
4415 q16 = (pcre_uint16 *) dbuffer;
4416 #endif
4417 #ifdef SUPPORT_PCRE32
4418 q32 = (pcre_uint32 *) dbuffer;
4419 #endif
4420
4421 while ((c = *p++) != 0)
4422 {
4423 int i = 0;
4424 int n = 0;
4425
4426 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4427 In non-UTF mode, allow the value of the byte to fall through to later,
4428 where values greater than 127 are turned into UTF-8 when running in
4429 16-bit or 32-bit mode. */
4430
4431 if (c != '\\')
4432 {
4433 #ifndef NOUTF
4434 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4435 #endif
4436 }
4437
4438 /* Handle backslash escapes */
4439
4440 else switch ((c = *p++))
4441 {
4442 case 'a': c = 7; break;
4443 case 'b': c = '\b'; break;
4444 case 'e': c = 27; break;
4445 case 'f': c = '\f'; break;
4446 case 'n': c = '\n'; break;
4447 case 'r': c = '\r'; break;
4448 case 't': c = '\t'; break;
4449 case 'v': c = '\v'; break;
4450
4451 case '0': case '1': case '2': case '3':
4452 case '4': case '5': case '6': case '7':
4453 c -= '0';
4454 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4455 c = c * 8 + *p++ - '0';
4456 break;
4457
4458 case 'x':
4459 if (*p == '{')
4460 {
4461 pcre_uint8 *pt = p;
4462 c = 0;
4463
4464 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4465 when isxdigit() is a macro that refers to its argument more than
4466 once. This is banned by the C Standard, but apparently happens in at
4467 least one MacOS environment. */
4468
4469 for (pt++; isxdigit(*pt); pt++)
4470 {
4471 if (++i == 9)
4472 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4473 "using only the first eight.\n");
4474 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4475 }
4476 if (*pt == '}')
4477 {
4478 p = pt + 1;
4479 break;
4480 }
4481 /* Not correct form for \x{...}; fall through */
4482 }
4483
4484 /* \x without {} always defines just one byte in 8-bit mode. This
4485 allows UTF-8 characters to be constructed byte by byte, and also allows
4486 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4487 Otherwise, pass it down to later code so that it can be turned into
4488 UTF-8 when running in 16/32-bit mode. */
4489
4490 c = 0;
4491 while (i++ < 2 && isxdigit(*p))
4492 {
4493 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4494 p++;
4495 }
4496 #if !defined NOUTF && defined SUPPORT_PCRE8
4497 if (use_utf && (pcre_mode == PCRE8_MODE))
4498 {
4499 *q8++ = c;
4500 continue;
4501 }
4502 #endif
4503 break;
4504
4505 case 0: /* \ followed by EOF allows for an empty line */
4506 p--;
4507 continue;
4508
4509 case '>':
4510 if (*p == '-')
4511 {
4512 start_offset_sign = -1;
4513 p++;
4514 }
4515 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4516 start_offset *= start_offset_sign;
4517 continue;
4518
4519 case 'A': /* Option setting */
4520 options |= PCRE_ANCHORED;
4521 continue;
4522
4523 case 'B':
4524 options |= PCRE_NOTBOL;
4525 continue;
4526
4527 case 'C':
4528 if (isdigit(*p)) /* Set copy string */
4529 {
4530 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4531 copystrings |= 1 << n;
4532 }
4533 else if (isalnum(*p))
4534 {
4535 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4536 }
4537 else if (*p == '+')
4538 {
4539 callout_extra = 1;
4540 p++;
4541 }
4542 else if (*p == '-')
4543 {
4544 SET_PCRE_CALLOUT(NULL);
4545 p++;
4546 }
4547 else if (*p == '!')
4548 {
4549 callout_fail_id = 0;
4550 p++;
4551 while(isdigit(*p))
4552 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4553 callout_fail_count = 0;
4554 if (*p == '!')
4555 {
4556 p++;
4557 while(isdigit(*p))
4558 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4559 }
4560 }
4561 else if (*p == '*')
4562 {
4563 int sign = 1;
4564 callout_data = 0;
4565 if (*(++p) == '-') { sign = -1; p++; }
4566 while(isdigit(*p))
4567 callout_data = callout_data * 10 + *p++ - '0';
4568 callout_data *= sign;
4569 callout_data_set = 1;
4570 }
4571 continue;
4572
4573 #if !defined NODFA
4574 case 'D':
4575 #if !defined NOPOSIX
4576 if (posix || do_posix)
4577 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4578 else
4579 #endif
4580 use_dfa = 1;
4581 continue;
4582 #endif
4583
4584 #if !defined NODFA
4585 case 'F':
4586 options |= PCRE_DFA_SHORTEST;
4587 continue;
4588 #endif
4589
4590 case 'G':
4591 if (isdigit(*p))
4592 {
4593 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4594 getstrings |= 1 << n;
4595 }
4596 else if (isalnum(*p))
4597 {
4598 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4599 }
4600 continue;
4601
4602 case 'J':
4603 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4604 if (extra != NULL
4605 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4606 && extra->executable_jit != NULL)
4607 {
4608 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4609 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4610 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4611 }
4612 continue;
4613
4614 case 'L':
4615 getlist = 1;
4616 continue;
4617
4618 case 'M':
4619 find_match_limit = 1;
4620 continue;
4621
4622 case 'N':
4623 if ((options & PCRE_NOTEMPTY) != 0)
4624 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4625 else
4626 options |= PCRE_NOTEMPTY;
4627 continue;
4628
4629 case 'O':
4630 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4631 if (n > size_offsets_max)
4632 {
4633 size_offsets_max = n;
4634 free(offsets);
4635 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4636 if (offsets == NULL)
4637 {
4638 printf("** Failed to get %d bytes of memory for offsets vector\n",
4639 (int)(size_offsets_max * sizeof(int)));
4640 yield = 1;
4641 goto EXIT;
4642 }
4643 }
4644 use_size_offsets = n;
4645 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4646 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4647 continue;
4648
4649 case 'P':
4650 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4651 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4652 continue;
4653
4654 case 'Q':
4655 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4656 if (extra == NULL)
4657 {
4658 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4659 extra->flags = 0;
4660 }
4661 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4662 extra->match_limit_recursion = n;
4663 continue;
4664
4665 case 'q':
4666 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4667 if (extra == NULL)
4668 {
4669 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4670 extra->flags = 0;
4671 }
4672 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4673 extra->match_limit = n;
4674 continue;
4675
4676 #if !defined NODFA
4677 case 'R':
4678 options |= PCRE_DFA_RESTART;
4679 continue;
4680 #endif
4681
4682 case 'S':
4683 show_malloc = 1;
4684 continue;
4685
4686 case 'Y':
4687 options |= PCRE_NO_START_OPTIMIZE;
4688 continue;
4689
4690 case 'Z':
4691 options |= PCRE_NOTEOL;
4692 continue;
4693
4694 case '?':
4695 options |= PCRE_NO_UTF8_CHECK;
4696 continue;
4697
4698 case '<':
4699 {
4700 int x = check_newline(p, outfile);
4701 if (x == 0) goto NEXT_DATA;
4702 options |= x;
4703 while (*p++ != '>');
4704 }
4705 continue;
4706 }
4707
4708 /* We now have a character value in c that may be greater than 255.
4709 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4710 than 127 in UTF mode must have come from \x{...} or octal constructs
4711 because values from \x.. get this far only in non-UTF mode. */
4712
4713 #ifdef SUPPORT_PCRE8
4714 if (pcre_mode == PCRE8_MODE)
4715 {
4716 #ifndef NOUTF
4717 if (use_utf)
4718 {
4719 q8 += ord2utf8(c, q8);
4720 }
4721 else
4722 #endif
4723 {
4724 if (c > 0xffu)
4725 {
4726 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4727 "and UTF-8 mode is not enabled.\n", c);
4728 fprintf(outfile, "** Truncation will probably give the wrong "
4729 "result.\n");
4730 }
4731 *q8++ = c;
4732 }
4733 }
4734 #endif
4735 #ifdef SUPPORT_PCRE16
4736 if (pcre_mode == PCRE16_MODE)
4737 {
4738 #ifndef NOUTF
4739 if (use_utf)
4740 {
4741 if (c > 0x10ffffu)
4742 {
4743 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4744 "cannot be converted to UTF-16\n");
4745 goto NEXT_DATA;
4746 }
4747 else if (c >= 0x10000u)
4748 {
4749 c-= 0x10000u;
4750 *q16++ = 0xD800 | (c >> 10);
4751 *q16++ = 0xDC00 | (c & 0x3ff);
4752 }
4753 else
4754 *q16++ = c;
4755 }
4756 else
4757 #endif
4758 {
4759 if (c > 0xffffu)
4760 {
4761 fprintf(outfile, "** Character value is greater than 0xffff "
4762 "and UTF-16 mode is not enabled.\n");
4763 fprintf(outfile, "** Truncation will probably give the wrong "
4764 "result.\n");
4765 }
4766
4767 *q16++ = c;
4768 }
4769 }
4770 #endif
4771 #ifdef SUPPORT_PCRE32
4772 if (pcre_mode == PCRE32_MODE)
4773 {
4774 *q32++ = c;
4775 }
4776 #endif
4777
4778 }
4779
4780 /* Reached end of subject string */
4781
4782 #ifdef SUPPORT_PCRE8
4783 if (pcre_mode == PCRE8_MODE)
4784 {
4785 *q8 = 0;
4786 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4787 }
4788 #endif
4789 #ifdef SUPPORT_PCRE16
4790 if (pcre_mode == PCRE16_MODE)
4791 {
4792 *q16 = 0;
4793 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4794 }
4795 #endif
4796 #ifdef SUPPORT_PCRE32
4797 if (pcre_mode == PCRE32_MODE)
4798 {
4799 *q32 = 0;
4800 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4801 }
4802 #endif
4803
4804 #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
4805 /* If we're requsted to test UTF-32 masking of high bits, change the data
4806 string to have high bits set, unless the string is invalid UTF-32.
4807 Since the JIT doesn't support this yet, only do it when not JITing. */
4808 if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
4809 valid_utf32((pcre_uint32 *)dbuffer, len))
4810 {
4811 for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
4812 *q32 |= ~(pcre_uint32)UTF32_MASK;
4813
4814 /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
4815 options |= PCRE_NO_UTF32_CHECK;
4816 }
4817 #endif
4818
4819 /* Move the data to the end of the buffer so that a read over the end of
4820 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4821 we are using the POSIX interface, we must include the terminating zero. */
4822
4823 bptr = dbuffer;
4824
4825 #if !defined NOPOSIX
4826 if (posix || do_posix)
4827 {
4828 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4829 bptr += dbuffer_size - len - 1;
4830 }
4831 else
4832 #endif
4833 {
4834 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4835 }
4836
4837 if ((all_use_dfa || use_dfa) && find_match_limit)
4838 {
4839 printf("**Match limit not relevant for DFA matching: ignored\n");
4840 find_match_limit = 0;
4841 }
4842
4843 /* Handle matching via the POSIX interface, which does not
4844 support timing or playing with the match limit or callout data. */
4845
4846 #if !defined NOPOSIX
4847 if (posix || do_posix)
4848 {
4849 int rc;
4850 int eflags = 0;
4851 regmatch_t *pmatch = NULL;
4852 if (use_size_offsets > 0)
4853 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4854 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4855 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4856 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4857
4858 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4859
4860 if (rc != 0)
4861 {
4862 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4863 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4864 }
4865 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4866 {
4867 fprintf(outfile, "Matched with REG_NOSUB\n");
4868 }
4869 else
4870 {
4871 size_t i;
4872 for (i = 0; i < (size_t)use_size_offsets; i++)
4873 {
4874 if (pmatch[i].rm_so >= 0)
4875 {
4876 fprintf(outfile, "%2d: ", (int)i);
4877 PCHARSV(dbuffer, pmatch[i].rm_so,
4878 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4879 fprintf(outfile, "\n");
4880 if (do_showcaprest || (i == 0 && do_showrest))
4881 {
4882 fprintf(outfile, "%2d+ ", (int)i);
4883 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4884 outfile);
4885 fprintf(outfile, "\n");
4886 }
4887 }
4888 }
4889 }
4890 free(pmatch);
4891 goto NEXT_DATA;
4892 }
4893
4894 #endif /* !defined NOPOSIX */
4895
4896 /* Handle matching via the native interface - repeats for /g and /G */
4897
4898 /* Ensure that there is a JIT callback if we want to verify that JIT was
4899 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4900
4901 if (verify_jit && jit_stack == NULL && extra != NULL)
4902 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4903
4904 for (;; gmatched++) /* Loop for /g or /G */
4905 {
4906 markptr = NULL;
4907 jit_was_used = FALSE;
4908
4909 if (timeitm > 0)
4910 {
4911 register int i;
4912 clock_t time_taken;
4913 clock_t start_time = clock();
4914
4915 #if !defined NODFA
4916 if (all_use_dfa || use_dfa)
4917 {
4918 if ((options & PCRE_DFA_RESTART) != 0)
4919 {
4920 fprintf(outfile, "Timing DFA restarts is not supported\n");
4921 break;
4922 }
4923 if (dfa_workspace == NULL)
4924 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4925 for (i = 0; i < timeitm; i++)
4926 {
4927 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4928 (options | g_notempty), use_offsets, use_size_offsets,
4929 dfa_workspace, DFA_WS_DIMENSION);
4930 }
4931 }
4932 else
4933 #endif
4934
4935 for (i = 0; i < timeitm; i++)
4936 {
4937 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4938 (options | g_notempty), use_offsets, use_size_offsets);
4939 }
4940 time_taken = clock() - start_time;
4941 fprintf(outfile, "Execute time %.4f milliseconds\n",
4942 (((double)time_taken * 1000.0) / (double)timeitm) /
4943 (double)CLOCKS_PER_SEC);
4944 }
4945
4946 /* If find_match_limit is set, we want to do repeated matches with
4947 varying limits in order to find the minimum value for the match limit and
4948 for the recursion limit. The match limits are relevant only to the normal
4949 running of pcre_exec(), so disable the JIT optimization. This makes it
4950 possible to run the same set of tests with and without JIT externally
4951 requested. */
4952
4953 if (find_match_limit)
4954 {
4955 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4956 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4957 extra->flags = 0;
4958
4959 (void)check_match_limit(re, extra, bptr, len, start_offset,
4960 options|g_notempty, use_offsets, use_size_offsets,
4961 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4962 PCRE_ERROR_MATCHLIMIT, "match()");
4963
4964 count = check_match_limit(re, extra, bptr, len, start_offset,
4965 options|g_notempty, use_offsets, use_size_offsets,
4966 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4967 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4968 }
4969
4970 /* If callout_data is set, use the interface with additional data */
4971
4972 else if (callout_data_set)
4973 {
4974 if (extra == NULL)
4975 {
4976 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4977 extra->flags = 0;
4978 }
4979 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4980 extra->callout_data = &callout_data;
4981 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4982 options | g_notempty, use_offsets, use_size_offsets);
4983 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4984 }
4985
4986 /* The normal case is just to do the match once, with the default
4987 value of match_limit. */
4988
4989 #if !defined NODFA
4990 else if (all_use_dfa || use_dfa)
4991 {
4992 if (dfa_workspace == NULL)
4993 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4994 if (dfa_matched++ == 0)
4995 dfa_workspace[0] = -1; /* To catch bad restart */
4996 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4997 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4998 DFA_WS_DIMENSION);
4999 if (count == 0)
5000 {
5001 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5002 count = use_size_offsets/2;
5003 }
5004 }
5005 #endif
5006
5007 else
5008 {
5009 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5010 options | g_notempty, use_offsets, use_size_offsets);
5011 if (count == 0)
5012 {
5013 fprintf(outfile, "Matched, but too many substrings\n");
5014 count = use_size_offsets/3;
5015 }
5016 }
5017
5018 /* Matched */
5019
5020 if (count >= 0)
5021 {
5022 int i, maxcount;
5023 void *cnptr, *gnptr;
5024
5025 #if !defined NODFA
5026 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5027 #endif
5028 maxcount = use_size_offsets/3;
5029
5030 /* This is a check against a lunatic return value. */
5031
5032 if (count > maxcount)
5033 {
5034 fprintf(outfile,
5035 "** PCRE error: returned count %d is too big for offset size %d\n",
5036 count, use_size_offsets);
5037 count = use_size_offsets/3;
5038 if (do_g || do_G)
5039 {
5040 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5041 do_g = do_G = FALSE; /* Break g/G loop */
5042 }
5043 }
5044
5045 /* do_allcaps requests showing of all captures in the pattern, to check
5046 unset ones at the end. */
5047
5048 if (do_allcaps)
5049 {
5050 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5051 goto SKIP_DATA;
5052 count++; /* Allow for full match */
5053 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5054 }
5055
5056 /* Output the captured substrings */
5057
5058 for (i = 0; i < count * 2; i += 2)
5059 {
5060 if (use_offsets[i] < 0)
5061 {
5062 if (use_offsets[i] != -1)
5063 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5064 use_offsets[i], i);
5065 if (use_offsets[i+1] != -1)
5066 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5067 use_offsets[i+1], i+1);
5068 fprintf(outfile, "%2d: <unset>\n", i/2);
5069 }
5070 else
5071 {
5072 fprintf(outfile, "%2d: ", i/2);
5073 PCHARSV(bptr, use_offsets[i],
5074 use_offsets[i+1] - use_offsets[i], outfile);
5075 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5076 fprintf(outfile, "\n");
5077 if (do_showcaprest || (i == 0 && do_showrest))
5078 {
5079 fprintf(outfile, "%2d+ ", i/2);
5080 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5081 outfile);
5082 fprintf(outfile, "\n");
5083 }
5084 }
5085 }
5086
5087 if (markptr != NULL)
5088 {
5089 fprintf(outfile, "MK: ");
5090 PCHARSV(markptr, 0, -1, outfile);
5091 fprintf(outfile, "\n");
5092 }
5093
5094 for (i = 0; i < 32; i++)
5095 {
5096 if ((copystrings & (1 << i)) != 0)
5097 {
5098 int rc;
5099 char copybuffer[256];
5100 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5101 copybuffer, sizeof(copybuffer));
5102 if (rc < 0)
5103 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5104 else
5105 {
5106 fprintf(outfile, "%2dC ", i);
5107 PCHARSV(copybuffer, 0, rc, outfile);
5108 fprintf(outfile, " (%d)\n", rc);
5109 }
5110 }
5111 }
5112
5113 cnptr = copynames;
5114 for (;;)
5115 {
5116 int rc;
5117 char copybuffer[256];
5118
5119 #ifdef SUPPORT_PCRE32
5120 if (pcre_mode == PCRE32_MODE)
5121 {
5122 if (*(pcre_uint32 *)cnptr == 0) break;
5123 }
5124 #endif
5125 #ifdef SUPPORT_PCRE16
5126 if (pcre_mode == PCRE16_MODE)
5127 {
5128 if (*(pcre_uint16 *)cnptr == 0) break;
5129 }
5130 #endif
5131 #ifdef SUPPORT_PCRE8
5132 if (pcre_mode == PCRE8_MODE)
5133 {
5134 if (*(pcre_uint8 *)cnptr == 0) break;
5135 }
5136 #endif
5137
5138 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5139 cnptr, copybuffer, sizeof(copybuffer));
5140
5141 if (rc < 0)
5142 {
5143 fprintf(outfile, "copy substring ");
5144 PCHARSV(cnptr, 0, -1, outfile);
5145 fprintf(outfile, " failed %d\n", rc);
5146 }
5147 else
5148 {
5149 fprintf(outfile, " C ");
5150 PCHARSV(copybuffer, 0, rc, outfile);
5151 fprintf(outfile, " (%d) ", rc);
5152 PCHARSV(cnptr, 0, -1, outfile);
5153 putc('\n', outfile);
5154 }
5155
5156 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5157 }
5158
5159 for (i = 0; i < 32; i++)
5160 {
5161 if ((getstrings & (1 << i)) != 0)
5162 {
5163 int rc;
5164 const char *substring;
5165 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5166 if (rc < 0)
5167 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5168 else
5169 {
5170 fprintf(outfile, "%2dG ", i);
5171 PCHARSV(substring, 0, rc, outfile);
5172 fprintf(outfile, " (%d)\n", rc);
5173 PCRE_FREE_SUBSTRING(substring);
5174 }
5175 }
5176 }
5177
5178 gnptr = getnames;
5179 for (;;)
5180 {
5181 int rc;
5182 const char *substring;
5183
5184 #ifdef SUPPORT_PCRE32
5185 if (pcre_mode == PCRE32_MODE)
5186 {
5187 if (*(pcre_uint32 *)gnptr == 0) break;
5188 }
5189 #endif
5190 #ifdef SUPPORT_PCRE16
5191 if (pcre_mode == PCRE16_MODE)
5192 {
5193 if (*(pcre_uint16 *)gnptr == 0) break;
5194 }
5195 #endif
5196 #ifdef SUPPORT_PCRE8
5197 if (pcre_mode == PCRE8_MODE)
5198 {
5199 if (*(pcre_uint8 *)gnptr == 0) break;
5200 }
5201 #endif
5202
5203 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5204 gnptr, &substring);
5205 if (rc < 0)
5206 {
5207 fprintf(outfile, "get substring ");
5208 PCHARSV(gnptr, 0, -1, outfile);
5209 fprintf(outfile, " failed %d\n", rc);
5210 }
5211 else
5212 {
5213 fprintf(outfile, " G ");
5214 PCHARSV(substring, 0, rc, outfile);
5215 fprintf(outfile, " (%d) ", rc);
5216 PCHARSV(gnptr, 0, -1, outfile);
5217 PCRE_FREE_SUBSTRING(substring);
5218 putc('\n', outfile);
5219 }
5220
5221 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5222 }
5223
5224 if (getlist)
5225 {
5226 int rc;
5227 const char **stringlist;
5228 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5229 if (rc < 0)
5230 fprintf(outfile, "get substring list failed %d\n", rc);
5231 else
5232 {
5233 for (i = 0; i < count; i++)
5234 {
5235 fprintf(outfile, "%2dL ", i);
5236 PCHARSV(stringlist[i], 0, -1, outfile);
5237 putc('\n', outfile);
5238 }
5239 if (stringlist[i] != NULL)
5240 fprintf(outfile, "string list not terminated by NULL\n");
5241 PCRE_FREE_SUBSTRING_LIST(stringlist);
5242 }
5243 }
5244 }
5245
5246 /* There was a partial match */
5247
5248 else if (count == PCRE_ERROR_PARTIAL)
5249 {
5250 if (markptr == NULL) fprintf(outfile, "Partial match");
5251 else
5252 {
5253 fprintf(outfile, "Partial match, mark=");
5254 PCHARSV(markptr, 0, -1, outfile);
5255 }
5256 if (use_size_offsets > 1)
5257 {
5258 fprintf(outfile, ": ");
5259 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5260 outfile);
5261 }
5262 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5263 fprintf(outfile, "\n");
5264 break; /* Out of the /g loop */
5265 }
5266
5267 /* Failed to match. If this is a /g or /G loop and we previously set
5268 g_notempty after a null match, this is not necessarily the end. We want
5269 to advance the start offset, and continue. We won't be at the end of the
5270 string - that was checked before setting g_notempty.
5271
5272 Complication arises in the case when the newline convention is "any",
5273 "crlf", or "anycrlf". If the previous match was at the end of a line
5274 terminated by CRLF, an advance of one character just passes the \r,
5275 whereas we should prefer the longer newline sequence, as does the code in
5276 pcre_exec(). Fudge the offset value to achieve this. We check for a
5277 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5278 find the default.
5279
5280 Otherwise, in the case of UTF-8 matching, the advance must be one
5281 character, not one byte. */
5282
5283 else
5284 {
5285 if (g_notempty != 0)
5286 {
5287 int onechar = 1;
5288 unsigned int obits = REAL_PCRE_OPTIONS(re);
5289 use_offsets[0] = start_offset;
5290 if ((obits & PCRE_NEWLINE_BITS) == 0)
5291 {
5292 int d;
5293 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5294 /* Note that these values are always the ASCII ones, even in
5295 EBCDIC environments. CR = 13, NL = 10. */
5296 obits = (d == 13)? PCRE_NEWLINE_CR :
5297 (d == 10)? PCRE_NEWLINE_LF :
5298 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5299 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5300 (d == -1)? PCRE_NEWLINE_ANY : 0;
5301 }
5302 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5303 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5304 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5305 &&
5306 start_offset < len - 1 && (
5307 #ifdef SUPPORT_PCRE8
5308 (pcre_mode == PCRE8_MODE &&
5309 bptr[start_offset] == '\r' &&
5310 bptr[start_offset + 1] == '\n') ||
5311 #endif
5312 #ifdef SUPPORT_PCRE16
5313 (pcre_mode == PCRE16_MODE &&
5314 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5315 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5316 #endif
5317 #ifdef SUPPORT_PCRE32
5318 (pcre_mode == PCRE32_MODE &&
5319 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5320 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5321 #endif
5322 0))
5323 onechar++;
5324 else if (use_utf)
5325 {
5326 while (start_offset + onechar < len)
5327 {
5328 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5329 onechar++;
5330 }
5331 }
5332 use_offsets[1] = start_offset + onechar;
5333 }
5334 else
5335 {
5336 switch(count)
5337 {
5338 case PCRE_ERROR_NOMATCH:
5339 if (gmatched == 0)
5340 {
5341 if (markptr == NULL)
5342 {
5343 fprintf(outfile, "No match");
5344 }
5345 else
5346 {
5347 fprintf(outfile, "No match, mark = ");
5348 PCHARSV(markptr, 0, -1, outfile);
5349 }
5350 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5351 putc('\n', outfile);
5352 }
5353 break;
5354
5355 case PCRE_ERROR_BADUTF8:
5356 case PCRE_ERROR_SHORTUTF8:
5357 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5358 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5359 8 * CHAR_SIZE);
5360 if (use_size_offsets >= 2)
5361 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5362 use_offsets[1]);
5363 fprintf(outfile, "\n");
5364 break;
5365
5366 case PCRE_ERROR_BADUTF8_OFFSET:
5367 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5368 8 * CHAR_SIZE);
5369 break;
5370
5371 default:
5372 if (count < 0 &&
5373 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5374 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5375 else
5376 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5377 break;
5378 }
5379
5380 break; /* Out of the /g loop */
5381 }
5382 }
5383
5384 /* If not /g or /G we are done */
5385
5386 if (!do_g && !do_G) break;
5387
5388 /* If we have matched an empty string, first check to see if we are at
5389 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5390 Perl's /g options does. This turns out to be rather cunning. First we set
5391 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5392 same point. If this fails (picked up above) we advance to the next
5393 character. */
5394
5395 g_notempty = 0;
5396
5397 if (use_offsets[0] == use_offsets[1])
5398 {
5399 if (use_offsets[0] == len) break;
5400 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5401 }
5402
5403 /* For /g, update the start offset, leaving the rest alone */
5404
5405 if (do_g) start_offset = use_offsets[1];
5406
5407 /* For /G, update the pointer and length */
5408
5409 else
5410 {
5411 bptr += use_offsets[1] * CHAR_SIZE;
5412 len -= use_offsets[1];
5413 }
5414 } /* End of loop for /g and /G */
5415
5416 NEXT_DATA: continue;
5417 } /* End of loop for data lines */
5418
5419 CONTINUE:
5420
5421 #if !defined NOPOSIX
5422 if (posix || do_posix) regfree(&preg);
5423 #endif
5424
5425 if (re != NULL) new_free(re);
5426 if (extra != NULL)
5427 {
5428 PCRE_FREE_STUDY(extra);
5429 }
5430 if (locale_set)
5431 {
5432 new_free((void *)tables);
5433 setlocale(LC_CTYPE, "C");
5434 locale_set = 0;
5435 }
5436 if (jit_stack != NULL)
5437 {
5438 PCRE_JIT_STACK_FREE(jit_stack);
5439 jit_stack = NULL;
5440 }
5441 }
5442
5443 if (infile == stdin) fprintf(outfile, "\n");
5444
5445 EXIT:
5446
5447 if (infile != NULL && infile != stdin) fclose(infile);
5448 if (outfile != NULL && outfile != stdout) fclose(outfile);
5449
5450 free(buffer);
5451 free(dbuffer);
5452 free(pbuffer);
5453 free(offsets);
5454
5455 #ifdef SUPPORT_PCRE16
5456 if (buffer16 != NULL) free(buffer16);
5457 #endif
5458 #ifdef SUPPORT_PCRE32
5459 if (buffer32 != NULL) free(buffer32);
5460 #endif
5461
5462 #if !defined NODFA
5463 if (dfa_workspace != NULL)
5464 free(dfa_workspace);
5465 #endif
5466
5467 return yield;
5468 }
5469
5470 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5