/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1150 - (show annotations)
Sun Oct 21 06:35:52 2012 UTC (6 years, 10 months ago) by zherczeg
File MIME type: text/plain
File size: 163626 byte(s)
Fix byte order issue when the result of badmode is shown.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136 #include "pcre_internal.h"
137
138 /* The pcre_printint() function, which prints the internal form of a compiled
139 regex, is held in a separate file so that (a) it can be compiled in either
140 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
141 when that is compiled in debug mode. */
142
143 #ifdef SUPPORT_PCRE8
144 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
145 #endif
146 #ifdef SUPPORT_PCRE16
147 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
148 #endif
149 #ifdef SUPPORT_PCRE32
150 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152
153 /* We need access to some of the data tables that PCRE uses. So as not to have
154 to keep two copies, we include the source files here, changing the names of the
155 external symbols to prevent clashes. */
156
157 #define PCRE_INCLUDED
158
159 #include "pcre_tables.c"
160 #include "pcre_ucd.c"
161
162 /* The definition of the macro PRINTABLE, which determines whether to print an
163 output character as-is or as a hex value when showing compiled patterns, is
164 the same as in the printint.src file. We uses it here in cases when the locale
165 has not been explicitly changed, so as to get consistent output from systems
166 that differ in their output from isprint() even in the "C" locale. */
167
168 #ifdef EBCDIC
169 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
170 #else
171 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
172 #endif
173
174 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
175
176 /* Posix support is disabled in 16 or 32 bit only mode. */
177 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
178 #define NOPOSIX
179 #endif
180
181 /* It is possible to compile this test program without including support for
182 testing the POSIX interface, though this is not available via the standard
183 Makefile. */
184
185 #if !defined NOPOSIX
186 #include "pcreposix.h"
187 #endif
188
189 /* It is also possible, originally for the benefit of a version that was
190 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
191 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
192 automatically cut out the UTF support if PCRE is built without it. */
193
194 #ifndef SUPPORT_UTF
195 #ifndef NOUTF
196 #define NOUTF
197 #endif
198 #endif
199
200 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
201 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
202 only from one place and is handled differently). I couldn't dream up any way of
203 using a single macro to do this in a generic way, because of the many different
204 argument requirements. We know that at least one of SUPPORT_PCRE8 and
205 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
206 use these in the definitions of generic macros.
207
208 **** Special note about the PCHARSxxx macros: the address of the string to be
209 printed is always given as two arguments: a base address followed by an offset.
210 The base address is cast to the correct data size for 8 or 16 bit data; the
211 offset is in units of this size. If the string were given as base+offset in one
212 argument, the casting might be incorrectly applied. */
213
214 #ifdef SUPPORT_PCRE8
215
216 #define PCHARS8(lv, p, offset, len, f) \
217 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
218
219 #define PCHARSV8(p, offset, len, f) \
220 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
221
222 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
223 p = read_capture_name8(p, cn8, re)
224
225 #define STRLEN8(p) ((int)strlen((char *)p))
226
227 #define SET_PCRE_CALLOUT8(callout) \
228 pcre_callout = callout
229
230 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
231 pcre_assign_jit_stack(extra, callback, userdata)
232
233 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
234 re = pcre_compile((char *)pat, options, error, erroffset, tables)
235
236 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
237 namesptr, cbuffer, size) \
238 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
239 (char *)namesptr, cbuffer, size)
240
241 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
242 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
243
244 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
245 offsets, size_offsets, workspace, size_workspace) \
246 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace)
248
249 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
250 offsets, size_offsets) \
251 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
252 offsets, size_offsets)
253
254 #define PCRE_FREE_STUDY8(extra) \
255 pcre_free_study(extra)
256
257 #define PCRE_FREE_SUBSTRING8(substring) \
258 pcre_free_substring(substring)
259
260 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
261 pcre_free_substring_list(listptr)
262
263 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
264 getnamesptr, subsptr) \
265 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
266 (char *)getnamesptr, subsptr)
267
268 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
269 n = pcre_get_stringnumber(re, (char *)ptr)
270
271 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
272 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
273
274 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
275 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
276
277 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
278 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
279
280 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
281 pcre_printint(re, outfile, debug_lengths)
282
283 #define PCRE_STUDY8(extra, re, options, error) \
284 extra = pcre_study(re, options, error)
285
286 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
287 pcre_jit_stack_alloc(startsize, maxsize)
288
289 #define PCRE_JIT_STACK_FREE8(stack) \
290 pcre_jit_stack_free(stack)
291
292 #define pcre8_maketables pcre_maketables
293
294 #endif /* SUPPORT_PCRE8 */
295
296 /* -----------------------------------------------------------*/
297
298 #ifdef SUPPORT_PCRE16
299
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
307 p = read_capture_name16(p, cn16, re)
308
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
313
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
317
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
321
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
330
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
336
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
341
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
344
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
347
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
362
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
366
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
370
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
373
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382
383 #endif /* SUPPORT_PCRE16 */
384
385 /* -----------------------------------------------------------*/
386
387 #ifdef SUPPORT_PCRE32
388
389 #define PCHARS32(lv, p, offset, len, f) \
390 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
391
392 #define PCHARSV32(p, offset, len, f) \
393 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
394
395 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396 p = read_capture_name32(p, cn32, re)
397
398 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
399
400 #define SET_PCRE_CALLOUT32(callout) \
401 pcre32_callout = (int (*)(pcre32_callout_block *))callout
402
403 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
404 pcre32_assign_jit_stack((pcre32_extra *)extra, \
405 (pcre32_jit_callback)callback, userdata)
406
407 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
408 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
409 tables)
410
411 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
412 namesptr, cbuffer, size) \
413 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
414 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
415
416 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
417 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
418 (PCRE_UCHAR32 *)cbuffer, size/2)
419
420 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
421 offsets, size_offsets, workspace, size_workspace) \
422 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
423 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
424 workspace, size_workspace)
425
426 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets) \
428 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
429 len, start_offset, options, offsets, size_offsets)
430
431 #define PCRE_FREE_STUDY32(extra) \
432 pcre32_free_study((pcre32_extra *)extra)
433
434 #define PCRE_FREE_SUBSTRING32(substring) \
435 pcre32_free_substring((PCRE_SPTR32)substring)
436
437 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
438 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
439
440 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
441 getnamesptr, subsptr) \
442 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
443 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
444
445 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
446 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
447
448 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
449 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
450 (PCRE_SPTR32 *)(void*)subsptr)
451
452 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
453 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
454 (PCRE_SPTR32 **)(void*)listptr)
455
456 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
457 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
458 tables)
459
460 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
461 pcre32_printint(re, outfile, debug_lengths)
462
463 #define PCRE_STUDY32(extra, re, options, error) \
464 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
465
466 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
467 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
468
469 #define PCRE_JIT_STACK_FREE32(stack) \
470 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
471
472 #endif /* SUPPORT_PCRE32 */
473
474
475 /* ----- More than one mode is supported; a runtime test is needed, except for
476 pcre_config(), and the JIT stack functions, when it doesn't matter which
477 available version is called. ----- */
478
479 enum {
480 PCRE8_MODE,
481 PCRE16_MODE,
482 PCRE32_MODE
483 };
484
485 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486 defined (SUPPORT_PCRE32)) >= 2
487
488 #define CHAR_SIZE (1 << pcre_mode)
489
490 /* There doesn't seem to be an easy way of writing these macros that can cope
491 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492 cases separately. */
493
494 /* ----- All three modes supported ----- */
495
496 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
497
498 #define PCHARS(lv, p, offset, len, f) \
499 if (pcre_mode == PCRE32_MODE) \
500 PCHARS32(lv, p, offset, len, f); \
501 else if (pcre_mode == PCRE16_MODE) \
502 PCHARS16(lv, p, offset, len, f); \
503 else \
504 PCHARS8(lv, p, offset, len, f)
505
506 #define PCHARSV(p, offset, len, f) \
507 if (pcre_mode == PCRE32_MODE) \
508 PCHARSV32(p, offset, len, f); \
509 else if (pcre_mode == PCRE16_MODE) \
510 PCHARSV16(p, offset, len, f); \
511 else \
512 PCHARSV8(p, offset, len, f)
513
514 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
515 if (pcre_mode == PCRE32_MODE) \
516 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
517 else if (pcre_mode == PCRE16_MODE) \
518 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
519 else \
520 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
521
522 #define SET_PCRE_CALLOUT(callout) \
523 if (pcre_mode == PCRE32_MODE) \
524 SET_PCRE_CALLOUT32(callout); \
525 else if (pcre_mode == PCRE16_MODE) \
526 SET_PCRE_CALLOUT16(callout); \
527 else \
528 SET_PCRE_CALLOUT8(callout)
529
530 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
531
532 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
533 if (pcre_mode == PCRE32_MODE) \
534 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
535 else if (pcre_mode == PCRE16_MODE) \
536 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
537 else \
538 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
539
540 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
541 if (pcre_mode == PCRE32_MODE) \
542 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
543 else if (pcre_mode == PCRE16_MODE) \
544 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
545 else \
546 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
547
548 #define PCRE_CONFIG pcre_config
549
550 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
551 namesptr, cbuffer, size) \
552 if (pcre_mode == PCRE32_MODE) \
553 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else \
559 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size)
561
562 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
563 if (pcre_mode == PCRE32_MODE) \
564 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
565 else if (pcre_mode == PCRE16_MODE) \
566 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
567 else \
568 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
569
570 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
571 offsets, size_offsets, workspace, size_workspace) \
572 if (pcre_mode == PCRE32_MODE) \
573 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace); \
575 else if (pcre_mode == PCRE16_MODE) \
576 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else \
579 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace)
581
582 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else \
591 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets)
593
594 #define PCRE_FREE_STUDY(extra) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_FREE_STUDY32(extra); \
597 else if (pcre_mode == PCRE16_MODE) \
598 PCRE_FREE_STUDY16(extra); \
599 else \
600 PCRE_FREE_STUDY8(extra)
601
602 #define PCRE_FREE_SUBSTRING(substring) \
603 if (pcre_mode == PCRE32_MODE) \
604 PCRE_FREE_SUBSTRING32(substring); \
605 else if (pcre_mode == PCRE16_MODE) \
606 PCRE_FREE_SUBSTRING16(substring); \
607 else \
608 PCRE_FREE_SUBSTRING8(substring)
609
610 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
611 if (pcre_mode == PCRE32_MODE) \
612 PCRE_FREE_SUBSTRING_LIST32(listptr); \
613 else if (pcre_mode == PCRE16_MODE) \
614 PCRE_FREE_SUBSTRING_LIST16(listptr); \
615 else \
616 PCRE_FREE_SUBSTRING_LIST8(listptr)
617
618 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
619 getnamesptr, subsptr) \
620 if (pcre_mode == PCRE32_MODE) \
621 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr); \
623 else if (pcre_mode == PCRE16_MODE) \
624 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else \
627 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr)
629
630 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
631 if (pcre_mode == PCRE32_MODE) \
632 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
633 else if (pcre_mode == PCRE16_MODE) \
634 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
635 else \
636 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
637
638 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
639 if (pcre_mode == PCRE32_MODE) \
640 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
641 else if (pcre_mode == PCRE16_MODE) \
642 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
643 else \
644 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
645
646 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
647 if (pcre_mode == PCRE32_MODE) \
648 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
649 else if (pcre_mode == PCRE16_MODE) \
650 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
651 else \
652 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
653
654 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
655 (pcre_mode == PCRE32_MODE ? \
656 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
657 : pcre_mode == PCRE16_MODE ? \
658 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
659 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
660
661 #define PCRE_JIT_STACK_FREE(stack) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_JIT_STACK_FREE32(stack); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_JIT_STACK_FREE16(stack); \
666 else \
667 PCRE_JIT_STACK_FREE8(stack)
668
669 #define PCRE_MAKETABLES \
670 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
671
672 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
673 if (pcre_mode == PCRE32_MODE) \
674 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
675 else if (pcre_mode == PCRE16_MODE) \
676 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
677 else \
678 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
679
680 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
681 if (pcre_mode == PCRE32_MODE) \
682 PCRE_PRINTINT32(re, outfile, debug_lengths); \
683 else if (pcre_mode == PCRE16_MODE) \
684 PCRE_PRINTINT16(re, outfile, debug_lengths); \
685 else \
686 PCRE_PRINTINT8(re, outfile, debug_lengths)
687
688 #define PCRE_STUDY(extra, re, options, error) \
689 if (pcre_mode == PCRE32_MODE) \
690 PCRE_STUDY32(extra, re, options, error); \
691 else if (pcre_mode == PCRE16_MODE) \
692 PCRE_STUDY16(extra, re, options, error); \
693 else \
694 PCRE_STUDY8(extra, re, options, error)
695
696
697 /* ----- Two out of three modes are supported ----- */
698
699 #else
700
701 /* We can use some macro trickery to make a single set of definitions work in
702 the three different cases. */
703
704 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
705
706 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707 #define BITONE 32
708 #define BITTWO 16
709
710 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
711
712 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713 #define BITONE 32
714 #define BITTWO 8
715
716 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
717
718 #else
719 #define BITONE 16
720 #define BITTWO 8
721 #endif
722
723 #define glue(a,b) a##b
724 #define G(a,b) glue(a,b)
725
726
727 /* ----- Common macros for two-mode cases ----- */
728
729 #define PCHARS(lv, p, offset, len, f) \
730 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731 G(PCHARS,BITONE)(lv, p, offset, len, f); \
732 else \
733 G(PCHARS,BITTWO)(lv, p, offset, len, f)
734
735 #define PCHARSV(p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARSV,BITONE)(p, offset, len, f); \
738 else \
739 G(PCHARSV,BITTWO)(p, offset, len, f)
740
741 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744 else \
745 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
746
747 #define SET_PCRE_CALLOUT(callout) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(SET_PCRE_CALLOUT,BITONE)(callout); \
750 else \
751 G(SET_PCRE_CALLOUT,BITTWO)(callout)
752
753 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
755
756 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759 else \
760 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
761
762 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765 else \
766 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
767
768 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
769
770 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771 namesptr, cbuffer, size) \
772 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774 namesptr, cbuffer, size); \
775 else \
776 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size)
778
779 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782 else \
783 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
784
785 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786 offsets, size_offsets, workspace, size_workspace) \
787 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789 offsets, size_offsets, workspace, size_workspace); \
790 else \
791 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace)
793
794 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets) \
796 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets); \
799 else \
800 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets)
802
803 #define PCRE_FREE_STUDY(extra) \
804 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805 G(PCRE_FREE_STUDY,BITONE)(extra); \
806 else \
807 G(PCRE_FREE_STUDY,BITTWO)(extra)
808
809 #define PCRE_FREE_SUBSTRING(substring) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812 else \
813 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
814
815 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818 else \
819 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
820
821 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822 getnamesptr, subsptr) \
823 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825 getnamesptr, subsptr); \
826 else \
827 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr)
829
830 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833 else \
834 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
835
836 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839 else \
840 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
841
842 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845 else \
846 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
847
848 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
852
853 #define PCRE_JIT_STACK_FREE(stack) \
854 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856 else \
857 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
858
859 #define PCRE_MAKETABLES \
860 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
862
863 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866 else \
867 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
868
869 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872 else \
873 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
874
875 #define PCRE_STUDY(extra, re, options, error) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878 else \
879 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
880
881 #endif /* Two out of three modes */
882
883 /* ----- End of cases where more than one mode is supported ----- */
884
885
886 /* ----- Only 8-bit mode is supported ----- */
887
888 #elif defined SUPPORT_PCRE8
889 #define CHAR_SIZE 1
890 #define PCHARS PCHARS8
891 #define PCHARSV PCHARSV8
892 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
893 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
894 #define STRLEN STRLEN8
895 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
896 #define PCRE_COMPILE PCRE_COMPILE8
897 #define PCRE_CONFIG pcre_config
898 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
899 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
900 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
901 #define PCRE_EXEC PCRE_EXEC8
902 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
903 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
904 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
905 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
906 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
907 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
908 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
909 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
910 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
911 #define PCRE_MAKETABLES pcre_maketables()
912 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
913 #define PCRE_PRINTINT PCRE_PRINTINT8
914 #define PCRE_STUDY PCRE_STUDY8
915
916 /* ----- Only 16-bit mode is supported ----- */
917
918 #elif defined SUPPORT_PCRE16
919 #define CHAR_SIZE 2
920 #define PCHARS PCHARS16
921 #define PCHARSV PCHARSV16
922 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
923 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
924 #define STRLEN STRLEN16
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
926 #define PCRE_COMPILE PCRE_COMPILE16
927 #define PCRE_CONFIG pcre16_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
931 #define PCRE_EXEC PCRE_EXEC16
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
941 #define PCRE_MAKETABLES pcre16_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
943 #define PCRE_PRINTINT PCRE_PRINTINT16
944 #define PCRE_STUDY PCRE_STUDY16
945
946 /* ----- Only 32-bit mode is supported ----- */
947
948 #elif defined SUPPORT_PCRE32
949 #define CHAR_SIZE 4
950 #define PCHARS PCHARS32
951 #define PCHARSV PCHARSV32
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
954 #define STRLEN STRLEN32
955 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
956 #define PCRE_COMPILE PCRE_COMPILE32
957 #define PCRE_CONFIG pcre32_config
958 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
959 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
960 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
961 #define PCRE_EXEC PCRE_EXEC32
962 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
963 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
964 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
965 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
966 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
967 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
968 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
969 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
970 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
971 #define PCRE_MAKETABLES pcre32_maketables()
972 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
973 #define PCRE_PRINTINT PCRE_PRINTINT32
974 #define PCRE_STUDY PCRE_STUDY32
975
976 #endif
977
978 /* ----- End of mode-specific function call macros ----- */
979
980
981 /* Other parameters */
982
983 #ifndef CLOCKS_PER_SEC
984 #ifdef CLK_TCK
985 #define CLOCKS_PER_SEC CLK_TCK
986 #else
987 #define CLOCKS_PER_SEC 100
988 #endif
989 #endif
990
991 #if !defined NODFA
992 #define DFA_WS_DIMENSION 1000
993 #endif
994
995 /* This is the default loop count for timing. */
996
997 #define LOOPREPEAT 500000
998
999 /* Static variables */
1000
1001 static FILE *outfile;
1002 static int log_store = 0;
1003 static int callout_count;
1004 static int callout_extra;
1005 static int callout_fail_count;
1006 static int callout_fail_id;
1007 static int debug_lengths;
1008 static int first_callout;
1009 static int jit_was_used;
1010 static int locale_set = 0;
1011 static int show_malloc;
1012 static int use_utf;
1013 static size_t gotten_store;
1014 static size_t first_gotten_store = 0;
1015 static const unsigned char *last_callout_mark = NULL;
1016
1017 /* The buffers grow automatically if very long input lines are encountered. */
1018
1019 static int buffer_size = 50000;
1020 static pcre_uint8 *buffer = NULL;
1021 static pcre_uint8 *pbuffer = NULL;
1022
1023 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1024
1025 #ifdef COMPILE_PCRE16
1026 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1027 #endif
1028
1029 #ifdef COMPILE_PCRE32
1030 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031 #endif
1032
1033 /* We need buffers for building 16/32-bit strings, and the tables of operator
1034 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035 pattern for saving/reloading testing. Luckily, the data for these tables is
1036 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038 LINK_SIZE is also used later in this program. */
1039
1040 #ifdef SUPPORT_PCRE16
1041 #undef IMM2_SIZE
1042 #define IMM2_SIZE 1
1043
1044 #if LINK_SIZE == 2
1045 #undef LINK_SIZE
1046 #define LINK_SIZE 1
1047 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1048 #undef LINK_SIZE
1049 #define LINK_SIZE 2
1050 #else
1051 #error LINK_SIZE must be either 2, 3, or 4
1052 #endif
1053
1054 static int buffer16_size = 0;
1055 static pcre_uint16 *buffer16 = NULL;
1056 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057 #endif /* SUPPORT_PCRE16 */
1058
1059 #ifdef SUPPORT_PCRE32
1060 #undef IMM2_SIZE
1061 #define IMM2_SIZE 1
1062 #undef LINK_SIZE
1063 #define LINK_SIZE 1
1064
1065 static int buffer32_size = 0;
1066 static pcre_uint32 *buffer32 = NULL;
1067 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068 #endif /* SUPPORT_PCRE32 */
1069
1070 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071 support, it can be changed by an option. If there is no 8-bit support, there
1072 must be 16-or 32-bit support, so default it to 1. */
1073
1074 #if defined SUPPORT_PCRE8
1075 static int pcre_mode = PCRE8_MODE;
1076 #elif defined SUPPORT_PCRE16
1077 static int pcre_mode = PCRE16_MODE;
1078 #elif defined SUPPORT_PCRE32
1079 static int pcre_mode = PCRE32_MODE;
1080 #endif
1081
1082 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1083
1084 static int jit_study_bits[] =
1085 {
1086 PCRE_STUDY_JIT_COMPILE,
1087 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1088 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1089 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1090 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1091 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1092 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1093 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1094 };
1095
1096 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1097 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1098
1099 /* Textual explanations for runtime error codes */
1100
1101 static const char *errtexts[] = {
1102 NULL, /* 0 is no error */
1103 NULL, /* NOMATCH is handled specially */
1104 "NULL argument passed",
1105 "bad option value",
1106 "magic number missing",
1107 "unknown opcode - pattern overwritten?",
1108 "no more memory",
1109 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1110 "match limit exceeded",
1111 "callout error code",
1112 NULL, /* BADUTF8/16 is handled specially */
1113 NULL, /* BADUTF8/16 offset is handled specially */
1114 NULL, /* PARTIAL is handled specially */
1115 "not used - internal error",
1116 "internal error - pattern overwritten?",
1117 "bad count value",
1118 "item unsupported for DFA matching",
1119 "backreference condition or recursion test not supported for DFA matching",
1120 "match limit not supported for DFA matching",
1121 "workspace size exceeded in DFA matching",
1122 "too much recursion for DFA matching",
1123 "recursion limit exceeded",
1124 "not used - internal error",
1125 "invalid combination of newline options",
1126 "bad offset value",
1127 NULL, /* SHORTUTF8/16 is handled specially */
1128 "nested recursion at the same subject position",
1129 "JIT stack limit reached",
1130 "pattern compiled in wrong mode: 8-bit/16-bit error",
1131 "pattern compiled with other endianness",
1132 "invalid data in workspace for DFA restart"
1133 };
1134
1135
1136 /*************************************************
1137 * Alternate character tables *
1138 *************************************************/
1139
1140 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1141 using the default tables of the library. However, the T option can be used to
1142 select alternate sets of tables, for different kinds of testing. Note also that
1143 the L (locale) option also adjusts the tables. */
1144
1145 /* This is the set of tables distributed as default with PCRE. It recognizes
1146 only ASCII characters. */
1147
1148 static const pcre_uint8 tables0[] = {
1149
1150 /* This table is a lower casing table. */
1151
1152 0, 1, 2, 3, 4, 5, 6, 7,
1153 8, 9, 10, 11, 12, 13, 14, 15,
1154 16, 17, 18, 19, 20, 21, 22, 23,
1155 24, 25, 26, 27, 28, 29, 30, 31,
1156 32, 33, 34, 35, 36, 37, 38, 39,
1157 40, 41, 42, 43, 44, 45, 46, 47,
1158 48, 49, 50, 51, 52, 53, 54, 55,
1159 56, 57, 58, 59, 60, 61, 62, 63,
1160 64, 97, 98, 99,100,101,102,103,
1161 104,105,106,107,108,109,110,111,
1162 112,113,114,115,116,117,118,119,
1163 120,121,122, 91, 92, 93, 94, 95,
1164 96, 97, 98, 99,100,101,102,103,
1165 104,105,106,107,108,109,110,111,
1166 112,113,114,115,116,117,118,119,
1167 120,121,122,123,124,125,126,127,
1168 128,129,130,131,132,133,134,135,
1169 136,137,138,139,140,141,142,143,
1170 144,145,146,147,148,149,150,151,
1171 152,153,154,155,156,157,158,159,
1172 160,161,162,163,164,165,166,167,
1173 168,169,170,171,172,173,174,175,
1174 176,177,178,179,180,181,182,183,
1175 184,185,186,187,188,189,190,191,
1176 192,193,194,195,196,197,198,199,
1177 200,201,202,203,204,205,206,207,
1178 208,209,210,211,212,213,214,215,
1179 216,217,218,219,220,221,222,223,
1180 224,225,226,227,228,229,230,231,
1181 232,233,234,235,236,237,238,239,
1182 240,241,242,243,244,245,246,247,
1183 248,249,250,251,252,253,254,255,
1184
1185 /* This table is a case flipping table. */
1186
1187 0, 1, 2, 3, 4, 5, 6, 7,
1188 8, 9, 10, 11, 12, 13, 14, 15,
1189 16, 17, 18, 19, 20, 21, 22, 23,
1190 24, 25, 26, 27, 28, 29, 30, 31,
1191 32, 33, 34, 35, 36, 37, 38, 39,
1192 40, 41, 42, 43, 44, 45, 46, 47,
1193 48, 49, 50, 51, 52, 53, 54, 55,
1194 56, 57, 58, 59, 60, 61, 62, 63,
1195 64, 97, 98, 99,100,101,102,103,
1196 104,105,106,107,108,109,110,111,
1197 112,113,114,115,116,117,118,119,
1198 120,121,122, 91, 92, 93, 94, 95,
1199 96, 65, 66, 67, 68, 69, 70, 71,
1200 72, 73, 74, 75, 76, 77, 78, 79,
1201 80, 81, 82, 83, 84, 85, 86, 87,
1202 88, 89, 90,123,124,125,126,127,
1203 128,129,130,131,132,133,134,135,
1204 136,137,138,139,140,141,142,143,
1205 144,145,146,147,148,149,150,151,
1206 152,153,154,155,156,157,158,159,
1207 160,161,162,163,164,165,166,167,
1208 168,169,170,171,172,173,174,175,
1209 176,177,178,179,180,181,182,183,
1210 184,185,186,187,188,189,190,191,
1211 192,193,194,195,196,197,198,199,
1212 200,201,202,203,204,205,206,207,
1213 208,209,210,211,212,213,214,215,
1214 216,217,218,219,220,221,222,223,
1215 224,225,226,227,228,229,230,231,
1216 232,233,234,235,236,237,238,239,
1217 240,241,242,243,244,245,246,247,
1218 248,249,250,251,252,253,254,255,
1219
1220 /* This table contains bit maps for various character classes. Each map is 32
1221 bytes long and the bits run from the least significant end of each byte. The
1222 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1223 graph, print, punct, and cntrl. Other classes are built from combinations. */
1224
1225 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1226 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1227 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1228 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1229
1230 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1231 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1232 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1233 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1234
1235 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1236 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1244
1245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1248 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249
1250 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1251 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1252 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1253 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254
1255 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1256 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1257 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1258 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1259
1260 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1261 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1262 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1263 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1264
1265 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1266 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1267 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1268 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1269
1270 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1272 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274
1275 /* This table identifies various classes of character by individual bits:
1276 0x01 white space character
1277 0x02 letter
1278 0x04 decimal digit
1279 0x08 hexadecimal digit
1280 0x10 alphanumeric or '_'
1281 0x80 regular expression metacharacter or binary zero
1282 */
1283
1284 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1285 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1287 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1288 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1289 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1290 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1291 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1292 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1293 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1294 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1295 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1296 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1297 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1298 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1299 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1316
1317 /* This is a set of tables that came orginally from a Windows user. It seems to
1318 be at least an approximation of ISO 8859. In particular, there are characters
1319 greater than 128 that are marked as spaces, letters, etc. */
1320
1321 static const pcre_uint8 tables1[] = {
1322 0,1,2,3,4,5,6,7,
1323 8,9,10,11,12,13,14,15,
1324 16,17,18,19,20,21,22,23,
1325 24,25,26,27,28,29,30,31,
1326 32,33,34,35,36,37,38,39,
1327 40,41,42,43,44,45,46,47,
1328 48,49,50,51,52,53,54,55,
1329 56,57,58,59,60,61,62,63,
1330 64,97,98,99,100,101,102,103,
1331 104,105,106,107,108,109,110,111,
1332 112,113,114,115,116,117,118,119,
1333 120,121,122,91,92,93,94,95,
1334 96,97,98,99,100,101,102,103,
1335 104,105,106,107,108,109,110,111,
1336 112,113,114,115,116,117,118,119,
1337 120,121,122,123,124,125,126,127,
1338 128,129,130,131,132,133,134,135,
1339 136,137,138,139,140,141,142,143,
1340 144,145,146,147,148,149,150,151,
1341 152,153,154,155,156,157,158,159,
1342 160,161,162,163,164,165,166,167,
1343 168,169,170,171,172,173,174,175,
1344 176,177,178,179,180,181,182,183,
1345 184,185,186,187,188,189,190,191,
1346 224,225,226,227,228,229,230,231,
1347 232,233,234,235,236,237,238,239,
1348 240,241,242,243,244,245,246,215,
1349 248,249,250,251,252,253,254,223,
1350 224,225,226,227,228,229,230,231,
1351 232,233,234,235,236,237,238,239,
1352 240,241,242,243,244,245,246,247,
1353 248,249,250,251,252,253,254,255,
1354 0,1,2,3,4,5,6,7,
1355 8,9,10,11,12,13,14,15,
1356 16,17,18,19,20,21,22,23,
1357 24,25,26,27,28,29,30,31,
1358 32,33,34,35,36,37,38,39,
1359 40,41,42,43,44,45,46,47,
1360 48,49,50,51,52,53,54,55,
1361 56,57,58,59,60,61,62,63,
1362 64,97,98,99,100,101,102,103,
1363 104,105,106,107,108,109,110,111,
1364 112,113,114,115,116,117,118,119,
1365 120,121,122,91,92,93,94,95,
1366 96,65,66,67,68,69,70,71,
1367 72,73,74,75,76,77,78,79,
1368 80,81,82,83,84,85,86,87,
1369 88,89,90,123,124,125,126,127,
1370 128,129,130,131,132,133,134,135,
1371 136,137,138,139,140,141,142,143,
1372 144,145,146,147,148,149,150,151,
1373 152,153,154,155,156,157,158,159,
1374 160,161,162,163,164,165,166,167,
1375 168,169,170,171,172,173,174,175,
1376 176,177,178,179,180,181,182,183,
1377 184,185,186,187,188,189,190,191,
1378 224,225,226,227,228,229,230,231,
1379 232,233,234,235,236,237,238,239,
1380 240,241,242,243,244,245,246,215,
1381 248,249,250,251,252,253,254,223,
1382 192,193,194,195,196,197,198,199,
1383 200,201,202,203,204,205,206,207,
1384 208,209,210,211,212,213,214,247,
1385 216,217,218,219,220,221,222,255,
1386 0,62,0,0,1,0,0,0,
1387 0,0,0,0,0,0,0,0,
1388 32,0,0,0,1,0,0,0,
1389 0,0,0,0,0,0,0,0,
1390 0,0,0,0,0,0,255,3,
1391 126,0,0,0,126,0,0,0,
1392 0,0,0,0,0,0,0,0,
1393 0,0,0,0,0,0,0,0,
1394 0,0,0,0,0,0,255,3,
1395 0,0,0,0,0,0,0,0,
1396 0,0,0,0,0,0,12,2,
1397 0,0,0,0,0,0,0,0,
1398 0,0,0,0,0,0,0,0,
1399 254,255,255,7,0,0,0,0,
1400 0,0,0,0,0,0,0,0,
1401 255,255,127,127,0,0,0,0,
1402 0,0,0,0,0,0,0,0,
1403 0,0,0,0,254,255,255,7,
1404 0,0,0,0,0,4,32,4,
1405 0,0,0,128,255,255,127,255,
1406 0,0,0,0,0,0,255,3,
1407 254,255,255,135,254,255,255,7,
1408 0,0,0,0,0,4,44,6,
1409 255,255,127,255,255,255,127,255,
1410 0,0,0,0,254,255,255,255,
1411 255,255,255,255,255,255,255,127,
1412 0,0,0,0,254,255,255,255,
1413 255,255,255,255,255,255,255,255,
1414 0,2,0,0,255,255,255,255,
1415 255,255,255,255,255,255,255,127,
1416 0,0,0,0,255,255,255,255,
1417 255,255,255,255,255,255,255,255,
1418 0,0,0,0,254,255,0,252,
1419 1,0,0,248,1,0,0,120,
1420 0,0,0,0,254,255,255,255,
1421 0,0,128,0,0,0,128,0,
1422 255,255,255,255,0,0,0,0,
1423 0,0,0,0,0,0,0,128,
1424 255,255,255,255,0,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 128,0,0,0,0,0,0,0,
1427 0,1,1,0,1,1,0,0,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,0,0,
1430 1,0,0,0,128,0,0,0,
1431 128,128,128,128,0,0,128,0,
1432 28,28,28,28,28,28,28,28,
1433 28,28,0,0,0,0,0,128,
1434 0,26,26,26,26,26,26,18,
1435 18,18,18,18,18,18,18,18,
1436 18,18,18,18,18,18,18,18,
1437 18,18,18,128,128,0,128,16,
1438 0,26,26,26,26,26,26,18,
1439 18,18,18,18,18,18,18,18,
1440 18,18,18,18,18,18,18,18,
1441 18,18,18,128,128,0,0,0,
1442 0,0,0,0,0,1,0,0,
1443 0,0,0,0,0,0,0,0,
1444 0,0,0,0,0,0,0,0,
1445 0,0,0,0,0,0,0,0,
1446 1,0,0,0,0,0,0,0,
1447 0,0,18,0,0,0,0,0,
1448 0,0,20,20,0,18,0,0,
1449 0,20,18,0,0,0,0,0,
1450 18,18,18,18,18,18,18,18,
1451 18,18,18,18,18,18,18,18,
1452 18,18,18,18,18,18,18,0,
1453 18,18,18,18,18,18,18,18,
1454 18,18,18,18,18,18,18,18,
1455 18,18,18,18,18,18,18,18,
1456 18,18,18,18,18,18,18,0,
1457 18,18,18,18,18,18,18,18
1458 };
1459
1460
1461
1462
1463 #ifndef HAVE_STRERROR
1464 /*************************************************
1465 * Provide strerror() for non-ANSI libraries *
1466 *************************************************/
1467
1468 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1469 in their libraries, but can provide the same facility by this simple
1470 alternative function. */
1471
1472 extern int sys_nerr;
1473 extern char *sys_errlist[];
1474
1475 char *
1476 strerror(int n)
1477 {
1478 if (n < 0 || n >= sys_nerr) return "unknown error number";
1479 return sys_errlist[n];
1480 }
1481 #endif /* HAVE_STRERROR */
1482
1483
1484
1485 /*************************************************
1486 * Print newline configuration *
1487 *************************************************/
1488
1489 /*
1490 Arguments:
1491 rc the return code from PCRE_CONFIG_NEWLINE
1492 isc TRUE if called from "-C newline"
1493 Returns: nothing
1494 */
1495
1496 static void
1497 print_newline_config(int rc, BOOL isc)
1498 {
1499 const char *s = NULL;
1500 if (!isc) printf(" Newline sequence is ");
1501 switch(rc)
1502 {
1503 case CHAR_CR: s = "CR"; break;
1504 case CHAR_LF: s = "LF"; break;
1505 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1506 case -1: s = "ANY"; break;
1507 case -2: s = "ANYCRLF"; break;
1508
1509 default:
1510 printf("a non-standard value: 0x%04x\n", rc);
1511 return;
1512 }
1513
1514 printf("%s\n", s);
1515 }
1516
1517
1518
1519 /*************************************************
1520 * JIT memory callback *
1521 *************************************************/
1522
1523 static pcre_jit_stack* jit_callback(void *arg)
1524 {
1525 jit_was_used = TRUE;
1526 return (pcre_jit_stack *)arg;
1527 }
1528
1529
1530 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1531 /*************************************************
1532 * Convert UTF-8 string to value *
1533 *************************************************/
1534
1535 /* This function takes one or more bytes that represents a UTF-8 character,
1536 and returns the value of the character.
1537
1538 Argument:
1539 utf8bytes a pointer to the byte vector
1540 vptr a pointer to an int to receive the value
1541
1542 Returns: > 0 => the number of bytes consumed
1543 -6 to 0 => malformed UTF-8 character at offset = (-return)
1544 */
1545
1546 static int
1547 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1548 {
1549 pcre_uint32 c = *utf8bytes++;
1550 pcre_uint32 d = c;
1551 int i, j, s;
1552
1553 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1554 {
1555 if ((d & 0x80) == 0) break;
1556 d <<= 1;
1557 }
1558
1559 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1560 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1561
1562 /* i now has a value in the range 1-5 */
1563
1564 s = 6*i;
1565 d = (c & utf8_table3[i]) << s;
1566
1567 for (j = 0; j < i; j++)
1568 {
1569 c = *utf8bytes++;
1570 if ((c & 0xc0) != 0x80) return -(j+1);
1571 s -= 6;
1572 d |= (c & 0x3f) << s;
1573 }
1574
1575 /* Check that encoding was the correct unique one */
1576
1577 for (j = 0; j < utf8_table1_size; j++)
1578 if (d <= (pcre_uint32)utf8_table1[j]) break;
1579 if (j != i) return -(i+1);
1580
1581 /* Valid value */
1582
1583 *vptr = d;
1584 return i+1;
1585 }
1586 #endif /* NOUTF || SUPPORT_PCRE16 */
1587
1588
1589
1590 #if defined SUPPORT_PCRE8 && !defined NOUTF
1591 /*************************************************
1592 * Convert character value to UTF-8 *
1593 *************************************************/
1594
1595 /* This function takes an integer value in the range 0 - 0x7fffffff
1596 and encodes it as a UTF-8 character in 0 to 6 bytes.
1597
1598 Arguments:
1599 cvalue the character value
1600 utf8bytes pointer to buffer for result - at least 6 bytes long
1601
1602 Returns: number of characters placed in the buffer
1603 */
1604
1605 static int
1606 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1607 {
1608 register int i, j;
1609 if (cvalue > 0x7fffffffu)
1610 return -1;
1611 for (i = 0; i < utf8_table1_size; i++)
1612 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1613 utf8bytes += i;
1614 for (j = i; j > 0; j--)
1615 {
1616 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1617 cvalue >>= 6;
1618 }
1619 *utf8bytes = utf8_table2[i] | cvalue;
1620 return i + 1;
1621 }
1622 #endif
1623
1624
1625 #ifdef SUPPORT_PCRE16
1626 /*************************************************
1627 * Convert a string to 16-bit *
1628 *************************************************/
1629
1630 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1631 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1632 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1633 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1634 result is always left in buffer16.
1635
1636 Note that this function does not object to surrogate values. This is
1637 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1638 for the purpose of testing that they are correctly faulted.
1639
1640 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1641 in UTF-8 so that values greater than 255 can be handled.
1642
1643 Arguments:
1644 data TRUE if converting a data line; FALSE for a regex
1645 p points to a byte string
1646 utf true if UTF-8 (to be converted to UTF-16)
1647 len number of bytes in the string (excluding trailing zero)
1648
1649 Returns: number of 16-bit data items used (excluding trailing zero)
1650 OR -1 if a UTF-8 string is malformed
1651 OR -2 if a value > 0x10ffff is encountered
1652 OR -3 if a value > 0xffff is encountered when not in UTF mode
1653 */
1654
1655 static int
1656 to16(int data, pcre_uint8 *p, int utf, int len)
1657 {
1658 pcre_uint16 *pp;
1659
1660 if (buffer16_size < 2*len + 2)
1661 {
1662 if (buffer16 != NULL) free(buffer16);
1663 buffer16_size = 2*len + 2;
1664 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1665 if (buffer16 == NULL)
1666 {
1667 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1668 exit(1);
1669 }
1670 }
1671
1672 pp = buffer16;
1673
1674 if (!utf && !data)
1675 {
1676 while (len-- > 0) *pp++ = *p++;
1677 }
1678
1679 else
1680 {
1681 pcre_uint32 c = 0;
1682 while (len > 0)
1683 {
1684 int chlen = utf82ord(p, &c);
1685 if (chlen <= 0) return -1;
1686 if (c > 0x10ffff) return -2;
1687 p += chlen;
1688 len -= chlen;
1689 if (c < 0x10000) *pp++ = c; else
1690 {
1691 if (!utf) return -3;
1692 c -= 0x10000;
1693 *pp++ = 0xD800 | (c >> 10);
1694 *pp++ = 0xDC00 | (c & 0x3ff);
1695 }
1696 }
1697 }
1698
1699 *pp = 0;
1700 return pp - buffer16;
1701 }
1702 #endif
1703
1704 #ifdef SUPPORT_PCRE32
1705 /*************************************************
1706 * Convert a string to 32-bit *
1707 *************************************************/
1708
1709 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1710 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1711 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1712 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1713 result is always left in buffer32.
1714
1715 Note that this function does not object to surrogate values. This is
1716 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1717 for the purpose of testing that they are correctly faulted.
1718
1719 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1720 in UTF-8 so that values greater than 255 can be handled.
1721
1722 Arguments:
1723 data TRUE if converting a data line; FALSE for a regex
1724 p points to a byte string
1725 utf true if UTF-8 (to be converted to UTF-32)
1726 len number of bytes in the string (excluding trailing zero)
1727
1728 Returns: number of 32-bit data items used (excluding trailing zero)
1729 OR -1 if a UTF-8 string is malformed
1730 OR -2 if a value > 0x10ffff is encountered
1731 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1732 */
1733
1734 static int
1735 to32(int data, pcre_uint8 *p, int utf, int len)
1736 {
1737 pcre_uint32 *pp;
1738
1739 if (buffer32_size < 4*len + 4)
1740 {
1741 if (buffer32 != NULL) free(buffer32);
1742 buffer32_size = 4*len + 4;
1743 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1744 if (buffer32 == NULL)
1745 {
1746 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1747 exit(1);
1748 }
1749 }
1750
1751 pp = buffer32;
1752
1753 if (!utf && !data)
1754 {
1755 while (len-- > 0) *pp++ = *p++;
1756 }
1757
1758 else
1759 {
1760 pcre_uint32 c = 0;
1761 while (len > 0)
1762 {
1763 int chlen = utf82ord(p, &c);
1764 if (chlen <= 0) return -1;
1765 if (utf)
1766 {
1767 if (c > 0x10ffff) return -2;
1768 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1769 }
1770
1771 p += chlen;
1772 len -= chlen;
1773 *pp++ = c;
1774 }
1775 }
1776
1777 *pp = 0;
1778 return pp - buffer32;
1779 }
1780
1781 /* Check that a 32-bit character string is valid UTF-32.
1782
1783 Arguments:
1784 string points to the string
1785 length length of string, or -1 if the string is zero-terminated
1786
1787 Returns: TRUE if the string is a valid UTF-32 string
1788 FALSE otherwise
1789 */
1790
1791 #ifdef SUPPORT_UTF
1792 static BOOL
1793 valid_utf32(pcre_uint32 *string, int length)
1794 {
1795 register pcre_uint32 *p;
1796 register pcre_uint32 c;
1797
1798 for (p = string; length-- > 0; p++)
1799 {
1800 c = *p;
1801
1802 if (c > 0x10ffffu)
1803 return FALSE;
1804
1805 /* A surrogate */
1806 if ((c & 0xfffff800u) == 0xd800u)
1807 return FALSE;
1808
1809 /* Non-character */
1810 if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1811 return FALSE;
1812 }
1813
1814 return TRUE;
1815 }
1816 #endif /* SUPPORT_UTF */
1817
1818 #endif
1819
1820 /*************************************************
1821 * Read or extend an input line *
1822 *************************************************/
1823
1824 /* Input lines are read into buffer, but both patterns and data lines can be
1825 continued over multiple input lines. In addition, if the buffer fills up, we
1826 want to automatically expand it so as to be able to handle extremely large
1827 lines that are needed for certain stress tests. When the input buffer is
1828 expanded, the other two buffers must also be expanded likewise, and the
1829 contents of pbuffer, which are a copy of the input for callouts, must be
1830 preserved (for when expansion happens for a data line). This is not the most
1831 optimal way of handling this, but hey, this is just a test program!
1832
1833 Arguments:
1834 f the file to read
1835 start where in buffer to start (this *must* be within buffer)
1836 prompt for stdin or readline()
1837
1838 Returns: pointer to the start of new data
1839 could be a copy of start, or could be moved
1840 NULL if no data read and EOF reached
1841 */
1842
1843 static pcre_uint8 *
1844 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1845 {
1846 pcre_uint8 *here = start;
1847
1848 for (;;)
1849 {
1850 size_t rlen = (size_t)(buffer_size - (here - buffer));
1851
1852 if (rlen > 1000)
1853 {
1854 int dlen;
1855
1856 /* If libreadline or libedit support is required, use readline() to read a
1857 line if the input is a terminal. Note that readline() removes the trailing
1858 newline, so we must put it back again, to be compatible with fgets(). */
1859
1860 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1861 if (isatty(fileno(f)))
1862 {
1863 size_t len;
1864 char *s = readline(prompt);
1865 if (s == NULL) return (here == start)? NULL : start;
1866 len = strlen(s);
1867 if (len > 0) add_history(s);
1868 if (len > rlen - 1) len = rlen - 1;
1869 memcpy(here, s, len);
1870 here[len] = '\n';
1871 here[len+1] = 0;
1872 free(s);
1873 }
1874 else
1875 #endif
1876
1877 /* Read the next line by normal means, prompting if the file is stdin. */
1878
1879 {
1880 if (f == stdin) printf("%s", prompt);
1881 if (fgets((char *)here, rlen, f) == NULL)
1882 return (here == start)? NULL : start;
1883 }
1884
1885 dlen = (int)strlen((char *)here);
1886 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1887 here += dlen;
1888 }
1889
1890 else
1891 {
1892 int new_buffer_size = 2*buffer_size;
1893 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1894 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1895
1896 if (new_buffer == NULL || new_pbuffer == NULL)
1897 {
1898 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1899 exit(1);
1900 }
1901
1902 memcpy(new_buffer, buffer, buffer_size);
1903 memcpy(new_pbuffer, pbuffer, buffer_size);
1904
1905 buffer_size = new_buffer_size;
1906
1907 start = new_buffer + (start - buffer);
1908 here = new_buffer + (here - buffer);
1909
1910 free(buffer);
1911 free(pbuffer);
1912
1913 buffer = new_buffer;
1914 pbuffer = new_pbuffer;
1915 }
1916 }
1917
1918 return NULL; /* Control never gets here */
1919 }
1920
1921
1922
1923 /*************************************************
1924 * Read number from string *
1925 *************************************************/
1926
1927 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1928 around with conditional compilation, just do the job by hand. It is only used
1929 for unpicking arguments, so just keep it simple.
1930
1931 Arguments:
1932 str string to be converted
1933 endptr where to put the end pointer
1934
1935 Returns: the unsigned long
1936 */
1937
1938 static int
1939 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1940 {
1941 int result = 0;
1942 while(*str != 0 && isspace(*str)) str++;
1943 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1944 *endptr = str;
1945 return(result);
1946 }
1947
1948
1949
1950 /*************************************************
1951 * Print one character *
1952 *************************************************/
1953
1954 /* Print a single character either literally, or as a hex escape. */
1955
1956 static int pchar(pcre_uint32 c, FILE *f)
1957 {
1958 int n = 0;
1959 if (PRINTOK(c))
1960 {
1961 if (f != NULL) fprintf(f, "%c", c);
1962 return 1;
1963 }
1964
1965 if (c < 0x100)
1966 {
1967 if (use_utf)
1968 {
1969 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1970 return 6;
1971 }
1972 else
1973 {
1974 if (f != NULL) fprintf(f, "\\x%02x", c);
1975 return 4;
1976 }
1977 }
1978
1979 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1980 return n >= 0 ? n : 0;
1981 }
1982
1983
1984
1985 #ifdef SUPPORT_PCRE8
1986 /*************************************************
1987 * Print 8-bit character string *
1988 *************************************************/
1989
1990 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1991 If handed a NULL file, just counts chars without printing. */
1992
1993 static int pchars(pcre_uint8 *p, int length, FILE *f)
1994 {
1995 pcre_uint32 c = 0;
1996 int yield = 0;
1997
1998 if (length < 0)
1999 length = strlen((char *)p);
2000
2001 while (length-- > 0)
2002 {
2003 #if !defined NOUTF
2004 if (use_utf)
2005 {
2006 int rc = utf82ord(p, &c);
2007 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2008 {
2009 length -= rc - 1;
2010 p += rc;
2011 yield += pchar(c, f);
2012 continue;
2013 }
2014 }
2015 #endif
2016 c = *p++;
2017 yield += pchar(c, f);
2018 }
2019
2020 return yield;
2021 }
2022 #endif
2023
2024
2025
2026 #ifdef SUPPORT_PCRE16
2027 /*************************************************
2028 * Find length of 0-terminated 16-bit string *
2029 *************************************************/
2030
2031 static int strlen16(PCRE_SPTR16 p)
2032 {
2033 int len = 0;
2034 while (*p++ != 0) len++;
2035 return len;
2036 }
2037 #endif /* SUPPORT_PCRE16 */
2038
2039
2040
2041 #ifdef SUPPORT_PCRE32
2042 /*************************************************
2043 * Find length of 0-terminated 32-bit string *
2044 *************************************************/
2045
2046 static int strlen32(PCRE_SPTR32 p)
2047 {
2048 int len = 0;
2049 while (*p++ != 0) len++;
2050 return len;
2051 }
2052 #endif /* SUPPORT_PCRE32 */
2053
2054
2055
2056 #ifdef SUPPORT_PCRE16
2057 /*************************************************
2058 * Print 16-bit character string *
2059 *************************************************/
2060
2061 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2062 If handed a NULL file, just counts chars without printing. */
2063
2064 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2065 {
2066 int yield = 0;
2067
2068 if (length < 0)
2069 length = strlen16(p);
2070
2071 while (length-- > 0)
2072 {
2073 pcre_uint32 c = *p++ & 0xffff;
2074 #if !defined NOUTF
2075 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2076 {
2077 int d = *p & 0xffff;
2078 if (d >= 0xDC00 && d < 0xDFFF)
2079 {
2080 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2081 length--;
2082 p++;
2083 }
2084 }
2085 #endif
2086 yield += pchar(c, f);
2087 }
2088
2089 return yield;
2090 }
2091 #endif /* SUPPORT_PCRE16 */
2092
2093
2094
2095 #ifdef SUPPORT_PCRE32
2096 /*************************************************
2097 * Print 32-bit character string *
2098 *************************************************/
2099
2100 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2101 If handed a NULL file, just counts chars without printing. */
2102
2103 #define UTF32_MASK (0x1fffffu)
2104
2105 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2106 {
2107 int yield = 0;
2108
2109 if (length < 0)
2110 length = strlen32(p);
2111
2112 while (length-- > 0)
2113 {
2114 pcre_uint32 c = *p++;
2115 if (utf) c &= UTF32_MASK;
2116 yield += pchar(c, f);
2117 }
2118
2119 return yield;
2120 }
2121 #endif /* SUPPORT_PCRE32 */
2122
2123
2124
2125 #ifdef SUPPORT_PCRE8
2126 /*************************************************
2127 * Read a capture name (8-bit) and check it *
2128 *************************************************/
2129
2130 static pcre_uint8 *
2131 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132 {
2133 pcre_uint8 *npp = *pp;
2134 while (isalnum(*p)) *npp++ = *p++;
2135 *npp++ = 0;
2136 *npp = 0;
2137 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138 {
2139 fprintf(outfile, "no parentheses with name \"");
2140 PCHARSV(*pp, 0, -1, outfile);
2141 fprintf(outfile, "\"\n");
2142 }
2143
2144 *pp = npp;
2145 return p;
2146 }
2147 #endif /* SUPPORT_PCRE8 */
2148
2149
2150
2151 #ifdef SUPPORT_PCRE16
2152 /*************************************************
2153 * Read a capture name (16-bit) and check it *
2154 *************************************************/
2155
2156 /* Note that the text being read is 8-bit. */
2157
2158 static pcre_uint8 *
2159 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160 {
2161 pcre_uint16 *npp = *pp;
2162 while (isalnum(*p)) *npp++ = *p++;
2163 *npp++ = 0;
2164 *npp = 0;
2165 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166 {
2167 fprintf(outfile, "no parentheses with name \"");
2168 PCHARSV(*pp, 0, -1, outfile);
2169 fprintf(outfile, "\"\n");
2170 }
2171 *pp = npp;
2172 return p;
2173 }
2174 #endif /* SUPPORT_PCRE16 */
2175
2176
2177
2178 #ifdef SUPPORT_PCRE32
2179 /*************************************************
2180 * Read a capture name (32-bit) and check it *
2181 *************************************************/
2182
2183 /* Note that the text being read is 8-bit. */
2184
2185 static pcre_uint8 *
2186 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2187 {
2188 pcre_uint32 *npp = *pp;
2189 while (isalnum(*p)) *npp++ = *p++;
2190 *npp++ = 0;
2191 *npp = 0;
2192 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2193 {
2194 fprintf(outfile, "no parentheses with name \"");
2195 PCHARSV(*pp, 0, -1, outfile);
2196 fprintf(outfile, "\"\n");
2197 }
2198 *pp = npp;
2199 return p;
2200 }
2201 #endif /* SUPPORT_PCRE32 */
2202
2203
2204
2205 /*************************************************
2206 * Callout function *
2207 *************************************************/
2208
2209 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2210 the match. Yield zero unless more callouts than the fail count, or the callout
2211 data is not zero. */
2212
2213 static int callout(pcre_callout_block *cb)
2214 {
2215 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2216 int i, pre_start, post_start, subject_length;
2217
2218 if (callout_extra)
2219 {
2220 fprintf(f, "Callout %d: last capture = %d\n",
2221 cb->callout_number, cb->capture_last);
2222
2223 for (i = 0; i < cb->capture_top * 2; i += 2)
2224 {
2225 if (cb->offset_vector[i] < 0)
2226 fprintf(f, "%2d: <unset>\n", i/2);
2227 else
2228 {
2229 fprintf(f, "%2d: ", i/2);
2230 PCHARSV(cb->subject, cb->offset_vector[i],
2231 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232 fprintf(f, "\n");
2233 }
2234 }
2235 }
2236
2237 /* Re-print the subject in canonical form, the first time or if giving full
2238 datails. On subsequent calls in the same match, we use pchars just to find the
2239 printed lengths of the substrings. */
2240
2241 if (f != NULL) fprintf(f, "--->");
2242
2243 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244 PCHARS(post_start, cb->subject, cb->start_match,
2245 cb->current_position - cb->start_match, f);
2246
2247 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2248
2249 PCHARSV(cb->subject, cb->current_position,
2250 cb->subject_length - cb->current_position, f);
2251
2252 if (f != NULL) fprintf(f, "\n");
2253
2254 /* Always print appropriate indicators, with callout number if not already
2255 shown. For automatic callouts, show the pattern offset. */
2256
2257 if (cb->callout_number == 255)
2258 {
2259 fprintf(outfile, "%+3d ", cb->pattern_position);
2260 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2261 }
2262 else
2263 {
2264 if (callout_extra) fprintf(outfile, " ");
2265 else fprintf(outfile, "%3d ", cb->callout_number);
2266 }
2267
2268 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2269 fprintf(outfile, "^");
2270
2271 if (post_start > 0)
2272 {
2273 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2274 fprintf(outfile, "^");
2275 }
2276
2277 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2278 fprintf(outfile, " ");
2279
2280 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2281 pbuffer + cb->pattern_position);
2282
2283 fprintf(outfile, "\n");
2284 first_callout = 0;
2285
2286 if (cb->mark != last_callout_mark)
2287 {
2288 if (cb->mark == NULL)
2289 fprintf(outfile, "Latest Mark: <unset>\n");
2290 else
2291 {
2292 fprintf(outfile, "Latest Mark: ");
2293 PCHARSV(cb->mark, 0, -1, outfile);
2294 putc('\n', outfile);
2295 }
2296 last_callout_mark = cb->mark;
2297 }
2298
2299 if (cb->callout_data != NULL)
2300 {
2301 int callout_data = *((int *)(cb->callout_data));
2302 if (callout_data != 0)
2303 {
2304 fprintf(outfile, "Callout data = %d\n", callout_data);
2305 return callout_data;
2306 }
2307 }
2308
2309 return (cb->callout_number != callout_fail_id)? 0 :
2310 (++callout_count >= callout_fail_count)? 1 : 0;
2311 }
2312
2313
2314 /*************************************************
2315 * Local malloc functions *
2316 *************************************************/
2317
2318 /* Alternative malloc function, to test functionality and save the size of a
2319 compiled re, which is the first store request that pcre_compile() makes. The
2320 show_malloc variable is set only during matching. */
2321
2322 static void *new_malloc(size_t size)
2323 {
2324 void *block = malloc(size);
2325 gotten_store = size;
2326 if (first_gotten_store == 0) first_gotten_store = size;
2327 if (show_malloc)
2328 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2329 return block;
2330 }
2331
2332 static void new_free(void *block)
2333 {
2334 if (show_malloc)
2335 fprintf(outfile, "free %p\n", block);
2336 free(block);
2337 }
2338
2339 /* For recursion malloc/free, to test stacking calls */
2340
2341 static void *stack_malloc(size_t size)
2342 {
2343 void *block = malloc(size);
2344 if (show_malloc)
2345 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346 return block;
2347 }
2348
2349 static void stack_free(void *block)
2350 {
2351 if (show_malloc)
2352 fprintf(outfile, "stack_free %p\n", block);
2353 free(block);
2354 }
2355
2356
2357 /*************************************************
2358 * Call pcre_fullinfo() *
2359 *************************************************/
2360
2361 /* Get one piece of information from the pcre_fullinfo() function. When only
2362 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2363 value, but the code is defensive.
2364
2365 Arguments:
2366 re compiled regex
2367 study study data
2368 option PCRE_INFO_xxx option
2369 ptr where to put the data
2370
2371 Returns: 0 when OK, < 0 on error
2372 */
2373
2374 static int
2375 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376 {
2377 int rc;
2378
2379 if (pcre_mode == PCRE32_MODE)
2380 #ifdef SUPPORT_PCRE32
2381 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2382 #else
2383 rc = PCRE_ERROR_BADMODE;
2384 #endif
2385 else if (pcre_mode == PCRE16_MODE)
2386 #ifdef SUPPORT_PCRE16
2387 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388 #else
2389 rc = PCRE_ERROR_BADMODE;
2390 #endif
2391 else
2392 #ifdef SUPPORT_PCRE8
2393 rc = pcre_fullinfo(re, study, option, ptr);
2394 #else
2395 rc = PCRE_ERROR_BADMODE;
2396 #endif
2397
2398 if (rc < 0)
2399 {
2400 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2401 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2402 if (rc == PCRE_ERROR_BADMODE)
2403 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2404 "%d-bit mode\n", 8 * CHAR_SIZE,
2405 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2406 }
2407
2408 return rc;
2409 }
2410
2411
2412
2413 /*************************************************
2414 * Swap byte functions *
2415 *************************************************/
2416
2417 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418 value, respectively.
2419
2420 Arguments:
2421 value any number
2422
2423 Returns: the byte swapped value
2424 */
2425
2426 static pcre_uint32
2427 swap_uint32(pcre_uint32 value)
2428 {
2429 return ((value & 0x000000ff) << 24) |
2430 ((value & 0x0000ff00) << 8) |
2431 ((value & 0x00ff0000) >> 8) |
2432 (value >> 24);
2433 }
2434
2435 static pcre_uint16
2436 swap_uint16(pcre_uint16 value)
2437 {
2438 return (value >> 8) | (value << 8);
2439 }
2440
2441
2442
2443 /*************************************************
2444 * Flip bytes in a compiled pattern *
2445 *************************************************/
2446
2447 /* This function is called if the 'F' option was present on a pattern that is
2448 to be written to a file. We flip the bytes of all the integer fields in the
2449 regex data block and the study block. In 16-bit mode this also flips relevant
2450 bytes in the pattern itself. This is to make it possible to test PCRE's
2451 ability to reload byte-flipped patterns, e.g. those compiled on a different
2452 architecture. */
2453
2454 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2455 static void
2456 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2457 {
2458 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2459 #ifdef SUPPORT_PCRE16
2460 int op;
2461 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462 int length = re->name_count * re->name_entry_size;
2463 #ifdef SUPPORT_UTF
2464 BOOL utf = (re->options & PCRE_UTF16) != 0;
2465 BOOL utf16_char = FALSE;
2466 #endif /* SUPPORT_UTF */
2467 #endif /* SUPPORT_PCRE16 */
2468
2469 /* Always flip the bytes in the main data block and study blocks. */
2470
2471 re->magic_number = REVERSED_MAGIC_NUMBER;
2472 re->size = swap_uint32(re->size);
2473 re->options = swap_uint32(re->options);
2474 re->flags = swap_uint16(re->flags);
2475 re->top_bracket = swap_uint16(re->top_bracket);
2476 re->top_backref = swap_uint16(re->top_backref);
2477 re->first_char = swap_uint16(re->first_char);
2478 re->req_char = swap_uint16(re->req_char);
2479 re->name_table_offset = swap_uint16(re->name_table_offset);
2480 re->name_entry_size = swap_uint16(re->name_entry_size);
2481 re->name_count = swap_uint16(re->name_count);
2482
2483 if (extra != NULL)
2484 {
2485 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2486 rsd->size = swap_uint32(rsd->size);
2487 rsd->flags = swap_uint32(rsd->flags);
2488 rsd->minlength = swap_uint32(rsd->minlength);
2489 }
2490
2491 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2492 in the name table, if present, and then in the pattern itself. */
2493
2494 #ifdef SUPPORT_PCRE16
2495 if (pcre_mode != PCRE16_MODE) return;
2496
2497 while(TRUE)
2498 {
2499 /* Swap previous characters. */
2500 while (length-- > 0)
2501 {
2502 *ptr = swap_uint16(*ptr);
2503 ptr++;
2504 }
2505 #ifdef SUPPORT_UTF
2506 if (utf16_char)
2507 {
2508 if ((ptr[-1] & 0xfc00) == 0xd800)
2509 {
2510 /* We know that there is only one extra character in UTF-16. */
2511 *ptr = swap_uint16(*ptr);
2512 ptr++;
2513 }
2514 }
2515 utf16_char = FALSE;
2516 #endif /* SUPPORT_UTF */
2517
2518 /* Get next opcode. */
2519
2520 length = 0;
2521 op = *ptr;
2522 *ptr++ = swap_uint16(op);
2523
2524 switch (op)
2525 {
2526 case OP_END:
2527 return;
2528
2529 #ifdef SUPPORT_UTF
2530 case OP_CHAR:
2531 case OP_CHARI:
2532 case OP_NOT:
2533 case OP_NOTI:
2534 case OP_STAR:
2535 case OP_MINSTAR:
2536 case OP_PLUS:
2537 case OP_MINPLUS:
2538 case OP_QUERY:
2539 case OP_MINQUERY:
2540 case OP_UPTO:
2541 case OP_MINUPTO:
2542 case OP_EXACT:
2543 case OP_POSSTAR:
2544 case OP_POSPLUS:
2545 case OP_POSQUERY:
2546 case OP_POSUPTO:
2547 case OP_STARI:
2548 case OP_MINSTARI:
2549 case OP_PLUSI:
2550 case OP_MINPLUSI:
2551 case OP_QUERYI:
2552 case OP_MINQUERYI:
2553 case OP_UPTOI:
2554 case OP_MINUPTOI:
2555 case OP_EXACTI:
2556 case OP_POSSTARI:
2557 case OP_POSPLUSI:
2558 case OP_POSQUERYI:
2559 case OP_POSUPTOI:
2560 case OP_NOTSTAR:
2561 case OP_NOTMINSTAR:
2562 case OP_NOTPLUS:
2563 case OP_NOTMINPLUS:
2564 case OP_NOTQUERY:
2565 case OP_NOTMINQUERY:
2566 case OP_NOTUPTO:
2567 case OP_NOTMINUPTO:
2568 case OP_NOTEXACT:
2569 case OP_NOTPOSSTAR:
2570 case OP_NOTPOSPLUS:
2571 case OP_NOTPOSQUERY:
2572 case OP_NOTPOSUPTO:
2573 case OP_NOTSTARI:
2574 case OP_NOTMINSTARI:
2575 case OP_NOTPLUSI:
2576 case OP_NOTMINPLUSI:
2577 case OP_NOTQUERYI:
2578 case OP_NOTMINQUERYI:
2579 case OP_NOTUPTOI:
2580 case OP_NOTMINUPTOI:
2581 case OP_NOTEXACTI:
2582 case OP_NOTPOSSTARI:
2583 case OP_NOTPOSPLUSI:
2584 case OP_NOTPOSQUERYI:
2585 case OP_NOTPOSUPTOI:
2586 if (utf) utf16_char = TRUE;
2587 #endif
2588 /* Fall through. */
2589
2590 default:
2591 length = OP_lengths16[op] - 1;
2592 break;
2593
2594 case OP_CLASS:
2595 case OP_NCLASS:
2596 /* Skip the character bit map. */
2597 ptr += 32/sizeof(pcre_uint16);
2598 length = 0;
2599 break;
2600
2601 case OP_XCLASS:
2602 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2603 if (LINK_SIZE > 1)
2604 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2605 - (1 + LINK_SIZE + 1));
2606 else
2607 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2608
2609 /* Reverse the size of the XCLASS instance. */
2610 *ptr = swap_uint16(*ptr);
2611 ptr++;
2612 if (LINK_SIZE > 1)
2613 {
2614 *ptr = swap_uint16(*ptr);
2615 ptr++;
2616 }
2617
2618 op = *ptr;
2619 *ptr = swap_uint16(op);
2620 ptr++;
2621 if ((op & XCL_MAP) != 0)
2622 {
2623 /* Skip the character bit map. */
2624 ptr += 32/sizeof(pcre_uint16);
2625 length -= 32/sizeof(pcre_uint16);
2626 }
2627 break;
2628 }
2629 }
2630 /* Control should never reach here in 16 bit mode. */
2631 #endif /* SUPPORT_PCRE16 */
2632 }
2633 #endif /* SUPPORT_PCRE[8|16] */
2634
2635
2636
2637 #if defined SUPPORT_PCRE32
2638 static void
2639 regexflip_32(pcre *ere, pcre_extra *extra)
2640 {
2641 real_pcre32 *re = (real_pcre32 *)ere;
2642 int op;
2643 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2644 int length = re->name_count * re->name_entry_size;
2645
2646 /* Always flip the bytes in the main data block and study blocks. */
2647
2648 re->magic_number = REVERSED_MAGIC_NUMBER;
2649 re->size = swap_uint32(re->size);
2650 re->options = swap_uint32(re->options);
2651 re->flags = swap_uint16(re->flags);
2652 re->top_bracket = swap_uint16(re->top_bracket);
2653 re->top_backref = swap_uint16(re->top_backref);
2654 re->first_char = swap_uint32(re->first_char);
2655 re->req_char = swap_uint32(re->req_char);
2656 re->name_table_offset = swap_uint16(re->name_table_offset);
2657 re->name_entry_size = swap_uint16(re->name_entry_size);
2658 re->name_count = swap_uint16(re->name_count);
2659
2660 if (extra != NULL)
2661 {
2662 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2663 rsd->size = swap_uint32(rsd->size);
2664 rsd->flags = swap_uint32(rsd->flags);
2665 rsd->minlength = swap_uint32(rsd->minlength);
2666 }
2667
2668 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2669 the pattern itself. */
2670
2671 while(TRUE)
2672 {
2673 /* Swap previous characters. */
2674 while (length-- > 0)
2675 {
2676 *ptr = swap_uint32(*ptr);
2677 ptr++;
2678 }
2679
2680 /* Get next opcode. */
2681
2682 length = 0;
2683 op = *ptr;
2684 *ptr++ = swap_uint32(op);
2685
2686 switch (op)
2687 {
2688 case OP_END:
2689 return;
2690
2691 default:
2692 length = OP_lengths32[op] - 1;
2693 break;
2694
2695 case OP_CLASS:
2696 case OP_NCLASS:
2697 /* Skip the character bit map. */
2698 ptr += 32/sizeof(pcre_uint32);
2699 length = 0;
2700 break;
2701
2702 case OP_XCLASS:
2703 /* LINK_SIZE can only be 1 in 32-bit mode. */
2704 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2705
2706 /* Reverse the size of the XCLASS instance. */
2707 *ptr = swap_uint32(*ptr);
2708 ptr++;
2709
2710 op = *ptr;
2711 *ptr = swap_uint32(op);
2712 ptr++;
2713 if ((op & XCL_MAP) != 0)
2714 {
2715 /* Skip the character bit map. */
2716 ptr += 32/sizeof(pcre_uint32);
2717 length -= 32/sizeof(pcre_uint32);
2718 }
2719 break;
2720 }
2721 }
2722 /* Control should never reach here in 32 bit mode. */
2723 }
2724
2725 #endif /* SUPPORT_PCRE32 */
2726
2727
2728
2729 static void
2730 regexflip(pcre *ere, pcre_extra *extra)
2731 {
2732 #if defined SUPPORT_PCRE32
2733 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2734 regexflip_32(ere, extra);
2735 #endif
2736 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2737 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2738 regexflip8_or_16(ere, extra);
2739 #endif
2740 }
2741
2742
2743
2744 /*************************************************
2745 * Check match or recursion limit *
2746 *************************************************/
2747
2748 static int
2749 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2750 int start_offset, int options, int *use_offsets, int use_size_offsets,
2751 int flag, unsigned long int *limit, int errnumber, const char *msg)
2752 {
2753 int count;
2754 int min = 0;
2755 int mid = 64;
2756 int max = -1;
2757
2758 extra->flags |= flag;
2759
2760 for (;;)
2761 {
2762 *limit = mid;
2763
2764 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2765 use_offsets, use_size_offsets);
2766
2767 if (count == errnumber)
2768 {
2769 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2770 min = mid;
2771 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2772 }
2773
2774 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2775 count == PCRE_ERROR_PARTIAL)
2776 {
2777 if (mid == min + 1)
2778 {
2779 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2780 break;
2781 }
2782 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2783 max = mid;
2784 mid = (min + mid)/2;
2785 }
2786 else break; /* Some other error */
2787 }
2788
2789 extra->flags &= ~flag;
2790 return count;
2791 }
2792
2793
2794
2795 /*************************************************
2796 * Case-independent strncmp() function *
2797 *************************************************/
2798
2799 /*
2800 Arguments:
2801 s first string
2802 t second string
2803 n number of characters to compare
2804
2805 Returns: < 0, = 0, or > 0, according to the comparison
2806 */
2807
2808 static int
2809 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2810 {
2811 while (n--)
2812 {
2813 int c = tolower(*s++) - tolower(*t++);
2814 if (c) return c;
2815 }
2816 return 0;
2817 }
2818
2819
2820
2821 /*************************************************
2822 * Check newline indicator *
2823 *************************************************/
2824
2825 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2826 a message and return 0 if there is no match.
2827
2828 Arguments:
2829 p points after the leading '<'
2830 f file for error message
2831
2832 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2833 */
2834
2835 static int
2836 check_newline(pcre_uint8 *p, FILE *f)
2837 {
2838 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2839 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2840 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2841 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2842 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2843 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2844 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2845 fprintf(f, "Unknown newline type at: <%s\n", p);
2846 return 0;
2847 }
2848
2849
2850
2851 /*************************************************
2852 * Usage function *
2853 *************************************************/
2854
2855 static void
2856 usage(void)
2857 {
2858 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2859 printf("Input and output default to stdin and stdout.\n");
2860 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2861 printf("If input is a terminal, readline() is used to read from it.\n");
2862 #else
2863 printf("This version of pcretest is not linked with readline().\n");
2864 #endif
2865 printf("\nOptions:\n");
2866 #ifdef SUPPORT_PCRE16
2867 printf(" -16 use the 16-bit library\n");
2868 #endif
2869 #ifdef SUPPORT_PCRE32
2870 printf(" -32 use the 32-bit library\n");
2871 #endif
2872 printf(" -b show compiled code\n");
2873 printf(" -C show PCRE compile-time options and exit\n");
2874 printf(" -C arg show a specific compile-time option\n");
2875 printf(" and exit with its value. The arg can be:\n");
2876 printf(" linksize internal link size [2, 3, 4]\n");
2877 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2878 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2879 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2880 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2881 printf(" ucp Unicode Properties supported [0, 1]\n");
2882 printf(" jit Just-in-time compiler supported [0, 1]\n");
2883 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2884 printf(" -d debug: show compiled code and information (-b and -i)\n");
2885 #if !defined NODFA
2886 printf(" -dfa force DFA matching for all subjects\n");
2887 #endif
2888 printf(" -help show usage information\n");
2889 printf(" -i show information about compiled patterns\n"
2890 " -M find MATCH_LIMIT minimum for each subject\n"
2891 " -m output memory used information\n"
2892 " -o <n> set size of offsets vector to <n>\n");
2893 #if !defined NOPOSIX
2894 printf(" -p use POSIX interface\n");
2895 #endif
2896 printf(" -q quiet: do not output PCRE version number at start\n");
2897 printf(" -S <n> set stack size to <n> megabytes\n");
2898 printf(" -s force each pattern to be studied at basic level\n"
2899 " -s+ force each pattern to be studied, using JIT if available\n"
2900 " -s++ ditto, verifying when JIT was actually used\n"
2901 " -s+n force each pattern to be studied, using JIT if available,\n"
2902 " where 1 <= n <= 7 selects JIT options\n"
2903 " -s++n ditto, verifying when JIT was actually used\n"
2904 " -t time compilation and execution\n");
2905 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2906 printf(" -tm time execution (matching) only\n");
2907 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2908 }
2909
2910
2911
2912 /*************************************************
2913 * Main Program *
2914 *************************************************/
2915
2916 /* Read lines from named file or stdin and write to named file or stdout; lines
2917 consist of a regular expression, in delimiters and optionally followed by
2918 options, followed by a set of test data, terminated by an empty line. */
2919
2920 int main(int argc, char **argv)
2921 {
2922 FILE *infile = stdin;
2923 const char *version;
2924 int options = 0;
2925 int study_options = 0;
2926 int default_find_match_limit = FALSE;
2927 int op = 1;
2928 int timeit = 0;
2929 int timeitm = 0;
2930 int showinfo = 0;
2931 int showstore = 0;
2932 int force_study = -1;
2933 int force_study_options = 0;
2934 int quiet = 0;
2935 int size_offsets = 45;
2936 int size_offsets_max;
2937 int *offsets = NULL;
2938 int debug = 0;
2939 int done = 0;
2940 int all_use_dfa = 0;
2941 int verify_jit = 0;
2942 int yield = 0;
2943 #ifdef SUPPORT_PCRE32
2944 int mask_utf32 = 0;
2945 #endif
2946 int stack_size;
2947 pcre_uint8 *dbuffer = NULL;
2948 size_t dbuffer_size = 1u << 14;
2949
2950 #if !defined NOPOSIX
2951 int posix = 0;
2952 #endif
2953 #if !defined NODFA
2954 int *dfa_workspace = NULL;
2955 #endif
2956
2957 pcre_jit_stack *jit_stack = NULL;
2958
2959 /* These vectors store, end-to-end, a list of zero-terminated captured
2960 substring names, each list itself being terminated by an empty name. Assume
2961 that 1024 is plenty long enough for the few names we'll be testing. It is
2962 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2963 for the actual memory, to ensure alignment. */
2964
2965 pcre_uint32 copynames[1024];
2966 pcre_uint32 getnames[1024];
2967
2968 #ifdef SUPPORT_PCRE32
2969 pcre_uint32 *cn32ptr;
2970 pcre_uint32 *gn32ptr;
2971 #endif
2972
2973 #ifdef SUPPORT_PCRE16
2974 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2975 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2976 pcre_uint16 *cn16ptr;
2977 pcre_uint16 *gn16ptr;
2978 #endif
2979
2980 #ifdef SUPPORT_PCRE8
2981 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2982 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2983 pcre_uint8 *cn8ptr;
2984 pcre_uint8 *gn8ptr;
2985 #endif
2986
2987 /* Get buffers from malloc() so that valgrind will check their misuse when
2988 debugging. They grow automatically when very long lines are read. The 16-
2989 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2990
2991 buffer = (pcre_uint8 *)malloc(buffer_size);
2992 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2993
2994 /* The outfile variable is static so that new_malloc can use it. */
2995
2996 outfile = stdout;
2997
2998 /* The following _setmode() stuff is some Windows magic that tells its runtime
2999 library to translate CRLF into a single LF character. At least, that's what
3000 I've been told: never having used Windows I take this all on trust. Originally
3001 it set 0x8000, but then I was advised that _O_BINARY was better. */
3002
3003 #if defined(_WIN32) || defined(WIN32)
3004 _setmode( _fileno( stdout ), _O_BINARY );
3005 #endif
3006
3007 /* Get the version number: both pcre_version() and pcre16_version() give the
3008 same answer. We just need to ensure that we call one that is available. */
3009
3010 #if defined SUPPORT_PCRE8
3011 version = pcre_version();
3012 #elif defined SUPPORT_PCRE16
3013 version = pcre16_version();
3014 #elif defined SUPPORT_PCRE32
3015 version = pcre32_version();
3016 #endif
3017
3018 /* Scan options */
3019
3020 while (argc > 1 && argv[op][0] == '-')
3021 {
3022 pcre_uint8 *endptr;
3023 char *arg = argv[op];
3024
3025 if (strcmp(arg, "-m") == 0) showstore = 1;
3026 else if (strcmp(arg, "-s") == 0) force_study = 0;
3027
3028 else if (strncmp(arg, "-s+", 3) == 0)
3029 {
3030 arg += 3;
3031 if (*arg == '+') { arg++; verify_jit = TRUE; }
3032 force_study = 1;
3033 if (*arg == 0)
3034 force_study_options = jit_study_bits[6];
3035 else if (*arg >= '1' && *arg <= '7')
3036 force_study_options = jit_study_bits[*arg - '1'];
3037 else goto BAD_ARG;
3038 }
3039 else if (strcmp(arg, "-8") == 0)
3040 {
3041 #ifdef SUPPORT_PCRE8
3042 pcre_mode = PCRE8_MODE;
3043 #else
3044 printf("** This version of PCRE was built without 8-bit support\n");
3045 exit(1);
3046 #endif
3047 }
3048 else if (strcmp(arg, "-16") == 0)
3049 {
3050 #ifdef SUPPORT_PCRE16
3051 pcre_mode = PCRE16_MODE;
3052 #else
3053 printf("** This version of PCRE was built without 16-bit support\n");
3054 exit(1);
3055 #endif
3056 }
3057 else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
3058 {
3059 #ifdef SUPPORT_PCRE32
3060 pcre_mode = PCRE32_MODE;
3061 mask_utf32 = (strcmp(arg, "-32+") == 0);
3062 #else
3063 printf("** This version of PCRE was built without 32-bit support\n");
3064 exit(1);
3065 #endif
3066 }
3067 else if (strcmp(arg, "-q") == 0) quiet = 1;
3068 else if (strcmp(arg, "-b") == 0) debug = 1;
3069 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3070 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3071 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3072 #if !defined NODFA
3073 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3074 #endif
3075 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3076 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3077 *endptr == 0))
3078 {
3079 op++;
3080 argc--;
3081 }
3082 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3083 {
3084 int both = arg[2] == 0;
3085 int temp;
3086 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3087 *endptr == 0))
3088 {
3089 timeitm = temp;
3090 op++;
3091 argc--;
3092 }
3093 else timeitm = LOOPREPEAT;
3094 if (both) timeit = timeitm;
3095 }
3096 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3097 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3098 *endptr == 0))
3099 {
3100 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3101 printf("PCRE: -S not supported on this OS\n");
3102 exit(1);
3103 #else
3104 int rc;
3105 struct rlimit rlim;
3106 getrlimit(RLIMIT_STACK, &rlim);
3107 rlim.rlim_cur = stack_size * 1024 * 1024;
3108 rc = setrlimit(RLIMIT_STACK, &rlim);
3109 if (rc != 0)
3110 {
3111 printf("PCRE: setrlimit() failed with error %d\n", rc);
3112 exit(1);
3113 }
3114 op++;
3115 argc--;
3116 #endif
3117 }
3118 #if !defined NOPOSIX
3119 else if (strcmp(arg, "-p") == 0) posix = 1;
3120 #endif
3121 else if (strcmp(arg, "-C") == 0)
3122 {
3123 int rc;
3124 unsigned long int lrc;
3125
3126 if (argc > 2)
3127 {
3128 if (strcmp(argv[op + 1], "linksize") == 0)
3129 {
3130 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3131 printf("%d\n", rc);
3132 yield = rc;
3133 }
3134 else if (strcmp(argv[op + 1], "pcre8") == 0)
3135 {
3136 #ifdef SUPPORT_PCRE8
3137 printf("1\n");
3138 yield = 1;
3139 #else
3140 printf("0\n");
3141 yield = 0;
3142 #endif
3143 }
3144 else if (strcmp(argv[op + 1], "pcre16") == 0)
3145 {
3146 #ifdef SUPPORT_PCRE16
3147 printf("1\n");
3148 yield = 1;
3149 #else
3150 printf("0\n");
3151 yield = 0;
3152 #endif
3153 }
3154 else if (strcmp(argv[op + 1], "pcre32") == 0)
3155 {
3156 #ifdef SUPPORT_PCRE32
3157 printf("1\n");
3158 yield = 1;
3159 #else
3160 printf("0\n");
3161 yield = 0;
3162 #endif
3163 goto EXIT;
3164 }
3165 if (strcmp(argv[op + 1], "utf") == 0)
3166 {
3167 #ifdef SUPPORT_PCRE8
3168 if (pcre_mode == PCRE8_MODE)
3169 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3170 #endif
3171 #ifdef SUPPORT_PCRE16
3172 if (pcre_mode == PCRE16_MODE)
3173 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3174 #endif
3175 #ifdef SUPPORT_PCRE32
3176 if (pcre_mode == PCRE32_MODE)
3177 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3178 #endif
3179 printf("%d\n", rc);
3180 yield = rc;
3181 goto EXIT;
3182 }
3183 else if (strcmp(argv[op + 1], "ucp") == 0)
3184 {
3185 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3186 printf("%d\n", rc);
3187 yield = rc;
3188 }
3189 else if (strcmp(argv[op + 1], "jit") == 0)
3190 {
3191 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3192 printf("%d\n", rc);
3193 yield = rc;
3194 }
3195 else if (strcmp(argv[op + 1], "newline") == 0)
3196 {
3197 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3198 print_newline_config(rc, TRUE);
3199 }
3200 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3201 {
3202 #ifdef EBCDIC
3203 printf("1\n");
3204 yield = 1;
3205 #else
3206 printf("0\n");
3207 #endif
3208 }
3209 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3210 {
3211 #ifdef EBCDIC
3212 printf("0x%02x\n", CHAR_LF);
3213 #else
3214 printf("0\n");
3215 #endif
3216 }
3217 else
3218 {
3219 printf("Unknown -C option: %s\n", argv[op + 1]);
3220 }
3221 goto EXIT;
3222 }
3223
3224 /* No argument for -C: output all configuration information. */
3225
3226 printf("PCRE version %s\n", version);
3227 printf("Compiled with\n");
3228
3229 #ifdef EBCDIC
3230 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3231 #endif
3232
3233 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3234 are set, either both UTFs are supported or both are not supported. */
3235
3236 #ifdef SUPPORT_PCRE8
3237 printf(" 8-bit support\n");
3238 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3239 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3240 #endif
3241 #ifdef SUPPORT_PCRE16
3242 printf(" 16-bit support\n");
3243 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3244 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3245 #endif
3246 #ifdef SUPPORT_PCRE32
3247 printf(" 32-bit support\n");
3248 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3249 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3250 #endif
3251
3252 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3253 printf(" %sUnicode properties support\n", rc? "" : "No ");
3254 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3255 if (rc)
3256 {
3257 const char *arch;
3258 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3259 printf(" Just-in-time compiler support: %s\n", arch);
3260 }
3261 else
3262 printf(" No just-in-time compiler support\n");
3263 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3264 print_newline_config(rc, FALSE);
3265 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3266 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3267 "all Unicode newlines");
3268 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3269 printf(" Internal link size = %d\n", rc);
3270 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3271 printf(" POSIX malloc threshold = %d\n", rc);
3272 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3273 printf(" Default match limit = %ld\n", lrc);
3274 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3275 printf(" Default recursion depth limit = %ld\n", lrc);
3276 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3277 printf(" Match recursion uses %s", rc? "stack" : "heap");
3278 if (showstore)
3279 {
3280 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3281 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3282 }
3283 printf("\n");
3284 goto EXIT;
3285 }
3286 else if (strcmp(arg, "-help") == 0 ||
3287 strcmp(arg, "--help") == 0)
3288 {
3289 usage();
3290 goto EXIT;
3291 }
3292 else
3293 {
3294 BAD_ARG:
3295 printf("** Unknown or malformed option %s\n", arg);
3296 usage();
3297 yield = 1;
3298 goto EXIT;
3299 }
3300 op++;
3301 argc--;
3302 }
3303
3304 /* Get the store for the offsets vector, and remember what it was */
3305
3306 size_offsets_max = size_offsets;
3307 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3308 if (offsets == NULL)
3309 {
3310 printf("** Failed to get %d bytes of memory for offsets vector\n",
3311 (int)(size_offsets_max * sizeof(int)));
3312 yield = 1;
3313 goto EXIT;
3314 }
3315
3316 /* Sort out the input and output files */
3317
3318 if (argc > 1)
3319 {
3320 infile = fopen(argv[op], INPUT_MODE);
3321 if (infile == NULL)
3322 {
3323 printf("** Failed to open %s\n", argv[op]);
3324 yield = 1;
3325 goto EXIT;
3326 }
3327 }
3328
3329 if (argc > 2)
3330 {
3331 outfile = fopen(argv[op+1], OUTPUT_MODE);
3332 if (outfile == NULL)
3333 {
3334 printf("** Failed to open %s\n", argv[op+1]);
3335 yield = 1;
3336 goto EXIT;
3337 }
3338 }
3339
3340 /* Set alternative malloc function */
3341
3342 #ifdef SUPPORT_PCRE8
3343 pcre_malloc = new_malloc;
3344 pcre_free = new_free;
3345 pcre_stack_malloc = stack_malloc;
3346 pcre_stack_free = stack_free;
3347 #endif
3348
3349 #ifdef SUPPORT_PCRE16
3350 pcre16_malloc = new_malloc;
3351 pcre16_free = new_free;
3352 pcre16_stack_malloc = stack_malloc;
3353 pcre16_stack_free = stack_free;
3354 #endif
3355
3356 #ifdef SUPPORT_PCRE32
3357 pcre32_malloc = new_malloc;
3358 pcre32_free = new_free;
3359 pcre32_stack_malloc = stack_malloc;
3360 pcre32_stack_free = stack_free;
3361 #endif
3362
3363 /* Heading line unless quiet, then prompt for first regex if stdin */
3364
3365 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3366
3367 /* Main loop */
3368
3369 while (!done)
3370 {
3371 pcre *re = NULL;
3372 pcre_extra *extra = NULL;
3373
3374 #if !defined NOPOSIX /* There are still compilers that require no indent */
3375 regex_t preg;
3376 int do_posix = 0;
3377 #endif
3378
3379 const char *error;
3380 pcre_uint8 *markptr;
3381 pcre_uint8 *p, *pp, *ppp;
3382 pcre_uint8 *to_file = NULL;
3383 const pcre_uint8 *tables = NULL;
3384 unsigned long int get_options;
3385 unsigned long int true_size, true_study_size = 0;
3386 size_t size, regex_gotten_store;
3387 int do_allcaps = 0;
3388 int do_mark = 0;
3389 int do_study = 0;
3390 int no_force_study = 0;
3391 int do_debug = debug;
3392 int do_G = 0;
3393 int do_g = 0;
3394 int do_showinfo = showinfo;
3395 int do_showrest = 0;
3396 int do_showcaprest = 0;
3397 int do_flip = 0;
3398 int erroroffset, len, delimiter, poffset;
3399
3400 #if !defined NODFA
3401 int dfa_matched = 0;
3402 #endif
3403
3404 use_utf = 0;
3405 debug_lengths = 1;
3406
3407 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3408 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3409 fflush(outfile);
3410
3411 p = buffer;
3412 while (isspace(*p)) p++;
3413 if (*p == 0) continue;
3414
3415 /* See if the pattern is to be loaded pre-compiled from a file. */
3416
3417 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3418 {
3419 pcre_uint32 magic;
3420 pcre_uint8 sbuf[8];
3421 FILE *f;
3422
3423 p++;
3424 if (*p == '!')
3425 {
3426 do_debug = TRUE;
3427 do_showinfo = TRUE;
3428 p++;
3429 }
3430
3431 pp = p + (int)strlen((char *)p);
3432 while (isspace(pp[-1])) pp--;
3433 *pp = 0;
3434
3435 f = fopen((char *)p, "rb");
3436 if (f == NULL)
3437 {
3438 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3439 continue;
3440 }
3441
3442 first_gotten_store = 0;
3443 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3444
3445 true_size =
3446 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3447 true_study_size =
3448 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3449
3450 re = (pcre *)new_malloc(true_size);
3451 if (re == NULL)
3452 {
3453 printf("** Failed to get %d bytes of memory for pcre object\n",
3454 (int)true_size);
3455 yield = 1;
3456 goto EXIT;
3457 }
3458 regex_gotten_store = first_gotten_store;
3459
3460 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3461
3462 magic = REAL_PCRE_MAGIC(re);
3463 if (magic != MAGIC_NUMBER)
3464 {
3465 if (swap_uint32(magic) == MAGIC_NUMBER)
3466 {
3467 do_flip = 1;
3468 }
3469 else
3470 {
3471 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3472 new_free(re);
3473 fclose(f);
3474 continue;
3475 }
3476 }
3477
3478 /* We hide the byte-invert info for little and big endian tests. */
3479 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3480 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3481
3482 /* Now see if there is any following study data. */
3483
3484 if (true_study_size != 0)
3485 {
3486 pcre_study_data *psd;
3487
3488 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3489 extra->flags = PCRE_EXTRA_STUDY_DATA;
3490
3491 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3492 extra->study_data = psd;
3493
3494 if (fread(psd, 1, true_study_size, f) != true_study_size)
3495 {
3496 FAIL_READ:
3497 fprintf(outfile, "Failed to read data from %s\n", p);
3498 if (extra != NULL)
3499 {
3500 PCRE_FREE_STUDY(extra);
3501 }
3502 new_free(re);
3503 fclose(f);
3504 continue;
3505 }
3506 fprintf(outfile, "Study data loaded from %s\n", p);
3507 do_study = 1; /* To get the data output if requested */
3508 }
3509 else fprintf(outfile, "No study data\n");
3510
3511 /* Flip the necessary bytes. */
3512 if (do_flip)
3513 {
3514 int rc;
3515 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3516 if (rc == PCRE_ERROR_BADMODE)
3517 {
3518 pcre_uint16 flags_in_host_byte_order;
3519 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3520 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3521 else
3522 flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3523 /* Simulate the result of the function call below. */
3524 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3525 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3526 PCRE_INFO_OPTIONS);
3527 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3528 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3529 new_free(re);
3530 fclose(f);
3531 continue;
3532 }
3533 }
3534
3535 /* Need to know if UTF-8 for printing data strings. */
3536
3537 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3538 {
3539 new_free(re);
3540 fclose(f);
3541 continue;
3542 }
3543 use_utf = (get_options & PCRE_UTF8) != 0;
3544
3545 fclose(f);
3546 goto SHOW_INFO;
3547 }
3548
3549 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3550 the pattern; if it isn't complete, read more. */
3551
3552 delimiter = *p++;
3553
3554 if (isalnum(delimiter) || delimiter == '\\')
3555 {
3556 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3557 goto SKIP_DATA;
3558 }
3559
3560 pp = p;
3561 poffset = (int)(p - buffer);
3562
3563 for(;;)
3564 {
3565 while (*pp != 0)
3566 {
3567 if (*pp == '\\' && pp[1] != 0) pp++;
3568 else if (*pp == delimiter) break;
3569 pp++;
3570 }
3571 if (*pp != 0) break;
3572 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3573 {
3574 fprintf(outfile, "** Unexpected EOF\n");
3575 done = 1;
3576 goto CONTINUE;
3577 }
3578 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3579 }
3580
3581 /* The buffer may have moved while being extended; reset the start of data
3582 pointer to the correct relative point in the buffer. */
3583
3584 p = buffer + poffset;
3585
3586 /* If the first character after the delimiter is backslash, make
3587 the pattern end with backslash. This is purely to provide a way
3588 of testing for the error message when a pattern ends with backslash. */
3589
3590 if (pp[1] == '\\') *pp++ = '\\';
3591
3592 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3593 for callouts. */
3594
3595 *pp++ = 0;
3596 strcpy((char *)pbuffer, (char *)p);
3597
3598 /* Look for options after final delimiter */
3599
3600 options = 0;
3601 study_options = force_study_options;
3602 log_store = showstore; /* default from command line */
3603
3604 while (*pp != 0)
3605 {
3606 switch (*pp++)
3607 {
3608 case 'f': options |= PCRE_FIRSTLINE; break;
3609 case 'g': do_g = 1; break;
3610 case 'i': options |= PCRE_CASELESS; break;
3611 case 'm': options |= PCRE_MULTILINE; break;
3612 case 's': options |= PCRE_DOTALL; break;
3613 case 'x': options |= PCRE_EXTENDED; break;
3614
3615 case '+':
3616 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3617 break;
3618
3619 case '=': do_allcaps = 1; break;
3620 case 'A': options |= PCRE_ANCHORED; break;
3621 case 'B': do_debug = 1; break;
3622 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3623 case 'D': do_debug = do_showinfo = 1; break;
3624 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3625 case 'F': do_flip = 1; break;
3626 case 'G': do_G = 1; break;
3627 case 'I': do_showinfo = 1; break;
3628 case 'J': options |= PCRE_DUPNAMES; break;
3629 case 'K': do_mark = 1; break;
3630 case 'M': log_store = 1; break;
3631 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3632
3633 #if !defined NOPOSIX
3634 case 'P': do_posix = 1; break;
3635 #endif
3636
3637 case 'S':
3638 do_study = 1;
3639 for (;;)
3640 {
3641 switch (*pp++)
3642 {
3643 case 'S':
3644 do_study = 0;
3645 no_force_study = 1;
3646 break;
3647
3648 case '!':
3649 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3650 break;
3651
3652 case '+':
3653 if (*pp == '+')
3654 {
3655 verify_jit = TRUE;
3656 pp++;
3657 }
3658 if (*pp >= '1' && *pp <= '7')
3659 study_options |= jit_study_bits[*pp++ - '1'];
3660 else
3661 study_options |= jit_study_bits[6];
3662 break;
3663
3664 case '-':
3665 study_options &= ~PCRE_STUDY_ALLJIT;
3666 break;
3667
3668 default:
3669 pp--;
3670 goto ENDLOOP;
3671 }
3672 }
3673 ENDLOOP:
3674 break;
3675
3676 case 'U': options |= PCRE_UNGREEDY; break;
3677 case 'W': options |= PCRE_UCP; break;
3678 case 'X': options |= PCRE_EXTRA; break;
3679 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3680 case 'Z': debug_lengths = 0; break;
3681 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3682 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3683
3684 case 'T':
3685 switch (*pp++)
3686 {
3687 case '0': tables = tables0; break;
3688 case '1': tables = tables1; break;
3689
3690 case '\r':
3691 case '\n':
3692 case ' ':
3693 case 0:
3694 fprintf(outfile, "** Missing table number after /T\n");
3695 goto SKIP_DATA;
3696
3697 default:
3698 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3699 goto SKIP_DATA;
3700 }
3701 break;
3702
3703 case 'L':
3704 ppp = pp;
3705 /* The '\r' test here is so that it works on Windows. */
3706 /* The '0' test is just in case this is an unterminated line. */
3707 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3708 *ppp = 0;
3709 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3710 {
3711 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3712 goto SKIP_DATA;
3713 }
3714 locale_set = 1;
3715 tables = PCRE_MAKETABLES;
3716 pp = ppp;
3717 break;
3718
3719 case '>':
3720 to_file = pp;
3721 while (*pp != 0) pp++;
3722 while (isspace(pp[-1])) pp--;
3723 *pp = 0;
3724 break;
3725
3726 case '<':
3727 {
3728 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3729 {
3730 options |= PCRE_JAVASCRIPT_COMPAT;
3731 pp += 3;
3732 }
3733 else
3734 {
3735 int x = check_newline(pp, outfile);
3736 if (x == 0) goto SKIP_DATA;
3737 options |= x;
3738 while (*pp++ != '>');
3739 }
3740 }
3741 break;
3742
3743 case '\r': /* So that it works in Windows */
3744 case '\n':
3745 case ' ':
3746 break;
3747
3748 default:
3749 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3750 goto SKIP_DATA;
3751 }
3752 }
3753
3754 /* Handle compiling via the POSIX interface, which doesn't support the
3755 timing, showing, or debugging options, nor the ability to pass over
3756 local character tables. Neither does it have 16-bit support. */
3757
3758 #if !defined NOPOSIX
3759 if (posix || do_posix)
3760 {
3761 int rc;
3762 int cflags = 0;
3763
3764 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3765 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3766 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3767 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3768 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3769 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3770 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3771
3772 first_gotten_store = 0;
3773 rc = regcomp(&preg, (char *)p, cflags);
3774
3775 /* Compilation failed; go back for another re, skipping to blank line
3776 if non-interactive. */
3777
3778 if (rc != 0)
3779 {
3780 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3781 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3782 goto SKIP_DATA;
3783 }
3784 }
3785
3786 /* Handle compiling via the native interface */
3787
3788 else
3789 #endif /* !defined NOPOSIX */
3790
3791 {
3792 /* In 16- or 32-bit mode, convert the input. */
3793
3794 #ifdef SUPPORT_PCRE16
3795 if (pcre_mode == PCRE16_MODE)
3796 {
3797 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3798 {
3799 case -1:
3800 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3801 "converted to UTF-16\n");
3802 goto SKIP_DATA;
3803
3804 case -2:
3805 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3806 "cannot be converted to UTF-16\n");
3807 goto SKIP_DATA;
3808
3809 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3810 fprintf(outfile, "**Failed: character value greater than 0xffff "
3811 "cannot be converted to 16-bit in non-UTF mode\n");
3812 goto SKIP_DATA;
3813
3814 default:
3815 break;
3816 }
3817 p = (pcre_uint8 *)buffer16;
3818 }
3819 #endif
3820
3821 #ifdef SUPPORT_PCRE32
3822 if (pcre_mode == PCRE32_MODE)
3823 {
3824 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3825 {
3826 case -1:
3827 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3828 "converted to UTF-32\n");
3829 goto SKIP_DATA;
3830
3831 case -2:
3832 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3833 "cannot be converted to UTF-32\n");
3834 goto SKIP_DATA;
3835
3836 case -3:
3837 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3838 goto SKIP_DATA;
3839
3840 default:
3841 break;
3842 }
3843 p = (pcre_uint8 *)buffer32;
3844 }
3845 #endif
3846
3847 /* Compile many times when timing */
3848
3849 if (timeit > 0)
3850 {
3851 register int i;
3852 clock_t time_taken;
3853 clock_t start_time = clock();
3854 for (i = 0; i < timeit; i++)
3855 {
3856 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3857 if (re != NULL) free(re);
3858 }
3859 time_taken = clock() - start_time;
3860 fprintf(outfile, "Compile time %.4f milliseconds\n",
3861 (((double)time_taken * 1000.0) / (double)timeit) /
3862 (double)CLOCKS_PER_SEC);
3863 }
3864
3865 first_gotten_store = 0;
3866 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3867
3868 /* Compilation failed; go back for another re, skipping to blank line
3869 if non-interactive. */
3870
3871 if (re == NULL)
3872 {
3873 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3874 SKIP_DATA:
3875 if (infile != stdin)
3876 {
3877 for (;;)
3878 {
3879 if (extend_inputline(infile, buffer, NULL) == NULL)
3880 {
3881 done = 1;
3882 goto CONTINUE;
3883 }
3884 len = (int)strlen((char *)buffer);
3885 while (len > 0 && isspace(buffer[len-1])) len--;
3886 if (len == 0) break;
3887 }
3888 fprintf(outfile, "\n");
3889 }
3890 goto CONTINUE;
3891 }
3892
3893 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3894 within the regex; check for this so that we know how to process the data
3895 lines. */
3896
3897 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3898 goto SKIP_DATA;
3899 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3900
3901 /* Extract the size for possible writing before possibly flipping it,
3902 and remember the store that was got. */
3903
3904 true_size = REAL_PCRE_SIZE(re);
3905 regex_gotten_store = first_gotten_store;
3906
3907 /* Output code size information if requested */
3908
3909 if (log_store)
3910 {
3911 int name_count, name_entry_size, real_pcre_size;
3912
3913 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3914 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3915 real_pcre_size = 0;
3916 #ifdef SUPPORT_PCRE8
3917 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3918 real_pcre_size = sizeof(real_pcre);
3919 #endif
3920 #ifdef SUPPORT_PCRE16
3921 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3922 real_pcre_size = sizeof(real_pcre16);
3923 #endif
3924 #ifdef SUPPORT_PCRE32
3925 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3926 real_pcre_size = sizeof(real_pcre32);
3927 #endif
3928 fprintf(outfile, "Memory allocation (code space): %d\n",
3929 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3930 }
3931
3932 /* If -s or /S was present, study the regex to generate additional info to
3933 help with the matching, unless the pattern has the SS option, which
3934 suppresses the effect of /S (used for a few test patterns where studying is
3935 never sensible). */
3936
3937 if (do_study || (force_study >= 0 && !no_force_study))
3938 {
3939 if (timeit > 0)
3940 {
3941 register int i;
3942 clock_t time_taken;
3943 clock_t start_time = clock();
3944 for (i = 0; i < timeit; i++)
3945 {
3946 PCRE_STUDY(extra, re, study_options, &error);
3947 }
3948 time_taken = clock() - start_time;
3949 if (extra != NULL)
3950 {
3951 PCRE_FREE_STUDY(extra);
3952 }
3953 fprintf(outfile, " Study time %.4f milliseconds\n",
3954 (((double)time_taken * 1000.0) / (double)timeit) /
3955 (double)CLOCKS_PER_SEC);
3956 }
3957 PCRE_STUDY(extra, re, study_options, &error);
3958 if (error != NULL)
3959 fprintf(outfile, "Failed to study: %s\n", error);
3960 else if (extra != NULL)
3961 {
3962 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3963 if (log_store)
3964 {
3965 size_t jitsize;
3966 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3967 jitsize != 0)
3968 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3969 }
3970 }
3971 }
3972
3973 /* If /K was present, we set up for handling MARK data. */
3974
3975 if (do_mark)
3976 {
3977 if (extra == NULL)
3978 {
3979 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3980 extra->flags = 0;
3981 }
3982 extra->mark = &markptr;
3983 extra->flags |= PCRE_EXTRA_MARK;
3984 }
3985
3986 /* Extract and display information from the compiled data if required. */
3987
3988 SHOW_INFO:
3989
3990 if (do_debug)
3991 {
3992 fprintf(outfile, "------------------------------------------------------------------\n");
3993 PCRE_PRINTINT(re, outfile, debug_lengths);
3994 }
3995
3996 /* We already have the options in get_options (see above) */
3997
3998 if (do_showinfo)
3999 {
4000 unsigned long int all_options;
4001 pcre_uint32 first_char, need_char;
4002 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4003 hascrorlf, maxlookbehind;
4004 int nameentrysize, namecount;
4005 const pcre_uint8 *nametable;
4006
4007 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4008 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4009 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4010 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4011 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4012 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4013 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4014 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4015 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4016 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4017 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4018 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4019 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4020 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4021 != 0)
4022 goto SKIP_DATA;
4023
4024 if (size != regex_gotten_store) fprintf(outfile,
4025 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4026 (int)size, (int)regex_gotten_store);
4027
4028 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4029 if (backrefmax > 0)
4030 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4031
4032 if (namecount > 0)
4033 {
4034 fprintf(outfile, "Named capturing subpatterns:\n");
4035 while (namecount-- > 0)
4036 {
4037 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4038 int length = (int)STRLEN(nametable + imm2_size);
4039 fprintf(outfile, " ");
4040 PCHARSV(nametable, imm2_size, length, outfile);
4041 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4042 #ifdef SUPPORT_PCRE32
4043 if (pcre_mode == PCRE32_MODE)
4044 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4045 #endif
4046 #ifdef SUPPORT_PCRE16
4047 if (pcre_mode == PCRE16_MODE)
4048 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4049 #endif
4050 #ifdef SUPPORT_PCRE8
4051 if (pcre_mode == PCRE8_MODE)
4052 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4053 #endif
4054 nametable += nameentrysize * CHAR_SIZE;
4055 }
4056 }
4057
4058 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4059 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4060
4061 all_options = REAL_PCRE_OPTIONS(re);
4062 if (do_flip) all_options = swap_uint32(all_options);
4063
4064 if (get_options == 0) fprintf(outfile, "No options\n");
4065 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4066 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4067 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4068 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4069 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4070 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4071 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4072 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4073 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4074 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4075 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4076 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4077 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4078 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4079 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4080 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4081 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4082 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4083
4084 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4085
4086 switch (get_options & PCRE_NEWLINE_BITS)
4087 {
4088 case PCRE_NEWLINE_CR:
4089 fprintf(outfile, "Forced newline sequence: CR\n");
4090 break;
4091
4092 case PCRE_NEWLINE_LF:
4093 fprintf(outfile, "Forced newline sequence: LF\n");
4094 break;
4095
4096 case PCRE_NEWLINE_CRLF:
4097 fprintf(outfile, "Forced newline sequence: CRLF\n");
4098 break;
4099
4100 case PCRE_NEWLINE_ANYCRLF:
4101 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4102 break;
4103
4104 case PCRE_NEWLINE_ANY:
4105 fprintf(outfile, "Forced newline sequence: ANY\n");
4106 break;
4107
4108 default:
4109 break;
4110 }
4111
4112 if (first_char_set == 2)
4113 {
4114 fprintf(outfile, "First char at start or follows newline\n");
4115 }
4116 else if (first_char_set == 1)
4117 {
4118 const char *caseless =
4119 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4120 "" : " (caseless)";
4121
4122 if (PRINTOK(first_char))
4123 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4124 else
4125 {
4126 fprintf(outfile, "First char = ");
4127 pchar(first_char, outfile);
4128 fprintf(outfile, "%s\n", caseless);
4129 }
4130 }
4131 else
4132 {
4133 fprintf(outfile, "No first char\n");
4134 }
4135
4136 if (need_char_set == 0)
4137 {
4138 fprintf(outfile, "No need char\n");
4139 }
4140 else
4141 {
4142 const char *caseless =
4143 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4144 "" : " (caseless)";
4145
4146 if (PRINTOK(need_char))
4147 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4148 else
4149 {
4150 fprintf(outfile, "Need char = ");
4151 pchar(need_char, outfile);
4152 fprintf(outfile, "%s\n", caseless);
4153 }
4154 }
4155
4156 if (maxlookbehind > 0)
4157 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4158
4159 /* Don't output study size; at present it is in any case a fixed
4160 value, but it varies, depending on the computer architecture, and
4161 so messes up the test suite. (And with the /F option, it might be
4162 flipped.) If study was forced by an external -s, don't show this
4163 information unless -i or -d was also present. This means that, except
4164 when auto-callouts are involved, the output from runs with and without
4165 -s should be identical. */
4166
4167 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4168 {
4169 if (extra == NULL)
4170 fprintf(outfile, "Study returned NULL\n");
4171 else
4172 {
4173 pcre_uint8 *start_bits = NULL;
4174 int minlength;
4175
4176 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4177 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4178
4179 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4180 {
4181 if (start_bits == NULL)
4182 fprintf(outfile, "No set of starting bytes\n");
4183 else
4184 {
4185 int i;
4186 int c = 24;
4187 fprintf(outfile, "Starting byte set: ");
4188 for (i = 0; i < 256; i++)
4189 {
4190 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4191 {
4192 if (c > 75)
4193 {
4194 fprintf(outfile, "\n ");
4195 c = 2;
4196 }
4197 if (PRINTOK(i) && i != ' ')
4198 {
4199 fprintf(outfile, "%c ", i);
4200 c += 2;
4201 }
4202 else
4203 {
4204 fprintf(outfile, "\\x%02x ", i);
4205 c += 5;
4206 }
4207 }
4208 }
4209 fprintf(outfile, "\n");
4210 }
4211 }
4212 }
4213
4214 /* Show this only if the JIT was set by /S, not by -s. */
4215
4216 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4217 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4218 {
4219 int jit;
4220 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4221 {
4222 if (jit)
4223 fprintf(outfile, "JIT study was successful\n");
4224 else
4225 #ifdef SUPPORT_JIT
4226 fprintf(outfile, "JIT study was not successful\n");
4227 #else
4228 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4229 #endif
4230 }
4231 }
4232 }
4233 }
4234
4235 /* If the '>' option was present, we write out the regex to a file, and
4236 that is all. The first 8 bytes of the file are the regex length and then
4237 the study length, in big-endian order. */
4238
4239 if (to_file != NULL)
4240 {
4241 FILE *f = fopen((char *)to_file, "wb");
4242 if (f == NULL)
4243 {
4244 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4245 }
4246 else
4247 {
4248 pcre_uint8 sbuf[8];
4249
4250 if (do_flip) regexflip(re, extra);
4251 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4252 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4253 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4254 sbuf[3] = (pcre_uint8)((true_size) & 255);
4255 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4256 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4257 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4258 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4259
4260 if (fwrite(sbuf, 1, 8, f) < 8 ||
4261 fwrite(re, 1, true_size, f) < true_size)
4262 {
4263 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4264 }
4265 else
4266 {
4267 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4268
4269 /* If there is study data, write it. */
4270
4271 if (extra != NULL)
4272 {
4273 if (fwrite(extra->study_data, 1, true_study_size, f) <
4274 true_study_size)
4275 {
4276 fprintf(outfile, "Write error on %s: %s\n", to_file,
4277 strerror(errno));
4278 }
4279 else fprintf(outfile, "Study data written to %s\n", to_file);
4280 }
4281 }
4282 fclose(f);
4283 }
4284
4285 new_free(re);
4286 if (extra != NULL)
4287 {
4288 PCRE_FREE_STUDY(extra);
4289 }
4290 if (locale_set)
4291 {
4292 new_free((void *)tables);
4293 setlocale(LC_CTYPE, "C");
4294 locale_set = 0;
4295 }
4296 continue; /* With next regex */
4297 }
4298 } /* End of non-POSIX compile */
4299
4300 /* Read data lines and test them */
4301
4302 for (;;)
4303 {
4304 #ifdef SUPPORT_PCRE8
4305 pcre_uint8 *q8;
4306 #endif
4307 #ifdef SUPPORT_PCRE16
4308 pcre_uint16 *q16;
4309 #endif
4310 #ifdef SUPPORT_PCRE32
4311 pcre_uint32 *q32;
4312 #endif
4313 pcre_uint8 *bptr;
4314 int *use_offsets = offsets;
4315 int use_size_offsets = size_offsets;
4316 int callout_data = 0;
4317 int callout_data_set = 0;
4318 int count;
4319 pcre_uint32 c;
4320 int copystrings = 0;
4321 int find_match_limit = default_find_match_limit;
4322 int getstrings = 0;
4323 int getlist = 0;
4324 int gmatched = 0;
4325 int start_offset = 0;
4326 int start_offset_sign = 1;
4327 int g_notempty = 0;
4328 int use_dfa = 0;
4329
4330 *copynames = 0;
4331 *getnames = 0;
4332
4333 #ifdef SUPPORT_PCRE32
4334 cn32ptr = copynames;
4335 gn32ptr = getnames;
4336 #endif
4337 #ifdef SUPPORT_PCRE16
4338 cn16ptr = copynames16;
4339 gn16ptr = getnames16;
4340 #endif
4341 #ifdef SUPPORT_PCRE8
4342 cn8ptr = copynames8;
4343 gn8ptr = getnames8;
4344 #endif
4345
4346 SET_PCRE_CALLOUT(callout);
4347 first_callout = 1;
4348 last_callout_mark = NULL;
4349 callout_extra = 0;
4350 callout_count = 0;
4351 callout_fail_count = 999999;
4352 callout_fail_id = -1;
4353 show_malloc = 0;
4354 options = 0;
4355
4356 if (extra != NULL) extra->flags &=
4357 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4358
4359 len = 0;
4360 for (;;)
4361 {
4362 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4363 {
4364 if (len > 0) /* Reached EOF without hitting a newline */
4365 {
4366 fprintf(outfile, "\n");
4367 break;
4368 }
4369 done = 1;
4370 goto CONTINUE;
4371 }
4372 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4373 len = (int)strlen((char *)buffer);
4374 if (buffer[len-1] == '\n') break;
4375 }
4376
4377 while (len > 0 && isspace(buffer[len-1])) len--;
4378 buffer[len] = 0;
4379 if (len == 0) break;
4380
4381 p = buffer;
4382 while (isspace(*p)) p++;
4383
4384 #ifndef NOUTF
4385 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4386 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4387 if (use_utf)
4388 {
4389 pcre_uint8 *q;
4390 pcre_uint32 cc;
4391 int n = 1;
4392
4393 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4394 if (n <= 0)
4395 {
4396 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4397 goto NEXT_DATA;
4398 }
4399 }
4400 #endif
4401
4402 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4403 the number of pcre_uchar units that will be needed. */
4404 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4405 {
4406 dbuffer_size *= 2;
4407 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4408 if (dbuffer == NULL)
4409 {
4410 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4411 exit(1);
4412 }
4413 }
4414
4415 #ifdef SUPPORT_PCRE8
4416 q8 = (pcre_uint8 *) dbuffer;
4417 #endif
4418 #ifdef SUPPORT_PCRE16
4419 q16 = (pcre_uint16 *) dbuffer;
4420 #endif
4421 #ifdef SUPPORT_PCRE32
4422 q32 = (pcre_uint32 *) dbuffer;
4423 #endif
4424
4425 while ((c = *p++) != 0)
4426 {
4427 int i = 0;
4428 int n = 0;
4429
4430 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4431 In non-UTF mode, allow the value of the byte to fall through to later,
4432 where values greater than 127 are turned into UTF-8 when running in
4433 16-bit or 32-bit mode. */
4434
4435 if (c != '\\')
4436 {
4437 #ifndef NOUTF
4438 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4439 #endif
4440 }
4441
4442 /* Handle backslash escapes */
4443
4444 else switch ((c = *p++))
4445 {
4446 case 'a': c = 7; break;
4447 case 'b': c = '\b'; break;
4448 case 'e': c = 27; break;
4449 case 'f': c = '\f'; break;
4450 case 'n': c = '\n'; break;
4451 case 'r': c = '\r'; break;
4452 case 't': c = '\t'; break;
4453 case 'v': c = '\v'; break;
4454
4455 case '0': case '1': case '2': case '3':
4456 case '4': case '5': case '6': case '7':
4457 c -= '0';
4458 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4459 c = c * 8 + *p++ - '0';
4460 break;
4461
4462 case 'x':
4463 if (*p == '{')
4464 {
4465 pcre_uint8 *pt = p;
4466 c = 0;
4467
4468 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4469 when isxdigit() is a macro that refers to its argument more than
4470 once. This is banned by the C Standard, but apparently happens in at
4471 least one MacOS environment. */
4472
4473 for (pt++; isxdigit(*pt); pt++)
4474 {
4475 if (++i == 9)
4476 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4477 "using only the first eight.\n");
4478 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4479 }
4480 if (*pt == '}')
4481 {
4482 p = pt + 1;
4483 break;
4484 }
4485 /* Not correct form for \x{...}; fall through */
4486 }
4487
4488 /* \x without {} always defines just one byte in 8-bit mode. This
4489 allows UTF-8 characters to be constructed byte by byte, and also allows
4490 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4491 Otherwise, pass it down to later code so that it can be turned into
4492 UTF-8 when running in 16/32-bit mode. */
4493
4494 c = 0;
4495 while (i++ < 2 && isxdigit(*p))
4496 {
4497 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4498 p++;
4499 }
4500 #if !defined NOUTF && defined SUPPORT_PCRE8
4501 if (use_utf && (pcre_mode == PCRE8_MODE))
4502 {
4503 *q8++ = c;
4504 continue;
4505 }
4506 #endif
4507 break;
4508
4509 case 0: /* \ followed by EOF allows for an empty line */
4510 p--;
4511 continue;
4512
4513 case '>':
4514 if (*p == '-')
4515 {
4516 start_offset_sign = -1;
4517 p++;
4518 }
4519 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4520 start_offset *= start_offset_sign;
4521 continue;
4522
4523 case 'A': /* Option setting */
4524 options |= PCRE_ANCHORED;
4525 continue;
4526
4527 case 'B':
4528 options |= PCRE_NOTBOL;
4529 continue;
4530
4531 case 'C':
4532 if (isdigit(*p)) /* Set copy string */
4533 {
4534 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4535 copystrings |= 1 << n;
4536 }
4537 else if (isalnum(*p))
4538 {
4539 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4540 }
4541 else if (*p == '+')
4542 {
4543 callout_extra = 1;
4544 p++;
4545 }
4546 else if (*p == '-')
4547 {
4548 SET_PCRE_CALLOUT(NULL);
4549 p++;
4550 }
4551 else if (*p == '!')
4552 {
4553 callout_fail_id = 0;
4554 p++;
4555 while(isdigit(*p))
4556 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4557 callout_fail_count = 0;
4558 if (*p == '!')
4559 {
4560 p++;
4561 while(isdigit(*p))
4562 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4563 }
4564 }
4565 else if (*p == '*')
4566 {
4567 int sign = 1;
4568 callout_data = 0;
4569 if (*(++p) == '-') { sign = -1; p++; }
4570 while(isdigit(*p))
4571 callout_data = callout_data * 10 + *p++ - '0';
4572 callout_data *= sign;
4573 callout_data_set = 1;
4574 }
4575 continue;
4576
4577 #if !defined NODFA
4578 case 'D':
4579 #if !defined NOPOSIX
4580 if (posix || do_posix)
4581 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4582 else
4583 #endif
4584 use_dfa = 1;
4585 continue;
4586 #endif
4587
4588 #if !defined NODFA
4589 case 'F':
4590 options |= PCRE_DFA_SHORTEST;
4591 continue;
4592 #endif
4593
4594 case 'G':
4595 if (isdigit(*p))
4596 {
4597 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4598 getstrings |= 1 << n;
4599 }
4600 else if (isalnum(*p))
4601 {
4602 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4603 }
4604 continue;
4605
4606 case 'J':
4607 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4608 if (extra != NULL
4609 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4610 && extra->executable_jit != NULL)
4611 {
4612 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4613 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4614 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4615 }
4616 continue;
4617
4618 case 'L':
4619 getlist = 1;
4620 continue;
4621
4622 case 'M':
4623 find_match_limit = 1;
4624 continue;
4625
4626 case 'N':
4627 if ((options & PCRE_NOTEMPTY) != 0)
4628 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4629 else
4630 options |= PCRE_NOTEMPTY;
4631 continue;
4632
4633 case 'O':
4634 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4635 if (n > size_offsets_max)
4636 {
4637 size_offsets_max = n;
4638 free(offsets);
4639 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4640 if (offsets == NULL)
4641 {
4642 printf("** Failed to get %d bytes of memory for offsets vector\n",
4643 (int)(size_offsets_max * sizeof(int)));
4644 yield = 1;
4645 goto EXIT;
4646 }
4647 }
4648 use_size_offsets = n;
4649 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4650 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4651 continue;
4652
4653 case 'P':
4654 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4655 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4656 continue;
4657
4658 case 'Q':
4659 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4660 if (extra == NULL)
4661 {
4662 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4663 extra->flags = 0;
4664 }
4665 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4666 extra->match_limit_recursion = n;
4667 continue;
4668
4669 case 'q':
4670 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4671 if (extra == NULL)
4672 {
4673 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4674 extra->flags = 0;
4675 }
4676 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4677 extra->match_limit = n;
4678 continue;
4679
4680 #if !defined NODFA
4681 case 'R':
4682 options |= PCRE_DFA_RESTART;
4683 continue;
4684 #endif
4685
4686 case 'S':
4687 show_malloc = 1;
4688 continue;
4689
4690 case 'Y':
4691 options |= PCRE_NO_START_OPTIMIZE;
4692 continue;
4693
4694 case 'Z':
4695 options |= PCRE_NOTEOL;
4696 continue;
4697
4698 case '?':
4699 options |= PCRE_NO_UTF8_CHECK;
4700 continue;
4701
4702 case '<':
4703 {
4704 int x = check_newline(p, outfile);
4705 if (x == 0) goto NEXT_DATA;
4706 options |= x;
4707 while (*p++ != '>');
4708 }
4709 continue;
4710 }
4711
4712 /* We now have a character value in c that may be greater than 255.
4713 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4714 than 127 in UTF mode must have come from \x{...} or octal constructs
4715 because values from \x.. get this far only in non-UTF mode. */
4716
4717 #ifdef SUPPORT_PCRE8
4718 if (pcre_mode == PCRE8_MODE)
4719 {
4720 #ifndef NOUTF
4721 if (use_utf)
4722 {
4723 q8 += ord2utf8(c, q8);
4724 }
4725 else
4726 #endif
4727 {
4728 if (c > 0xffu)
4729 {
4730 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4731 "and UTF-8 mode is not enabled.\n", c);
4732 fprintf(outfile, "** Truncation will probably give the wrong "
4733 "result.\n");
4734 }
4735 *q8++ = c;
4736 }
4737 }
4738 #endif
4739 #ifdef SUPPORT_PCRE16
4740 if (pcre_mode == PCRE16_MODE)
4741 {
4742 #ifndef NOUTF
4743 if (use_utf)
4744 {
4745 if (c > 0x10ffffu)
4746 {
4747 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4748 "cannot be converted to UTF-16\n");
4749 goto NEXT_DATA;
4750 }
4751 else if (c >= 0x10000u)
4752 {
4753 c-= 0x10000u;
4754 *q16++ = 0xD800 | (c >> 10);
4755 *q16++ = 0xDC00 | (c & 0x3ff);
4756 }
4757 else
4758 *q16++ = c;
4759 }
4760 else
4761 #endif
4762 {
4763 if (c > 0xffffu)
4764 {
4765 fprintf(outfile, "** Character value is greater than 0xffff "
4766 "and UTF-16 mode is not enabled.\n");
4767 fprintf(outfile, "** Truncation will probably give the wrong "
4768 "result.\n");
4769 }
4770
4771 *q16++ = c;
4772 }
4773 }
4774 #endif
4775 #ifdef SUPPORT_PCRE32
4776 if (pcre_mode == PCRE32_MODE)
4777 {
4778 *q32++ = c;
4779 }
4780 #endif
4781
4782 }
4783
4784 /* Reached end of subject string */
4785
4786 #ifdef SUPPORT_PCRE8
4787 if (pcre_mode == PCRE8_MODE)
4788 {
4789 *q8 = 0;
4790 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4791 }
4792 #endif
4793 #ifdef SUPPORT_PCRE16
4794 if (pcre_mode == PCRE16_MODE)
4795 {
4796 *q16 = 0;
4797 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4798 }
4799 #endif
4800 #ifdef SUPPORT_PCRE32
4801 if (pcre_mode == PCRE32_MODE)
4802 {
4803 *q32 = 0;
4804 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4805 }
4806 #endif
4807
4808 #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
4809 /* If we're requsted to test UTF-32 masking of high bits, change the data
4810 string to have high bits set, unless the string is invalid UTF-32.
4811 Since the JIT doesn't support this yet, only do it when not JITing. */
4812 if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
4813 valid_utf32((pcre_uint32 *)dbuffer, len))
4814 {
4815 for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
4816 *q32 |= ~(pcre_uint32)UTF32_MASK;
4817
4818 /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
4819 options |= PCRE_NO_UTF32_CHECK;
4820 }
4821 #endif
4822
4823 /* Move the data to the end of the buffer so that a read over the end of
4824 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4825 we are using the POSIX interface, we must include the terminating zero. */
4826
4827 bptr = dbuffer;
4828
4829 #if !defined NOPOSIX
4830 if (posix || do_posix)
4831 {
4832 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4833 bptr += dbuffer_size - len - 1;
4834 }
4835 else
4836 #endif
4837 {
4838 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4839 }
4840
4841 if ((all_use_dfa || use_dfa) && find_match_limit)
4842 {
4843 printf("**Match limit not relevant for DFA matching: ignored\n");
4844 find_match_limit = 0;
4845 }
4846
4847 /* Handle matching via the POSIX interface, which does not
4848 support timing or playing with the match limit or callout data. */
4849
4850 #if !defined NOPOSIX
4851 if (posix || do_posix)
4852 {
4853 int rc;
4854 int eflags = 0;
4855 regmatch_t *pmatch = NULL;
4856 if (use_size_offsets > 0)
4857 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4858 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4859 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4860 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4861
4862 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4863
4864 if (rc != 0)
4865 {
4866 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4867 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4868 }
4869 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4870 {
4871 fprintf(outfile, "Matched with REG_NOSUB\n");
4872 }
4873 else
4874 {
4875 size_t i;
4876 for (i = 0; i < (size_t)use_size_offsets; i++)
4877 {
4878 if (pmatch[i].rm_so >= 0)
4879 {
4880 fprintf(outfile, "%2d: ", (int)i);
4881 PCHARSV(dbuffer, pmatch[i].rm_so,
4882 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4883 fprintf(outfile, "\n");
4884 if (do_showcaprest || (i == 0 && do_showrest))
4885 {
4886 fprintf(outfile, "%2d+ ", (int)i);
4887 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4888 outfile);
4889 fprintf(outfile, "\n");
4890 }
4891 }
4892 }
4893 }
4894 free(pmatch);
4895 goto NEXT_DATA;
4896 }
4897
4898 #endif /* !defined NOPOSIX */
4899
4900 /* Handle matching via the native interface - repeats for /g and /G */
4901
4902 /* Ensure that there is a JIT callback if we want to verify that JIT was
4903 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4904
4905 if (verify_jit && jit_stack == NULL && extra != NULL)
4906 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4907
4908 for (;; gmatched++) /* Loop for /g or /G */
4909 {
4910 markptr = NULL;
4911 jit_was_used = FALSE;
4912
4913 if (timeitm > 0)
4914 {
4915 register int i;
4916 clock_t time_taken;
4917 clock_t start_time = clock();
4918
4919 #if !defined NODFA
4920 if (all_use_dfa || use_dfa)
4921 {
4922 if ((options & PCRE_DFA_RESTART) != 0)
4923 {
4924 fprintf(outfile, "Timing DFA restarts is not supported\n");
4925 break;
4926 }
4927 if (dfa_workspace == NULL)
4928 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4929 for (i = 0; i < timeitm; i++)
4930 {
4931 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4932 (options | g_notempty), use_offsets, use_size_offsets,
4933 dfa_workspace, DFA_WS_DIMENSION);
4934 }
4935 }
4936 else
4937 #endif
4938
4939 for (i = 0; i < timeitm; i++)
4940 {
4941 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4942 (options | g_notempty), use_offsets, use_size_offsets);
4943 }
4944 time_taken = clock() - start_time;
4945 fprintf(outfile, "Execute time %.4f milliseconds\n",
4946 (((double)time_taken * 1000.0) / (double)timeitm) /
4947 (double)CLOCKS_PER_SEC);
4948 }
4949
4950 /* If find_match_limit is set, we want to do repeated matches with
4951 varying limits in order to find the minimum value for the match limit and
4952 for the recursion limit. The match limits are relevant only to the normal
4953 running of pcre_exec(), so disable the JIT optimization. This makes it
4954 possible to run the same set of tests with and without JIT externally
4955 requested. */
4956
4957 if (find_match_limit)
4958 {
4959 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4960 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4961 extra->flags = 0;
4962
4963 (void)check_match_limit(re, extra, bptr, len, start_offset,
4964 options|g_notempty, use_offsets, use_size_offsets,
4965 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4966 PCRE_ERROR_MATCHLIMIT, "match()");
4967
4968 count = check_match_limit(re, extra, bptr, len, start_offset,
4969 options|g_notempty, use_offsets, use_size_offsets,
4970 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4971 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4972 }
4973
4974 /* If callout_data is set, use the interface with additional data */
4975
4976 else if (callout_data_set)
4977 {
4978 if (extra == NULL)
4979 {
4980 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4981 extra->flags = 0;
4982 }
4983 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4984 extra->callout_data = &callout_data;
4985 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4986 options | g_notempty, use_offsets, use_size_offsets);
4987 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4988 }
4989
4990 /* The normal case is just to do the match once, with the default
4991 value of match_limit. */
4992
4993 #if !defined NODFA
4994 else if (all_use_dfa || use_dfa)
4995 {
4996 if (dfa_workspace == NULL)
4997 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4998 if (dfa_matched++ == 0)
4999 dfa_workspace[0] = -1; /* To catch bad restart */
5000 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5001 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5002 DFA_WS_DIMENSION);
5003 if (count == 0)
5004 {
5005 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5006 count = use_size_offsets/2;
5007 }
5008 }
5009 #endif
5010
5011 else
5012 {
5013 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5014 options | g_notempty, use_offsets, use_size_offsets);
5015 if (count == 0)
5016 {
5017 fprintf(outfile, "Matched, but too many substrings\n");
5018 count = use_size_offsets/3;
5019 }
5020 }
5021
5022 /* Matched */
5023
5024 if (count >= 0)
5025 {
5026 int i, maxcount;
5027 void *cnptr, *gnptr;
5028
5029 #if !defined NODFA
5030 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5031 #endif
5032 maxcount = use_size_offsets/3;
5033
5034 /* This is a check against a lunatic return value. */
5035
5036 if (count > maxcount)
5037 {
5038 fprintf(outfile,
5039 "** PCRE error: returned count %d is too big for offset size %d\n",
5040 count, use_size_offsets);
5041 count = use_size_offsets/3;
5042 if (do_g || do_G)
5043 {
5044 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5045 do_g = do_G = FALSE; /* Break g/G loop */
5046 }
5047 }
5048
5049 /* do_allcaps requests showing of all captures in the pattern, to check
5050 unset ones at the end. */
5051
5052 if (do_allcaps)
5053 {
5054 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5055 goto SKIP_DATA;
5056 count++; /* Allow for full match */
5057 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5058 }
5059
5060 /* Output the captured substrings */
5061
5062 for (i = 0; i < count * 2; i += 2)
5063 {
5064 if (use_offsets[i] < 0)
5065 {
5066 if (use_offsets[i] != -1)
5067 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5068 use_offsets[i], i);
5069 if (use_offsets[i+1] != -1)
5070 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5071 use_offsets[i+1], i+1);
5072 fprintf(outfile, "%2d: <unset>\n", i/2);
5073 }
5074 else
5075 {
5076 fprintf(outfile, "%2d: ", i/2);
5077 PCHARSV(bptr, use_offsets[i],
5078 use_offsets[i+1] - use_offsets[i], outfile);
5079 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5080 fprintf(outfile, "\n");
5081 if (do_showcaprest || (i == 0 && do_showrest))
5082 {
5083 fprintf(outfile, "%2d+ ", i/2);
5084 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5085 outfile);
5086 fprintf(outfile, "\n");
5087 }
5088 }
5089 }
5090
5091 if (markptr != NULL)
5092 {
5093 fprintf(outfile, "MK: ");
5094 PCHARSV(markptr, 0, -1, outfile);
5095 fprintf(outfile, "\n");
5096 }
5097
5098 for (i = 0; i < 32; i++)
5099 {
5100 if ((copystrings & (1 << i)) != 0)
5101 {
5102 int rc;
5103 char copybuffer[256];
5104 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5105 copybuffer, sizeof(copybuffer));
5106 if (rc < 0)
5107 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5108 else
5109 {
5110 fprintf(outfile, "%2dC ", i);
5111 PCHARSV(copybuffer, 0, rc, outfile);
5112 fprintf(outfile, " (%d)\n", rc);
5113 }
5114 }
5115 }
5116
5117 cnptr = copynames;
5118 for (;;)
5119 {
5120 int rc;
5121 char copybuffer[256];
5122
5123 #ifdef SUPPORT_PCRE32
5124 if (pcre_mode == PCRE32_MODE)
5125 {
5126 if (*(pcre_uint32 *)cnptr == 0) break;
5127 }
5128 #endif
5129 #ifdef SUPPORT_PCRE16
5130 if (pcre_mode == PCRE16_MODE)
5131 {
5132 if (*(pcre_uint16 *)cnptr == 0) break;
5133 }
5134 #endif
5135 #ifdef SUPPORT_PCRE8
5136 if (pcre_mode == PCRE8_MODE)
5137 {
5138 if (*(pcre_uint8 *)cnptr == 0) break;
5139 }
5140 #endif
5141
5142 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5143 cnptr, copybuffer, sizeof(copybuffer));
5144
5145 if (rc < 0)
5146 {
5147 fprintf(outfile, "copy substring ");
5148 PCHARSV(cnptr, 0, -1, outfile);
5149 fprintf(outfile, " failed %d\n", rc);
5150 }
5151 else
5152 {
5153 fprintf(outfile, " C ");
5154 PCHARSV(copybuffer, 0, rc, outfile);
5155 fprintf(outfile, " (%d) ", rc);
5156 PCHARSV(cnptr, 0, -1, outfile);
5157 putc('\n', outfile);
5158 }
5159
5160 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5161 }
5162
5163 for (i = 0; i < 32; i++)
5164 {
5165 if ((getstrings & (1 << i)) != 0)
5166 {
5167 int rc;
5168 const char *substring;
5169 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5170 if (rc < 0)
5171 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5172 else
5173 {
5174 fprintf(outfile, "%2dG ", i);
5175 PCHARSV(substring, 0, rc, outfile);
5176 fprintf(outfile, " (%d)\n", rc);
5177 PCRE_FREE_SUBSTRING(substring);
5178 }
5179 }
5180 }
5181
5182 gnptr = getnames;
5183 for (;;)
5184 {
5185 int rc;
5186 const char *substring;
5187
5188 #ifdef SUPPORT_PCRE32
5189 if (pcre_mode == PCRE32_MODE)
5190 {
5191 if (*(pcre_uint32 *)gnptr == 0) break;
5192 }
5193 #endif
5194 #ifdef SUPPORT_PCRE16
5195 if (pcre_mode == PCRE16_MODE)
5196 {
5197 if (*(pcre_uint16 *)gnptr == 0) break;
5198 }
5199 #endif
5200 #ifdef SUPPORT_PCRE8
5201 if (pcre_mode == PCRE8_MODE)
5202 {
5203 if (*(pcre_uint8 *)gnptr == 0) break;
5204 }
5205 #endif
5206
5207 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5208 gnptr, &substring);
5209 if (rc < 0)
5210 {
5211 fprintf(outfile, "get substring ");
5212 PCHARSV(gnptr, 0, -1, outfile);
5213 fprintf(outfile, " failed %d\n", rc);
5214 }
5215 else
5216 {
5217 fprintf(outfile, " G ");
5218 PCHARSV(substring, 0, rc, outfile);
5219 fprintf(outfile, " (%d) ", rc);
5220 PCHARSV(gnptr, 0, -1, outfile);
5221 PCRE_FREE_SUBSTRING(substring);
5222 putc('\n', outfile);
5223 }
5224
5225 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5226 }
5227
5228 if (getlist)
5229 {
5230 int rc;
5231 const char **stringlist;
5232 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5233 if (rc < 0)
5234 fprintf(outfile, "get substring list failed %d\n", rc);
5235 else
5236 {
5237 for (i = 0; i < count; i++)
5238 {
5239 fprintf(outfile, "%2dL ", i);
5240 PCHARSV(stringlist[i], 0, -1, outfile);
5241 putc('\n', outfile);
5242 }
5243 if (stringlist[i] != NULL)
5244 fprintf(outfile, "string list not terminated by NULL\n");
5245 PCRE_FREE_SUBSTRING_LIST(stringlist);
5246 }
5247 }
5248 }
5249
5250 /* There was a partial match */
5251
5252 else if (count == PCRE_ERROR_PARTIAL)
5253 {
5254 if (markptr == NULL) fprintf(outfile, "Partial match");
5255 else
5256 {
5257 fprintf(outfile, "Partial match, mark=");
5258 PCHARSV(markptr, 0, -1, outfile);
5259 }
5260 if (use_size_offsets > 1)
5261 {
5262 fprintf(outfile, ": ");
5263 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5264 outfile);
5265 }
5266 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5267 fprintf(outfile, "\n");
5268 break; /* Out of the /g loop */
5269 }
5270
5271 /* Failed to match. If this is a /g or /G loop and we previously set
5272 g_notempty after a null match, this is not necessarily the end. We want
5273 to advance the start offset, and continue. We won't be at the end of the
5274 string - that was checked before setting g_notempty.
5275
5276 Complication arises in the case when the newline convention is "any",
5277 "crlf", or "anycrlf". If the previous match was at the end of a line
5278 terminated by CRLF, an advance of one character just passes the \r,
5279 whereas we should prefer the longer newline sequence, as does the code in
5280 pcre_exec(). Fudge the offset value to achieve this. We check for a
5281 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5282 find the default.
5283
5284 Otherwise, in the case of UTF-8 matching, the advance must be one
5285 character, not one byte. */
5286
5287 else
5288 {
5289 if (g_notempty != 0)
5290 {
5291 int onechar = 1;
5292 unsigned int obits = REAL_PCRE_OPTIONS(re);
5293 use_offsets[0] = start_offset;
5294 if ((obits & PCRE_NEWLINE_BITS) == 0)
5295 {
5296 int d;
5297 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5298 /* Note that these values are always the ASCII ones, even in
5299 EBCDIC environments. CR = 13, NL = 10. */
5300 obits = (d == 13)? PCRE_NEWLINE_CR :
5301 (d == 10)? PCRE_NEWLINE_LF :
5302 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5303 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5304 (d == -1)? PCRE_NEWLINE_ANY : 0;
5305 }
5306 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5307 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5308 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5309 &&
5310 start_offset < len - 1 && (
5311 #ifdef SUPPORT_PCRE8
5312 (pcre_mode == PCRE8_MODE &&
5313 bptr[start_offset] == '\r' &&
5314 bptr[start_offset + 1] == '\n') ||
5315 #endif
5316 #ifdef SUPPORT_PCRE16
5317 (pcre_mode == PCRE16_MODE &&
5318 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5319 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5320 #endif
5321 #ifdef SUPPORT_PCRE32
5322 (pcre_mode == PCRE32_MODE &&
5323 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5324 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5325 #endif
5326 0))
5327 onechar++;
5328 else if (use_utf)
5329 {
5330 while (start_offset + onechar < len)
5331 {
5332 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5333 onechar++;
5334 }
5335 }
5336 use_offsets[1] = start_offset + onechar;
5337 }
5338 else
5339 {
5340 switch(count)
5341 {
5342 case PCRE_ERROR_NOMATCH:
5343 if (gmatched == 0)
5344 {
5345 if (markptr == NULL)
5346 {
5347 fprintf(outfile, "No match");
5348 }
5349 else
5350 {
5351 fprintf(outfile, "No match, mark = ");
5352 PCHARSV(markptr, 0, -1, outfile);
5353 }
5354 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5355 putc('\n', outfile);
5356 }
5357 break;
5358
5359 case PCRE_ERROR_BADUTF8:
5360 case PCRE_ERROR_SHORTUTF8:
5361 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5362 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5363 8 * CHAR_SIZE);
5364 if (use_size_offsets >= 2)
5365 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5366 use_offsets[1]);
5367 fprintf(outfile, "\n");
5368 break;
5369
5370 case PCRE_ERROR_BADUTF8_OFFSET:
5371 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5372 8 * CHAR_SIZE);
5373 break;
5374
5375 default:
5376 if (count < 0 &&
5377 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5378 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5379 else
5380 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5381 break;
5382 }
5383
5384 break; /* Out of the /g loop */
5385 }
5386 }
5387
5388 /* If not /g or /G we are done */
5389
5390 if (!do_g && !do_G) break;
5391
5392 /* If we have matched an empty string, first check to see if we are at
5393 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5394 Perl's /g options does. This turns out to be rather cunning. First we set
5395 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5396 same point. If this fails (picked up above) we advance to the next
5397 character. */
5398
5399 g_notempty = 0;
5400
5401 if (use_offsets[0] == use_offsets[1])
5402 {
5403 if (use_offsets[0] == len) break;
5404 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5405 }
5406
5407 /* For /g, update the start offset, leaving the rest alone */
5408
5409 if (do_g) start_offset = use_offsets[1];
5410
5411 /* For /G, update the pointer and length */
5412
5413 else
5414 {
5415 bptr += use_offsets[1] * CHAR_SIZE;
5416 len -= use_offsets[1];
5417 }
5418 } /* End of loop for /g and /G */
5419
5420 NEXT_DATA: continue;
5421 } /* End of loop for data lines */
5422
5423 CONTINUE:
5424
5425 #if !defined NOPOSIX
5426 if (posix || do_posix) regfree(&preg);
5427 #endif
5428
5429 if (re != NULL) new_free(re);
5430 if (extra != NULL)
5431 {
5432 PCRE_FREE_STUDY(extra);
5433 }
5434 if (locale_set)
5435 {
5436 new_free((void *)tables);
5437 setlocale(LC_CTYPE, "C");
5438 locale_set = 0;
5439 }
5440 if (jit_stack != NULL)
5441 {
5442 PCRE_JIT_STACK_FREE(jit_stack);
5443 jit_stack = NULL;
5444 }
5445 }
5446
5447 if (infile == stdin) fprintf(outfile, "\n");
5448
5449 EXIT:
5450
5451 if (infile != NULL && infile != stdin) fclose(infile);
5452 if (outfile != NULL && outfile != stdout) fclose(outfile);
5453
5454 free(buffer);
5455 free(dbuffer);
5456 free(pbuffer);
5457 free(offsets);
5458
5459 #ifdef SUPPORT_PCRE16
5460 if (buffer16 != NULL) free(buffer16);
5461 #endif
5462 #ifdef SUPPORT_PCRE32
5463 if (buffer32 != NULL) free(buffer32);
5464 #endif
5465
5466 #if !defined NODFA
5467 if (dfa_workspace != NULL)
5468 free(dfa_workspace);
5469 #endif
5470
5471 return yield;
5472 }
5473
5474 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5