/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 838 - (show annotations)
Thu Dec 29 18:27:07 2011 UTC (7 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 128058 byte(s)
Add -C newline to pcretest; update ManyConfigTests to use new -C features; add 
some 16-bit tests with link sizes 3 and 4.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define SET_PCRE_CALLOUT8(callout) \
213 pcre_callout = callout
214
215 #define STRLEN8(p) ((int)strlen((char *)p))
216
217
218 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219 re = pcre_compile((char *)pat, options, error, erroffset, tables)
220
221 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222 namesptr, cbuffer, size) \
223 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224 (char *)namesptr, cbuffer, size)
225
226 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228
229 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230 offsets, size_offsets, workspace, size_workspace) \
231 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace)
233
234 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235 offsets, size_offsets) \
236 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237 offsets, size_offsets)
238
239 #define PCRE_FREE_STUDY8(extra) \
240 pcre_free_study(extra)
241
242 #define PCRE_FREE_SUBSTRING8(substring) \
243 pcre_free_substring(substring)
244
245 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246 pcre_free_substring_list(listptr)
247
248 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249 getnamesptr, subsptr) \
250 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251 (char *)getnamesptr, subsptr)
252
253 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254 n = pcre_get_stringnumber(re, (char *)ptr)
255
256 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258
259 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261
262 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
263 pcre_pattern_to_host_byte_order(re, extra, tables)
264
265 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266 pcre_printint(re, outfile, debug_lengths)
267
268 #define PCRE_STUDY8(extra, re, options, error) \
269 extra = pcre_study(re, options, error)
270
271 #endif /* SUPPORT_PCRE8 */
272
273 /* -----------------------------------------------------------*/
274
275 #ifdef SUPPORT_PCRE16
276
277 #define PCHARS16(lv, p, offset, len, f) \
278 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279
280 #define PCHARSV16(p, offset, len, f) \
281 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282
283 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284 p = read_capture_name16(p, cn16, re)
285
286 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287
288 #define SET_PCRE_CALLOUT16(callout) \
289 pcre16_callout = callout
290
291
292 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294
295 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296 namesptr, cbuffer, size) \
297 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299
300 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302 (PCRE_SCHAR16 *)cbuffer, size/2)
303
304 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305 offsets, size_offsets, workspace, size_workspace) \
306 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307 options, offsets, size_offsets, workspace, size_workspace)
308
309 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310 offsets, size_offsets) \
311 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312 options, offsets, size_offsets)
313
314 #define PCRE_FREE_STUDY16(extra) \
315 pcre16_free_study(extra)
316
317 #define PCRE_FREE_SUBSTRING16(substring) \
318 pcre16_free_substring((PCRE_SPTR16)substring)
319
320 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322
323 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324 getnamesptr, subsptr) \
325 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327
328 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330
331 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333 (PCRE_SPTR16 *)(void*)subsptr)
334
335 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337 (PCRE_SPTR16 **)(void*)listptr)
338
339 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
340 pcre16_pattern_to_host_byte_order(re, extra, tables)
341
342 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343 pcre16_printint(re, outfile, debug_lengths)
344
345 #define PCRE_STUDY16(extra, re, options, error) \
346 extra = pcre16_study(re, options, error)
347
348 #endif /* SUPPORT_PCRE16 */
349
350
351 /* ----- Both modes are supported; a runtime test is needed, except for
352 pcre_config(), and the JIT stack functions, when it doesn't matter which
353 version is called. ----- */
354
355 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356
357 #define CHAR_SIZE (use_pcre16? 2:1)
358
359 #define PCHARS(lv, p, offset, len, f) \
360 if (use_pcre16) \
361 PCHARS16(lv, p, offset, len, f); \
362 else \
363 PCHARS8(lv, p, offset, len, f)
364
365 #define PCHARSV(p, offset, len, f) \
366 if (use_pcre16) \
367 PCHARSV16(p, offset, len, f); \
368 else \
369 PCHARSV8(p, offset, len, f)
370
371 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372 if (use_pcre16) \
373 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374 else \
375 READ_CAPTURE_NAME8(p, cn8, cn16, re)
376
377 #define SET_PCRE_CALLOUT(callout) \
378 if (use_pcre16) \
379 SET_PCRE_CALLOUT16(callout); \
380 else \
381 SET_PCRE_CALLOUT8(callout)
382
383 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384
385 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386
387 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388 if (use_pcre16) \
389 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390 else \
391 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392
393 #define PCRE_CONFIG pcre_config
394
395 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396 namesptr, cbuffer, size) \
397 if (use_pcre16) \
398 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399 namesptr, cbuffer, size); \
400 else \
401 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402 namesptr, cbuffer, size)
403
404 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405 if (use_pcre16) \
406 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407 else \
408 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409
410 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411 offsets, size_offsets, workspace, size_workspace) \
412 if (use_pcre16) \
413 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414 offsets, size_offsets, workspace, size_workspace); \
415 else \
416 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417 offsets, size_offsets, workspace, size_workspace)
418
419 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420 offsets, size_offsets) \
421 if (use_pcre16) \
422 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423 offsets, size_offsets); \
424 else \
425 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426 offsets, size_offsets)
427
428 #define PCRE_FREE_STUDY(extra) \
429 if (use_pcre16) \
430 PCRE_FREE_STUDY16(extra); \
431 else \
432 PCRE_FREE_STUDY8(extra)
433
434 #define PCRE_FREE_SUBSTRING(substring) \
435 if (use_pcre16) \
436 PCRE_FREE_SUBSTRING16(substring); \
437 else \
438 PCRE_FREE_SUBSTRING8(substring)
439
440 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441 if (use_pcre16) \
442 PCRE_FREE_SUBSTRING_LIST16(listptr); \
443 else \
444 PCRE_FREE_SUBSTRING_LIST8(listptr)
445
446 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr) \
448 if (use_pcre16) \
449 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450 getnamesptr, subsptr); \
451 else \
452 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr)
454
455 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456 if (use_pcre16) \
457 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458 else \
459 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460
461 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462 if (use_pcre16) \
463 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464 else \
465 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466
467 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468 if (use_pcre16) \
469 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470 else \
471 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472
473 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475
476 #define PCRE_MAKETABLES \
477 (use_pcre16? pcre16_maketables() : pcre_maketables())
478
479 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
480 if (use_pcre16) \
481 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
482 else \
483 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
484
485 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486 if (use_pcre16) \
487 PCRE_PRINTINT16(re, outfile, debug_lengths); \
488 else \
489 PCRE_PRINTINT8(re, outfile, debug_lengths)
490
491 #define PCRE_STUDY(extra, re, options, error) \
492 if (use_pcre16) \
493 PCRE_STUDY16(extra, re, options, error); \
494 else \
495 PCRE_STUDY8(extra, re, options, error)
496
497 /* ----- Only 8-bit mode is supported ----- */
498
499 #elif defined SUPPORT_PCRE8
500 #define CHAR_SIZE 1
501 #define PCHARS PCHARS8
502 #define PCHARSV PCHARSV8
503 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
504 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
505 #define STRLEN STRLEN8
506 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
507 #define PCRE_COMPILE PCRE_COMPILE8
508 #define PCRE_CONFIG pcre_config
509 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
511 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
512 #define PCRE_EXEC PCRE_EXEC8
513 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
514 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
515 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
516 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
517 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
518 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
519 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
520 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
521 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
522 #define PCRE_MAKETABLES pcre_maketables()
523 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524 #define PCRE_PRINTINT PCRE_PRINTINT8
525 #define PCRE_STUDY PCRE_STUDY8
526
527 /* ----- Only 16-bit mode is supported ----- */
528
529 #else
530 #define CHAR_SIZE 2
531 #define PCHARS PCHARS16
532 #define PCHARSV PCHARSV16
533 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
534 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
535 #define STRLEN STRLEN16
536 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
537 #define PCRE_COMPILE PCRE_COMPILE16
538 #define PCRE_CONFIG pcre16_config
539 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
541 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
542 #define PCRE_EXEC PCRE_EXEC16
543 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
544 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
545 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
546 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
547 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
548 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
549 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
550 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
551 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
552 #define PCRE_MAKETABLES pcre16_maketables()
553 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554 #define PCRE_PRINTINT PCRE_PRINTINT16
555 #define PCRE_STUDY PCRE_STUDY16
556 #endif
557
558 /* ----- End of mode-specific function call macros ----- */
559
560
561 /* Other parameters */
562
563 #ifndef CLOCKS_PER_SEC
564 #ifdef CLK_TCK
565 #define CLOCKS_PER_SEC CLK_TCK
566 #else
567 #define CLOCKS_PER_SEC 100
568 #endif
569 #endif
570
571 /* This is the default loop count for timing. */
572
573 #define LOOPREPEAT 500000
574
575 /* Static variables */
576
577 static FILE *outfile;
578 static int log_store = 0;
579 static int callout_count;
580 static int callout_extra;
581 static int callout_fail_count;
582 static int callout_fail_id;
583 static int debug_lengths;
584 static int first_callout;
585 static int locale_set = 0;
586 static int show_malloc;
587 static int use_utf;
588 static size_t gotten_store;
589 static size_t first_gotten_store = 0;
590 static const unsigned char *last_callout_mark = NULL;
591
592 /* The buffers grow automatically if very long input lines are encountered. */
593
594 static int buffer_size = 50000;
595 static pcre_uint8 *buffer = NULL;
596 static pcre_uint8 *dbuffer = NULL;
597 static pcre_uint8 *pbuffer = NULL;
598
599 /* Another buffer is needed translation to 16-bit character strings. It will
600 obtained and extended as required. */
601
602 #ifdef SUPPORT_PCRE16
603 static int buffer16_size = 0;
604 static pcre_uint16 *buffer16 = NULL;
605
606 #ifdef SUPPORT_PCRE8
607
608 /* We need the table of operator lengths that is used for 16-bit compiling, in
609 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611 appropriately for the 16-bit world. Just as a safety check, make sure that
612 COMPILE_PCRE16 is *not* set. */
613
614 #ifdef COMPILE_PCRE16
615 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616 #endif
617
618 #if LINK_SIZE == 2
619 #undef LINK_SIZE
620 #define LINK_SIZE 1
621 #elif LINK_SIZE == 3 || LINK_SIZE == 4
622 #undef LINK_SIZE
623 #define LINK_SIZE 2
624 #else
625 #error LINK_SIZE must be either 2, 3, or 4
626 #endif
627
628 #endif /* SUPPORT_PCRE8 */
629
630 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
631 #endif /* SUPPORT_PCRE16 */
632
633 /* If we have 8-bit support, default use_pcre16 to false; if there is also
634 16-bit support, it can be changed by an option. If there is no 8-bit support,
635 there must be 16-bit support, so default it to 1. */
636
637 #ifdef SUPPORT_PCRE8
638 static int use_pcre16 = 0;
639 #else
640 static int use_pcre16 = 1;
641 #endif
642
643 /* Textual explanations for runtime error codes */
644
645 static const char *errtexts[] = {
646 NULL, /* 0 is no error */
647 NULL, /* NOMATCH is handled specially */
648 "NULL argument passed",
649 "bad option value",
650 "magic number missing",
651 "unknown opcode - pattern overwritten?",
652 "no more memory",
653 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
654 "match limit exceeded",
655 "callout error code",
656 NULL, /* BADUTF8/16 is handled specially */
657 NULL, /* BADUTF8/16 offset is handled specially */
658 NULL, /* PARTIAL is handled specially */
659 "not used - internal error",
660 "internal error - pattern overwritten?",
661 "bad count value",
662 "item unsupported for DFA matching",
663 "backreference condition or recursion test not supported for DFA matching",
664 "match limit not supported for DFA matching",
665 "workspace size exceeded in DFA matching",
666 "too much recursion for DFA matching",
667 "recursion limit exceeded",
668 "not used - internal error",
669 "invalid combination of newline options",
670 "bad offset value",
671 NULL, /* SHORTUTF8/16 is handled specially */
672 "nested recursion at the same subject position",
673 "JIT stack limit reached",
674 "pattern compiled in wrong mode: 8-bit/16-bit error"
675 };
676
677
678 /*************************************************
679 * Alternate character tables *
680 *************************************************/
681
682 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
683 using the default tables of the library. However, the T option can be used to
684 select alternate sets of tables, for different kinds of testing. Note also that
685 the L (locale) option also adjusts the tables. */
686
687 /* This is the set of tables distributed as default with PCRE. It recognizes
688 only ASCII characters. */
689
690 static const pcre_uint8 tables0[] = {
691
692 /* This table is a lower casing table. */
693
694 0, 1, 2, 3, 4, 5, 6, 7,
695 8, 9, 10, 11, 12, 13, 14, 15,
696 16, 17, 18, 19, 20, 21, 22, 23,
697 24, 25, 26, 27, 28, 29, 30, 31,
698 32, 33, 34, 35, 36, 37, 38, 39,
699 40, 41, 42, 43, 44, 45, 46, 47,
700 48, 49, 50, 51, 52, 53, 54, 55,
701 56, 57, 58, 59, 60, 61, 62, 63,
702 64, 97, 98, 99,100,101,102,103,
703 104,105,106,107,108,109,110,111,
704 112,113,114,115,116,117,118,119,
705 120,121,122, 91, 92, 93, 94, 95,
706 96, 97, 98, 99,100,101,102,103,
707 104,105,106,107,108,109,110,111,
708 112,113,114,115,116,117,118,119,
709 120,121,122,123,124,125,126,127,
710 128,129,130,131,132,133,134,135,
711 136,137,138,139,140,141,142,143,
712 144,145,146,147,148,149,150,151,
713 152,153,154,155,156,157,158,159,
714 160,161,162,163,164,165,166,167,
715 168,169,170,171,172,173,174,175,
716 176,177,178,179,180,181,182,183,
717 184,185,186,187,188,189,190,191,
718 192,193,194,195,196,197,198,199,
719 200,201,202,203,204,205,206,207,
720 208,209,210,211,212,213,214,215,
721 216,217,218,219,220,221,222,223,
722 224,225,226,227,228,229,230,231,
723 232,233,234,235,236,237,238,239,
724 240,241,242,243,244,245,246,247,
725 248,249,250,251,252,253,254,255,
726
727 /* This table is a case flipping table. */
728
729 0, 1, 2, 3, 4, 5, 6, 7,
730 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23,
732 24, 25, 26, 27, 28, 29, 30, 31,
733 32, 33, 34, 35, 36, 37, 38, 39,
734 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55,
736 56, 57, 58, 59, 60, 61, 62, 63,
737 64, 97, 98, 99,100,101,102,103,
738 104,105,106,107,108,109,110,111,
739 112,113,114,115,116,117,118,119,
740 120,121,122, 91, 92, 93, 94, 95,
741 96, 65, 66, 67, 68, 69, 70, 71,
742 72, 73, 74, 75, 76, 77, 78, 79,
743 80, 81, 82, 83, 84, 85, 86, 87,
744 88, 89, 90,123,124,125,126,127,
745 128,129,130,131,132,133,134,135,
746 136,137,138,139,140,141,142,143,
747 144,145,146,147,148,149,150,151,
748 152,153,154,155,156,157,158,159,
749 160,161,162,163,164,165,166,167,
750 168,169,170,171,172,173,174,175,
751 176,177,178,179,180,181,182,183,
752 184,185,186,187,188,189,190,191,
753 192,193,194,195,196,197,198,199,
754 200,201,202,203,204,205,206,207,
755 208,209,210,211,212,213,214,215,
756 216,217,218,219,220,221,222,223,
757 224,225,226,227,228,229,230,231,
758 232,233,234,235,236,237,238,239,
759 240,241,242,243,244,245,246,247,
760 248,249,250,251,252,253,254,255,
761
762 /* This table contains bit maps for various character classes. Each map is 32
763 bytes long and the bits run from the least significant end of each byte. The
764 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
765 graph, print, punct, and cntrl. Other classes are built from combinations. */
766
767 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
768 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
769 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
770 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
771
772 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
773 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
774 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
775 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
776
777 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
780 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
781
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
784 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
785 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786
787 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
789 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
790 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791
792 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
793 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
794 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
795 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
796
797 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
798 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
799 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
800 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
801
802 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
803 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806
807 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
808 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811
812 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816
817 /* This table identifies various classes of character by individual bits:
818 0x01 white space character
819 0x02 letter
820 0x04 decimal digit
821 0x08 hexadecimal digit
822 0x10 alphanumeric or '_'
823 0x80 regular expression metacharacter or binary zero
824 */
825
826 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
827 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
828 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
830 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
831 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
832 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
833 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
834 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
835 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
836 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
837 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
838 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
839 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
840 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
841 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
842 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
857 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
858
859 /* This is a set of tables that came orginally from a Windows user. It seems to
860 be at least an approximation of ISO 8859. In particular, there are characters
861 greater than 128 that are marked as spaces, letters, etc. */
862
863 static const pcre_uint8 tables1[] = {
864 0,1,2,3,4,5,6,7,
865 8,9,10,11,12,13,14,15,
866 16,17,18,19,20,21,22,23,
867 24,25,26,27,28,29,30,31,
868 32,33,34,35,36,37,38,39,
869 40,41,42,43,44,45,46,47,
870 48,49,50,51,52,53,54,55,
871 56,57,58,59,60,61,62,63,
872 64,97,98,99,100,101,102,103,
873 104,105,106,107,108,109,110,111,
874 112,113,114,115,116,117,118,119,
875 120,121,122,91,92,93,94,95,
876 96,97,98,99,100,101,102,103,
877 104,105,106,107,108,109,110,111,
878 112,113,114,115,116,117,118,119,
879 120,121,122,123,124,125,126,127,
880 128,129,130,131,132,133,134,135,
881 136,137,138,139,140,141,142,143,
882 144,145,146,147,148,149,150,151,
883 152,153,154,155,156,157,158,159,
884 160,161,162,163,164,165,166,167,
885 168,169,170,171,172,173,174,175,
886 176,177,178,179,180,181,182,183,
887 184,185,186,187,188,189,190,191,
888 224,225,226,227,228,229,230,231,
889 232,233,234,235,236,237,238,239,
890 240,241,242,243,244,245,246,215,
891 248,249,250,251,252,253,254,223,
892 224,225,226,227,228,229,230,231,
893 232,233,234,235,236,237,238,239,
894 240,241,242,243,244,245,246,247,
895 248,249,250,251,252,253,254,255,
896 0,1,2,3,4,5,6,7,
897 8,9,10,11,12,13,14,15,
898 16,17,18,19,20,21,22,23,
899 24,25,26,27,28,29,30,31,
900 32,33,34,35,36,37,38,39,
901 40,41,42,43,44,45,46,47,
902 48,49,50,51,52,53,54,55,
903 56,57,58,59,60,61,62,63,
904 64,97,98,99,100,101,102,103,
905 104,105,106,107,108,109,110,111,
906 112,113,114,115,116,117,118,119,
907 120,121,122,91,92,93,94,95,
908 96,65,66,67,68,69,70,71,
909 72,73,74,75,76,77,78,79,
910 80,81,82,83,84,85,86,87,
911 88,89,90,123,124,125,126,127,
912 128,129,130,131,132,133,134,135,
913 136,137,138,139,140,141,142,143,
914 144,145,146,147,148,149,150,151,
915 152,153,154,155,156,157,158,159,
916 160,161,162,163,164,165,166,167,
917 168,169,170,171,172,173,174,175,
918 176,177,178,179,180,181,182,183,
919 184,185,186,187,188,189,190,191,
920 224,225,226,227,228,229,230,231,
921 232,233,234,235,236,237,238,239,
922 240,241,242,243,244,245,246,215,
923 248,249,250,251,252,253,254,223,
924 192,193,194,195,196,197,198,199,
925 200,201,202,203,204,205,206,207,
926 208,209,210,211,212,213,214,247,
927 216,217,218,219,220,221,222,255,
928 0,62,0,0,1,0,0,0,
929 0,0,0,0,0,0,0,0,
930 32,0,0,0,1,0,0,0,
931 0,0,0,0,0,0,0,0,
932 0,0,0,0,0,0,255,3,
933 126,0,0,0,126,0,0,0,
934 0,0,0,0,0,0,0,0,
935 0,0,0,0,0,0,0,0,
936 0,0,0,0,0,0,255,3,
937 0,0,0,0,0,0,0,0,
938 0,0,0,0,0,0,12,2,
939 0,0,0,0,0,0,0,0,
940 0,0,0,0,0,0,0,0,
941 254,255,255,7,0,0,0,0,
942 0,0,0,0,0,0,0,0,
943 255,255,127,127,0,0,0,0,
944 0,0,0,0,0,0,0,0,
945 0,0,0,0,254,255,255,7,
946 0,0,0,0,0,4,32,4,
947 0,0,0,128,255,255,127,255,
948 0,0,0,0,0,0,255,3,
949 254,255,255,135,254,255,255,7,
950 0,0,0,0,0,4,44,6,
951 255,255,127,255,255,255,127,255,
952 0,0,0,0,254,255,255,255,
953 255,255,255,255,255,255,255,127,
954 0,0,0,0,254,255,255,255,
955 255,255,255,255,255,255,255,255,
956 0,2,0,0,255,255,255,255,
957 255,255,255,255,255,255,255,127,
958 0,0,0,0,255,255,255,255,
959 255,255,255,255,255,255,255,255,
960 0,0,0,0,254,255,0,252,
961 1,0,0,248,1,0,0,120,
962 0,0,0,0,254,255,255,255,
963 0,0,128,0,0,0,128,0,
964 255,255,255,255,0,0,0,0,
965 0,0,0,0,0,0,0,128,
966 255,255,255,255,0,0,0,0,
967 0,0,0,0,0,0,0,0,
968 128,0,0,0,0,0,0,0,
969 0,1,1,0,1,1,0,0,
970 0,0,0,0,0,0,0,0,
971 0,0,0,0,0,0,0,0,
972 1,0,0,0,128,0,0,0,
973 128,128,128,128,0,0,128,0,
974 28,28,28,28,28,28,28,28,
975 28,28,0,0,0,0,0,128,
976 0,26,26,26,26,26,26,18,
977 18,18,18,18,18,18,18,18,
978 18,18,18,18,18,18,18,18,
979 18,18,18,128,128,0,128,16,
980 0,26,26,26,26,26,26,18,
981 18,18,18,18,18,18,18,18,
982 18,18,18,18,18,18,18,18,
983 18,18,18,128,128,0,0,0,
984 0,0,0,0,0,1,0,0,
985 0,0,0,0,0,0,0,0,
986 0,0,0,0,0,0,0,0,
987 0,0,0,0,0,0,0,0,
988 1,0,0,0,0,0,0,0,
989 0,0,18,0,0,0,0,0,
990 0,0,20,20,0,18,0,0,
991 0,20,18,0,0,0,0,0,
992 18,18,18,18,18,18,18,18,
993 18,18,18,18,18,18,18,18,
994 18,18,18,18,18,18,18,0,
995 18,18,18,18,18,18,18,18,
996 18,18,18,18,18,18,18,18,
997 18,18,18,18,18,18,18,18,
998 18,18,18,18,18,18,18,0,
999 18,18,18,18,18,18,18,18
1000 };
1001
1002
1003
1004
1005 #ifndef HAVE_STRERROR
1006 /*************************************************
1007 * Provide strerror() for non-ANSI libraries *
1008 *************************************************/
1009
1010 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1011 in their libraries, but can provide the same facility by this simple
1012 alternative function. */
1013
1014 extern int sys_nerr;
1015 extern char *sys_errlist[];
1016
1017 char *
1018 strerror(int n)
1019 {
1020 if (n < 0 || n >= sys_nerr) return "unknown error number";
1021 return sys_errlist[n];
1022 }
1023 #endif /* HAVE_STRERROR */
1024
1025
1026 /*************************************************
1027 * JIT memory callback *
1028 *************************************************/
1029
1030 static pcre_jit_stack* jit_callback(void *arg)
1031 {
1032 return (pcre_jit_stack *)arg;
1033 }
1034
1035
1036 #if !defined NOUTF || defined SUPPORT_PCRE16
1037 /*************************************************
1038 * Convert UTF-8 string to value *
1039 *************************************************/
1040
1041 /* This function takes one or more bytes that represents a UTF-8 character,
1042 and returns the value of the character.
1043
1044 Argument:
1045 utf8bytes a pointer to the byte vector
1046 vptr a pointer to an int to receive the value
1047
1048 Returns: > 0 => the number of bytes consumed
1049 -6 to 0 => malformed UTF-8 character at offset = (-return)
1050 */
1051
1052 static int
1053 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1054 {
1055 int c = *utf8bytes++;
1056 int d = c;
1057 int i, j, s;
1058
1059 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1060 {
1061 if ((d & 0x80) == 0) break;
1062 d <<= 1;
1063 }
1064
1065 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1066 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1067
1068 /* i now has a value in the range 1-5 */
1069
1070 s = 6*i;
1071 d = (c & utf8_table3[i]) << s;
1072
1073 for (j = 0; j < i; j++)
1074 {
1075 c = *utf8bytes++;
1076 if ((c & 0xc0) != 0x80) return -(j+1);
1077 s -= 6;
1078 d |= (c & 0x3f) << s;
1079 }
1080
1081 /* Check that encoding was the correct unique one */
1082
1083 for (j = 0; j < utf8_table1_size; j++)
1084 if (d <= utf8_table1[j]) break;
1085 if (j != i) return -(i+1);
1086
1087 /* Valid value */
1088
1089 *vptr = d;
1090 return i+1;
1091 }
1092 #endif /* NOUTF || SUPPORT_PCRE16 */
1093
1094
1095
1096 #if !defined NOUTF || defined SUPPORT_PCRE16
1097 /*************************************************
1098 * Convert character value to UTF-8 *
1099 *************************************************/
1100
1101 /* This function takes an integer value in the range 0 - 0x7fffffff
1102 and encodes it as a UTF-8 character in 0 to 6 bytes.
1103
1104 Arguments:
1105 cvalue the character value
1106 utf8bytes pointer to buffer for result - at least 6 bytes long
1107
1108 Returns: number of characters placed in the buffer
1109 */
1110
1111 static int
1112 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1113 {
1114 register int i, j;
1115 for (i = 0; i < utf8_table1_size; i++)
1116 if (cvalue <= utf8_table1[i]) break;
1117 utf8bytes += i;
1118 for (j = i; j > 0; j--)
1119 {
1120 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1121 cvalue >>= 6;
1122 }
1123 *utf8bytes = utf8_table2[i] | cvalue;
1124 return i + 1;
1125 }
1126 #endif /* NOUTF || SUPPORT_PCRE16 */
1127
1128
1129
1130 #ifdef SUPPORT_PCRE16
1131 /*************************************************
1132 * Convert a string to 16-bit *
1133 *************************************************/
1134
1135 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1136 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1137 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1138 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1139 result is always left in buffer16.
1140
1141 Note that this function does not object to surrogate values. This is
1142 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1143 for the purpose of testing that they are correctly faulted.
1144
1145 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1146 in UTF-8 so that values greater than 255 can be handled.
1147
1148 Arguments:
1149 data TRUE if converting a data line; FALSE for a regex
1150 p points to a byte string
1151 utf true if UTF-8 (to be converted to UTF-16)
1152 len number of bytes in the string (excluding trailing zero)
1153
1154 Returns: number of 16-bit data items used (excluding trailing zero)
1155 OR -1 if a UTF-8 string is malformed
1156 OR -2 if a value > 0x10ffff is encountered
1157 OR -3 if a value > 0xffff is encountered when not in UTF mode
1158 */
1159
1160 static int
1161 to16(int data, pcre_uint8 *p, int utf, int len)
1162 {
1163 pcre_uint16 *pp;
1164
1165 if (buffer16_size < 2*len + 2)
1166 {
1167 if (buffer16 != NULL) free(buffer16);
1168 buffer16_size = 2*len + 2;
1169 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1170 if (buffer16 == NULL)
1171 {
1172 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1173 exit(1);
1174 }
1175 }
1176
1177 pp = buffer16;
1178
1179 if (!utf && !data)
1180 {
1181 while (len-- > 0) *pp++ = *p++;
1182 }
1183
1184 else
1185 {
1186 int c = 0;
1187 while (len > 0)
1188 {
1189 int chlen = utf82ord(p, &c);
1190 if (chlen <= 0) return -1;
1191 if (c > 0x10ffff) return -2;
1192 p += chlen;
1193 len -= chlen;
1194 if (c < 0x10000) *pp++ = c; else
1195 {
1196 if (!utf) return -3;
1197 c -= 0x10000;
1198 *pp++ = 0xD800 | (c >> 10);
1199 *pp++ = 0xDC00 | (c & 0x3ff);
1200 }
1201 }
1202 }
1203
1204 *pp = 0;
1205 return pp - buffer16;
1206 }
1207 #endif
1208
1209
1210 /*************************************************
1211 * Read or extend an input line *
1212 *************************************************/
1213
1214 /* Input lines are read into buffer, but both patterns and data lines can be
1215 continued over multiple input lines. In addition, if the buffer fills up, we
1216 want to automatically expand it so as to be able to handle extremely large
1217 lines that are needed for certain stress tests. When the input buffer is
1218 expanded, the other two buffers must also be expanded likewise, and the
1219 contents of pbuffer, which are a copy of the input for callouts, must be
1220 preserved (for when expansion happens for a data line). This is not the most
1221 optimal way of handling this, but hey, this is just a test program!
1222
1223 Arguments:
1224 f the file to read
1225 start where in buffer to start (this *must* be within buffer)
1226 prompt for stdin or readline()
1227
1228 Returns: pointer to the start of new data
1229 could be a copy of start, or could be moved
1230 NULL if no data read and EOF reached
1231 */
1232
1233 static pcre_uint8 *
1234 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1235 {
1236 pcre_uint8 *here = start;
1237
1238 for (;;)
1239 {
1240 int rlen = (int)(buffer_size - (here - buffer));
1241
1242 if (rlen > 1000)
1243 {
1244 int dlen;
1245
1246 /* If libreadline support is required, use readline() to read a line if the
1247 input is a terminal. Note that readline() removes the trailing newline, so
1248 we must put it back again, to be compatible with fgets(). */
1249
1250 #ifdef SUPPORT_LIBREADLINE
1251 if (isatty(fileno(f)))
1252 {
1253 size_t len;
1254 char *s = readline(prompt);
1255 if (s == NULL) return (here == start)? NULL : start;
1256 len = strlen(s);
1257 if (len > 0) add_history(s);
1258 if (len > rlen - 1) len = rlen - 1;
1259 memcpy(here, s, len);
1260 here[len] = '\n';
1261 here[len+1] = 0;
1262 free(s);
1263 }
1264 else
1265 #endif
1266
1267 /* Read the next line by normal means, prompting if the file is stdin. */
1268
1269 {
1270 if (f == stdin) printf("%s", prompt);
1271 if (fgets((char *)here, rlen, f) == NULL)
1272 return (here == start)? NULL : start;
1273 }
1274
1275 dlen = (int)strlen((char *)here);
1276 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1277 here += dlen;
1278 }
1279
1280 else
1281 {
1282 int new_buffer_size = 2*buffer_size;
1283 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1284 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1285 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1286
1287 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1288 {
1289 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1290 exit(1);
1291 }
1292
1293 memcpy(new_buffer, buffer, buffer_size);
1294 memcpy(new_pbuffer, pbuffer, buffer_size);
1295
1296 buffer_size = new_buffer_size;
1297
1298 start = new_buffer + (start - buffer);
1299 here = new_buffer + (here - buffer);
1300
1301 free(buffer);
1302 free(dbuffer);
1303 free(pbuffer);
1304
1305 buffer = new_buffer;
1306 dbuffer = new_dbuffer;
1307 pbuffer = new_pbuffer;
1308 }
1309 }
1310
1311 return NULL; /* Control never gets here */
1312 }
1313
1314
1315
1316 /*************************************************
1317 * Read number from string *
1318 *************************************************/
1319
1320 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1321 around with conditional compilation, just do the job by hand. It is only used
1322 for unpicking arguments, so just keep it simple.
1323
1324 Arguments:
1325 str string to be converted
1326 endptr where to put the end pointer
1327
1328 Returns: the unsigned long
1329 */
1330
1331 static int
1332 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1333 {
1334 int result = 0;
1335 while(*str != 0 && isspace(*str)) str++;
1336 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1337 *endptr = str;
1338 return(result);
1339 }
1340
1341
1342
1343 /*************************************************
1344 * Print one character *
1345 *************************************************/
1346
1347 /* Print a single character either literally, or as a hex escape. */
1348
1349 static int pchar(int c, FILE *f)
1350 {
1351 if (PRINTOK(c))
1352 {
1353 if (f != NULL) fprintf(f, "%c", c);
1354 return 1;
1355 }
1356
1357 if (c < 0x100)
1358 {
1359 if (use_utf)
1360 {
1361 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1362 return 6;
1363 }
1364 else
1365 {
1366 if (f != NULL) fprintf(f, "\\x%02x", c);
1367 return 4;
1368 }
1369 }
1370
1371 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1372 return (c <= 0x000000ff)? 6 :
1373 (c <= 0x00000fff)? 7 :
1374 (c <= 0x0000ffff)? 8 :
1375 (c <= 0x000fffff)? 9 : 10;
1376 }
1377
1378
1379
1380 #ifdef SUPPORT_PCRE8
1381 /*************************************************
1382 * Print 8-bit character string *
1383 *************************************************/
1384
1385 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1386 If handed a NULL file, just counts chars without printing. */
1387
1388 static int pchars(pcre_uint8 *p, int length, FILE *f)
1389 {
1390 int c = 0;
1391 int yield = 0;
1392
1393 if (length < 0)
1394 length = strlen((char *)p);
1395
1396 while (length-- > 0)
1397 {
1398 #if !defined NOUTF
1399 if (use_utf)
1400 {
1401 int rc = utf82ord(p, &c);
1402 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1403 {
1404 length -= rc - 1;
1405 p += rc;
1406 yield += pchar(c, f);
1407 continue;
1408 }
1409 }
1410 #endif
1411 c = *p++;
1412 yield += pchar(c, f);
1413 }
1414
1415 return yield;
1416 }
1417 #endif
1418
1419
1420
1421 #ifdef SUPPORT_PCRE16
1422 /*************************************************
1423 * Find length of 0-terminated 16-bit string *
1424 *************************************************/
1425
1426 static int strlen16(PCRE_SPTR16 p)
1427 {
1428 int len = 0;
1429 while (*p++ != 0) len++;
1430 return len;
1431 }
1432 #endif /* SUPPORT_PCRE16 */
1433
1434
1435 #ifdef SUPPORT_PCRE16
1436 /*************************************************
1437 * Print 16-bit character string *
1438 *************************************************/
1439
1440 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1441 If handed a NULL file, just counts chars without printing. */
1442
1443 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1444 {
1445 int yield = 0;
1446
1447 if (length < 0)
1448 length = strlen16(p);
1449
1450 while (length-- > 0)
1451 {
1452 int c = *p++ & 0xffff;
1453 #if !defined NOUTF
1454 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1455 {
1456 int d = *p & 0xffff;
1457 if (d >= 0xDC00 && d < 0xDFFF)
1458 {
1459 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1460 length--;
1461 p++;
1462 }
1463 }
1464 #endif
1465 yield += pchar(c, f);
1466 }
1467
1468 return yield;
1469 }
1470 #endif /* SUPPORT_PCRE16 */
1471
1472
1473
1474 #ifdef SUPPORT_PCRE8
1475 /*************************************************
1476 * Read a capture name (8-bit) and check it *
1477 *************************************************/
1478
1479 static pcre_uint8 *
1480 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1481 {
1482 pcre_uint8 *npp = *pp;
1483 while (isalnum(*p)) *npp++ = *p++;
1484 *npp++ = 0;
1485 *npp = 0;
1486 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1487 {
1488 fprintf(outfile, "no parentheses with name \"");
1489 PCHARSV(*pp, 0, -1, outfile);
1490 fprintf(outfile, "\"\n");
1491 }
1492
1493 *pp = npp;
1494 return p;
1495 }
1496 #endif /* SUPPORT_PCRE8 */
1497
1498
1499
1500 #ifdef SUPPORT_PCRE16
1501 /*************************************************
1502 * Read a capture name (16-bit) and check it *
1503 *************************************************/
1504
1505 /* Note that the text being read is 8-bit. */
1506
1507 static pcre_uint8 *
1508 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1509 {
1510 pcre_uint16 *npp = *pp;
1511 while (isalnum(*p)) *npp++ = *p++;
1512 *npp++ = 0;
1513 *npp = 0;
1514 if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1515 {
1516 fprintf(outfile, "no parentheses with name \"");
1517 PCHARSV(*pp, 0, -1, outfile);
1518 fprintf(outfile, "\"\n");
1519 }
1520 *pp = npp;
1521 return p;
1522 }
1523 #endif /* SUPPORT_PCRE16 */
1524
1525
1526
1527 /*************************************************
1528 * Callout function *
1529 *************************************************/
1530
1531 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1532 the match. Yield zero unless more callouts than the fail count, or the callout
1533 data is not zero. */
1534
1535 static int callout(pcre_callout_block *cb)
1536 {
1537 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1538 int i, pre_start, post_start, subject_length;
1539
1540 if (callout_extra)
1541 {
1542 fprintf(f, "Callout %d: last capture = %d\n",
1543 cb->callout_number, cb->capture_last);
1544
1545 for (i = 0; i < cb->capture_top * 2; i += 2)
1546 {
1547 if (cb->offset_vector[i] < 0)
1548 fprintf(f, "%2d: <unset>\n", i/2);
1549 else
1550 {
1551 fprintf(f, "%2d: ", i/2);
1552 PCHARSV(cb->subject, cb->offset_vector[i],
1553 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1554 fprintf(f, "\n");
1555 }
1556 }
1557 }
1558
1559 /* Re-print the subject in canonical form, the first time or if giving full
1560 datails. On subsequent calls in the same match, we use pchars just to find the
1561 printed lengths of the substrings. */
1562
1563 if (f != NULL) fprintf(f, "--->");
1564
1565 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1566 PCHARS(post_start, cb->subject, cb->start_match,
1567 cb->current_position - cb->start_match, f);
1568
1569 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1570
1571 PCHARSV(cb->subject, cb->current_position,
1572 cb->subject_length - cb->current_position, f);
1573
1574 if (f != NULL) fprintf(f, "\n");
1575
1576 /* Always print appropriate indicators, with callout number if not already
1577 shown. For automatic callouts, show the pattern offset. */
1578
1579 if (cb->callout_number == 255)
1580 {
1581 fprintf(outfile, "%+3d ", cb->pattern_position);
1582 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1583 }
1584 else
1585 {
1586 if (callout_extra) fprintf(outfile, " ");
1587 else fprintf(outfile, "%3d ", cb->callout_number);
1588 }
1589
1590 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1591 fprintf(outfile, "^");
1592
1593 if (post_start > 0)
1594 {
1595 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1596 fprintf(outfile, "^");
1597 }
1598
1599 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1600 fprintf(outfile, " ");
1601
1602 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1603 pbuffer + cb->pattern_position);
1604
1605 fprintf(outfile, "\n");
1606 first_callout = 0;
1607
1608 if (cb->mark != last_callout_mark)
1609 {
1610 if (cb->mark == NULL)
1611 fprintf(outfile, "Latest Mark: <unset>\n");
1612 else
1613 {
1614 fprintf(outfile, "Latest Mark: ");
1615 PCHARSV(cb->mark, 0, -1, outfile);
1616 putc('\n', outfile);
1617 }
1618 last_callout_mark = cb->mark;
1619 }
1620
1621 if (cb->callout_data != NULL)
1622 {
1623 int callout_data = *((int *)(cb->callout_data));
1624 if (callout_data != 0)
1625 {
1626 fprintf(outfile, "Callout data = %d\n", callout_data);
1627 return callout_data;
1628 }
1629 }
1630
1631 return (cb->callout_number != callout_fail_id)? 0 :
1632 (++callout_count >= callout_fail_count)? 1 : 0;
1633 }
1634
1635
1636 /*************************************************
1637 * Local malloc functions *
1638 *************************************************/
1639
1640 /* Alternative malloc function, to test functionality and save the size of a
1641 compiled re, which is the first store request that pcre_compile() makes. The
1642 show_malloc variable is set only during matching. */
1643
1644 static void *new_malloc(size_t size)
1645 {
1646 void *block = malloc(size);
1647 gotten_store = size;
1648 if (first_gotten_store == 0) first_gotten_store = size;
1649 if (show_malloc)
1650 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1651 return block;
1652 }
1653
1654 static void new_free(void *block)
1655 {
1656 if (show_malloc)
1657 fprintf(outfile, "free %p\n", block);
1658 free(block);
1659 }
1660
1661 /* For recursion malloc/free, to test stacking calls */
1662
1663 static void *stack_malloc(size_t size)
1664 {
1665 void *block = malloc(size);
1666 if (show_malloc)
1667 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1668 return block;
1669 }
1670
1671 static void stack_free(void *block)
1672 {
1673 if (show_malloc)
1674 fprintf(outfile, "stack_free %p\n", block);
1675 free(block);
1676 }
1677
1678
1679 /*************************************************
1680 * Call pcre_fullinfo() *
1681 *************************************************/
1682
1683 /* Get one piece of information from the pcre_fullinfo() function. When only
1684 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1685 value, but the code is defensive.
1686
1687 Arguments:
1688 re compiled regex
1689 study study data
1690 option PCRE_INFO_xxx option
1691 ptr where to put the data
1692
1693 Returns: 0 when OK, < 0 on error
1694 */
1695
1696 static int
1697 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1698 {
1699 int rc;
1700
1701 if (use_pcre16)
1702 #ifdef SUPPORT_PCRE16
1703 rc = pcre16_fullinfo(re, study, option, ptr);
1704 #else
1705 rc = PCRE_ERROR_BADMODE;
1706 #endif
1707 else
1708 #ifdef SUPPORT_PCRE8
1709 rc = pcre_fullinfo(re, study, option, ptr);
1710 #else
1711 rc = PCRE_ERROR_BADMODE;
1712 #endif
1713
1714 if (rc < 0)
1715 {
1716 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1717 use_pcre16? "16" : "", option);
1718 if (rc == PCRE_ERROR_BADMODE)
1719 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1720 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1721 }
1722
1723 return rc;
1724 }
1725
1726
1727
1728 /*************************************************
1729 * Swap byte functions *
1730 *************************************************/
1731
1732 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1733 value, respectively.
1734
1735 Arguments:
1736 value any number
1737
1738 Returns: the byte swapped value
1739 */
1740
1741 static pcre_uint32
1742 swap_uint32(pcre_uint32 value)
1743 {
1744 return ((value & 0x000000ff) << 24) |
1745 ((value & 0x0000ff00) << 8) |
1746 ((value & 0x00ff0000) >> 8) |
1747 (value >> 24);
1748 }
1749
1750 static pcre_uint16
1751 swap_uint16(pcre_uint16 value)
1752 {
1753 return (value >> 8) | (value << 8);
1754 }
1755
1756
1757
1758 /*************************************************
1759 * Flip bytes in a compiled pattern *
1760 *************************************************/
1761
1762 /* This function is called if the 'F' option was present on a pattern that is
1763 to be written to a file. We flip the bytes of all the integer fields in the
1764 regex data block and the study block. In 16-bit mode this also flips relevant
1765 bytes in the pattern itself. This is to make it possible to test PCRE's
1766 ability to reload byte-flipped patterns, e.g. those compiled on a different
1767 architecture. */
1768
1769 static void
1770 regexflip(pcre *ere, pcre_extra *extra)
1771 {
1772 real_pcre *re = (real_pcre *)ere;
1773 #ifdef SUPPORT_PCRE16
1774 int op;
1775 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1776 int length = re->name_count * re->name_entry_size;
1777 #ifdef SUPPORT_UTF
1778 BOOL utf = (re->options & PCRE_UTF16) != 0;
1779 BOOL utf16_char = FALSE;
1780 #endif /* SUPPORT_UTF */
1781 #endif /* SUPPORT_PCRE16 */
1782
1783 /* Always flip the bytes in the main data block and study blocks. */
1784
1785 re->magic_number = REVERSED_MAGIC_NUMBER;
1786 re->size = swap_uint32(re->size);
1787 re->options = swap_uint32(re->options);
1788 re->flags = swap_uint16(re->flags);
1789 re->top_bracket = swap_uint16(re->top_bracket);
1790 re->top_backref = swap_uint16(re->top_backref);
1791 re->first_char = swap_uint16(re->first_char);
1792 re->req_char = swap_uint16(re->req_char);
1793 re->name_table_offset = swap_uint16(re->name_table_offset);
1794 re->name_entry_size = swap_uint16(re->name_entry_size);
1795 re->name_count = swap_uint16(re->name_count);
1796
1797 if (extra != NULL)
1798 {
1799 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1800 rsd->size = swap_uint32(rsd->size);
1801 rsd->flags = swap_uint32(rsd->flags);
1802 rsd->minlength = swap_uint32(rsd->minlength);
1803 }
1804
1805 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1806 in the name table, if present, and then in the pattern itself. */
1807
1808 #ifdef SUPPORT_PCRE16
1809 if (!use_pcre16) return;
1810
1811 while(TRUE)
1812 {
1813 /* Swap previous characters. */
1814 while (length-- > 0)
1815 {
1816 *ptr = swap_uint16(*ptr);
1817 ptr++;
1818 }
1819 #ifdef SUPPORT_UTF
1820 if (utf16_char)
1821 {
1822 if ((ptr[-1] & 0xfc00) == 0xd800)
1823 {
1824 /* We know that there is only one extra character in UTF-16. */
1825 *ptr = swap_uint16(*ptr);
1826 ptr++;
1827 }
1828 }
1829 utf16_char = FALSE;
1830 #endif /* SUPPORT_UTF */
1831
1832 /* Get next opcode. */
1833
1834 length = 0;
1835 op = *ptr;
1836 *ptr++ = swap_uint16(op);
1837
1838 switch (op)
1839 {
1840 case OP_END:
1841 return;
1842
1843 #ifdef SUPPORT_UTF
1844 case OP_CHAR:
1845 case OP_CHARI:
1846 case OP_NOT:
1847 case OP_NOTI:
1848 case OP_STAR:
1849 case OP_MINSTAR:
1850 case OP_PLUS:
1851 case OP_MINPLUS:
1852 case OP_QUERY:
1853 case OP_MINQUERY:
1854 case OP_UPTO:
1855 case OP_MINUPTO:
1856 case OP_EXACT:
1857 case OP_POSSTAR:
1858 case OP_POSPLUS:
1859 case OP_POSQUERY:
1860 case OP_POSUPTO:
1861 case OP_STARI:
1862 case OP_MINSTARI:
1863 case OP_PLUSI:
1864 case OP_MINPLUSI:
1865 case OP_QUERYI:
1866 case OP_MINQUERYI:
1867 case OP_UPTOI:
1868 case OP_MINUPTOI:
1869 case OP_EXACTI:
1870 case OP_POSSTARI:
1871 case OP_POSPLUSI:
1872 case OP_POSQUERYI:
1873 case OP_POSUPTOI:
1874 case OP_NOTSTAR:
1875 case OP_NOTMINSTAR:
1876 case OP_NOTPLUS:
1877 case OP_NOTMINPLUS:
1878 case OP_NOTQUERY:
1879 case OP_NOTMINQUERY:
1880 case OP_NOTUPTO:
1881 case OP_NOTMINUPTO:
1882 case OP_NOTEXACT:
1883 case OP_NOTPOSSTAR:
1884 case OP_NOTPOSPLUS:
1885 case OP_NOTPOSQUERY:
1886 case OP_NOTPOSUPTO:
1887 case OP_NOTSTARI:
1888 case OP_NOTMINSTARI:
1889 case OP_NOTPLUSI:
1890 case OP_NOTMINPLUSI:
1891 case OP_NOTQUERYI:
1892 case OP_NOTMINQUERYI:
1893 case OP_NOTUPTOI:
1894 case OP_NOTMINUPTOI:
1895 case OP_NOTEXACTI:
1896 case OP_NOTPOSSTARI:
1897 case OP_NOTPOSPLUSI:
1898 case OP_NOTPOSQUERYI:
1899 case OP_NOTPOSUPTOI:
1900 if (utf) utf16_char = TRUE;
1901 #endif
1902 /* Fall through. */
1903
1904 default:
1905 length = OP_lengths16[op] - 1;
1906 break;
1907
1908 case OP_CLASS:
1909 case OP_NCLASS:
1910 /* Skip the character bit map. */
1911 ptr += 32/sizeof(pcre_uint16);
1912 length = 0;
1913 break;
1914
1915 case OP_XCLASS:
1916 /* Reverse the size of the XCLASS instance. */
1917 ptr++;
1918 *ptr = swap_uint16(*ptr);
1919 if (LINK_SIZE > 1)
1920 {
1921 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1922 ptr++;
1923 *ptr = swap_uint16(*ptr);
1924 }
1925 ptr++;
1926
1927 if (LINK_SIZE > 1)
1928 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1929 (1 + LINK_SIZE + 1);
1930 else
1931 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1932
1933 op = *ptr;
1934 *ptr = swap_uint16(op);
1935 if ((op & XCL_MAP) != 0)
1936 {
1937 /* Skip the character bit map. */
1938 ptr += 32/sizeof(pcre_uint16);
1939 length -= 32/sizeof(pcre_uint16);
1940 }
1941 break;
1942 }
1943 }
1944 /* Control should never reach here in 16 bit mode. */
1945 #endif /* SUPPORT_PCRE16 */
1946 }
1947
1948
1949
1950 /*************************************************
1951 * Check match or recursion limit *
1952 *************************************************/
1953
1954 static int
1955 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1956 int start_offset, int options, int *use_offsets, int use_size_offsets,
1957 int flag, unsigned long int *limit, int errnumber, const char *msg)
1958 {
1959 int count;
1960 int min = 0;
1961 int mid = 64;
1962 int max = -1;
1963
1964 extra->flags |= flag;
1965
1966 for (;;)
1967 {
1968 *limit = mid;
1969
1970 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1971 use_offsets, use_size_offsets);
1972
1973 if (count == errnumber)
1974 {
1975 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1976 min = mid;
1977 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1978 }
1979
1980 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1981 count == PCRE_ERROR_PARTIAL)
1982 {
1983 if (mid == min + 1)
1984 {
1985 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1986 break;
1987 }
1988 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1989 max = mid;
1990 mid = (min + mid)/2;
1991 }
1992 else break; /* Some other error */
1993 }
1994
1995 extra->flags &= ~flag;
1996 return count;
1997 }
1998
1999
2000
2001 /*************************************************
2002 * Case-independent strncmp() function *
2003 *************************************************/
2004
2005 /*
2006 Arguments:
2007 s first string
2008 t second string
2009 n number of characters to compare
2010
2011 Returns: < 0, = 0, or > 0, according to the comparison
2012 */
2013
2014 static int
2015 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2016 {
2017 while (n--)
2018 {
2019 int c = tolower(*s++) - tolower(*t++);
2020 if (c) return c;
2021 }
2022 return 0;
2023 }
2024
2025
2026
2027 /*************************************************
2028 * Check newline indicator *
2029 *************************************************/
2030
2031 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2032 a message and return 0 if there is no match.
2033
2034 Arguments:
2035 p points after the leading '<'
2036 f file for error message
2037
2038 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2039 */
2040
2041 static int
2042 check_newline(pcre_uint8 *p, FILE *f)
2043 {
2044 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2045 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2046 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2047 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2048 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2049 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2050 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2051 fprintf(f, "Unknown newline type at: <%s\n", p);
2052 return 0;
2053 }
2054
2055
2056
2057 /*************************************************
2058 * Usage function *
2059 *************************************************/
2060
2061 static void
2062 usage(void)
2063 {
2064 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2065 printf("Input and output default to stdin and stdout.\n");
2066 #ifdef SUPPORT_LIBREADLINE
2067 printf("If input is a terminal, readline() is used to read from it.\n");
2068 #else
2069 printf("This version of pcretest is not linked with readline().\n");
2070 #endif
2071 printf("\nOptions:\n");
2072 #ifdef SUPPORT_PCRE16
2073 printf(" -16 use 16-bit interface\n");
2074 #endif
2075 printf(" -b show compiled code (bytecode)\n");
2076 printf(" -C show PCRE compile-time options and exit\n");
2077 printf(" -C arg show a specific compile-time option\n");
2078 printf(" and exit with its value. The arg can be:\n");
2079 printf(" linksize internal link size [2, 3, 4]\n");
2080 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2081 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2082 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2083 printf(" ucp Unicode Properties supported [0, 1]\n");
2084 printf(" jit Just-in-time compiler supported [0, 1]\n");
2085 printf(" -d debug: show compiled code and information (-b and -i)\n");
2086 #if !defined NODFA
2087 printf(" -dfa force DFA matching for all subjects\n");
2088 #endif
2089 printf(" -help show usage information\n");
2090 printf(" -i show information about compiled patterns\n"
2091 " -M find MATCH_LIMIT minimum for each subject\n"
2092 " -m output memory used information\n"
2093 " -o <n> set size of offsets vector to <n>\n");
2094 #if !defined NOPOSIX
2095 printf(" -p use POSIX interface\n");
2096 #endif
2097 printf(" -q quiet: do not output PCRE version number at start\n");
2098 printf(" -S <n> set stack size to <n> megabytes\n");
2099 printf(" -s force each pattern to be studied at basic level\n"
2100 " -s+ force each pattern to be studied, using JIT if available\n"
2101 " -t time compilation and execution\n");
2102 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2103 printf(" -tm time execution (matching) only\n");
2104 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2105 }
2106
2107
2108
2109 /*************************************************
2110 * Main Program *
2111 *************************************************/
2112
2113 /* Read lines from named file or stdin and write to named file or stdout; lines
2114 consist of a regular expression, in delimiters and optionally followed by
2115 options, followed by a set of test data, terminated by an empty line. */
2116
2117 int main(int argc, char **argv)
2118 {
2119 FILE *infile = stdin;
2120 const char *version;
2121 int options = 0;
2122 int study_options = 0;
2123 int default_find_match_limit = FALSE;
2124 int op = 1;
2125 int timeit = 0;
2126 int timeitm = 0;
2127 int showinfo = 0;
2128 int showstore = 0;
2129 int force_study = -1;
2130 int force_study_options = 0;
2131 int quiet = 0;
2132 int size_offsets = 45;
2133 int size_offsets_max;
2134 int *offsets = NULL;
2135 #if !defined NOPOSIX
2136 int posix = 0;
2137 #endif
2138 int debug = 0;
2139 int done = 0;
2140 int all_use_dfa = 0;
2141 int yield = 0;
2142 int stack_size;
2143
2144 pcre_jit_stack *jit_stack = NULL;
2145
2146 /* These vectors store, end-to-end, a list of zero-terminated captured
2147 substring names, each list itself being terminated by an empty name. Assume
2148 that 1024 is plenty long enough for the few names we'll be testing. It is
2149 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2150 for the actual memory, to ensure alignment. By defining these variables always
2151 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2152 #ifdefs in the code. */
2153
2154 pcre_uint16 copynames[1024];
2155 pcre_uint16 getnames[1024];
2156
2157 pcre_uint16 *cn16ptr;
2158 pcre_uint16 *gn16ptr;
2159
2160 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2161 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2162 pcre_uint8 *cn8ptr;
2163 pcre_uint8 *gn8ptr;
2164
2165 /* Get buffers from malloc() so that valgrind will check their misuse when
2166 debugging. They grow automatically when very long lines are read. The 16-bit
2167 buffer (buffer16) is obtained only if needed. */
2168
2169 buffer = (pcre_uint8 *)malloc(buffer_size);
2170 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2171 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2172
2173 /* The outfile variable is static so that new_malloc can use it. */
2174
2175 outfile = stdout;
2176
2177 /* The following _setmode() stuff is some Windows magic that tells its runtime
2178 library to translate CRLF into a single LF character. At least, that's what
2179 I've been told: never having used Windows I take this all on trust. Originally
2180 it set 0x8000, but then I was advised that _O_BINARY was better. */
2181
2182 #if defined(_WIN32) || defined(WIN32)
2183 _setmode( _fileno( stdout ), _O_BINARY );
2184 #endif
2185
2186 /* Get the version number: both pcre_version() and pcre16_version() give the
2187 same answer. We just need to ensure that we call one that is available. */
2188
2189 #ifdef SUPPORT_PCRE8
2190 version = pcre_version();
2191 #else
2192 version = pcre16_version();
2193 #endif
2194
2195 /* Scan options */
2196
2197 while (argc > 1 && argv[op][0] == '-')
2198 {
2199 pcre_uint8 *endptr;
2200
2201 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2202 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2203 else if (strcmp(argv[op], "-s+") == 0)
2204 {
2205 force_study = 1;
2206 force_study_options = PCRE_STUDY_JIT_COMPILE;
2207 }
2208 else if (strcmp(argv[op], "-16") == 0)
2209 {
2210 #ifdef SUPPORT_PCRE16
2211 use_pcre16 = 1;
2212 #else
2213 printf("** This version of PCRE was built without 16-bit support\n");
2214 exit(1);
2215 #endif
2216 }
2217 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2218 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2219 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2220 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2221 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2222 #if !defined NODFA
2223 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2224 #endif
2225 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2226 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2227 *endptr == 0))
2228 {
2229 op++;
2230 argc--;
2231 }
2232 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2233 {
2234 int both = argv[op][2] == 0;
2235 int temp;
2236 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2237 *endptr == 0))
2238 {
2239 timeitm = temp;
2240 op++;
2241 argc--;
2242 }
2243 else timeitm = LOOPREPEAT;
2244 if (both) timeit = timeitm;
2245 }
2246 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2247 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2248 *endptr == 0))
2249 {
2250 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2251 printf("PCRE: -S not supported on this OS\n");
2252 exit(1);
2253 #else
2254 int rc;
2255 struct rlimit rlim;
2256 getrlimit(RLIMIT_STACK, &rlim);
2257 rlim.rlim_cur = stack_size * 1024 * 1024;
2258 rc = setrlimit(RLIMIT_STACK, &rlim);
2259 if (rc != 0)
2260 {
2261 printf("PCRE: setrlimit() failed with error %d\n", rc);
2262 exit(1);
2263 }
2264 op++;
2265 argc--;
2266 #endif
2267 }
2268 #if !defined NOPOSIX
2269 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2270 #endif
2271 else if (strcmp(argv[op], "-C") == 0)
2272 {
2273 int rc;
2274 unsigned long int lrc;
2275
2276 if (argc > 2)
2277 {
2278 if (strcmp(argv[op + 1], "linksize") == 0)
2279 {
2280 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2281 printf("%d\n", rc);
2282 yield = rc;
2283 goto EXIT;
2284 }
2285 if (strcmp(argv[op + 1], "pcre8") == 0)
2286 {
2287 #ifdef SUPPORT_PCRE8
2288 printf("1\n");
2289 yield = 1;
2290 #else
2291 printf("0\n");
2292 yield = 0;
2293 #endif
2294 goto EXIT;
2295 }
2296 if (strcmp(argv[op + 1], "pcre16") == 0)
2297 {
2298 #ifdef SUPPORT_PCRE16
2299 printf("1\n");
2300 yield = 1;
2301 #else
2302 printf("0\n");
2303 yield = 0;
2304 #endif
2305 goto EXIT;
2306 }
2307 if (strcmp(argv[op + 1], "utf") == 0)
2308 {
2309 #ifdef SUPPORT_PCRE8
2310 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2311 printf("%d\n", rc);
2312 yield = rc;
2313 #else
2314 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2315 printf("%d\n", rc);
2316 yield = rc;
2317 #endif
2318 goto EXIT;
2319 }
2320 if (strcmp(argv[op + 1], "ucp") == 0)
2321 {
2322 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2323 printf("%d\n", rc);
2324 yield = rc;
2325 goto EXIT;
2326 }
2327 if (strcmp(argv[op + 1], "jit") == 0)
2328 {
2329 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2330 printf("%d\n", rc);
2331 yield = rc;
2332 goto EXIT;
2333 }
2334 if (strcmp(argv[op + 1], "newline") == 0)
2335 {
2336 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2337 /* Note that these values are always the ASCII values, even
2338 in EBCDIC environments. CR is 13 and NL is 10. */
2339 printf("%s\n", (rc == 13)? "CR" :
2340 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2341 (rc == -2)? "ANYCRLF" :
2342 (rc == -1)? "ANY" : "???");
2343 goto EXIT;
2344 }
2345 printf("Unknown -C option: %s\n", argv[op + 1]);
2346 goto EXIT;
2347 }
2348
2349 printf("PCRE version %s\n", version);
2350 printf("Compiled with\n");
2351
2352 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2353 are set, either both UTFs are supported or both are not supported. */
2354
2355 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2356 printf(" 8-bit and 16-bit support\n");
2357 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2358 if (rc)
2359 printf(" UTF-8 and UTF-16 support\n");
2360 else
2361 printf(" No UTF-8 or UTF-16 support\n");
2362 #elif defined SUPPORT_PCRE8
2363 printf(" 8-bit support only\n");
2364 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2365 printf(" %sUTF-8 support\n", rc? "" : "No ");
2366 #else
2367 printf(" 16-bit support only\n");
2368 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2369 printf(" %sUTF-16 support\n", rc? "" : "No ");
2370 #endif
2371
2372 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2373 printf(" %sUnicode properties support\n", rc? "" : "No ");
2374 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2375 if (rc)
2376 printf(" Just-in-time compiler support\n");
2377 else
2378 printf(" No just-in-time compiler support\n");
2379 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2380 /* Note that these values are always the ASCII values, even
2381 in EBCDIC environments. CR is 13 and NL is 10. */
2382 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2383 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2384 (rc == -2)? "ANYCRLF" :
2385 (rc == -1)? "ANY" : "???");
2386 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2387 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2388 "all Unicode newlines");
2389 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2390 printf(" Internal link size = %d\n", rc);
2391 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2392 printf(" POSIX malloc threshold = %d\n", rc);
2393 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2394 printf(" Default match limit = %ld\n", lrc);
2395 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2396 printf(" Default recursion depth limit = %ld\n", lrc);
2397 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2398 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2399 goto EXIT;
2400 }
2401 else if (strcmp(argv[op], "-help") == 0 ||
2402 strcmp(argv[op], "--help") == 0)
2403 {
2404 usage();
2405 goto EXIT;
2406 }
2407 else
2408 {
2409 printf("** Unknown or malformed option %s\n", argv[op]);
2410 usage();
2411 yield = 1;
2412 goto EXIT;
2413 }
2414 op++;
2415 argc--;
2416 }
2417
2418 /* Get the store for the offsets vector, and remember what it was */
2419
2420 size_offsets_max = size_offsets;
2421 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2422 if (offsets == NULL)
2423 {
2424 printf("** Failed to get %d bytes of memory for offsets vector\n",
2425 (int)(size_offsets_max * sizeof(int)));
2426 yield = 1;
2427 goto EXIT;
2428 }
2429
2430 /* Sort out the input and output files */
2431
2432 if (argc > 1)
2433 {
2434 infile = fopen(argv[op], INPUT_MODE);
2435 if (infile == NULL)
2436 {
2437 printf("** Failed to open %s\n", argv[op]);
2438 yield = 1;
2439 goto EXIT;
2440 }
2441 }
2442
2443 if (argc > 2)
2444 {
2445 outfile = fopen(argv[op+1], OUTPUT_MODE);
2446 if (outfile == NULL)
2447 {
2448 printf("** Failed to open %s\n", argv[op+1]);
2449 yield = 1;
2450 goto EXIT;
2451 }
2452 }
2453
2454 /* Set alternative malloc function */
2455
2456 #ifdef SUPPORT_PCRE8
2457 pcre_malloc = new_malloc;
2458 pcre_free = new_free;
2459 pcre_stack_malloc = stack_malloc;
2460 pcre_stack_free = stack_free;
2461 #endif
2462
2463 #ifdef SUPPORT_PCRE16
2464 pcre16_malloc = new_malloc;
2465 pcre16_free = new_free;
2466 pcre16_stack_malloc = stack_malloc;
2467 pcre16_stack_free = stack_free;
2468 #endif
2469
2470 /* Heading line unless quiet, then prompt for first regex if stdin */
2471
2472 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2473
2474 /* Main loop */
2475
2476 while (!done)
2477 {
2478 pcre *re = NULL;
2479 pcre_extra *extra = NULL;
2480
2481 #if !defined NOPOSIX /* There are still compilers that require no indent */
2482 regex_t preg;
2483 int do_posix = 0;
2484 #endif
2485
2486 const char *error;
2487 pcre_uint8 *markptr;
2488 pcre_uint8 *p, *pp, *ppp;
2489 pcre_uint8 *to_file = NULL;
2490 const pcre_uint8 *tables = NULL;
2491 unsigned long int true_size, true_study_size = 0;
2492 size_t size, regex_gotten_store;
2493 int do_allcaps = 0;
2494 int do_mark = 0;
2495 int do_study = 0;
2496 int no_force_study = 0;
2497 int do_debug = debug;
2498 int do_G = 0;
2499 int do_g = 0;
2500 int do_showinfo = showinfo;
2501 int do_showrest = 0;
2502 int do_showcaprest = 0;
2503 int do_flip = 0;
2504 int erroroffset, len, delimiter, poffset;
2505
2506 use_utf = 0;
2507 debug_lengths = 1;
2508
2509 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2510 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2511 fflush(outfile);
2512
2513 p = buffer;
2514 while (isspace(*p)) p++;
2515 if (*p == 0) continue;
2516
2517 /* See if the pattern is to be loaded pre-compiled from a file. */
2518
2519 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2520 {
2521 unsigned long int magic, get_options;
2522 pcre_uint8 sbuf[8];
2523 FILE *f;
2524
2525 p++;
2526 pp = p + (int)strlen((char *)p);
2527 while (isspace(pp[-1])) pp--;
2528 *pp = 0;
2529
2530 f = fopen((char *)p, "rb");
2531 if (f == NULL)
2532 {
2533 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2534 continue;
2535 }
2536
2537 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2538
2539 true_size =
2540 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2541 true_study_size =
2542 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2543
2544 re = (real_pcre *)new_malloc(true_size);
2545 regex_gotten_store = first_gotten_store;
2546
2547 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2548
2549 magic = ((real_pcre *)re)->magic_number;
2550 if (magic != MAGIC_NUMBER)
2551 {
2552 if (swap_uint32(magic) == MAGIC_NUMBER)
2553 {
2554 do_flip = 1;
2555 }
2556 else
2557 {
2558 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2559 fclose(f);
2560 continue;
2561 }
2562 }
2563
2564 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2565 do_flip? " (byte-inverted)" : "", p);
2566
2567 /* Now see if there is any following study data. */
2568
2569 if (true_study_size != 0)
2570 {
2571 pcre_study_data *psd;
2572
2573 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2574 extra->flags = PCRE_EXTRA_STUDY_DATA;
2575
2576 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2577 extra->study_data = psd;
2578
2579 if (fread(psd, 1, true_study_size, f) != true_study_size)
2580 {
2581 FAIL_READ:
2582 fprintf(outfile, "Failed to read data from %s\n", p);
2583 if (extra != NULL)
2584 {
2585 PCRE_FREE_STUDY(extra);
2586 }
2587 if (re != NULL) new_free(re);
2588 fclose(f);
2589 continue;
2590 }
2591 fprintf(outfile, "Study data loaded from %s\n", p);
2592 do_study = 1; /* To get the data output if requested */
2593 }
2594 else fprintf(outfile, "No study data\n");
2595
2596 /* Flip the necessary bytes. */
2597 if (do_flip)
2598 {
2599 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2600 }
2601
2602 /* Need to know if UTF-8 for printing data strings. */
2603
2604 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2605 use_utf = (get_options & PCRE_UTF8) != 0;
2606
2607 fclose(f);
2608 goto SHOW_INFO;
2609 }
2610
2611 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2612 the pattern; if it isn't complete, read more. */
2613
2614 delimiter = *p++;
2615
2616 if (isalnum(delimiter) || delimiter == '\\')
2617 {
2618 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2619 goto SKIP_DATA;
2620 }
2621
2622 pp = p;
2623 poffset = (int)(p - buffer);
2624
2625 for(;;)
2626 {
2627 while (*pp != 0)
2628 {
2629 if (*pp == '\\' && pp[1] != 0) pp++;
2630 else if (*pp == delimiter) break;
2631 pp++;
2632 }
2633 if (*pp != 0) break;
2634 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2635 {
2636 fprintf(outfile, "** Unexpected EOF\n");
2637 done = 1;
2638 goto CONTINUE;
2639 }
2640 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2641 }
2642
2643 /* The buffer may have moved while being extended; reset the start of data
2644 pointer to the correct relative point in the buffer. */
2645
2646 p = buffer + poffset;
2647
2648 /* If the first character after the delimiter is backslash, make
2649 the pattern end with backslash. This is purely to provide a way
2650 of testing for the error message when a pattern ends with backslash. */
2651
2652 if (pp[1] == '\\') *pp++ = '\\';
2653
2654 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2655 for callouts. */
2656
2657 *pp++ = 0;
2658 strcpy((char *)pbuffer, (char *)p);
2659
2660 /* Look for options after final delimiter */
2661
2662 options = 0;
2663 study_options = 0;
2664 log_store = showstore; /* default from command line */
2665
2666 while (*pp != 0)
2667 {
2668 switch (*pp++)
2669 {
2670 case 'f': options |= PCRE_FIRSTLINE; break;
2671 case 'g': do_g = 1; break;
2672 case 'i': options |= PCRE_CASELESS; break;
2673 case 'm': options |= PCRE_MULTILINE; break;
2674 case 's': options |= PCRE_DOTALL; break;
2675 case 'x': options |= PCRE_EXTENDED; break;
2676
2677 case '+':
2678 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2679 break;
2680
2681 case '=': do_allcaps = 1; break;
2682 case 'A': options |= PCRE_ANCHORED; break;
2683 case 'B': do_debug = 1; break;
2684 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2685 case 'D': do_debug = do_showinfo = 1; break;
2686 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2687 case 'F': do_flip = 1; break;
2688 case 'G': do_G = 1; break;
2689 case 'I': do_showinfo = 1; break;
2690 case 'J': options |= PCRE_DUPNAMES; break;
2691 case 'K': do_mark = 1; break;
2692 case 'M': log_store = 1; break;
2693 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2694
2695 #if !defined NOPOSIX
2696 case 'P': do_posix = 1; break;
2697 #endif
2698
2699 case 'S':
2700 if (do_study == 0)
2701 {
2702 do_study = 1;
2703 if (*pp == '+')
2704 {
2705 study_options |= PCRE_STUDY_JIT_COMPILE;
2706 pp++;
2707 }
2708 }
2709 else
2710 {
2711 do_study = 0;
2712 no_force_study = 1;
2713 }
2714 break;
2715
2716 case 'U': options |= PCRE_UNGREEDY; break;
2717 case 'W': options |= PCRE_UCP; break;
2718 case 'X': options |= PCRE_EXTRA; break;
2719 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2720 case 'Z': debug_lengths = 0; break;
2721 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2722 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2723
2724 case 'T':
2725 switch (*pp++)
2726 {
2727 case '0': tables = tables0; break;
2728 case '1': tables = tables1; break;
2729
2730 case '\r':
2731 case '\n':
2732 case ' ':
2733 case 0:
2734 fprintf(outfile, "** Missing table number after /T\n");
2735 goto SKIP_DATA;
2736
2737 default:
2738 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2739 goto SKIP_DATA;
2740 }
2741 break;
2742
2743 case 'L':
2744 ppp = pp;
2745 /* The '\r' test here is so that it works on Windows. */
2746 /* The '0' test is just in case this is an unterminated line. */
2747 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2748 *ppp = 0;
2749 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2750 {
2751 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2752 goto SKIP_DATA;
2753 }
2754 locale_set = 1;
2755 tables = PCRE_MAKETABLES;
2756 pp = ppp;
2757 break;
2758
2759 case '>':
2760 to_file = pp;
2761 while (*pp != 0) pp++;
2762 while (isspace(pp[-1])) pp--;
2763 *pp = 0;
2764 break;
2765
2766 case '<':
2767 {
2768 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2769 {
2770 options |= PCRE_JAVASCRIPT_COMPAT;
2771 pp += 3;
2772 }
2773 else
2774 {
2775 int x = check_newline(pp, outfile);
2776 if (x == 0) goto SKIP_DATA;
2777 options |= x;
2778 while (*pp++ != '>');
2779 }
2780 }
2781 break;
2782
2783 case '\r': /* So that it works in Windows */
2784 case '\n':
2785 case ' ':
2786 break;
2787
2788 default:
2789 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2790 goto SKIP_DATA;
2791 }
2792 }
2793
2794 /* Handle compiling via the POSIX interface, which doesn't support the
2795 timing, showing, or debugging options, nor the ability to pass over
2796 local character tables. Neither does it have 16-bit support. */
2797
2798 #if !defined NOPOSIX
2799 if (posix || do_posix)
2800 {
2801 int rc;
2802 int cflags = 0;
2803
2804 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2805 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2806 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2807 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2808 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2809 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2810 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2811
2812 first_gotten_store = 0;
2813 rc = regcomp(&preg, (char *)p, cflags);
2814
2815 /* Compilation failed; go back for another re, skipping to blank line
2816 if non-interactive. */
2817
2818 if (rc != 0)
2819 {
2820 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2821 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2822 goto SKIP_DATA;
2823 }
2824 }
2825
2826 /* Handle compiling via the native interface */
2827
2828 else
2829 #endif /* !defined NOPOSIX */
2830
2831 {
2832 unsigned long int get_options;
2833
2834 /* In 16-bit mode, convert the input. */
2835
2836 #ifdef SUPPORT_PCRE16
2837 if (use_pcre16)
2838 {
2839 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2840 {
2841 case -1:
2842 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2843 "converted to UTF-16\n");
2844 goto SKIP_DATA;
2845
2846 case -2:
2847 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2848 "cannot be converted to UTF-16\n");
2849 goto SKIP_DATA;
2850
2851 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2852 fprintf(outfile, "**Failed: character value greater than 0xffff "
2853 "cannot be converted to 16-bit in non-UTF mode\n");
2854 goto SKIP_DATA;
2855
2856 default:
2857 break;
2858 }
2859 p = (pcre_uint8 *)buffer16;
2860 }
2861 #endif
2862
2863 /* Compile many times when timing */
2864
2865 if (timeit > 0)
2866 {
2867 register int i;
2868 clock_t time_taken;
2869 clock_t start_time = clock();
2870 for (i = 0; i < timeit; i++)
2871 {
2872 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2873 if (re != NULL) free(re);
2874 }
2875 time_taken = clock() - start_time;
2876 fprintf(outfile, "Compile time %.4f milliseconds\n",
2877 (((double)time_taken * 1000.0) / (double)timeit) /
2878 (double)CLOCKS_PER_SEC);
2879 }
2880
2881 first_gotten_store = 0;
2882 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2883
2884 /* Compilation failed; go back for another re, skipping to blank line
2885 if non-interactive. */
2886
2887 if (re == NULL)
2888 {
2889 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2890 SKIP_DATA:
2891 if (infile != stdin)
2892 {
2893 for (;;)
2894 {
2895 if (extend_inputline(infile, buffer, NULL) == NULL)
2896 {
2897 done = 1;
2898 goto CONTINUE;
2899 }
2900 len = (int)strlen((char *)buffer);
2901 while (len > 0 && isspace(buffer[len-1])) len--;
2902 if (len == 0) break;
2903 }
2904 fprintf(outfile, "\n");
2905 }
2906 goto CONTINUE;
2907 }
2908
2909 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2910 within the regex; check for this so that we know how to process the data
2911 lines. */
2912
2913 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2914 goto SKIP_DATA;
2915 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2916
2917 /* Extract the size for possible writing before possibly flipping it,
2918 and remember the store that was got. */
2919
2920 true_size = ((real_pcre *)re)->size;
2921 regex_gotten_store = first_gotten_store;
2922
2923 /* Output code size information if requested */
2924
2925 if (log_store)
2926 fprintf(outfile, "Memory allocation (code space): %d\n",
2927 (int)(first_gotten_store -
2928 sizeof(real_pcre) -
2929 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2930
2931 /* If -s or /S was present, study the regex to generate additional info to
2932 help with the matching, unless the pattern has the SS option, which
2933 suppresses the effect of /S (used for a few test patterns where studying is
2934 never sensible). */
2935
2936 if (do_study || (force_study >= 0 && !no_force_study))
2937 {
2938 if (timeit > 0)
2939 {
2940 register int i;
2941 clock_t time_taken;
2942 clock_t start_time = clock();
2943 for (i = 0; i < timeit; i++)
2944 {
2945 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2946 }
2947 time_taken = clock() - start_time;
2948 if (extra != NULL)
2949 {
2950 PCRE_FREE_STUDY(extra);
2951 }
2952 fprintf(outfile, " Study time %.4f milliseconds\n",
2953 (((double)time_taken * 1000.0) / (double)timeit) /
2954 (double)CLOCKS_PER_SEC);
2955 }
2956 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2957 if (error != NULL)
2958 fprintf(outfile, "Failed to study: %s\n", error);
2959 else if (extra != NULL)
2960 {
2961 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2962 if (log_store)
2963 {
2964 size_t jitsize;
2965 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2966 jitsize != 0)
2967 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2968 }
2969 }
2970 }
2971
2972 /* If /K was present, we set up for handling MARK data. */
2973
2974 if (do_mark)
2975 {
2976 if (extra == NULL)
2977 {
2978 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2979 extra->flags = 0;
2980 }
2981 extra->mark = &markptr;
2982 extra->flags |= PCRE_EXTRA_MARK;
2983 }
2984
2985 /* Extract and display information from the compiled data if required. */
2986
2987 SHOW_INFO:
2988
2989 if (do_debug)
2990 {
2991 fprintf(outfile, "------------------------------------------------------------------\n");
2992 PCRE_PRINTINT(re, outfile, debug_lengths);
2993 }
2994
2995 /* We already have the options in get_options (see above) */
2996
2997 if (do_showinfo)
2998 {
2999 unsigned long int all_options;
3000 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3001 hascrorlf;
3002 int nameentrysize, namecount;
3003 const pcre_uint8 *nametable;
3004
3005 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3006 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3007 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3008 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3009 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3010 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3011 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3012 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3013 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3014 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3015 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3016 != 0)
3017 goto SKIP_DATA;
3018
3019 if (size != regex_gotten_store) fprintf(outfile,
3020 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3021 (int)size, (int)regex_gotten_store);
3022
3023 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3024 if (backrefmax > 0)
3025 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3026
3027 if (namecount > 0)
3028 {
3029 fprintf(outfile, "Named capturing subpatterns:\n");
3030 while (namecount-- > 0)
3031 {
3032 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3033 int imm2_size = use_pcre16 ? 1 : 2;
3034 #else
3035 int imm2_size = IMM2_SIZE;
3036 #endif
3037 int length = (int)STRLEN(nametable + imm2_size);
3038 fprintf(outfile, " ");
3039 PCHARSV(nametable, imm2_size, length, outfile);
3040 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3041 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3042 fprintf(outfile, "%3d\n", use_pcre16?
3043 (int)(((PCRE_SPTR16)nametable)[0])
3044 :((int)nametable[0] << 8) | (int)nametable[1]);
3045 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3046 #else
3047 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3048 #ifdef SUPPORT_PCRE8
3049 nametable += nameentrysize;
3050 #else
3051 nametable += nameentrysize * 2;
3052 #endif
3053 #endif
3054 }
3055 }
3056
3057 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3058 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3059
3060 all_options = ((real_pcre *)re)->options;
3061 if (do_flip) all_options = swap_uint32(all_options);
3062
3063 if (get_options == 0) fprintf(outfile, "No options\n");
3064 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3065 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3066 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3067 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3068 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3069 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3070 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3071 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3072 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3073 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3074 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3075 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3076 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3077 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3078 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3079 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3080 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3081 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3082
3083 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3084
3085 switch (get_options & PCRE_NEWLINE_BITS)
3086 {
3087 case PCRE_NEWLINE_CR:
3088 fprintf(outfile, "Forced newline sequence: CR\n");
3089 break;
3090
3091 case PCRE_NEWLINE_LF:
3092 fprintf(outfile, "Forced newline sequence: LF\n");
3093 break;
3094
3095 case PCRE_NEWLINE_CRLF:
3096 fprintf(outfile, "Forced newline sequence: CRLF\n");
3097 break;
3098
3099 case PCRE_NEWLINE_ANYCRLF:
3100 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3101 break;
3102
3103 case PCRE_NEWLINE_ANY:
3104 fprintf(outfile, "Forced newline sequence: ANY\n");
3105 break;
3106
3107 default:
3108 break;
3109 }
3110
3111 if (first_char == -1)
3112 {
3113 fprintf(outfile, "First char at start or follows newline\n");
3114 }
3115 else if (first_char < 0)
3116 {
3117 fprintf(outfile, "No first char\n");
3118 }
3119 else
3120 {
3121 const char *caseless =
3122 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3123 "" : " (caseless)";
3124
3125 if (PRINTOK(first_char))
3126 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3127 else
3128 {
3129 fprintf(outfile, "First char = ");
3130 pchar(first_char, outfile);
3131 fprintf(outfile, "%s\n", caseless);
3132 }
3133 }
3134
3135 if (need_char < 0)
3136 {
3137 fprintf(outfile, "No need char\n");
3138 }
3139 else
3140 {
3141 const char *caseless =
3142 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3143 "" : " (caseless)";
3144
3145 if (PRINTOK(need_char))
3146 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3147 else
3148 {
3149 fprintf(outfile, "Need char = ");
3150 pchar(need_char, outfile);
3151 fprintf(outfile, "%s\n", caseless);
3152 }
3153 }
3154
3155 /* Don't output study size; at present it is in any case a fixed
3156 value, but it varies, depending on the computer architecture, and
3157 so messes up the test suite. (And with the /F option, it might be
3158 flipped.) If study was forced by an external -s, don't show this
3159 information unless -i or -d was also present. This means that, except
3160 when auto-callouts are involved, the output from runs with and without
3161 -s should be identical. */
3162
3163 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3164 {
3165 if (extra == NULL)
3166 fprintf(outfile, "Study returned NULL\n");
3167 else
3168 {
3169 pcre_uint8 *start_bits = NULL;
3170 int minlength;
3171
3172 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3173 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3174
3175 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3176 {
3177 if (start_bits == NULL)
3178 fprintf(outfile, "No set of starting bytes\n");
3179 else
3180 {
3181 int i;
3182 int c = 24;
3183 fprintf(outfile, "Starting byte set: ");
3184 for (i = 0; i < 256; i++)
3185 {
3186 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3187 {
3188 if (c > 75)
3189 {
3190 fprintf(outfile, "\n ");
3191 c = 2;
3192 }
3193 if (PRINTOK(i) && i != ' ')
3194 {
3195 fprintf(outfile, "%c ", i);
3196 c += 2;
3197 }
3198 else
3199 {
3200 fprintf(outfile, "\\x%02x ", i);
3201 c += 5;
3202 }
3203 }
3204 }
3205 fprintf(outfile, "\n");
3206 }
3207 }
3208 }
3209
3210 /* Show this only if the JIT was set by /S, not by -s. */
3211
3212 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3213 {
3214 int jit;
3215 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3216 {
3217 if (jit)
3218 fprintf(outfile, "JIT study was successful\n");
3219 else
3220 #ifdef SUPPORT_JIT
3221 fprintf(outfile, "JIT study was not successful\n");
3222 #else
3223 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3224 #endif
3225 }
3226 }
3227 }
3228 }
3229
3230 /* If the '>' option was present, we write out the regex to a file, and
3231 that is all. The first 8 bytes of the file are the regex length and then
3232 the study length, in big-endian order. */
3233
3234 if (to_file != NULL)
3235 {
3236 FILE *f = fopen((char *)to_file, "wb");
3237 if (f == NULL)
3238 {
3239 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3240 }
3241 else
3242 {
3243 pcre_uint8 sbuf[8];
3244
3245 if (do_flip) regexflip(re, extra);
3246 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3247 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3248 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3249 sbuf[3] = (pcre_uint8)((true_size) & 255);
3250 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3251 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3252 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3253 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3254
3255 if (fwrite(sbuf, 1, 8, f) < 8 ||
3256 fwrite(re, 1, true_size, f) < true_size)
3257 {
3258 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3259 }
3260 else
3261 {
3262 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3263
3264 /* If there is study data, write it. */
3265
3266 if (extra != NULL)
3267 {
3268 if (fwrite(extra->study_data, 1, true_study_size, f) <
3269 true_study_size)
3270 {
3271 fprintf(outfile, "Write error on %s: %s\n", to_file,
3272 strerror(errno));
3273 }
3274 else fprintf(outfile, "Study data written to %s\n", to_file);
3275 }
3276 }
3277 fclose(f);
3278 }
3279
3280 new_free(re);
3281 if (extra != NULL)
3282 {
3283 PCRE_FREE_STUDY(extra);
3284 }
3285 if (locale_set)
3286 {
3287 new_free((void *)tables);
3288 setlocale(LC_CTYPE, "C");
3289 locale_set = 0;
3290 }
3291 continue; /* With next regex */
3292 }
3293 } /* End of non-POSIX compile */
3294
3295 /* Read data lines and test them */
3296
3297 for (;;)
3298 {
3299 pcre_uint8 *q;
3300 pcre_uint8 *bptr;
3301 int *use_offsets = offsets;
3302 int use_size_offsets = size_offsets;
3303 int callout_data = 0;
3304 int callout_data_set = 0;
3305 int count, c;
3306 int copystrings = 0;
3307 int find_match_limit = default_find_match_limit;
3308 int getstrings = 0;
3309 int getlist = 0;
3310 int gmatched = 0;
3311 int start_offset = 0;
3312 int start_offset_sign = 1;
3313 int g_notempty = 0;
3314 int use_dfa = 0;
3315
3316 *copynames = 0;
3317 *getnames = 0;
3318
3319 cn16ptr = copynames;
3320 gn16ptr = getnames;
3321 cn8ptr = copynames8;
3322 gn8ptr = getnames8;
3323
3324 SET_PCRE_CALLOUT(callout);
3325 first_callout = 1;
3326 last_callout_mark = NULL;
3327 callout_extra = 0;
3328 callout_count = 0;
3329 callout_fail_count = 999999;
3330 callout_fail_id = -1;
3331 show_malloc = 0;
3332 options = 0;
3333
3334 if (extra != NULL) extra->flags &=
3335 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3336
3337 len = 0;
3338 for (;;)
3339 {
3340 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3341 {
3342 if (len > 0) /* Reached EOF without hitting a newline */
3343 {
3344 fprintf(outfile, "\n");
3345 break;
3346 }
3347 done = 1;
3348 goto CONTINUE;
3349 }
3350 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3351 len = (int)strlen((char *)buffer);
3352 if (buffer[len-1] == '\n') break;
3353 }
3354
3355 while (len > 0 && isspace(buffer[len-1])) len--;
3356 buffer[len] = 0;
3357 if (len == 0) break;
3358
3359 p = buffer;
3360 while (isspace(*p)) p++;
3361
3362 bptr = q = dbuffer;
3363 while ((c = *p++) != 0)
3364 {
3365 int i = 0;
3366 int n = 0;
3367
3368 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3369 In non-UTF mode, allow the value of the byte to fall through to later,
3370 where values greater than 127 are turned into UTF-8 when running in
3371 16-bit mode. */
3372
3373 if (c != '\\')
3374 {
3375 if (use_utf)
3376 {
3377 *q++ = c;
3378 continue;
3379 }
3380 }
3381
3382 /* Handle backslash escapes */
3383
3384 else switch ((c = *p++))
3385 {
3386 case 'a': c = 7; break;
3387 case 'b': c = '\b'; break;
3388 case 'e': c = 27; break;
3389 case 'f': c = '\f'; break;
3390 case 'n': c = '\n'; break;
3391 case 'r': c = '\r'; break;
3392 case 't': c = '\t'; break;
3393 case 'v': c = '\v'; break;
3394
3395 case '0': case '1': case '2': case '3':
3396 case '4': case '5': case '6': case '7':
3397 c -= '0';
3398 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3399 c = c * 8 + *p++ - '0';
3400 break;
3401
3402 case 'x':
3403 if (*p == '{')
3404 {
3405 pcre_uint8 *pt = p;
3406 c = 0;
3407
3408 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3409 when isxdigit() is a macro that refers to its argument more than
3410 once. This is banned by the C Standard, but apparently happens in at
3411 least one MacOS environment. */
3412
3413 for (pt++; isxdigit(*pt); pt++)
3414 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3415 if (*pt == '}')
3416 {
3417 p = pt + 1;
3418 break;
3419 }
3420 /* Not correct form for \x{...}; fall through */
3421 }
3422
3423 /* \x without {} always defines just one byte in 8-bit mode. This
3424 allows UTF-8 characters to be constructed byte by byte, and also allows
3425 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3426 Otherwise, pass it down to later code so that it can be turned into
3427 UTF-8 when running in 16-bit mode. */
3428
3429 c = 0;
3430 while (i++ < 2 && isxdigit(*p))
3431 {
3432 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3433 p++;
3434 }
3435 if (use_utf)
3436 {
3437 *q++ = c;
3438 continue;
3439 }
3440 break;
3441
3442 case 0: /* \ followed by EOF allows for an empty line */
3443 p--;
3444 continue;
3445
3446 case '>':
3447 if (*p == '-')
3448 {
3449 start_offset_sign = -1;
3450 p++;
3451 }
3452 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3453 start_offset *= start_offset_sign;
3454 continue;
3455
3456 case 'A': /* Option setting */
3457 options |= PCRE_ANCHORED;
3458 continue;
3459
3460 case 'B':
3461 options |= PCRE_NOTBOL;
3462 continue;
3463
3464 case 'C':
3465 if (isdigit(*p)) /* Set copy string */
3466 {
3467 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3468 copystrings |= 1 << n;
3469 }
3470 else if (isalnum(*p))
3471 {
3472 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3473 }
3474 else if (*p == '+')
3475 {
3476 callout_extra = 1;
3477 p++;
3478 }
3479 else if (*p == '-')
3480 {
3481 SET_PCRE_CALLOUT(NULL);
3482 p++;
3483 }
3484 else if (*p == '!')
3485 {
3486 callout_fail_id = 0;
3487 p++;
3488 while(isdigit(*p))
3489 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3490 callout_fail_count = 0;
3491 if (*p == '!')
3492 {
3493 p++;
3494 while(isdigit(*p))
3495 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3496 }
3497 }
3498 else if (*p == '*')
3499 {
3500 int sign = 1;
3501 callout_data = 0;
3502 if (*(++p) == '-') { sign = -1; p++; }
3503 while(isdigit(*p))
3504 callout_data = callout_data * 10 + *p++ - '0';
3505 callout_data *= sign;
3506 callout_data_set = 1;
3507 }
3508 continue;
3509
3510 #if !defined NODFA
3511 case 'D':
3512 #if !defined NOPOSIX
3513 if (posix || do_posix)
3514 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3515 else
3516 #endif
3517 use_dfa = 1;
3518 continue;
3519 #endif
3520
3521 #if !defined NODFA
3522 case 'F':
3523 options |= PCRE_DFA_SHORTEST;
3524 continue;
3525 #endif
3526
3527 case 'G':
3528 if (isdigit(*p))
3529 {
3530 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3531 getstrings |= 1 << n;
3532 }
3533 else if (isalnum(*p))
3534 {
3535 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3536 }
3537 continue;
3538
3539 case 'J':
3540 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3541 if (extra != NULL
3542 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3543 && extra->executable_jit != NULL)
3544 {
3545 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3546 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3547 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3548 }
3549 continue;
3550
3551 case 'L':
3552 getlist = 1;
3553 continue;
3554
3555 case 'M':
3556 find_match_limit = 1;
3557 continue;
3558
3559 case 'N':
3560 if ((options & PCRE_NOTEMPTY) != 0)
3561 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3562 else
3563 options |= PCRE_NOTEMPTY;
3564 continue;
3565
3566 case 'O':
3567 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3568 if (n > size_offsets_max)
3569 {
3570 size_offsets_max = n;
3571 free(offsets);
3572 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3573 if (offsets == NULL)
3574 {
3575 printf("** Failed to get %d bytes of memory for offsets vector\n",
3576 (int)(size_offsets_max * sizeof(int)));
3577 yield = 1;
3578 goto EXIT;
3579 }
3580 }
3581 use_size_offsets = n;
3582 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3583 continue;
3584
3585 case 'P':
3586 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3587 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3588 continue;
3589
3590 case 'Q':
3591 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3592 if (extra == NULL)
3593 {
3594 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3595 extra->flags = 0;
3596 }
3597 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3598 extra->match_limit_recursion = n;
3599 continue;
3600
3601 case 'q':
3602 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3603 if (extra == NULL)
3604 {
3605 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3606 extra->flags = 0;
3607 }
3608 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3609 extra->match_limit = n;
3610 continue;
3611
3612 #if !defined NODFA
3613 case 'R':
3614 options |= PCRE_DFA_RESTART;
3615 continue;
3616 #endif
3617
3618 case 'S':
3619 show_malloc = 1;
3620 continue;
3621
3622 case 'Y':
3623 options |= PCRE_NO_START_OPTIMIZE;
3624 continue;
3625
3626 case 'Z':
3627 options |= PCRE_NOTEOL;
3628 continue;
3629
3630 case '?':
3631 options |= PCRE_NO_UTF8_CHECK;
3632 continue;
3633
3634 case '<':
3635 {
3636 int x = check_newline(p, outfile);
3637 if (x == 0) goto NEXT_DATA;
3638 options |= x;
3639 while (*p++ != '>');
3640 }
3641 continue;
3642 }
3643
3644 /* We now have a character value in c that may be greater than 255. In
3645 16-bit mode, we always convert characters to UTF-8 so that values greater
3646 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3647 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3648 mode must have come from \x{...} or octal constructs because values from
3649 \x.. get this far only in non-UTF mode. */
3650
3651 if (use_pcre16 || use_utf)
3652 {
3653 pcre_uint8 buff8[8];
3654 int ii, utn;
3655 utn = ord2utf8(c, buff8);
3656 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3657 }
3658 else
3659 {
3660 if (c > 255)
3661 {
3662 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3663 "and UTF-8 mode is not enabled.\n", c);
3664 fprintf(outfile, "** Truncation will probably give the wrong "
3665 "result.\n");
3666 }
3667 *q++ = c;
3668 }
3669 }
3670
3671 /* Reached end of subject string */
3672
3673 *q = 0;
3674 len = (int)(q - dbuffer);
3675
3676 /* Move the data to the end of the buffer so that a read over the end of
3677 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3678 we are using the POSIX interface, we must include the terminating zero. */
3679
3680 #if !defined NOPOSIX
3681 if (posix || do_posix)
3682 {
3683 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3684 bptr += buffer_size - len - 1;
3685 }
3686 else
3687 #endif
3688 {
3689 memmove(bptr + buffer_size - len, bptr, len);
3690 bptr += buffer_size - len;
3691 }
3692
3693 if ((all_use_dfa || use_dfa) && find_match_limit)
3694 {
3695 printf("**Match limit not relevant for DFA matching: ignored\n");
3696 find_match_limit = 0;
3697 }
3698
3699 /* Handle matching via the POSIX interface, which does not
3700 support timing or playing with the match limit or callout data. */
3701
3702 #if !defined NOPOSIX
3703 if (posix || do_posix)
3704 {
3705 int rc;
3706 int eflags = 0;
3707 regmatch_t *pmatch = NULL;
3708 if (use_size_offsets > 0)
3709 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3710 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3711 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3712 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3713
3714 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3715
3716 if (rc != 0)
3717 {
3718 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3719 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3720 }
3721 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3722 != 0)
3723 {
3724 fprintf(outfile, "Matched with REG_NOSUB\n");
3725 }
3726 else
3727 {
3728 size_t i;
3729 for (i = 0; i < (size_t)use_size_offsets; i++)
3730 {
3731 if (pmatch[i].rm_so >= 0)
3732 {
3733 fprintf(outfile, "%2d: ", (int)i);
3734 PCHARSV(dbuffer, pmatch[i].rm_so,
3735 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3736 fprintf(outfile, "\n");
3737 if (do_showcaprest || (i == 0 && do_showrest))
3738 {
3739 fprintf(outfile, "%2d+ ", (int)i);
3740 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3741 outfile);
3742 fprintf(outfile, "\n");
3743 }
3744 }
3745 }
3746 }
3747 free(pmatch);
3748 goto NEXT_DATA;
3749 }
3750
3751 #endif /* !defined NOPOSIX */
3752
3753 /* Handle matching via the native interface - repeats for /g and /G */
3754
3755 #ifdef SUPPORT_PCRE16
3756 if (use_pcre16)
3757 {
3758 len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3759 switch(len)
3760 {
3761 case -1:
3762 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3763 "converted to UTF-16\n");
3764 goto NEXT_DATA;
3765
3766 case -2:
3767 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3768 "cannot be converted to UTF-16\n");
3769 goto NEXT_DATA;
3770
3771 case -3:
3772 fprintf(outfile, "**Failed: character value greater than 0xffff "
3773 "cannot be converted to 16-bit in non-UTF mode\n");
3774 goto NEXT_DATA;
3775
3776 default:
3777 break;
3778 }
3779 bptr = (pcre_uint8 *)buffer16;
3780 }
3781 #endif
3782
3783 for (;; gmatched++) /* Loop for /g or /G */
3784 {
3785 markptr = NULL;
3786
3787 if (timeitm > 0)
3788 {
3789 register int i;
3790 clock_t time_taken;
3791 clock_t start_time = clock();
3792
3793 #if !defined NODFA
3794 if (all_use_dfa || use_dfa)
3795 {
3796 int workspace[1000];
3797 for (i = 0; i < timeitm; i++)
3798 {
3799 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3800 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3801 (sizeof(workspace)/sizeof(int)));
3802 }
3803 }
3804 else
3805 #endif
3806
3807 for (i = 0; i < timeitm; i++)
3808 {
3809 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3810 (options | g_notempty), use_offsets, use_size_offsets);
3811 }
3812 time_taken = clock() - start_time;
3813 fprintf(outfile, "Execute time %.4f milliseconds\n",
3814 (((double)time_taken * 1000.0) / (double)timeitm) /
3815 (double)CLOCKS_PER_SEC);
3816 }
3817
3818 /* If find_match_limit is set, we want to do repeated matches with
3819 varying limits in order to find the minimum value for the match limit and
3820 for the recursion limit. The match limits are relevant only to the normal
3821 running of pcre_exec(), so disable the JIT optimization. This makes it
3822 possible to run the same set of tests with and without JIT externally
3823 requested. */
3824
3825 if (find_match_limit)
3826 {
3827 if (extra == NULL)
3828 {
3829 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3830 extra->flags = 0;
3831 }
3832 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3833
3834 (void)check_match_limit(re, extra, bptr, len, start_offset,
3835 options|g_notempty, use_offsets, use_size_offsets,
3836 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3837 PCRE_ERROR_MATCHLIMIT, "match()");
3838
3839 count = check_match_limit(re, extra, bptr, len, start_offset,
3840 options|g_notempty, use_offsets, use_size_offsets,
3841 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3842 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3843 }
3844
3845 /* If callout_data is set, use the interface with additional data */
3846
3847 else if (callout_data_set)
3848 {
3849 if (extra == NULL)
3850 {
3851 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3852 extra->flags = 0;
3853 }
3854 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3855 extra->callout_data = &callout_data;
3856 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3857 options | g_notempty, use_offsets, use_size_offsets);
3858 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3859 }
3860
3861 /* The normal case is just to do the match once, with the default
3862 value of match_limit. */
3863
3864 #if !defined NODFA
3865 else if (all_use_dfa || use_dfa)
3866 {
3867 int workspace[1000];
3868 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3869 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3870 (sizeof(workspace)/sizeof(int)));
3871 if (count == 0)
3872 {
3873 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3874 count = use_size_offsets/2;
3875 }
3876 }
3877 #endif
3878
3879 else
3880 {
3881 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3882 options | g_notempty, use_offsets, use_size_offsets);
3883 if (count == 0)
3884 {
3885 fprintf(outfile, "Matched, but too many substrings\n");
3886 count = use_size_offsets/3;
3887 }
3888 }
3889
3890 /* Matched */
3891
3892 if (count >= 0)
3893 {
3894 int i, maxcount;
3895 void *cnptr, *gnptr;
3896
3897 #if !defined NODFA
3898 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3899 #endif
3900 maxcount = use_size_offsets/3;
3901
3902 /* This is a check against a lunatic return value. */
3903
3904 if (count > maxcount)
3905 {
3906 fprintf(outfile,
3907 "** PCRE error: returned count %d is too big for offset size %d\n",
3908 count, use_size_offsets);
3909 count = use_size_offsets/3;
3910 if (do_g || do_G)
3911 {
3912 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3913 do_g = do_G = FALSE; /* Break g/G loop */
3914 }
3915 }
3916
3917 /* do_allcaps requests showing of all captures in the pattern, to check
3918 unset ones at the end. */
3919
3920 if (do_allcaps)
3921 {
3922 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3923 goto SKIP_DATA;
3924 count++; /* Allow for full match */
3925 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3926 }
3927
3928 /* Output the captured substrings */
3929
3930 for (i = 0; i < count * 2; i += 2)
3931 {
3932 if (use_offsets[i] < 0)
3933 {
3934 if (use_offsets[i] != -1)
3935 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3936 use_offsets[i], i);
3937 if (use_offsets[i+1] != -1)
3938 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3939 use_offsets[i+1], i+1);
3940 fprintf(outfile, "%2d: <unset>\n", i/2);
3941 }
3942 else
3943 {
3944 fprintf(outfile, "%2d: ", i/2);
3945 PCHARSV(bptr, use_offsets[i],
3946 use_offsets[i+1] - use_offsets[i], outfile);
3947 fprintf(outfile, "\n");
3948 if (do_showcaprest || (i == 0 && do_showrest))
3949 {
3950 fprintf(outfile, "%2d+ ", i/2);
3951 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3952 outfile);
3953 fprintf(outfile, "\n");
3954 }
3955 }
3956 }
3957
3958 if (markptr != NULL)
3959 {
3960 fprintf(outfile, "MK: ");
3961 PCHARSV(markptr, 0, -1, outfile);
3962 fprintf(outfile, "\n");
3963 }
3964
3965 for (i = 0; i < 32; i++)
3966 {
3967 if ((copystrings & (1 << i)) != 0)
3968 {
3969 int rc;
3970 char copybuffer[256];
3971 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3972 copybuffer, sizeof(copybuffer));
3973 if (rc < 0)
3974 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3975 else
3976 {
3977 fprintf(outfile, "%2dC ", i);
3978 PCHARSV(copybuffer, 0, rc, outfile);
3979 fprintf(outfile, " (%d)\n", rc);
3980 }
3981 }
3982 }
3983
3984 cnptr = copynames;
3985 for (;;)
3986 {
3987 int rc;
3988 char copybuffer[256];
3989
3990 if (use_pcre16)
3991 {
3992 if (*(pcre_uint16 *)cnptr == 0) break;
3993 }
3994 else
3995 {
3996 if (*(pcre_uint8 *)cnptr == 0) break;
3997 }
3998
3999 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4000 cnptr, copybuffer, sizeof(copybuffer));
4001
4002 if (rc < 0)
4003 {
4004 fprintf(outfile, "copy substring ");
4005 PCHARSV(cnptr, 0, -1, outfile);
4006 fprintf(outfile, " failed %d\n", rc);
4007 }
4008 else
4009 {
4010 fprintf(outfile, " C ");
4011 PCHARSV(copybuffer, 0, rc, outfile);
4012 fprintf(outfile, " (%d) ", rc);
4013 PCHARSV(cnptr, 0, -1, outfile);
4014 putc('\n', outfile);
4015 }
4016
4017 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4018 }
4019
4020 for (i = 0; i < 32; i++)
4021 {
4022 if ((getstrings & (1 << i)) != 0)
4023 {
4024 int rc;
4025 const char *substring;
4026 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4027 if (rc < 0)
4028 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4029 else
4030 {
4031 fprintf(outfile, "%2dG ", i);
4032 PCHARSV(substring, 0, rc, outfile);
4033 fprintf(outfile, " (%d)\n", rc);
4034 PCRE_FREE_SUBSTRING(substring);
4035 }
4036 }
4037 }
4038
4039 gnptr = getnames;
4040 for (;;)
4041 {
4042 int rc;
4043 const char *substring;
4044
4045 if (use_pcre16)
4046 {
4047 if (*(pcre_uint16 *)gnptr == 0) break;
4048 }
4049 else
4050 {
4051 if (*(pcre_uint8 *)gnptr == 0) break;
4052 }
4053
4054 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4055 gnptr, &substring);
4056 if (rc < 0)
4057 {
4058 fprintf(outfile, "get substring ");
4059 PCHARSV(gnptr, 0, -1, outfile);
4060 fprintf(outfile, " failed %d\n", rc);
4061 }
4062 else
4063 {
4064 fprintf(outfile, " G ");
4065 PCHARSV(substring, 0, rc, outfile);
4066 fprintf(outfile, " (%d) ", rc);
4067 PCHARSV(gnptr, 0, -1, outfile);
4068 PCRE_FREE_SUBSTRING(substring);
4069 putc('\n', outfile);
4070 }
4071
4072 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4073 }
4074
4075 if (getlist)
4076 {
4077 int rc;
4078 const char **stringlist;
4079 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4080 if (rc < 0)
4081 fprintf(outfile, "get substring list failed %d\n", rc);
4082 else
4083 {
4084 for (i = 0; i < count; i++)
4085 {
4086 fprintf(outfile, "%2dL ", i);
4087 PCHARSV(stringlist[i], 0, -1, outfile);
4088 putc('\n', outfile);
4089 }
4090 if (stringlist[i] != NULL)
4091 fprintf(outfile, "string list not terminated by NULL\n");
4092 PCRE_FREE_SUBSTRING_LIST(stringlist);
4093 }
4094 }
4095 }
4096
4097 /* There was a partial match */
4098
4099 else if (count == PCRE_ERROR_PARTIAL)
4100 {
4101 if (markptr == NULL) fprintf(outfile, "Partial match");
4102 else
4103 {
4104 fprintf(outfile, "Partial match, mark=");
4105 PCHARSV(markptr, 0, -1, outfile);
4106 }
4107 if (use_size_offsets > 1)
4108 {
4109 fprintf(outfile, ": ");
4110 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4111 outfile);
4112 }
4113 fprintf(outfile, "\n");
4114 break; /* Out of the /g loop */
4115 }
4116
4117 /* Failed to match. If this is a /g or /G loop and we previously set
4118 g_notempty after a null match, this is not necessarily the end. We want
4119 to advance the start offset, and continue. We won't be at the end of the
4120 string - that was checked before setting g_notempty.
4121
4122 Complication arises in the case when the newline convention is "any",
4123 "crlf", or "anycrlf". If the previous match was at the end of a line
4124 terminated by CRLF, an advance of one character just passes the \r,
4125 whereas we should prefer the longer newline sequence, as does the code in
4126 pcre_exec(). Fudge the offset value to achieve this. We check for a
4127 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4128 find the default.
4129
4130 Otherwise, in the case of UTF-8 matching, the advance must be one
4131 character, not one byte. */
4132
4133 else
4134 {
4135 if (g_notempty != 0)
4136 {
4137 int onechar = 1;
4138 unsigned int obits = ((real_pcre *)re)->options;
4139 use_offsets[0] = start_offset;
4140 if ((obits & PCRE_NEWLINE_BITS) == 0)
4141 {
4142 int d;
4143 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4144 /* Note that these values are always the ASCII ones, even in
4145 EBCDIC environments. CR = 13, NL = 10. */
4146 obits = (d == 13)? PCRE_NEWLINE_CR :
4147 (d == 10)? PCRE_NEWLINE_LF :
4148 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4149 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4150 (d == -1)? PCRE_NEWLINE_ANY : 0;
4151 }
4152 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4153 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4154 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4155 &&
4156 start_offset < len - 1 &&
4157 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4158 (use_pcre16?
4159 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4160 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4161 :
4162 bptr[start_offset] == '\r'
4163 && bptr[start_offset + 1] == '\n')
4164 #elif defined SUPPORT_PCRE16
4165 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4166 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4167 #else
4168 bptr[start_offset] == '\r'
4169 && bptr[start_offset + 1] == '\n'
4170 #endif
4171 )
4172 onechar++;
4173 else if (use_utf)
4174 {
4175 while (start_offset + onechar < len)
4176 {
4177 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4178 onechar++;
4179 }
4180 }
4181 use_offsets[1] = start_offset + onechar;
4182 }
4183 else
4184 {
4185 switch(count)
4186 {
4187 case PCRE_ERROR_NOMATCH:
4188 if (gmatched == 0)
4189 {
4190 if (markptr == NULL)
4191 {
4192 fprintf(outfile, "No match\n");
4193 }
4194 else
4195 {
4196 fprintf(outfile, "No match, mark = ");
4197 PCHARSV(markptr, 0, -1, outfile);
4198 putc('\n', outfile);
4199 }
4200 }
4201 break;
4202
4203 case PCRE_ERROR_BADUTF8:
4204 case PCRE_ERROR_SHORTUTF8:
4205 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4206 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4207 use_pcre16? "16" : "8");
4208 if (use_size_offsets >= 2)
4209 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4210 use_offsets[1]);
4211 fprintf(outfile, "\n");
4212 break;
4213
4214 case PCRE_ERROR_BADUTF8_OFFSET:
4215 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4216 use_pcre16? "16" : "8");
4217 break;
4218
4219 default:
4220 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4221 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4222 else
4223 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4224 break;
4225 }
4226
4227 break; /* Out of the /g loop */
4228 }
4229 }
4230
4231 /* If not /g or /G we are done */
4232
4233 if (!do_g && !do_G) break;
4234
4235 /* If we have matched an empty string, first check to see if we are at
4236 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4237 Perl's /g options does. This turns out to be rather cunning. First we set
4238 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4239 same point. If this fails (picked up above) we advance to the next
4240 character. */
4241
4242 g_notempty = 0;
4243
4244 if (use_offsets[0] == use_offsets[1])
4245 {
4246 if (use_offsets[0] == len) break;
4247 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4248 }
4249
4250 /* For /g, update the start offset, leaving the rest alone */
4251
4252 if (do_g) start_offset = use_offsets[1];
4253
4254 /* For /G, update the pointer and length */
4255
4256 else
4257 {
4258 bptr += use_offsets[1] * CHAR_SIZE;
4259 len -= use_offsets[1];
4260 }
4261 } /* End of loop for /g and /G */
4262
4263 NEXT_DATA: continue;
4264 } /* End of loop for data lines */
4265
4266 CONTINUE:
4267
4268 #if !defined NOPOSIX
4269 if (posix || do_posix) regfree(&preg);
4270 #endif
4271
4272 if (re != NULL) new_free(re);
4273 if (extra != NULL)
4274 {
4275 PCRE_FREE_STUDY(extra);
4276 }
4277 if (locale_set)
4278 {
4279 new_free((void *)tables);
4280 setlocale(LC_CTYPE, "C");
4281 locale_set = 0;
4282 }
4283 if (jit_stack != NULL)
4284 {
4285 PCRE_JIT_STACK_FREE(jit_stack);
4286 jit_stack = NULL;
4287 }
4288 }
4289
4290 if (infile == stdin) fprintf(outfile, "\n");
4291
4292 EXIT:
4293
4294 if (infile != NULL && infile != stdin) fclose(infile);
4295 if (outfile != NULL && outfile != stdout) fclose(outfile);
4296
4297 free(buffer);
4298 free(dbuffer);
4299 free(pbuffer);
4300 free(offsets);
4301
4302 #ifdef SUPPORT_PCRE16
4303 if (buffer16 != NULL) free(buffer16);
4304 #endif
4305
4306 return yield;
4307 }
4308
4309 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5