/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1030 - (show annotations)
Sat Sep 8 15:58:38 2012 UTC (7 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 134802 byte(s)
Fix -C option in pcretest for EBCDIC environments.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138 /* Configure internal macros to 16 bit mode. */
139 #define COMPILE_PCRE16
140 #endif
141
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155
156 /* We need access to some of the data tables that PCRE uses. So as not to have
157 to keep two copies, we include the source file here, changing the names of the
158 external symbols to prevent clashes. */
159
160 #define PCRE_INCLUDED
161
162 #include "pcre_tables.c"
163
164 /* The definition of the macro PRINTABLE, which determines whether to print an
165 output character as-is or as a hex value when showing compiled patterns, is
166 the same as in the printint.src file. We uses it here in cases when the locale
167 has not been explicitly changed, so as to get consistent output from systems
168 that differ in their output from isprint() even in the "C" locale. */
169
170 #ifdef EBCDIC
171 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
172 #else
173 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174 #endif
175
176 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
177
178 /* Posix support is disabled in 16 bit only mode. */
179 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
180 #define NOPOSIX
181 #endif
182
183 /* It is possible to compile this test program without including support for
184 testing the POSIX interface, though this is not available via the standard
185 Makefile. */
186
187 #if !defined NOPOSIX
188 #include "pcreposix.h"
189 #endif
190
191 /* It is also possible, originally for the benefit of a version that was
192 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
193 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
194 automatically cut out the UTF support if PCRE is built without it. */
195
196 #ifndef SUPPORT_UTF
197 #ifndef NOUTF
198 #define NOUTF
199 #endif
200 #endif
201
202 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
203 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
204 only from one place and is handled differently). I couldn't dream up any way of
205 using a single macro to do this in a generic way, because of the many different
206 argument requirements. We know that at least one of SUPPORT_PCRE8 and
207 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
208 use these in the definitions of generic macros.
209
210 **** Special note about the PCHARSxxx macros: the address of the string to be
211 printed is always given as two arguments: a base address followed by an offset.
212 The base address is cast to the correct data size for 8 or 16 bit data; the
213 offset is in units of this size. If the string were given as base+offset in one
214 argument, the casting might be incorrectly applied. */
215
216 #ifdef SUPPORT_PCRE8
217
218 #define PCHARS8(lv, p, offset, len, f) \
219 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
220
221 #define PCHARSV8(p, offset, len, f) \
222 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
223
224 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
225 p = read_capture_name8(p, cn8, re)
226
227 #define STRLEN8(p) ((int)strlen((char *)p))
228
229 #define SET_PCRE_CALLOUT8(callout) \
230 pcre_callout = callout
231
232 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
233 pcre_assign_jit_stack(extra, callback, userdata)
234
235 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
236 re = pcre_compile((char *)pat, options, error, erroffset, tables)
237
238 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
239 namesptr, cbuffer, size) \
240 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
241 (char *)namesptr, cbuffer, size)
242
243 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
244 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
245
246 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace) \
248 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
249 offsets, size_offsets, workspace, size_workspace)
250
251 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252 offsets, size_offsets) \
253 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
254 offsets, size_offsets)
255
256 #define PCRE_FREE_STUDY8(extra) \
257 pcre_free_study(extra)
258
259 #define PCRE_FREE_SUBSTRING8(substring) \
260 pcre_free_substring(substring)
261
262 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
263 pcre_free_substring_list(listptr)
264
265 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
266 getnamesptr, subsptr) \
267 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
268 (char *)getnamesptr, subsptr)
269
270 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
271 n = pcre_get_stringnumber(re, (char *)ptr)
272
273 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
274 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
275
276 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
277 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
278
279 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
280 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
281
282 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
283 pcre_printint(re, outfile, debug_lengths)
284
285 #define PCRE_STUDY8(extra, re, options, error) \
286 extra = pcre_study(re, options, error)
287
288 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
289 pcre_jit_stack_alloc(startsize, maxsize)
290
291 #define PCRE_JIT_STACK_FREE8(stack) \
292 pcre_jit_stack_free(stack)
293
294 #endif /* SUPPORT_PCRE8 */
295
296 /* -----------------------------------------------------------*/
297
298 #ifdef SUPPORT_PCRE16
299
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
307 p = read_capture_name16(p, cn16, re)
308
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
313
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
317
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
321
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
330
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
336
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
341
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
344
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
347
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
362
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
366
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
370
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
373
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382
383 #endif /* SUPPORT_PCRE16 */
384
385
386 /* ----- Both modes are supported; a runtime test is needed, except for
387 pcre_config(), and the JIT stack functions, when it doesn't matter which
388 version is called. ----- */
389
390 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
391
392 #define CHAR_SIZE (use_pcre16? 2:1)
393
394 #define PCHARS(lv, p, offset, len, f) \
395 if (use_pcre16) \
396 PCHARS16(lv, p, offset, len, f); \
397 else \
398 PCHARS8(lv, p, offset, len, f)
399
400 #define PCHARSV(p, offset, len, f) \
401 if (use_pcre16) \
402 PCHARSV16(p, offset, len, f); \
403 else \
404 PCHARSV8(p, offset, len, f)
405
406 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
407 if (use_pcre16) \
408 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
409 else \
410 READ_CAPTURE_NAME8(p, cn8, cn16, re)
411
412 #define SET_PCRE_CALLOUT(callout) \
413 if (use_pcre16) \
414 SET_PCRE_CALLOUT16(callout); \
415 else \
416 SET_PCRE_CALLOUT8(callout)
417
418 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
419
420 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
421 if (use_pcre16) \
422 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
423 else \
424 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
425
426 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
427 if (use_pcre16) \
428 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
429 else \
430 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
431
432 #define PCRE_CONFIG pcre_config
433
434 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
435 namesptr, cbuffer, size) \
436 if (use_pcre16) \
437 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
438 namesptr, cbuffer, size); \
439 else \
440 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
441 namesptr, cbuffer, size)
442
443 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
444 if (use_pcre16) \
445 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
446 else \
447 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
448
449 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets, workspace, size_workspace) \
451 if (use_pcre16) \
452 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
453 offsets, size_offsets, workspace, size_workspace); \
454 else \
455 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
456 offsets, size_offsets, workspace, size_workspace)
457
458 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
459 offsets, size_offsets) \
460 if (use_pcre16) \
461 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
462 offsets, size_offsets); \
463 else \
464 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
465 offsets, size_offsets)
466
467 #define PCRE_FREE_STUDY(extra) \
468 if (use_pcre16) \
469 PCRE_FREE_STUDY16(extra); \
470 else \
471 PCRE_FREE_STUDY8(extra)
472
473 #define PCRE_FREE_SUBSTRING(substring) \
474 if (use_pcre16) \
475 PCRE_FREE_SUBSTRING16(substring); \
476 else \
477 PCRE_FREE_SUBSTRING8(substring)
478
479 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
480 if (use_pcre16) \
481 PCRE_FREE_SUBSTRING_LIST16(listptr); \
482 else \
483 PCRE_FREE_SUBSTRING_LIST8(listptr)
484
485 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
486 getnamesptr, subsptr) \
487 if (use_pcre16) \
488 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
489 getnamesptr, subsptr); \
490 else \
491 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
492 getnamesptr, subsptr)
493
494 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
495 if (use_pcre16) \
496 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
497 else \
498 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
499
500 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
501 if (use_pcre16) \
502 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
503 else \
504 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
505
506 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
507 if (use_pcre16) \
508 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
509 else \
510 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
511
512 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
513 (use_pcre16 ? \
514 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
515 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
516
517 #define PCRE_JIT_STACK_FREE(stack) \
518 if (use_pcre16) \
519 PCRE_JIT_STACK_FREE16(stack); \
520 else \
521 PCRE_JIT_STACK_FREE8(stack)
522
523 #define PCRE_MAKETABLES \
524 (use_pcre16? pcre16_maketables() : pcre_maketables())
525
526 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
527 if (use_pcre16) \
528 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
529 else \
530 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
531
532 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
533 if (use_pcre16) \
534 PCRE_PRINTINT16(re, outfile, debug_lengths); \
535 else \
536 PCRE_PRINTINT8(re, outfile, debug_lengths)
537
538 #define PCRE_STUDY(extra, re, options, error) \
539 if (use_pcre16) \
540 PCRE_STUDY16(extra, re, options, error); \
541 else \
542 PCRE_STUDY8(extra, re, options, error)
543
544 /* ----- Only 8-bit mode is supported ----- */
545
546 #elif defined SUPPORT_PCRE8
547 #define CHAR_SIZE 1
548 #define PCHARS PCHARS8
549 #define PCHARSV PCHARSV8
550 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
551 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
552 #define STRLEN STRLEN8
553 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
554 #define PCRE_COMPILE PCRE_COMPILE8
555 #define PCRE_CONFIG pcre_config
556 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
557 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
558 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
559 #define PCRE_EXEC PCRE_EXEC8
560 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
561 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
562 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
563 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
564 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
565 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
566 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
567 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
568 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
569 #define PCRE_MAKETABLES pcre_maketables()
570 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
571 #define PCRE_PRINTINT PCRE_PRINTINT8
572 #define PCRE_STUDY PCRE_STUDY8
573
574 /* ----- Only 16-bit mode is supported ----- */
575
576 #else
577 #define CHAR_SIZE 2
578 #define PCHARS PCHARS16
579 #define PCHARSV PCHARSV16
580 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
581 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
582 #define STRLEN STRLEN16
583 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
584 #define PCRE_COMPILE PCRE_COMPILE16
585 #define PCRE_CONFIG pcre16_config
586 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
587 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
588 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
589 #define PCRE_EXEC PCRE_EXEC16
590 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
591 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
592 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
593 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
594 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
595 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
596 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
597 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
598 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
599 #define PCRE_MAKETABLES pcre16_maketables()
600 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
601 #define PCRE_PRINTINT PCRE_PRINTINT16
602 #define PCRE_STUDY PCRE_STUDY16
603 #endif
604
605 /* ----- End of mode-specific function call macros ----- */
606
607
608 /* Other parameters */
609
610 #ifndef CLOCKS_PER_SEC
611 #ifdef CLK_TCK
612 #define CLOCKS_PER_SEC CLK_TCK
613 #else
614 #define CLOCKS_PER_SEC 100
615 #endif
616 #endif
617
618 #if !defined NODFA
619 #define DFA_WS_DIMENSION 1000
620 #endif
621
622 /* This is the default loop count for timing. */
623
624 #define LOOPREPEAT 500000
625
626 /* Static variables */
627
628 static FILE *outfile;
629 static int log_store = 0;
630 static int callout_count;
631 static int callout_extra;
632 static int callout_fail_count;
633 static int callout_fail_id;
634 static int debug_lengths;
635 static int first_callout;
636 static int jit_was_used;
637 static int locale_set = 0;
638 static int show_malloc;
639 static int use_utf;
640 static size_t gotten_store;
641 static size_t first_gotten_store = 0;
642 static const unsigned char *last_callout_mark = NULL;
643
644 /* The buffers grow automatically if very long input lines are encountered. */
645
646 static int buffer_size = 50000;
647 static pcre_uint8 *buffer = NULL;
648 static pcre_uint8 *dbuffer = NULL;
649 static pcre_uint8 *pbuffer = NULL;
650
651 /* Another buffer is needed translation to 16-bit character strings. It will
652 obtained and extended as required. */
653
654 #ifdef SUPPORT_PCRE16
655 static int buffer16_size = 0;
656 static pcre_uint16 *buffer16 = NULL;
657
658 #ifdef SUPPORT_PCRE8
659
660 /* We need the table of operator lengths that is used for 16-bit compiling, in
661 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
662 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
663 appropriately for the 16-bit world. Just as a safety check, make sure that
664 COMPILE_PCRE16 is *not* set. */
665
666 #ifdef COMPILE_PCRE16
667 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
668 #endif
669
670 #if LINK_SIZE == 2
671 #undef LINK_SIZE
672 #define LINK_SIZE 1
673 #elif LINK_SIZE == 3 || LINK_SIZE == 4
674 #undef LINK_SIZE
675 #define LINK_SIZE 2
676 #else
677 #error LINK_SIZE must be either 2, 3, or 4
678 #endif
679
680 #undef IMM2_SIZE
681 #define IMM2_SIZE 1
682
683 #endif /* SUPPORT_PCRE8 */
684
685 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
686 #endif /* SUPPORT_PCRE16 */
687
688 /* If we have 8-bit support, default use_pcre16 to false; if there is also
689 16-bit support, it can be changed by an option. If there is no 8-bit support,
690 there must be 16-bit support, so default it to 1. */
691
692 #ifdef SUPPORT_PCRE8
693 static int use_pcre16 = 0;
694 #else
695 static int use_pcre16 = 1;
696 #endif
697
698 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
699
700 static int jit_study_bits[] =
701 {
702 PCRE_STUDY_JIT_COMPILE,
703 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
704 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
706 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
709 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
710 };
711
712 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
713 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
714
715 /* Textual explanations for runtime error codes */
716
717 static const char *errtexts[] = {
718 NULL, /* 0 is no error */
719 NULL, /* NOMATCH is handled specially */
720 "NULL argument passed",
721 "bad option value",
722 "magic number missing",
723 "unknown opcode - pattern overwritten?",
724 "no more memory",
725 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
726 "match limit exceeded",
727 "callout error code",
728 NULL, /* BADUTF8/16 is handled specially */
729 NULL, /* BADUTF8/16 offset is handled specially */
730 NULL, /* PARTIAL is handled specially */
731 "not used - internal error",
732 "internal error - pattern overwritten?",
733 "bad count value",
734 "item unsupported for DFA matching",
735 "backreference condition or recursion test not supported for DFA matching",
736 "match limit not supported for DFA matching",
737 "workspace size exceeded in DFA matching",
738 "too much recursion for DFA matching",
739 "recursion limit exceeded",
740 "not used - internal error",
741 "invalid combination of newline options",
742 "bad offset value",
743 NULL, /* SHORTUTF8/16 is handled specially */
744 "nested recursion at the same subject position",
745 "JIT stack limit reached",
746 "pattern compiled in wrong mode: 8-bit/16-bit error",
747 "pattern compiled with other endianness",
748 "invalid data in workspace for DFA restart"
749 };
750
751
752 /*************************************************
753 * Alternate character tables *
754 *************************************************/
755
756 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
757 using the default tables of the library. However, the T option can be used to
758 select alternate sets of tables, for different kinds of testing. Note also that
759 the L (locale) option also adjusts the tables. */
760
761 /* This is the set of tables distributed as default with PCRE. It recognizes
762 only ASCII characters. */
763
764 static const pcre_uint8 tables0[] = {
765
766 /* This table is a lower casing table. */
767
768 0, 1, 2, 3, 4, 5, 6, 7,
769 8, 9, 10, 11, 12, 13, 14, 15,
770 16, 17, 18, 19, 20, 21, 22, 23,
771 24, 25, 26, 27, 28, 29, 30, 31,
772 32, 33, 34, 35, 36, 37, 38, 39,
773 40, 41, 42, 43, 44, 45, 46, 47,
774 48, 49, 50, 51, 52, 53, 54, 55,
775 56, 57, 58, 59, 60, 61, 62, 63,
776 64, 97, 98, 99,100,101,102,103,
777 104,105,106,107,108,109,110,111,
778 112,113,114,115,116,117,118,119,
779 120,121,122, 91, 92, 93, 94, 95,
780 96, 97, 98, 99,100,101,102,103,
781 104,105,106,107,108,109,110,111,
782 112,113,114,115,116,117,118,119,
783 120,121,122,123,124,125,126,127,
784 128,129,130,131,132,133,134,135,
785 136,137,138,139,140,141,142,143,
786 144,145,146,147,148,149,150,151,
787 152,153,154,155,156,157,158,159,
788 160,161,162,163,164,165,166,167,
789 168,169,170,171,172,173,174,175,
790 176,177,178,179,180,181,182,183,
791 184,185,186,187,188,189,190,191,
792 192,193,194,195,196,197,198,199,
793 200,201,202,203,204,205,206,207,
794 208,209,210,211,212,213,214,215,
795 216,217,218,219,220,221,222,223,
796 224,225,226,227,228,229,230,231,
797 232,233,234,235,236,237,238,239,
798 240,241,242,243,244,245,246,247,
799 248,249,250,251,252,253,254,255,
800
801 /* This table is a case flipping table. */
802
803 0, 1, 2, 3, 4, 5, 6, 7,
804 8, 9, 10, 11, 12, 13, 14, 15,
805 16, 17, 18, 19, 20, 21, 22, 23,
806 24, 25, 26, 27, 28, 29, 30, 31,
807 32, 33, 34, 35, 36, 37, 38, 39,
808 40, 41, 42, 43, 44, 45, 46, 47,
809 48, 49, 50, 51, 52, 53, 54, 55,
810 56, 57, 58, 59, 60, 61, 62, 63,
811 64, 97, 98, 99,100,101,102,103,
812 104,105,106,107,108,109,110,111,
813 112,113,114,115,116,117,118,119,
814 120,121,122, 91, 92, 93, 94, 95,
815 96, 65, 66, 67, 68, 69, 70, 71,
816 72, 73, 74, 75, 76, 77, 78, 79,
817 80, 81, 82, 83, 84, 85, 86, 87,
818 88, 89, 90,123,124,125,126,127,
819 128,129,130,131,132,133,134,135,
820 136,137,138,139,140,141,142,143,
821 144,145,146,147,148,149,150,151,
822 152,153,154,155,156,157,158,159,
823 160,161,162,163,164,165,166,167,
824 168,169,170,171,172,173,174,175,
825 176,177,178,179,180,181,182,183,
826 184,185,186,187,188,189,190,191,
827 192,193,194,195,196,197,198,199,
828 200,201,202,203,204,205,206,207,
829 208,209,210,211,212,213,214,215,
830 216,217,218,219,220,221,222,223,
831 224,225,226,227,228,229,230,231,
832 232,233,234,235,236,237,238,239,
833 240,241,242,243,244,245,246,247,
834 248,249,250,251,252,253,254,255,
835
836 /* This table contains bit maps for various character classes. Each map is 32
837 bytes long and the bits run from the least significant end of each byte. The
838 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
839 graph, print, punct, and cntrl. Other classes are built from combinations. */
840
841 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
842 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845
846 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850
851 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
858 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860
861 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865
866 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
867 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
868 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870
871 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
872 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875
876 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
877 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880
881 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
882 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885
886 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890
891 /* This table identifies various classes of character by individual bits:
892 0x01 white space character
893 0x02 letter
894 0x04 decimal digit
895 0x08 hexadecimal digit
896 0x10 alphanumeric or '_'
897 0x80 regular expression metacharacter or binary zero
898 */
899
900 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
901 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
902 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
903 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
904 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
905 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
906 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
907 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
908 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
909 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
910 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
911 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
912 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
913 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
914 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
915 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
924 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
925 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
926 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
927 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
928 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
929 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
930 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
931 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
932
933 /* This is a set of tables that came orginally from a Windows user. It seems to
934 be at least an approximation of ISO 8859. In particular, there are characters
935 greater than 128 that are marked as spaces, letters, etc. */
936
937 static const pcre_uint8 tables1[] = {
938 0,1,2,3,4,5,6,7,
939 8,9,10,11,12,13,14,15,
940 16,17,18,19,20,21,22,23,
941 24,25,26,27,28,29,30,31,
942 32,33,34,35,36,37,38,39,
943 40,41,42,43,44,45,46,47,
944 48,49,50,51,52,53,54,55,
945 56,57,58,59,60,61,62,63,
946 64,97,98,99,100,101,102,103,
947 104,105,106,107,108,109,110,111,
948 112,113,114,115,116,117,118,119,
949 120,121,122,91,92,93,94,95,
950 96,97,98,99,100,101,102,103,
951 104,105,106,107,108,109,110,111,
952 112,113,114,115,116,117,118,119,
953 120,121,122,123,124,125,126,127,
954 128,129,130,131,132,133,134,135,
955 136,137,138,139,140,141,142,143,
956 144,145,146,147,148,149,150,151,
957 152,153,154,155,156,157,158,159,
958 160,161,162,163,164,165,166,167,
959 168,169,170,171,172,173,174,175,
960 176,177,178,179,180,181,182,183,
961 184,185,186,187,188,189,190,191,
962 224,225,226,227,228,229,230,231,
963 232,233,234,235,236,237,238,239,
964 240,241,242,243,244,245,246,215,
965 248,249,250,251,252,253,254,223,
966 224,225,226,227,228,229,230,231,
967 232,233,234,235,236,237,238,239,
968 240,241,242,243,244,245,246,247,
969 248,249,250,251,252,253,254,255,
970 0,1,2,3,4,5,6,7,
971 8,9,10,11,12,13,14,15,
972 16,17,18,19,20,21,22,23,
973 24,25,26,27,28,29,30,31,
974 32,33,34,35,36,37,38,39,
975 40,41,42,43,44,45,46,47,
976 48,49,50,51,52,53,54,55,
977 56,57,58,59,60,61,62,63,
978 64,97,98,99,100,101,102,103,
979 104,105,106,107,108,109,110,111,
980 112,113,114,115,116,117,118,119,
981 120,121,122,91,92,93,94,95,
982 96,65,66,67,68,69,70,71,
983 72,73,74,75,76,77,78,79,
984 80,81,82,83,84,85,86,87,
985 88,89,90,123,124,125,126,127,
986 128,129,130,131,132,133,134,135,
987 136,137,138,139,140,141,142,143,
988 144,145,146,147,148,149,150,151,
989 152,153,154,155,156,157,158,159,
990 160,161,162,163,164,165,166,167,
991 168,169,170,171,172,173,174,175,
992 176,177,178,179,180,181,182,183,
993 184,185,186,187,188,189,190,191,
994 224,225,226,227,228,229,230,231,
995 232,233,234,235,236,237,238,239,
996 240,241,242,243,244,245,246,215,
997 248,249,250,251,252,253,254,223,
998 192,193,194,195,196,197,198,199,
999 200,201,202,203,204,205,206,207,
1000 208,209,210,211,212,213,214,247,
1001 216,217,218,219,220,221,222,255,
1002 0,62,0,0,1,0,0,0,
1003 0,0,0,0,0,0,0,0,
1004 32,0,0,0,1,0,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,255,3,
1007 126,0,0,0,126,0,0,0,
1008 0,0,0,0,0,0,0,0,
1009 0,0,0,0,0,0,0,0,
1010 0,0,0,0,0,0,255,3,
1011 0,0,0,0,0,0,0,0,
1012 0,0,0,0,0,0,12,2,
1013 0,0,0,0,0,0,0,0,
1014 0,0,0,0,0,0,0,0,
1015 254,255,255,7,0,0,0,0,
1016 0,0,0,0,0,0,0,0,
1017 255,255,127,127,0,0,0,0,
1018 0,0,0,0,0,0,0,0,
1019 0,0,0,0,254,255,255,7,
1020 0,0,0,0,0,4,32,4,
1021 0,0,0,128,255,255,127,255,
1022 0,0,0,0,0,0,255,3,
1023 254,255,255,135,254,255,255,7,
1024 0,0,0,0,0,4,44,6,
1025 255,255,127,255,255,255,127,255,
1026 0,0,0,0,254,255,255,255,
1027 255,255,255,255,255,255,255,127,
1028 0,0,0,0,254,255,255,255,
1029 255,255,255,255,255,255,255,255,
1030 0,2,0,0,255,255,255,255,
1031 255,255,255,255,255,255,255,127,
1032 0,0,0,0,255,255,255,255,
1033 255,255,255,255,255,255,255,255,
1034 0,0,0,0,254,255,0,252,
1035 1,0,0,248,1,0,0,120,
1036 0,0,0,0,254,255,255,255,
1037 0,0,128,0,0,0,128,0,
1038 255,255,255,255,0,0,0,0,
1039 0,0,0,0,0,0,0,128,
1040 255,255,255,255,0,0,0,0,
1041 0,0,0,0,0,0,0,0,
1042 128,0,0,0,0,0,0,0,
1043 0,1,1,0,1,1,0,0,
1044 0,0,0,0,0,0,0,0,
1045 0,0,0,0,0,0,0,0,
1046 1,0,0,0,128,0,0,0,
1047 128,128,128,128,0,0,128,0,
1048 28,28,28,28,28,28,28,28,
1049 28,28,0,0,0,0,0,128,
1050 0,26,26,26,26,26,26,18,
1051 18,18,18,18,18,18,18,18,
1052 18,18,18,18,18,18,18,18,
1053 18,18,18,128,128,0,128,16,
1054 0,26,26,26,26,26,26,18,
1055 18,18,18,18,18,18,18,18,
1056 18,18,18,18,18,18,18,18,
1057 18,18,18,128,128,0,0,0,
1058 0,0,0,0,0,1,0,0,
1059 0,0,0,0,0,0,0,0,
1060 0,0,0,0,0,0,0,0,
1061 0,0,0,0,0,0,0,0,
1062 1,0,0,0,0,0,0,0,
1063 0,0,18,0,0,0,0,0,
1064 0,0,20,20,0,18,0,0,
1065 0,20,18,0,0,0,0,0,
1066 18,18,18,18,18,18,18,18,
1067 18,18,18,18,18,18,18,18,
1068 18,18,18,18,18,18,18,0,
1069 18,18,18,18,18,18,18,18,
1070 18,18,18,18,18,18,18,18,
1071 18,18,18,18,18,18,18,18,
1072 18,18,18,18,18,18,18,0,
1073 18,18,18,18,18,18,18,18
1074 };
1075
1076
1077
1078
1079 #ifndef HAVE_STRERROR
1080 /*************************************************
1081 * Provide strerror() for non-ANSI libraries *
1082 *************************************************/
1083
1084 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085 in their libraries, but can provide the same facility by this simple
1086 alternative function. */
1087
1088 extern int sys_nerr;
1089 extern char *sys_errlist[];
1090
1091 char *
1092 strerror(int n)
1093 {
1094 if (n < 0 || n >= sys_nerr) return "unknown error number";
1095 return sys_errlist[n];
1096 }
1097 #endif /* HAVE_STRERROR */
1098
1099
1100
1101 /*************************************************
1102 * Print newline configuration *
1103 *************************************************/
1104
1105 /*
1106 Argument: the return code from PCRE_CONFIG_NEWLINE
1107 Returns: nothing
1108 */
1109
1110 static void
1111 print_newline_config(int rc)
1112 {
1113 const char *s = NULL;
1114 printf(" Newline sequence is ");
1115 switch(rc)
1116 {
1117 case CHAR_CR: s = "CR"; break;
1118 case CHAR_LF: s = "LF"; break;
1119 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1120 case -1: s = "ANY"; break;
1121 case -2: s = "ANYCRLF"; break;
1122
1123 default:
1124 printf("a non-standard value: 0x%04x\n", rc);
1125 return;
1126 }
1127
1128 printf("%s\n", s);
1129 }
1130
1131
1132
1133 /*************************************************
1134 * JIT memory callback *
1135 *************************************************/
1136
1137 static pcre_jit_stack* jit_callback(void *arg)
1138 {
1139 jit_was_used = TRUE;
1140 return (pcre_jit_stack *)arg;
1141 }
1142
1143
1144 #if !defined NOUTF || defined SUPPORT_PCRE16
1145 /*************************************************
1146 * Convert UTF-8 string to value *
1147 *************************************************/
1148
1149 /* This function takes one or more bytes that represents a UTF-8 character,
1150 and returns the value of the character.
1151
1152 Argument:
1153 utf8bytes a pointer to the byte vector
1154 vptr a pointer to an int to receive the value
1155
1156 Returns: > 0 => the number of bytes consumed
1157 -6 to 0 => malformed UTF-8 character at offset = (-return)
1158 */
1159
1160 static int
1161 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1162 {
1163 int c = *utf8bytes++;
1164 int d = c;
1165 int i, j, s;
1166
1167 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1168 {
1169 if ((d & 0x80) == 0) break;
1170 d <<= 1;
1171 }
1172
1173 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1174 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1175
1176 /* i now has a value in the range 1-5 */
1177
1178 s = 6*i;
1179 d = (c & utf8_table3[i]) << s;
1180
1181 for (j = 0; j < i; j++)
1182 {
1183 c = *utf8bytes++;
1184 if ((c & 0xc0) != 0x80) return -(j+1);
1185 s -= 6;
1186 d |= (c & 0x3f) << s;
1187 }
1188
1189 /* Check that encoding was the correct unique one */
1190
1191 for (j = 0; j < utf8_table1_size; j++)
1192 if (d <= utf8_table1[j]) break;
1193 if (j != i) return -(i+1);
1194
1195 /* Valid value */
1196
1197 *vptr = d;
1198 return i+1;
1199 }
1200 #endif /* NOUTF || SUPPORT_PCRE16 */
1201
1202
1203
1204 #if !defined NOUTF || defined SUPPORT_PCRE16
1205 /*************************************************
1206 * Convert character value to UTF-8 *
1207 *************************************************/
1208
1209 /* This function takes an integer value in the range 0 - 0x7fffffff
1210 and encodes it as a UTF-8 character in 0 to 6 bytes.
1211
1212 Arguments:
1213 cvalue the character value
1214 utf8bytes pointer to buffer for result - at least 6 bytes long
1215
1216 Returns: number of characters placed in the buffer
1217 */
1218
1219 static int
1220 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1221 {
1222 register int i, j;
1223 for (i = 0; i < utf8_table1_size; i++)
1224 if (cvalue <= utf8_table1[i]) break;
1225 utf8bytes += i;
1226 for (j = i; j > 0; j--)
1227 {
1228 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1229 cvalue >>= 6;
1230 }
1231 *utf8bytes = utf8_table2[i] | cvalue;
1232 return i + 1;
1233 }
1234 #endif
1235
1236
1237 #ifdef SUPPORT_PCRE16
1238 /*************************************************
1239 * Convert a string to 16-bit *
1240 *************************************************/
1241
1242 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1243 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1244 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1245 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1246 result is always left in buffer16.
1247
1248 Note that this function does not object to surrogate values. This is
1249 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1250 for the purpose of testing that they are correctly faulted.
1251
1252 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1253 in UTF-8 so that values greater than 255 can be handled.
1254
1255 Arguments:
1256 data TRUE if converting a data line; FALSE for a regex
1257 p points to a byte string
1258 utf true if UTF-8 (to be converted to UTF-16)
1259 len number of bytes in the string (excluding trailing zero)
1260
1261 Returns: number of 16-bit data items used (excluding trailing zero)
1262 OR -1 if a UTF-8 string is malformed
1263 OR -2 if a value > 0x10ffff is encountered
1264 OR -3 if a value > 0xffff is encountered when not in UTF mode
1265 */
1266
1267 static int
1268 to16(int data, pcre_uint8 *p, int utf, int len)
1269 {
1270 pcre_uint16 *pp;
1271
1272 if (buffer16_size < 2*len + 2)
1273 {
1274 if (buffer16 != NULL) free(buffer16);
1275 buffer16_size = 2*len + 2;
1276 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1277 if (buffer16 == NULL)
1278 {
1279 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1280 exit(1);
1281 }
1282 }
1283
1284 pp = buffer16;
1285
1286 if (!utf && !data)
1287 {
1288 while (len-- > 0) *pp++ = *p++;
1289 }
1290
1291 else
1292 {
1293 int c = 0;
1294 while (len > 0)
1295 {
1296 int chlen = utf82ord(p, &c);
1297 if (chlen <= 0) return -1;
1298 if (c > 0x10ffff) return -2;
1299 p += chlen;
1300 len -= chlen;
1301 if (c < 0x10000) *pp++ = c; else
1302 {
1303 if (!utf) return -3;
1304 c -= 0x10000;
1305 *pp++ = 0xD800 | (c >> 10);
1306 *pp++ = 0xDC00 | (c & 0x3ff);
1307 }
1308 }
1309 }
1310
1311 *pp = 0;
1312 return pp - buffer16;
1313 }
1314 #endif
1315
1316
1317 /*************************************************
1318 * Read or extend an input line *
1319 *************************************************/
1320
1321 /* Input lines are read into buffer, but both patterns and data lines can be
1322 continued over multiple input lines. In addition, if the buffer fills up, we
1323 want to automatically expand it so as to be able to handle extremely large
1324 lines that are needed for certain stress tests. When the input buffer is
1325 expanded, the other two buffers must also be expanded likewise, and the
1326 contents of pbuffer, which are a copy of the input for callouts, must be
1327 preserved (for when expansion happens for a data line). This is not the most
1328 optimal way of handling this, but hey, this is just a test program!
1329
1330 Arguments:
1331 f the file to read
1332 start where in buffer to start (this *must* be within buffer)
1333 prompt for stdin or readline()
1334
1335 Returns: pointer to the start of new data
1336 could be a copy of start, or could be moved
1337 NULL if no data read and EOF reached
1338 */
1339
1340 static pcre_uint8 *
1341 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1342 {
1343 pcre_uint8 *here = start;
1344
1345 for (;;)
1346 {
1347 size_t rlen = (size_t)(buffer_size - (here - buffer));
1348
1349 if (rlen > 1000)
1350 {
1351 int dlen;
1352
1353 /* If libreadline or libedit support is required, use readline() to read a
1354 line if the input is a terminal. Note that readline() removes the trailing
1355 newline, so we must put it back again, to be compatible with fgets(). */
1356
1357 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1358 if (isatty(fileno(f)))
1359 {
1360 size_t len;
1361 char *s = readline(prompt);
1362 if (s == NULL) return (here == start)? NULL : start;
1363 len = strlen(s);
1364 if (len > 0) add_history(s);
1365 if (len > rlen - 1) len = rlen - 1;
1366 memcpy(here, s, len);
1367 here[len] = '\n';
1368 here[len+1] = 0;
1369 free(s);
1370 }
1371 else
1372 #endif
1373
1374 /* Read the next line by normal means, prompting if the file is stdin. */
1375
1376 {
1377 if (f == stdin) printf("%s", prompt);
1378 if (fgets((char *)here, rlen, f) == NULL)
1379 return (here == start)? NULL : start;
1380 }
1381
1382 dlen = (int)strlen((char *)here);
1383 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1384 here += dlen;
1385 }
1386
1387 else
1388 {
1389 int new_buffer_size = 2*buffer_size;
1390 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1391 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1392 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1393
1394 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1395 {
1396 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1397 exit(1);
1398 }
1399
1400 memcpy(new_buffer, buffer, buffer_size);
1401 memcpy(new_pbuffer, pbuffer, buffer_size);
1402
1403 buffer_size = new_buffer_size;
1404
1405 start = new_buffer + (start - buffer);
1406 here = new_buffer + (here - buffer);
1407
1408 free(buffer);
1409 free(dbuffer);
1410 free(pbuffer);
1411
1412 buffer = new_buffer;
1413 dbuffer = new_dbuffer;
1414 pbuffer = new_pbuffer;
1415 }
1416 }
1417
1418 return NULL; /* Control never gets here */
1419 }
1420
1421
1422
1423 /*************************************************
1424 * Read number from string *
1425 *************************************************/
1426
1427 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1428 around with conditional compilation, just do the job by hand. It is only used
1429 for unpicking arguments, so just keep it simple.
1430
1431 Arguments:
1432 str string to be converted
1433 endptr where to put the end pointer
1434
1435 Returns: the unsigned long
1436 */
1437
1438 static int
1439 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1440 {
1441 int result = 0;
1442 while(*str != 0 && isspace(*str)) str++;
1443 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1444 *endptr = str;
1445 return(result);
1446 }
1447
1448
1449
1450 /*************************************************
1451 * Print one character *
1452 *************************************************/
1453
1454 /* Print a single character either literally, or as a hex escape. */
1455
1456 static int pchar(int c, FILE *f)
1457 {
1458 if (PRINTOK(c))
1459 {
1460 if (f != NULL) fprintf(f, "%c", c);
1461 return 1;
1462 }
1463
1464 if (c < 0x100)
1465 {
1466 if (use_utf)
1467 {
1468 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1469 return 6;
1470 }
1471 else
1472 {
1473 if (f != NULL) fprintf(f, "\\x%02x", c);
1474 return 4;
1475 }
1476 }
1477
1478 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1479 return (c <= 0x000000ff)? 6 :
1480 (c <= 0x00000fff)? 7 :
1481 (c <= 0x0000ffff)? 8 :
1482 (c <= 0x000fffff)? 9 : 10;
1483 }
1484
1485
1486
1487 #ifdef SUPPORT_PCRE8
1488 /*************************************************
1489 * Print 8-bit character string *
1490 *************************************************/
1491
1492 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1493 If handed a NULL file, just counts chars without printing. */
1494
1495 static int pchars(pcre_uint8 *p, int length, FILE *f)
1496 {
1497 int c = 0;
1498 int yield = 0;
1499
1500 if (length < 0)
1501 length = strlen((char *)p);
1502
1503 while (length-- > 0)
1504 {
1505 #if !defined NOUTF
1506 if (use_utf)
1507 {
1508 int rc = utf82ord(p, &c);
1509 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1510 {
1511 length -= rc - 1;
1512 p += rc;
1513 yield += pchar(c, f);
1514 continue;
1515 }
1516 }
1517 #endif
1518 c = *p++;
1519 yield += pchar(c, f);
1520 }
1521
1522 return yield;
1523 }
1524 #endif
1525
1526
1527
1528 #ifdef SUPPORT_PCRE16
1529 /*************************************************
1530 * Find length of 0-terminated 16-bit string *
1531 *************************************************/
1532
1533 static int strlen16(PCRE_SPTR16 p)
1534 {
1535 int len = 0;
1536 while (*p++ != 0) len++;
1537 return len;
1538 }
1539 #endif /* SUPPORT_PCRE16 */
1540
1541
1542 #ifdef SUPPORT_PCRE16
1543 /*************************************************
1544 * Print 16-bit character string *
1545 *************************************************/
1546
1547 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1548 If handed a NULL file, just counts chars without printing. */
1549
1550 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1551 {
1552 int yield = 0;
1553
1554 if (length < 0)
1555 length = strlen16(p);
1556
1557 while (length-- > 0)
1558 {
1559 int c = *p++ & 0xffff;
1560 #if !defined NOUTF
1561 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1562 {
1563 int d = *p & 0xffff;
1564 if (d >= 0xDC00 && d < 0xDFFF)
1565 {
1566 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1567 length--;
1568 p++;
1569 }
1570 }
1571 #endif
1572 yield += pchar(c, f);
1573 }
1574
1575 return yield;
1576 }
1577 #endif /* SUPPORT_PCRE16 */
1578
1579
1580
1581 #ifdef SUPPORT_PCRE8
1582 /*************************************************
1583 * Read a capture name (8-bit) and check it *
1584 *************************************************/
1585
1586 static pcre_uint8 *
1587 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1588 {
1589 pcre_uint8 *npp = *pp;
1590 while (isalnum(*p)) *npp++ = *p++;
1591 *npp++ = 0;
1592 *npp = 0;
1593 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1594 {
1595 fprintf(outfile, "no parentheses with name \"");
1596 PCHARSV(*pp, 0, -1, outfile);
1597 fprintf(outfile, "\"\n");
1598 }
1599
1600 *pp = npp;
1601 return p;
1602 }
1603 #endif /* SUPPORT_PCRE8 */
1604
1605
1606
1607 #ifdef SUPPORT_PCRE16
1608 /*************************************************
1609 * Read a capture name (16-bit) and check it *
1610 *************************************************/
1611
1612 /* Note that the text being read is 8-bit. */
1613
1614 static pcre_uint8 *
1615 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1616 {
1617 pcre_uint16 *npp = *pp;
1618 while (isalnum(*p)) *npp++ = *p++;
1619 *npp++ = 0;
1620 *npp = 0;
1621 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1622 {
1623 fprintf(outfile, "no parentheses with name \"");
1624 PCHARSV(*pp, 0, -1, outfile);
1625 fprintf(outfile, "\"\n");
1626 }
1627 *pp = npp;
1628 return p;
1629 }
1630 #endif /* SUPPORT_PCRE16 */
1631
1632
1633
1634 /*************************************************
1635 * Callout function *
1636 *************************************************/
1637
1638 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1639 the match. Yield zero unless more callouts than the fail count, or the callout
1640 data is not zero. */
1641
1642 static int callout(pcre_callout_block *cb)
1643 {
1644 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1645 int i, pre_start, post_start, subject_length;
1646
1647 if (callout_extra)
1648 {
1649 fprintf(f, "Callout %d: last capture = %d\n",
1650 cb->callout_number, cb->capture_last);
1651
1652 for (i = 0; i < cb->capture_top * 2; i += 2)
1653 {
1654 if (cb->offset_vector[i] < 0)
1655 fprintf(f, "%2d: <unset>\n", i/2);
1656 else
1657 {
1658 fprintf(f, "%2d: ", i/2);
1659 PCHARSV(cb->subject, cb->offset_vector[i],
1660 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1661 fprintf(f, "\n");
1662 }
1663 }
1664 }
1665
1666 /* Re-print the subject in canonical form, the first time or if giving full
1667 datails. On subsequent calls in the same match, we use pchars just to find the
1668 printed lengths of the substrings. */
1669
1670 if (f != NULL) fprintf(f, "--->");
1671
1672 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1673 PCHARS(post_start, cb->subject, cb->start_match,
1674 cb->current_position - cb->start_match, f);
1675
1676 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1677
1678 PCHARSV(cb->subject, cb->current_position,
1679 cb->subject_length - cb->current_position, f);
1680
1681 if (f != NULL) fprintf(f, "\n");
1682
1683 /* Always print appropriate indicators, with callout number if not already
1684 shown. For automatic callouts, show the pattern offset. */
1685
1686 if (cb->callout_number == 255)
1687 {
1688 fprintf(outfile, "%+3d ", cb->pattern_position);
1689 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1690 }
1691 else
1692 {
1693 if (callout_extra) fprintf(outfile, " ");
1694 else fprintf(outfile, "%3d ", cb->callout_number);
1695 }
1696
1697 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1698 fprintf(outfile, "^");
1699
1700 if (post_start > 0)
1701 {
1702 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1703 fprintf(outfile, "^");
1704 }
1705
1706 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1707 fprintf(outfile, " ");
1708
1709 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1710 pbuffer + cb->pattern_position);
1711
1712 fprintf(outfile, "\n");
1713 first_callout = 0;
1714
1715 if (cb->mark != last_callout_mark)
1716 {
1717 if (cb->mark == NULL)
1718 fprintf(outfile, "Latest Mark: <unset>\n");
1719 else
1720 {
1721 fprintf(outfile, "Latest Mark: ");
1722 PCHARSV(cb->mark, 0, -1, outfile);
1723 putc('\n', outfile);
1724 }
1725 last_callout_mark = cb->mark;
1726 }
1727
1728 if (cb->callout_data != NULL)
1729 {
1730 int callout_data = *((int *)(cb->callout_data));
1731 if (callout_data != 0)
1732 {
1733 fprintf(outfile, "Callout data = %d\n", callout_data);
1734 return callout_data;
1735 }
1736 }
1737
1738 return (cb->callout_number != callout_fail_id)? 0 :
1739 (++callout_count >= callout_fail_count)? 1 : 0;
1740 }
1741
1742
1743 /*************************************************
1744 * Local malloc functions *
1745 *************************************************/
1746
1747 /* Alternative malloc function, to test functionality and save the size of a
1748 compiled re, which is the first store request that pcre_compile() makes. The
1749 show_malloc variable is set only during matching. */
1750
1751 static void *new_malloc(size_t size)
1752 {
1753 void *block = malloc(size);
1754 gotten_store = size;
1755 if (first_gotten_store == 0) first_gotten_store = size;
1756 if (show_malloc)
1757 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1758 return block;
1759 }
1760
1761 static void new_free(void *block)
1762 {
1763 if (show_malloc)
1764 fprintf(outfile, "free %p\n", block);
1765 free(block);
1766 }
1767
1768 /* For recursion malloc/free, to test stacking calls */
1769
1770 static void *stack_malloc(size_t size)
1771 {
1772 void *block = malloc(size);
1773 if (show_malloc)
1774 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1775 return block;
1776 }
1777
1778 static void stack_free(void *block)
1779 {
1780 if (show_malloc)
1781 fprintf(outfile, "stack_free %p\n", block);
1782 free(block);
1783 }
1784
1785
1786 /*************************************************
1787 * Call pcre_fullinfo() *
1788 *************************************************/
1789
1790 /* Get one piece of information from the pcre_fullinfo() function. When only
1791 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1792 value, but the code is defensive.
1793
1794 Arguments:
1795 re compiled regex
1796 study study data
1797 option PCRE_INFO_xxx option
1798 ptr where to put the data
1799
1800 Returns: 0 when OK, < 0 on error
1801 */
1802
1803 static int
1804 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1805 {
1806 int rc;
1807
1808 if (use_pcre16)
1809 #ifdef SUPPORT_PCRE16
1810 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1811 #else
1812 rc = PCRE_ERROR_BADMODE;
1813 #endif
1814 else
1815 #ifdef SUPPORT_PCRE8
1816 rc = pcre_fullinfo(re, study, option, ptr);
1817 #else
1818 rc = PCRE_ERROR_BADMODE;
1819 #endif
1820
1821 if (rc < 0)
1822 {
1823 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1824 use_pcre16? "16" : "", option);
1825 if (rc == PCRE_ERROR_BADMODE)
1826 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1827 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1828 }
1829
1830 return rc;
1831 }
1832
1833
1834
1835 /*************************************************
1836 * Swap byte functions *
1837 *************************************************/
1838
1839 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1840 value, respectively.
1841
1842 Arguments:
1843 value any number
1844
1845 Returns: the byte swapped value
1846 */
1847
1848 static pcre_uint32
1849 swap_uint32(pcre_uint32 value)
1850 {
1851 return ((value & 0x000000ff) << 24) |
1852 ((value & 0x0000ff00) << 8) |
1853 ((value & 0x00ff0000) >> 8) |
1854 (value >> 24);
1855 }
1856
1857 static pcre_uint16
1858 swap_uint16(pcre_uint16 value)
1859 {
1860 return (value >> 8) | (value << 8);
1861 }
1862
1863
1864
1865 /*************************************************
1866 * Flip bytes in a compiled pattern *
1867 *************************************************/
1868
1869 /* This function is called if the 'F' option was present on a pattern that is
1870 to be written to a file. We flip the bytes of all the integer fields in the
1871 regex data block and the study block. In 16-bit mode this also flips relevant
1872 bytes in the pattern itself. This is to make it possible to test PCRE's
1873 ability to reload byte-flipped patterns, e.g. those compiled on a different
1874 architecture. */
1875
1876 static void
1877 regexflip(pcre *ere, pcre_extra *extra)
1878 {
1879 REAL_PCRE *re = (REAL_PCRE *)ere;
1880 #ifdef SUPPORT_PCRE16
1881 int op;
1882 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1883 int length = re->name_count * re->name_entry_size;
1884 #ifdef SUPPORT_UTF
1885 BOOL utf = (re->options & PCRE_UTF16) != 0;
1886 BOOL utf16_char = FALSE;
1887 #endif /* SUPPORT_UTF */
1888 #endif /* SUPPORT_PCRE16 */
1889
1890 /* Always flip the bytes in the main data block and study blocks. */
1891
1892 re->magic_number = REVERSED_MAGIC_NUMBER;
1893 re->size = swap_uint32(re->size);
1894 re->options = swap_uint32(re->options);
1895 re->flags = swap_uint16(re->flags);
1896 re->top_bracket = swap_uint16(re->top_bracket);
1897 re->top_backref = swap_uint16(re->top_backref);
1898 re->first_char = swap_uint16(re->first_char);
1899 re->req_char = swap_uint16(re->req_char);
1900 re->name_table_offset = swap_uint16(re->name_table_offset);
1901 re->name_entry_size = swap_uint16(re->name_entry_size);
1902 re->name_count = swap_uint16(re->name_count);
1903
1904 if (extra != NULL)
1905 {
1906 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1907 rsd->size = swap_uint32(rsd->size);
1908 rsd->flags = swap_uint32(rsd->flags);
1909 rsd->minlength = swap_uint32(rsd->minlength);
1910 }
1911
1912 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1913 in the name table, if present, and then in the pattern itself. */
1914
1915 #ifdef SUPPORT_PCRE16
1916 if (!use_pcre16) return;
1917
1918 while(TRUE)
1919 {
1920 /* Swap previous characters. */
1921 while (length-- > 0)
1922 {
1923 *ptr = swap_uint16(*ptr);
1924 ptr++;
1925 }
1926 #ifdef SUPPORT_UTF
1927 if (utf16_char)
1928 {
1929 if ((ptr[-1] & 0xfc00) == 0xd800)
1930 {
1931 /* We know that there is only one extra character in UTF-16. */
1932 *ptr = swap_uint16(*ptr);
1933 ptr++;
1934 }
1935 }
1936 utf16_char = FALSE;
1937 #endif /* SUPPORT_UTF */
1938
1939 /* Get next opcode. */
1940
1941 length = 0;
1942 op = *ptr;
1943 *ptr++ = swap_uint16(op);
1944
1945 switch (op)
1946 {
1947 case OP_END:
1948 return;
1949
1950 #ifdef SUPPORT_UTF
1951 case OP_CHAR:
1952 case OP_CHARI:
1953 case OP_NOT:
1954 case OP_NOTI:
1955 case OP_STAR:
1956 case OP_MINSTAR:
1957 case OP_PLUS:
1958 case OP_MINPLUS:
1959 case OP_QUERY:
1960 case OP_MINQUERY:
1961 case OP_UPTO:
1962 case OP_MINUPTO:
1963 case OP_EXACT:
1964 case OP_POSSTAR:
1965 case OP_POSPLUS:
1966 case OP_POSQUERY:
1967 case OP_POSUPTO:
1968 case OP_STARI:
1969 case OP_MINSTARI:
1970 case OP_PLUSI:
1971 case OP_MINPLUSI:
1972 case OP_QUERYI:
1973 case OP_MINQUERYI:
1974 case OP_UPTOI:
1975 case OP_MINUPTOI:
1976 case OP_EXACTI:
1977 case OP_POSSTARI:
1978 case OP_POSPLUSI:
1979 case OP_POSQUERYI:
1980 case OP_POSUPTOI:
1981 case OP_NOTSTAR:
1982 case OP_NOTMINSTAR:
1983 case OP_NOTPLUS:
1984 case OP_NOTMINPLUS:
1985 case OP_NOTQUERY:
1986 case OP_NOTMINQUERY:
1987 case OP_NOTUPTO:
1988 case OP_NOTMINUPTO:
1989 case OP_NOTEXACT:
1990 case OP_NOTPOSSTAR:
1991 case OP_NOTPOSPLUS:
1992 case OP_NOTPOSQUERY:
1993 case OP_NOTPOSUPTO:
1994 case OP_NOTSTARI:
1995 case OP_NOTMINSTARI:
1996 case OP_NOTPLUSI:
1997 case OP_NOTMINPLUSI:
1998 case OP_NOTQUERYI:
1999 case OP_NOTMINQUERYI:
2000 case OP_NOTUPTOI:
2001 case OP_NOTMINUPTOI:
2002 case OP_NOTEXACTI:
2003 case OP_NOTPOSSTARI:
2004 case OP_NOTPOSPLUSI:
2005 case OP_NOTPOSQUERYI:
2006 case OP_NOTPOSUPTOI:
2007 if (utf) utf16_char = TRUE;
2008 #endif
2009 /* Fall through. */
2010
2011 default:
2012 length = OP_lengths16[op] - 1;
2013 break;
2014
2015 case OP_CLASS:
2016 case OP_NCLASS:
2017 /* Skip the character bit map. */
2018 ptr += 32/sizeof(pcre_uint16);
2019 length = 0;
2020 break;
2021
2022 case OP_XCLASS:
2023 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2024 if (LINK_SIZE > 1)
2025 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2026 - (1 + LINK_SIZE + 1));
2027 else
2028 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2029
2030 /* Reverse the size of the XCLASS instance. */
2031 *ptr = swap_uint16(*ptr);
2032 ptr++;
2033 if (LINK_SIZE > 1)
2034 {
2035 *ptr = swap_uint16(*ptr);
2036 ptr++;
2037 }
2038
2039 op = *ptr;
2040 *ptr = swap_uint16(op);
2041 ptr++;
2042 if ((op & XCL_MAP) != 0)
2043 {
2044 /* Skip the character bit map. */
2045 ptr += 32/sizeof(pcre_uint16);
2046 length -= 32/sizeof(pcre_uint16);
2047 }
2048 break;
2049 }
2050 }
2051 /* Control should never reach here in 16 bit mode. */
2052 #endif /* SUPPORT_PCRE16 */
2053 }
2054
2055
2056
2057 /*************************************************
2058 * Check match or recursion limit *
2059 *************************************************/
2060
2061 static int
2062 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2063 int start_offset, int options, int *use_offsets, int use_size_offsets,
2064 int flag, unsigned long int *limit, int errnumber, const char *msg)
2065 {
2066 int count;
2067 int min = 0;
2068 int mid = 64;
2069 int max = -1;
2070
2071 extra->flags |= flag;
2072
2073 for (;;)
2074 {
2075 *limit = mid;
2076
2077 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2078 use_offsets, use_size_offsets);
2079
2080 if (count == errnumber)
2081 {
2082 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2083 min = mid;
2084 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2085 }
2086
2087 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2088 count == PCRE_ERROR_PARTIAL)
2089 {
2090 if (mid == min + 1)
2091 {
2092 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2093 break;
2094 }
2095 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2096 max = mid;
2097 mid = (min + mid)/2;
2098 }
2099 else break; /* Some other error */
2100 }
2101
2102 extra->flags &= ~flag;
2103 return count;
2104 }
2105
2106
2107
2108 /*************************************************
2109 * Case-independent strncmp() function *
2110 *************************************************/
2111
2112 /*
2113 Arguments:
2114 s first string
2115 t second string
2116 n number of characters to compare
2117
2118 Returns: < 0, = 0, or > 0, according to the comparison
2119 */
2120
2121 static int
2122 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2123 {
2124 while (n--)
2125 {
2126 int c = tolower(*s++) - tolower(*t++);
2127 if (c) return c;
2128 }
2129 return 0;
2130 }
2131
2132
2133
2134 /*************************************************
2135 * Check newline indicator *
2136 *************************************************/
2137
2138 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2139 a message and return 0 if there is no match.
2140
2141 Arguments:
2142 p points after the leading '<'
2143 f file for error message
2144
2145 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2146 */
2147
2148 static int
2149 check_newline(pcre_uint8 *p, FILE *f)
2150 {
2151 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2152 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2153 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2154 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2155 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2156 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2157 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2158 fprintf(f, "Unknown newline type at: <%s\n", p);
2159 return 0;
2160 }
2161
2162
2163
2164 /*************************************************
2165 * Usage function *
2166 *************************************************/
2167
2168 static void
2169 usage(void)
2170 {
2171 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2172 printf("Input and output default to stdin and stdout.\n");
2173 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2174 printf("If input is a terminal, readline() is used to read from it.\n");
2175 #else
2176 printf("This version of pcretest is not linked with readline().\n");
2177 #endif
2178 printf("\nOptions:\n");
2179 #ifdef SUPPORT_PCRE16
2180 printf(" -16 use the 16-bit library\n");
2181 #endif
2182 printf(" -b show compiled code\n");
2183 printf(" -C show PCRE compile-time options and exit\n");
2184 printf(" -C arg show a specific compile-time option\n");
2185 printf(" and exit with its value. The arg can be:\n");
2186 printf(" linksize internal link size [2, 3, 4]\n");
2187 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2188 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2189 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2190 printf(" ucp Unicode Properties supported [0, 1]\n");
2191 printf(" jit Just-in-time compiler supported [0, 1]\n");
2192 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2193 printf(" -d debug: show compiled code and information (-b and -i)\n");
2194 #if !defined NODFA
2195 printf(" -dfa force DFA matching for all subjects\n");
2196 #endif
2197 printf(" -help show usage information\n");
2198 printf(" -i show information about compiled patterns\n"
2199 " -M find MATCH_LIMIT minimum for each subject\n"
2200 " -m output memory used information\n"
2201 " -o <n> set size of offsets vector to <n>\n");
2202 #if !defined NOPOSIX
2203 printf(" -p use POSIX interface\n");
2204 #endif
2205 printf(" -q quiet: do not output PCRE version number at start\n");
2206 printf(" -S <n> set stack size to <n> megabytes\n");
2207 printf(" -s force each pattern to be studied at basic level\n"
2208 " -s+ force each pattern to be studied, using JIT if available\n"
2209 " -s++ ditto, verifying when JIT was actually used\n"
2210 " -s+n force each pattern to be studied, using JIT if available,\n"
2211 " where 1 <= n <= 7 selects JIT options\n"
2212 " -s++n ditto, verifying when JIT was actually used\n"
2213 " -t time compilation and execution\n");
2214 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2215 printf(" -tm time execution (matching) only\n");
2216 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2217 }
2218
2219
2220
2221 /*************************************************
2222 * Main Program *
2223 *************************************************/
2224
2225 /* Read lines from named file or stdin and write to named file or stdout; lines
2226 consist of a regular expression, in delimiters and optionally followed by
2227 options, followed by a set of test data, terminated by an empty line. */
2228
2229 int main(int argc, char **argv)
2230 {
2231 FILE *infile = stdin;
2232 const char *version;
2233 int options = 0;
2234 int study_options = 0;
2235 int default_find_match_limit = FALSE;
2236 int op = 1;
2237 int timeit = 0;
2238 int timeitm = 0;
2239 int showinfo = 0;
2240 int showstore = 0;
2241 int force_study = -1;
2242 int force_study_options = 0;
2243 int quiet = 0;
2244 int size_offsets = 45;
2245 int size_offsets_max;
2246 int *offsets = NULL;
2247 int debug = 0;
2248 int done = 0;
2249 int all_use_dfa = 0;
2250 int verify_jit = 0;
2251 int yield = 0;
2252 int stack_size;
2253
2254 #if !defined NOPOSIX
2255 int posix = 0;
2256 #endif
2257 #if !defined NODFA
2258 int *dfa_workspace = NULL;
2259 #endif
2260
2261 pcre_jit_stack *jit_stack = NULL;
2262
2263 /* These vectors store, end-to-end, a list of zero-terminated captured
2264 substring names, each list itself being terminated by an empty name. Assume
2265 that 1024 is plenty long enough for the few names we'll be testing. It is
2266 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2267 for the actual memory, to ensure alignment. */
2268
2269 pcre_uint16 copynames[1024];
2270 pcre_uint16 getnames[1024];
2271
2272 #ifdef SUPPORT_PCRE16
2273 pcre_uint16 *cn16ptr;
2274 pcre_uint16 *gn16ptr;
2275 #endif
2276
2277 #ifdef SUPPORT_PCRE8
2278 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2279 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2280 pcre_uint8 *cn8ptr;
2281 pcre_uint8 *gn8ptr;
2282 #endif
2283
2284 /* Get buffers from malloc() so that valgrind will check their misuse when
2285 debugging. They grow automatically when very long lines are read. The 16-bit
2286 buffer (buffer16) is obtained only if needed. */
2287
2288 buffer = (pcre_uint8 *)malloc(buffer_size);
2289 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2290 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2291
2292 /* The outfile variable is static so that new_malloc can use it. */
2293
2294 outfile = stdout;
2295
2296 /* The following _setmode() stuff is some Windows magic that tells its runtime
2297 library to translate CRLF into a single LF character. At least, that's what
2298 I've been told: never having used Windows I take this all on trust. Originally
2299 it set 0x8000, but then I was advised that _O_BINARY was better. */
2300
2301 #if defined(_WIN32) || defined(WIN32)
2302 _setmode( _fileno( stdout ), _O_BINARY );
2303 #endif
2304
2305 /* Get the version number: both pcre_version() and pcre16_version() give the
2306 same answer. We just need to ensure that we call one that is available. */
2307
2308 #ifdef SUPPORT_PCRE8
2309 version = pcre_version();
2310 #else
2311 version = pcre16_version();
2312 #endif
2313
2314 /* Scan options */
2315
2316 while (argc > 1 && argv[op][0] == '-')
2317 {
2318 pcre_uint8 *endptr;
2319 char *arg = argv[op];
2320
2321 if (strcmp(arg, "-m") == 0) showstore = 1;
2322 else if (strcmp(arg, "-s") == 0) force_study = 0;
2323
2324 else if (strncmp(arg, "-s+", 3) == 0)
2325 {
2326 arg += 3;
2327 if (*arg == '+') { arg++; verify_jit = TRUE; }
2328 force_study = 1;
2329 if (*arg == 0)
2330 force_study_options = jit_study_bits[6];
2331 else if (*arg >= '1' && *arg <= '7')
2332 force_study_options = jit_study_bits[*arg - '1'];
2333 else goto BAD_ARG;
2334 }
2335 else if (strcmp(arg, "-16") == 0)
2336 {
2337 #ifdef SUPPORT_PCRE16
2338 use_pcre16 = 1;
2339 #else
2340 printf("** This version of PCRE was built without 16-bit support\n");
2341 exit(1);
2342 #endif
2343 }
2344 else if (strcmp(arg, "-q") == 0) quiet = 1;
2345 else if (strcmp(arg, "-b") == 0) debug = 1;
2346 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2347 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2348 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2349 #if !defined NODFA
2350 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2351 #endif
2352 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2353 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2354 *endptr == 0))
2355 {
2356 op++;
2357 argc--;
2358 }
2359 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2360 {
2361 int both = arg[2] == 0;
2362 int temp;
2363 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2364 *endptr == 0))
2365 {
2366 timeitm = temp;
2367 op++;
2368 argc--;
2369 }
2370 else timeitm = LOOPREPEAT;
2371 if (both) timeit = timeitm;
2372 }
2373 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2374 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2375 *endptr == 0))
2376 {
2377 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2378 printf("PCRE: -S not supported on this OS\n");
2379 exit(1);
2380 #else
2381 int rc;
2382 struct rlimit rlim;
2383 getrlimit(RLIMIT_STACK, &rlim);
2384 rlim.rlim_cur = stack_size * 1024 * 1024;
2385 rc = setrlimit(RLIMIT_STACK, &rlim);
2386 if (rc != 0)
2387 {
2388 printf("PCRE: setrlimit() failed with error %d\n", rc);
2389 exit(1);
2390 }
2391 op++;
2392 argc--;
2393 #endif
2394 }
2395 #if !defined NOPOSIX
2396 else if (strcmp(arg, "-p") == 0) posix = 1;
2397 #endif
2398 else if (strcmp(arg, "-C") == 0)
2399 {
2400 int rc;
2401 unsigned long int lrc;
2402
2403 if (argc > 2)
2404 {
2405 if (strcmp(argv[op + 1], "linksize") == 0)
2406 {
2407 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2408 printf("%d\n", rc);
2409 yield = rc;
2410 goto EXIT;
2411 }
2412 if (strcmp(argv[op + 1], "pcre8") == 0)
2413 {
2414 #ifdef SUPPORT_PCRE8
2415 printf("1\n");
2416 yield = 1;
2417 #else
2418 printf("0\n");
2419 yield = 0;
2420 #endif
2421 goto EXIT;
2422 }
2423 if (strcmp(argv[op + 1], "pcre16") == 0)
2424 {
2425 #ifdef SUPPORT_PCRE16
2426 printf("1\n");
2427 yield = 1;
2428 #else
2429 printf("0\n");
2430 yield = 0;
2431 #endif
2432 goto EXIT;
2433 }
2434 if (strcmp(argv[op + 1], "utf") == 0)
2435 {
2436 #ifdef SUPPORT_PCRE8
2437 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2438 printf("%d\n", rc);
2439 yield = rc;
2440 #else
2441 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2442 printf("%d\n", rc);
2443 yield = rc;
2444 #endif
2445 goto EXIT;
2446 }
2447 if (strcmp(argv[op + 1], "ucp") == 0)
2448 {
2449 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2450 printf("%d\n", rc);
2451 yield = rc;
2452 goto EXIT;
2453 }
2454 if (strcmp(argv[op + 1], "jit") == 0)
2455 {
2456 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2457 printf("%d\n", rc);
2458 yield = rc;
2459 goto EXIT;
2460 }
2461 if (strcmp(argv[op + 1], "newline") == 0)
2462 {
2463 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2464 print_newline_config(rc);
2465 goto EXIT;
2466 }
2467 printf("Unknown -C option: %s\n", argv[op + 1]);
2468 goto EXIT;
2469 }
2470
2471 printf("PCRE version %s\n", version);
2472 printf("Compiled with\n");
2473
2474 #ifdef EBCDIC
2475 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2476 #endif
2477
2478 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2479 are set, either both UTFs are supported or both are not supported. */
2480
2481 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2482 printf(" 8-bit and 16-bit support\n");
2483 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2484 if (rc)
2485 printf(" UTF-8 and UTF-16 support\n");
2486 else
2487 printf(" No UTF-8 or UTF-16 support\n");
2488 #elif defined SUPPORT_PCRE8
2489 printf(" 8-bit support only\n");
2490 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2491 printf(" %sUTF-8 support\n", rc? "" : "No ");
2492 #else
2493 printf(" 16-bit support only\n");
2494 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2495 printf(" %sUTF-16 support\n", rc? "" : "No ");
2496 #endif
2497
2498 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2499 printf(" %sUnicode properties support\n", rc? "" : "No ");
2500 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2501 if (rc)
2502 {
2503 const char *arch;
2504 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2505 printf(" Just-in-time compiler support: %s\n", arch);
2506 }
2507 else
2508 printf(" No just-in-time compiler support\n");
2509 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2510 print_newline_config(rc);
2511 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2512 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2513 "all Unicode newlines");
2514 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2515 printf(" Internal link size = %d\n", rc);
2516 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2517 printf(" POSIX malloc threshold = %d\n", rc);
2518 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2519 printf(" Default match limit = %ld\n", lrc);
2520 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2521 printf(" Default recursion depth limit = %ld\n", lrc);
2522 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2523 printf(" Match recursion uses %s", rc? "stack" : "heap");
2524 if (showstore)
2525 {
2526 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2527 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2528 }
2529 printf("\n");
2530 goto EXIT;
2531 }
2532 else if (strcmp(arg, "-help") == 0 ||
2533 strcmp(arg, "--help") == 0)
2534 {
2535 usage();
2536 goto EXIT;
2537 }
2538 else
2539 {
2540 BAD_ARG:
2541 printf("** Unknown or malformed option %s\n", arg);
2542 usage();
2543 yield = 1;
2544 goto EXIT;
2545 }
2546 op++;
2547 argc--;
2548 }
2549
2550 /* Get the store for the offsets vector, and remember what it was */
2551
2552 size_offsets_max = size_offsets;
2553 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2554 if (offsets == NULL)
2555 {
2556 printf("** Failed to get %d bytes of memory for offsets vector\n",
2557 (int)(size_offsets_max * sizeof(int)));
2558 yield = 1;
2559 goto EXIT;
2560 }
2561
2562 /* Sort out the input and output files */
2563
2564 if (argc > 1)
2565 {
2566 infile = fopen(argv[op], INPUT_MODE);
2567 if (infile == NULL)
2568 {
2569 printf("** Failed to open %s\n", argv[op]);
2570 yield = 1;
2571 goto EXIT;
2572 }
2573 }
2574
2575 if (argc > 2)
2576 {
2577 outfile = fopen(argv[op+1], OUTPUT_MODE);
2578 if (outfile == NULL)
2579 {
2580 printf("** Failed to open %s\n", argv[op+1]);
2581 yield = 1;
2582 goto EXIT;
2583 }
2584 }
2585
2586 /* Set alternative malloc function */
2587
2588 #ifdef SUPPORT_PCRE8
2589 pcre_malloc = new_malloc;
2590 pcre_free = new_free;
2591 pcre_stack_malloc = stack_malloc;
2592 pcre_stack_free = stack_free;
2593 #endif
2594
2595 #ifdef SUPPORT_PCRE16
2596 pcre16_malloc = new_malloc;
2597 pcre16_free = new_free;
2598 pcre16_stack_malloc = stack_malloc;
2599 pcre16_stack_free = stack_free;
2600 #endif
2601
2602 /* Heading line unless quiet, then prompt for first regex if stdin */
2603
2604 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2605
2606 /* Main loop */
2607
2608 while (!done)
2609 {
2610 pcre *re = NULL;
2611 pcre_extra *extra = NULL;
2612
2613 #if !defined NOPOSIX /* There are still compilers that require no indent */
2614 regex_t preg;
2615 int do_posix = 0;
2616 #endif
2617
2618 const char *error;
2619 pcre_uint8 *markptr;
2620 pcre_uint8 *p, *pp, *ppp;
2621 pcre_uint8 *to_file = NULL;
2622 const pcre_uint8 *tables = NULL;
2623 unsigned long int get_options;
2624 unsigned long int true_size, true_study_size = 0;
2625 size_t size, regex_gotten_store;
2626 int do_allcaps = 0;
2627 int do_mark = 0;
2628 int do_study = 0;
2629 int no_force_study = 0;
2630 int do_debug = debug;
2631 int do_G = 0;
2632 int do_g = 0;
2633 int do_showinfo = showinfo;
2634 int do_showrest = 0;
2635 int do_showcaprest = 0;
2636 int do_flip = 0;
2637 int erroroffset, len, delimiter, poffset;
2638
2639 #if !defined NODFA
2640 int dfa_matched = 0;
2641 #endif
2642
2643 use_utf = 0;
2644 debug_lengths = 1;
2645
2646 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2647 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2648 fflush(outfile);
2649
2650 p = buffer;
2651 while (isspace(*p)) p++;
2652 if (*p == 0) continue;
2653
2654 /* See if the pattern is to be loaded pre-compiled from a file. */
2655
2656 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2657 {
2658 pcre_uint32 magic;
2659 pcre_uint8 sbuf[8];
2660 FILE *f;
2661
2662 p++;
2663 if (*p == '!')
2664 {
2665 do_debug = TRUE;
2666 do_showinfo = TRUE;
2667 p++;
2668 }
2669
2670 pp = p + (int)strlen((char *)p);
2671 while (isspace(pp[-1])) pp--;
2672 *pp = 0;
2673
2674 f = fopen((char *)p, "rb");
2675 if (f == NULL)
2676 {
2677 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2678 continue;
2679 }
2680
2681 first_gotten_store = 0;
2682 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2683
2684 true_size =
2685 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2686 true_study_size =
2687 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2688
2689 re = (pcre *)new_malloc(true_size);
2690 if (re == NULL)
2691 {
2692 printf("** Failed to get %d bytes of memory for pcre object\n",
2693 (int)true_size);
2694 yield = 1;
2695 goto EXIT;
2696 }
2697 regex_gotten_store = first_gotten_store;
2698
2699 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2700
2701 magic = ((REAL_PCRE *)re)->magic_number;
2702 if (magic != MAGIC_NUMBER)
2703 {
2704 if (swap_uint32(magic) == MAGIC_NUMBER)
2705 {
2706 do_flip = 1;
2707 }
2708 else
2709 {
2710 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2711 new_free(re);
2712 fclose(f);
2713 continue;
2714 }
2715 }
2716
2717 /* We hide the byte-invert info for little and big endian tests. */
2718 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2719 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2720
2721 /* Now see if there is any following study data. */
2722
2723 if (true_study_size != 0)
2724 {
2725 pcre_study_data *psd;
2726
2727 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2728 extra->flags = PCRE_EXTRA_STUDY_DATA;
2729
2730 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2731 extra->study_data = psd;
2732
2733 if (fread(psd, 1, true_study_size, f) != true_study_size)
2734 {
2735 FAIL_READ:
2736 fprintf(outfile, "Failed to read data from %s\n", p);
2737 if (extra != NULL)
2738 {
2739 PCRE_FREE_STUDY(extra);
2740 }
2741 new_free(re);
2742 fclose(f);
2743 continue;
2744 }
2745 fprintf(outfile, "Study data loaded from %s\n", p);
2746 do_study = 1; /* To get the data output if requested */
2747 }
2748 else fprintf(outfile, "No study data\n");
2749
2750 /* Flip the necessary bytes. */
2751 if (do_flip)
2752 {
2753 int rc;
2754 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2755 if (rc == PCRE_ERROR_BADMODE)
2756 {
2757 /* Simulate the result of the function call below. */
2758 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2759 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2760 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2761 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2762 new_free(re);
2763 fclose(f);
2764 continue;
2765 }
2766 }
2767
2768 /* Need to know if UTF-8 for printing data strings. */
2769
2770 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2771 {
2772 new_free(re);
2773 fclose(f);
2774 continue;
2775 }
2776 use_utf = (get_options & PCRE_UTF8) != 0;
2777
2778 fclose(f);
2779 goto SHOW_INFO;
2780 }
2781
2782 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2783 the pattern; if it isn't complete, read more. */
2784
2785 delimiter = *p++;
2786
2787 if (isalnum(delimiter) || delimiter == '\\')
2788 {
2789 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2790 goto SKIP_DATA;
2791 }
2792
2793 pp = p;
2794 poffset = (int)(p - buffer);
2795
2796 for(;;)
2797 {
2798 while (*pp != 0)
2799 {
2800 if (*pp == '\\' && pp[1] != 0) pp++;
2801 else if (*pp == delimiter) break;
2802 pp++;
2803 }
2804 if (*pp != 0) break;
2805 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2806 {
2807 fprintf(outfile, "** Unexpected EOF\n");
2808 done = 1;
2809 goto CONTINUE;
2810 }
2811 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2812 }
2813
2814 /* The buffer may have moved while being extended; reset the start of data
2815 pointer to the correct relative point in the buffer. */
2816
2817 p = buffer + poffset;
2818
2819 /* If the first character after the delimiter is backslash, make
2820 the pattern end with backslash. This is purely to provide a way
2821 of testing for the error message when a pattern ends with backslash. */
2822
2823 if (pp[1] == '\\') *pp++ = '\\';
2824
2825 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2826 for callouts. */
2827
2828 *pp++ = 0;
2829 strcpy((char *)pbuffer, (char *)p);
2830
2831 /* Look for options after final delimiter */
2832
2833 options = 0;
2834 study_options = force_study_options;
2835 log_store = showstore; /* default from command line */
2836
2837 while (*pp != 0)
2838 {
2839 switch (*pp++)
2840 {
2841 case 'f': options |= PCRE_FIRSTLINE; break;
2842 case 'g': do_g = 1; break;
2843 case 'i': options |= PCRE_CASELESS; break;
2844 case 'm': options |= PCRE_MULTILINE; break;
2845 case 's': options |= PCRE_DOTALL; break;
2846 case 'x': options |= PCRE_EXTENDED; break;
2847
2848 case '+':
2849 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2850 break;
2851
2852 case '=': do_allcaps = 1; break;
2853 case 'A': options |= PCRE_ANCHORED; break;
2854 case 'B': do_debug = 1; break;
2855 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2856 case 'D': do_debug = do_showinfo = 1; break;
2857 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2858 case 'F': do_flip = 1; break;
2859 case 'G': do_G = 1; break;
2860 case 'I': do_showinfo = 1; break;
2861 case 'J': options |= PCRE_DUPNAMES; break;
2862 case 'K': do_mark = 1; break;
2863 case 'M': log_store = 1; break;
2864 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2865
2866 #if !defined NOPOSIX
2867 case 'P': do_posix = 1; break;
2868 #endif
2869
2870 case 'S':
2871 do_study = 1;
2872 for (;;)
2873 {
2874 switch (*pp++)
2875 {
2876 case 'S':
2877 do_study = 0;
2878 no_force_study = 1;
2879 break;
2880
2881 case '!':
2882 study_options |= PCRE_STUDY_EXTRA_NEEDED;
2883 break;
2884
2885 case '+':
2886 if (*pp == '+')
2887 {
2888 verify_jit = TRUE;
2889 pp++;
2890 }
2891 if (*pp >= '1' && *pp <= '7')
2892 study_options |= jit_study_bits[*pp++ - '1'];
2893 else
2894 study_options |= jit_study_bits[6];
2895 break;
2896
2897 case '-':
2898 study_options &= ~PCRE_STUDY_ALLJIT;
2899 break;
2900
2901 default:
2902 pp--;
2903 goto ENDLOOP;
2904 }
2905 }
2906 ENDLOOP:
2907 break;
2908
2909 case 'U': options |= PCRE_UNGREEDY; break;
2910 case 'W': options |= PCRE_UCP; break;
2911 case 'X': options |= PCRE_EXTRA; break;
2912 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2913 case 'Z': debug_lengths = 0; break;
2914 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2915 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2916
2917 case 'T':
2918 switch (*pp++)
2919 {
2920 case '0': tables = tables0; break;
2921 case '1': tables = tables1; break;
2922
2923 case '\r':
2924 case '\n':
2925 case ' ':
2926 case 0:
2927 fprintf(outfile, "** Missing table number after /T\n");
2928 goto SKIP_DATA;
2929
2930 default:
2931 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2932 goto SKIP_DATA;
2933 }
2934 break;
2935
2936 case 'L':
2937 ppp = pp;
2938 /* The '\r' test here is so that it works on Windows. */
2939 /* The '0' test is just in case this is an unterminated line. */
2940 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2941 *ppp = 0;
2942 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2943 {
2944 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2945 goto SKIP_DATA;
2946 }
2947 locale_set = 1;
2948 tables = PCRE_MAKETABLES;
2949 pp = ppp;
2950 break;
2951
2952 case '>':
2953 to_file = pp;
2954 while (*pp != 0) pp++;
2955 while (isspace(pp[-1])) pp--;
2956 *pp = 0;
2957 break;
2958
2959 case '<':
2960 {
2961 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2962 {
2963 options |= PCRE_JAVASCRIPT_COMPAT;
2964 pp += 3;
2965 }
2966 else
2967 {
2968 int x = check_newline(pp, outfile);
2969 if (x == 0) goto SKIP_DATA;
2970 options |= x;
2971 while (*pp++ != '>');
2972 }
2973 }
2974 break;
2975
2976 case '\r': /* So that it works in Windows */
2977 case '\n':
2978 case ' ':
2979 break;
2980
2981 default:
2982 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2983 goto SKIP_DATA;
2984 }
2985 }
2986
2987 /* Handle compiling via the POSIX interface, which doesn't support the
2988 timing, showing, or debugging options, nor the ability to pass over
2989 local character tables. Neither does it have 16-bit support. */
2990
2991 #if !defined NOPOSIX
2992 if (posix || do_posix)
2993 {
2994 int rc;
2995 int cflags = 0;
2996
2997 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2998 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2999 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3000 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3001 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3002 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3003 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3004
3005 first_gotten_store = 0;
3006 rc = regcomp(&preg, (char *)p, cflags);
3007
3008 /* Compilation failed; go back for another re, skipping to blank line
3009 if non-interactive. */
3010
3011 if (rc != 0)
3012 {
3013 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3014 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3015 goto SKIP_DATA;
3016 }
3017 }
3018
3019 /* Handle compiling via the native interface */
3020
3021 else
3022 #endif /* !defined NOPOSIX */
3023
3024 {
3025 /* In 16-bit mode, convert the input. */
3026
3027 #ifdef SUPPORT_PCRE16
3028 if (use_pcre16)
3029 {
3030 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3031 {
3032 case -1:
3033 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3034 "converted to UTF-16\n");
3035 goto SKIP_DATA;
3036
3037 case -2:
3038 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3039 "cannot be converted to UTF-16\n");
3040 goto SKIP_DATA;
3041
3042 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3043 fprintf(outfile, "**Failed: character value greater than 0xffff "
3044 "cannot be converted to 16-bit in non-UTF mode\n");
3045 goto SKIP_DATA;
3046
3047 default:
3048 break;
3049 }
3050 p = (pcre_uint8 *)buffer16;
3051 }
3052 #endif
3053
3054 /* Compile many times when timing */
3055
3056 if (timeit > 0)
3057 {
3058 register int i;
3059 clock_t time_taken;
3060 clock_t start_time = clock();
3061 for (i = 0; i < timeit; i++)
3062 {
3063 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3064 if (re != NULL) free(re);
3065 }
3066 time_taken = clock() - start_time;
3067 fprintf(outfile, "Compile time %.4f milliseconds\n",
3068 (((double)time_taken * 1000.0) / (double)timeit) /
3069 (double)CLOCKS_PER_SEC);
3070 }
3071
3072 first_gotten_store = 0;
3073 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3074
3075 /* Compilation failed; go back for another re, skipping to blank line
3076 if non-interactive. */
3077
3078 if (re == NULL)
3079 {
3080 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3081 SKIP_DATA:
3082 if (infile != stdin)
3083 {
3084 for (;;)
3085 {
3086 if (extend_inputline(infile, buffer, NULL) == NULL)
3087 {
3088 done = 1;
3089 goto CONTINUE;
3090 }
3091 len = (int)strlen((char *)buffer);
3092 while (len > 0 && isspace(buffer[len-1])) len--;
3093 if (len == 0) break;
3094 }
3095 fprintf(outfile, "\n");
3096 }
3097 goto CONTINUE;
3098 }
3099
3100 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3101 within the regex; check for this so that we know how to process the data
3102 lines. */
3103
3104 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3105 goto SKIP_DATA;
3106 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3107
3108 /* Extract the size for possible writing before possibly flipping it,
3109 and remember the store that was got. */
3110
3111 true_size = ((REAL_PCRE *)re)->size;
3112 regex_gotten_store = first_gotten_store;
3113
3114 /* Output code size information if requested */
3115
3116 if (log_store)
3117 fprintf(outfile, "Memory allocation (code space): %d\n",
3118 (int)(first_gotten_store -
3119 sizeof(REAL_PCRE) -
3120 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3121
3122 /* If -s or /S was present, study the regex to generate additional info to
3123 help with the matching, unless the pattern has the SS option, which
3124 suppresses the effect of /S (used for a few test patterns where studying is
3125 never sensible). */
3126
3127 if (do_study || (force_study >= 0 && !no_force_study))
3128 {
3129 if (timeit > 0)
3130 {
3131 register int i;
3132 clock_t time_taken;
3133 clock_t start_time = clock();
3134 for (i = 0; i < timeit; i++)
3135 {
3136 PCRE_STUDY(extra, re, study_options, &error);
3137 }
3138 time_taken = clock() - start_time;
3139 if (extra != NULL)
3140 {
3141 PCRE_FREE_STUDY(extra);
3142 }
3143 fprintf(outfile, " Study time %.4f milliseconds\n",
3144 (((double)time_taken * 1000.0) / (double)timeit) /
3145 (double)CLOCKS_PER_SEC);
3146 }
3147 PCRE_STUDY(extra, re, study_options, &error);
3148 if (error != NULL)
3149 fprintf(outfile, "Failed to study: %s\n", error);
3150 else if (extra != NULL)
3151 {
3152 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3153 if (log_store)
3154 {
3155 size_t jitsize;
3156 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3157 jitsize != 0)
3158 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3159 }
3160 }
3161 }
3162
3163 /* If /K was present, we set up for handling MARK data. */
3164
3165 if (do_mark)
3166 {
3167 if (extra == NULL)
3168 {
3169 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3170 extra->flags = 0;
3171 }
3172 extra->mark = &markptr;
3173 extra->flags |= PCRE_EXTRA_MARK;
3174 }
3175
3176 /* Extract and display information from the compiled data if required. */
3177
3178 SHOW_INFO:
3179
3180 if (do_debug)
3181 {
3182 fprintf(outfile, "------------------------------------------------------------------\n");
3183 PCRE_PRINTINT(re, outfile, debug_lengths);
3184 }
3185
3186 /* We already have the options in get_options (see above) */
3187
3188 if (do_showinfo)
3189 {
3190 unsigned long int all_options;
3191 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3192 hascrorlf, maxlookbehind;
3193 int nameentrysize, namecount;
3194 const pcre_uint8 *nametable;
3195
3196 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3197 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3198 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3199 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3200 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3201 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3202 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3203 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3204 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3205 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3206 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3207 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3208 != 0)
3209 goto SKIP_DATA;
3210
3211 if (size != regex_gotten_store) fprintf(outfile,
3212 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3213 (int)size, (int)regex_gotten_store);
3214
3215 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3216 if (backrefmax > 0)
3217 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3218
3219 if (namecount > 0)
3220 {
3221 fprintf(outfile, "Named capturing subpatterns:\n");
3222 while (namecount-- > 0)
3223 {
3224 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3225 int imm2_size = use_pcre16 ? 1 : 2;
3226 #else
3227 int imm2_size = IMM2_SIZE;
3228 #endif
3229 int length = (int)STRLEN(nametable + imm2_size);
3230 fprintf(outfile, " ");
3231 PCHARSV(nametable, imm2_size, length, outfile);
3232 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3233 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3234 fprintf(outfile, "%3d\n", use_pcre16?
3235 (int)(((PCRE_SPTR16)nametable)[0])
3236 :((int)nametable[0] << 8) | (int)nametable[1]);
3237 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3238 #else
3239 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3240 #ifdef SUPPORT_PCRE8
3241 nametable += nameentrysize;
3242 #else
3243 nametable += nameentrysize * 2;
3244 #endif
3245 #endif
3246 }
3247 }
3248
3249 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3250 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3251
3252 all_options = ((REAL_PCRE *)re)->options;
3253 if (do_flip) all_options = swap_uint32(all_options);
3254
3255 if (get_options == 0) fprintf(outfile, "No options\n");
3256 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3257 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3258 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3259 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3260 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3261 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3262 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3263 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3264 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3265 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3266 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3267 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3268 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3269 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3270 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3271 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3272 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3273 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3274
3275 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3276
3277 switch (get_options & PCRE_NEWLINE_BITS)
3278 {
3279 case PCRE_NEWLINE_CR:
3280 fprintf(outfile, "Forced newline sequence: CR\n");
3281 break;
3282
3283 case PCRE_NEWLINE_LF:
3284 fprintf(outfile, "Forced newline sequence: LF\n");
3285 break;
3286
3287 case PCRE_NEWLINE_CRLF:
3288 fprintf(outfile, "Forced newline sequence: CRLF\n");
3289 break;
3290
3291 case PCRE_NEWLINE_ANYCRLF:
3292 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3293 break;
3294
3295 case PCRE_NEWLINE_ANY:
3296 fprintf(outfile, "Forced newline sequence: ANY\n");
3297 break;
3298
3299 default:
3300 break;
3301 }
3302
3303 if (first_char == -1)
3304 {
3305 fprintf(outfile, "First char at start or follows newline\n");
3306 }
3307 else if (first_char < 0)
3308 {
3309 fprintf(outfile, "No first char\n");
3310 }
3311 else
3312 {
3313 const char *caseless =
3314 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3315 "" : " (caseless)";
3316
3317 if (PRINTOK(first_char))
3318 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3319 else
3320 {
3321 fprintf(outfile, "First char = ");
3322 pchar(first_char, outfile);
3323 fprintf(outfile, "%s\n", caseless);
3324 }
3325 }
3326
3327 if (need_char < 0)
3328 {
3329 fprintf(outfile, "No need char\n");
3330 }
3331 else
3332 {
3333 const char *caseless =
3334 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3335 "" : " (caseless)";
3336
3337 if (PRINTOK(need_char))
3338 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3339 else
3340 {
3341 fprintf(outfile, "Need char = ");
3342 pchar(need_char, outfile);
3343 fprintf(outfile, "%s\n", caseless);
3344 }
3345 }
3346
3347 if (maxlookbehind > 0)
3348 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3349
3350 /* Don't output study size; at present it is in any case a fixed
3351 value, but it varies, depending on the computer architecture, and
3352 so messes up the test suite. (And with the /F option, it might be
3353 flipped.) If study was forced by an external -s, don't show this
3354 information unless -i or -d was also present. This means that, except
3355 when auto-callouts are involved, the output from runs with and without
3356 -s should be identical. */
3357
3358 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3359 {
3360 if (extra == NULL)
3361 fprintf(outfile, "Study returned NULL\n");
3362 else
3363 {
3364 pcre_uint8 *start_bits = NULL;
3365 int minlength;
3366
3367 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3368 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3369
3370 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3371 {
3372 if (start_bits == NULL)
3373 fprintf(outfile, "No set of starting bytes\n");
3374 else
3375 {
3376 int i;
3377 int c = 24;
3378 fprintf(outfile, "Starting byte set: ");
3379 for (i = 0; i < 256; i++)
3380 {
3381 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3382 {
3383 if (c > 75)
3384 {
3385 fprintf(outfile, "\n ");
3386 c = 2;
3387 }
3388 if (PRINTOK(i) && i != ' ')
3389 {
3390 fprintf(outfile, "%c ", i);
3391 c += 2;
3392 }
3393 else
3394 {
3395 fprintf(outfile, "\\x%02x ", i);
3396 c += 5;
3397 }
3398 }
3399 }
3400 fprintf(outfile, "\n");
3401 }
3402 }
3403 }
3404
3405 /* Show this only if the JIT was set by /S, not by -s. */
3406
3407 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3408 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3409 {
3410 int jit;
3411 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3412 {
3413 if (jit)
3414 fprintf(outfile, "JIT study was successful\n");
3415 else
3416 #ifdef SUPPORT_JIT
3417 fprintf(outfile, "JIT study was not successful\n");
3418 #else
3419 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3420 #endif
3421 }
3422 }
3423 }
3424 }
3425
3426 /* If the '>' option was present, we write out the regex to a file, and
3427 that is all. The first 8 bytes of the file are the regex length and then
3428 the study length, in big-endian order. */
3429
3430 if (to_file != NULL)
3431 {
3432 FILE *f = fopen((char *)to_file, "wb");
3433 if (f == NULL)
3434 {
3435 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3436 }
3437 else
3438 {
3439 pcre_uint8 sbuf[8];
3440
3441 if (do_flip) regexflip(re, extra);
3442 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3443 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3444 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3445 sbuf[3] = (pcre_uint8)((true_size) & 255);
3446 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3447 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3448 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3449 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3450
3451 if (fwrite(sbuf, 1, 8, f) < 8 ||
3452 fwrite(re, 1, true_size, f) < true_size)
3453 {
3454 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3455 }
3456 else
3457 {
3458 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3459
3460 /* If there is study data, write it. */
3461
3462 if (extra != NULL)
3463 {
3464 if (fwrite(extra->study_data, 1, true_study_size, f) <
3465 true_study_size)
3466 {
3467 fprintf(outfile, "Write error on %s: %s\n", to_file,
3468 strerror(errno));
3469 }
3470 else fprintf(outfile, "Study data written to %s\n", to_file);
3471 }
3472 }
3473 fclose(f);
3474 }
3475
3476 new_free(re);
3477 if (extra != NULL)
3478 {
3479 PCRE_FREE_STUDY(extra);
3480 }
3481 if (locale_set)
3482 {
3483 new_free((void *)tables);
3484 setlocale(LC_CTYPE, "C");
3485 locale_set = 0;
3486 }
3487 continue; /* With next regex */
3488 }
3489 } /* End of non-POSIX compile */
3490
3491 /* Read data lines and test them */
3492
3493 for (;;)
3494 {
3495 pcre_uint8 *q;
3496 pcre_uint8 *bptr;
3497 int *use_offsets = offsets;
3498 int use_size_offsets = size_offsets;
3499 int callout_data = 0;
3500 int callout_data_set = 0;
3501 int count, c;
3502 int copystrings = 0;
3503 int find_match_limit = default_find_match_limit;
3504 int getstrings = 0;
3505 int getlist = 0;
3506 int gmatched = 0;
3507 int start_offset = 0;
3508 int start_offset_sign = 1;
3509 int g_notempty = 0;
3510 int use_dfa = 0;
3511
3512 *copynames = 0;
3513 *getnames = 0;
3514
3515 #ifdef SUPPORT_PCRE16
3516 cn16ptr = copynames;
3517 gn16ptr = getnames;
3518 #endif
3519 #ifdef SUPPORT_PCRE8
3520 cn8ptr = copynames8;
3521 gn8ptr = getnames8;
3522 #endif
3523
3524 SET_PCRE_CALLOUT(callout);
3525 first_callout = 1;
3526 last_callout_mark = NULL;
3527 callout_extra = 0;
3528 callout_count = 0;
3529 callout_fail_count = 999999;
3530 callout_fail_id = -1;
3531 show_malloc = 0;
3532 options = 0;
3533
3534 if (extra != NULL) extra->flags &=
3535 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3536
3537 len = 0;
3538 for (;;)
3539 {
3540 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3541 {
3542 if (len > 0) /* Reached EOF without hitting a newline */
3543 {
3544 fprintf(outfile, "\n");
3545 break;
3546 }
3547 done = 1;
3548 goto CONTINUE;
3549 }
3550 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3551 len = (int)strlen((char *)buffer);
3552 if (buffer[len-1] == '\n') break;
3553 }
3554
3555 while (len > 0 && isspace(buffer[len-1])) len--;
3556 buffer[len] = 0;
3557 if (len == 0) break;
3558
3559 p = buffer;
3560 while (isspace(*p)) p++;
3561
3562 bptr = q = dbuffer;
3563 while ((c = *p++) != 0)
3564 {
3565 int i = 0;
3566 int n = 0;
3567
3568 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3569 In non-UTF mode, allow the value of the byte to fall through to later,
3570 where values greater than 127 are turned into UTF-8 when running in
3571 16-bit mode. */
3572
3573 if (c != '\\')
3574 {
3575 if (use_utf)
3576 {
3577 *q++ = c;
3578 continue;
3579 }
3580 }
3581
3582 /* Handle backslash escapes */
3583
3584 else switch ((c = *p++))
3585 {
3586 case 'a': c = 7; break;
3587 case 'b': c = '\b'; break;
3588 case 'e': c = 27; break;
3589 case 'f': c = '\f'; break;
3590 case 'n': c = '\n'; break;
3591 case 'r': c = '\r'; break;
3592 case 't': c = '\t'; break;
3593 case 'v': c = '\v'; break;
3594
3595 case '0': case '1': case '2': case '3':
3596 case '4': case '5': case '6': case '7':
3597 c -= '0';
3598 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3599 c = c * 8 + *p++ - '0';
3600 break;
3601
3602 case 'x':
3603 if (*p == '{')
3604 {
3605 pcre_uint8 *pt = p;
3606 c = 0;
3607
3608 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3609 when isxdigit() is a macro that refers to its argument more than
3610 once. This is banned by the C Standard, but apparently happens in at
3611 least one MacOS environment. */
3612
3613 for (pt++; isxdigit(*pt); pt++)
3614 {
3615 if (++i == 9)
3616 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3617 "using only the first eight.\n");
3618 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3619 }
3620 if (*pt == '}')
3621 {
3622 p = pt + 1;
3623 break;
3624 }
3625 /* Not correct form for \x{...}; fall through */
3626 }
3627
3628 /* \x without {} always defines just one byte in 8-bit mode. This
3629 allows UTF-8 characters to be constructed byte by byte, and also allows
3630 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3631 Otherwise, pass it down to later code so that it can be turned into
3632 UTF-8 when running in 16-bit mode. */
3633
3634 c = 0;
3635 while (i++ < 2 && isxdigit(*p))
3636 {
3637 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3638 p++;
3639 }
3640 if (use_utf)
3641 {
3642 *q++ = c;
3643 continue;
3644 }
3645 break;
3646
3647 case 0: /* \ followed by EOF allows for an empty line */
3648 p--;
3649 continue;
3650
3651 case '>':
3652 if (*p == '-')
3653 {
3654 start_offset_sign = -1;
3655 p++;
3656 }
3657 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3658 start_offset *= start_offset_sign;
3659 continue;
3660
3661 case 'A': /* Option setting */
3662 options |= PCRE_ANCHORED;
3663 continue;
3664
3665 case 'B':
3666 options |= PCRE_NOTBOL;
3667 continue;
3668
3669 case 'C':
3670 if (isdigit(*p)) /* Set copy string */
3671 {
3672 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3673 copystrings |= 1 << n;
3674 }
3675 else if (isalnum(*p))
3676 {
3677 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3678 }
3679 else if (*p == '+')
3680 {
3681 callout_extra = 1;
3682 p++;
3683 }
3684 else if (*p == '-')
3685 {
3686 SET_PCRE_CALLOUT(NULL);
3687 p++;
3688 }
3689 else if (*p == '!')
3690 {
3691 callout_fail_id = 0;
3692 p++;
3693 while(isdigit(*p))
3694 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3695 callout_fail_count = 0;
3696 if (*p == '!')
3697 {
3698 p++;
3699 while(isdigit(*p))
3700 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3701 }
3702 }
3703 else if (*p == '*')
3704 {
3705 int sign = 1;
3706 callout_data = 0;
3707 if (*(++p) == '-') { sign = -1; p++; }
3708 while(isdigit(*p))
3709 callout_data = callout_data * 10 + *p++ - '0';
3710 callout_data *= sign;
3711 callout_data_set = 1;
3712 }
3713 continue;
3714
3715 #if !defined NODFA
3716 case 'D':
3717 #if !defined NOPOSIX
3718 if (posix || do_posix)
3719 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3720 else
3721 #endif
3722 use_dfa = 1;
3723 continue;
3724 #endif
3725
3726 #if !defined NODFA
3727 case 'F':
3728 options |= PCRE_DFA_SHORTEST;
3729 continue;
3730 #endif
3731
3732 case 'G':
3733 if (isdigit(*p))
3734 {
3735 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3736 getstrings |= 1 << n;
3737 }
3738 else if (isalnum(*p))
3739 {
3740 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3741 }
3742 continue;
3743
3744 case 'J':
3745 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3746 if (extra != NULL
3747 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3748 && extra->executable_jit != NULL)
3749 {
3750 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3751 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3752 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3753 }
3754 continue;
3755
3756 case 'L':
3757 getlist = 1;
3758 continue;
3759
3760 case 'M':
3761 find_match_limit = 1;
3762 continue;
3763
3764 case 'N':
3765 if ((options & PCRE_NOTEMPTY) != 0)
3766 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3767 else
3768 options |= PCRE_NOTEMPTY;
3769 continue;
3770
3771 case 'O':
3772 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3773 if (n > size_offsets_max)
3774 {
3775 size_offsets_max = n;
3776 free(offsets);
3777 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3778 if (offsets == NULL)
3779 {
3780 printf("** Failed to get %d bytes of memory for offsets vector\n",
3781 (int)(size_offsets_max * sizeof(int)));
3782 yield = 1;
3783 goto EXIT;
3784 }
3785 }
3786 use_size_offsets = n;
3787 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3788 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
3789 continue;
3790
3791 case 'P':
3792 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3793 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3794 continue;
3795
3796 case 'Q':
3797 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3798 if (extra == NULL)
3799 {
3800 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3801 extra->flags = 0;
3802 }
3803 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3804 extra->match_limit_recursion = n;
3805 continue;
3806
3807 case 'q':
3808 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3809 if (extra == NULL)
3810 {
3811 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3812 extra->flags = 0;
3813 }
3814 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3815 extra->match_limit = n;
3816 continue;
3817
3818 #if !defined NODFA
3819 case 'R':
3820 options |= PCRE_DFA_RESTART;
3821 continue;
3822 #endif
3823
3824 case 'S':
3825 show_malloc = 1;
3826 continue;
3827
3828 case 'Y':
3829 options |= PCRE_NO_START_OPTIMIZE;
3830 continue;
3831
3832 case 'Z':
3833 options |= PCRE_NOTEOL;
3834 continue;
3835
3836 case '?':
3837 options |= PCRE_NO_UTF8_CHECK;
3838 continue;
3839
3840 case '<':
3841 {
3842 int x = check_newline(p, outfile);
3843 if (x == 0) goto NEXT_DATA;
3844 options |= x;
3845 while (*p++ != '>');
3846 }
3847 continue;
3848 }
3849
3850 /* We now have a character value in c that may be greater than 255. In
3851 16-bit mode, we always convert characters to UTF-8 so that values greater
3852 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3853 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3854 mode must have come from \x{...} or octal constructs because values from
3855 \x.. get this far only in non-UTF mode. */
3856
3857 #if !defined NOUTF || defined SUPPORT_PCRE16
3858 if (use_pcre16 || use_utf)
3859 {
3860 pcre_uint8 buff8[8];
3861 int ii, utn;
3862 utn = ord2utf8(c, buff8);
3863 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3864 }
3865 else
3866 #endif
3867 {
3868 if (c > 255)
3869 {
3870 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3871 "and UTF-8 mode is not enabled.\n", c);
3872 fprintf(outfile, "** Truncation will probably give the wrong "
3873 "result.\n");
3874 }
3875 *q++ = c;
3876 }
3877 }
3878
3879 /* Reached end of subject string */
3880
3881 *q = 0;
3882 len = (int)(q - dbuffer);
3883
3884 /* Move the data to the end of the buffer so that a read over the end of
3885 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3886 we are using the POSIX interface, we must include the terminating zero. */
3887
3888 #if !defined NOPOSIX
3889 if (posix || do_posix)
3890 {
3891 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3892 bptr += buffer_size - len - 1;
3893 }
3894 else
3895 #endif
3896 {
3897 memmove(bptr + buffer_size - len, bptr, len);
3898 bptr += buffer_size - len;
3899 }
3900
3901 if ((all_use_dfa || use_dfa) && find_match_limit)
3902 {
3903 printf("**Match limit not relevant for DFA matching: ignored\n");
3904 find_match_limit = 0;
3905 }
3906
3907 /* Handle matching via the POSIX interface, which does not
3908 support timing or playing with the match limit or callout data. */
3909
3910 #if !defined NOPOSIX
3911 if (posix || do_posix)
3912 {
3913 int rc;
3914 int eflags = 0;
3915 regmatch_t *pmatch = NULL;
3916 if (use_size_offsets > 0)
3917 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3918 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3919 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3920 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3921
3922 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3923
3924 if (rc != 0)
3925 {
3926 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3927 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3928 }
3929 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3930 != 0)
3931 {
3932 fprintf(outfile, "Matched with REG_NOSUB\n");
3933 }
3934 else
3935 {
3936 size_t i;
3937 for (i = 0; i < (size_t)use_size_offsets; i++)
3938 {
3939 if (pmatch[i].rm_so >= 0)
3940 {
3941 fprintf(outfile, "%2d: ", (int)i);
3942 PCHARSV(dbuffer, pmatch[i].rm_so,
3943 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3944 fprintf(outfile, "\n");
3945 if (do_showcaprest || (i == 0 && do_showrest))
3946 {
3947 fprintf(outfile, "%2d+ ", (int)i);
3948 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3949 outfile);
3950 fprintf(outfile, "\n");
3951 }
3952 }
3953 }
3954 }
3955 free(pmatch);
3956 goto NEXT_DATA;
3957 }
3958
3959 #endif /* !defined NOPOSIX */
3960
3961 /* Handle matching via the native interface - repeats for /g and /G */
3962
3963 #ifdef SUPPORT_PCRE16
3964 if (use_pcre16)
3965 {
3966 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3967 switch(len)
3968 {
3969 case -1:
3970 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3971 "converted to UTF-16\n");
3972 goto NEXT_DATA;
3973
3974 case -2:
3975 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3976 "cannot be converted to UTF-16\n");
3977 goto NEXT_DATA;
3978
3979 case -3:
3980 fprintf(outfile, "**Failed: character value greater than 0xffff "
3981 "cannot be converted to 16-bit in non-UTF mode\n");
3982 goto NEXT_DATA;
3983
3984 default:
3985 break;
3986 }
3987 bptr = (pcre_uint8 *)buffer16;
3988 }
3989 #endif
3990
3991 /* Ensure that there is a JIT callback if we want to verify that JIT was
3992 actually used. If jit_stack == NULL, no stack has yet been assigned. */
3993
3994 if (verify_jit && jit_stack == NULL && extra != NULL)
3995 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3996
3997 for (;; gmatched++) /* Loop for /g or /G */
3998 {
3999 markptr = NULL;
4000 jit_was_used = FALSE;
4001
4002 if (timeitm > 0)
4003 {
4004 register int i;
4005 clock_t time_taken;
4006 clock_t start_time = clock();
4007
4008 #if !defined NODFA
4009 if (all_use_dfa || use_dfa)
4010 {
4011 if ((options & PCRE_DFA_RESTART) != 0)
4012 {
4013 fprintf(outfile, "Timing DFA restarts is not supported\n");
4014 break;
4015 }
4016 if (dfa_workspace == NULL)
4017 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4018 for (i = 0; i < timeitm; i++)
4019 {
4020 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4021 (options | g_notempty), use_offsets, use_size_offsets,
4022 dfa_workspace, DFA_WS_DIMENSION);
4023 }
4024 }
4025 else
4026 #endif
4027
4028 for (i = 0; i < timeitm; i++)
4029 {
4030 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4031 (options | g_notempty), use_offsets, use_size_offsets);
4032 }
4033 time_taken = clock() - start_time;
4034 fprintf(outfile, "Execute time %.4f milliseconds\n",
4035 (((double)time_taken * 1000.0) / (double)timeitm) /
4036 (double)CLOCKS_PER_SEC);
4037 }
4038
4039 /* If find_match_limit is set, we want to do repeated matches with
4040 varying limits in order to find the minimum value for the match limit and
4041 for the recursion limit. The match limits are relevant only to the normal
4042 running of pcre_exec(), so disable the JIT optimization. This makes it
4043 possible to run the same set of tests with and without JIT externally
4044 requested. */
4045
4046 if (find_match_limit)
4047 {
4048 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4049 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4050 extra->flags = 0;
4051
4052 (void)check_match_limit(re, extra, bptr, len, start_offset,
4053 options|g_notempty, use_offsets, use_size_offsets,
4054 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4055 PCRE_ERROR_MATCHLIMIT, "match()");
4056
4057 count = check_match_limit(re, extra, bptr, len, start_offset,
4058 options|g_notempty, use_offsets, use_size_offsets,
4059 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4060 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4061 }
4062
4063 /* If callout_data is set, use the interface with additional data */
4064
4065 else if (callout_data_set)
4066 {
4067 if (extra == NULL)
4068 {
4069 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4070 extra->flags = 0;
4071 }
4072 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4073 extra->callout_data = &callout_data;
4074 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4075 options | g_notempty, use_offsets, use_size_offsets);
4076 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4077 }
4078
4079 /* The normal case is just to do the match once, with the default
4080 value of match_limit. */
4081
4082 #if !defined NODFA
4083 else if (all_use_dfa || use_dfa)
4084 {
4085 if (dfa_workspace == NULL)
4086 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4087 if (dfa_matched++ == 0)
4088 dfa_workspace[0] = -1; /* To catch bad restart */
4089 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4090 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4091 DFA_WS_DIMENSION);
4092 if (count == 0)
4093 {
4094 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4095 count = use_size_offsets/2;
4096 }
4097 }
4098 #endif
4099
4100 else
4101 {
4102 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4103 options | g_notempty, use_offsets, use_size_offsets);
4104 if (count == 0)
4105 {
4106 fprintf(outfile, "Matched, but too many substrings\n");
4107 count = use_size_offsets/3;
4108 }
4109 }
4110
4111 /* Matched */
4112
4113 if (count >= 0)
4114 {
4115 int i, maxcount;
4116 void *cnptr, *gnptr;
4117
4118 #if !defined NODFA
4119 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4120 #endif
4121 maxcount = use_size_offsets/3;
4122
4123 /* This is a check against a lunatic return value. */
4124
4125 if (count > maxcount)
4126 {
4127 fprintf(outfile,
4128 "** PCRE error: returned count %d is too big for offset size %d\n",
4129 count, use_size_offsets);
4130 count = use_size_offsets/3;
4131 if (do_g || do_G)
4132 {
4133 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4134 do_g = do_G = FALSE; /* Break g/G loop */
4135 }
4136 }
4137
4138 /* do_allcaps requests showing of all captures in the pattern, to check
4139 unset ones at the end. */
4140
4141 if (do_allcaps)
4142 {
4143 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4144 goto SKIP_DATA;
4145 count++; /* Allow for full match */
4146 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4147 }
4148
4149 /* Output the captured substrings */
4150
4151 for (i = 0; i < count * 2; i += 2)
4152 {
4153 if (use_offsets[i] < 0)
4154 {
4155 if (use_offsets[i] != -1)
4156 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4157 use_offsets[i], i);
4158 if (use_offsets[i+1] != -1)
4159 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4160 use_offsets[i+1], i+1);
4161 fprintf(outfile, "%2d: <unset>\n", i/2);
4162 }
4163 else
4164 {
4165 fprintf(outfile, "%2d: ", i/2);
4166 PCHARSV(bptr, use_offsets[i],
4167 use_offsets[i+1] - use_offsets[i], outfile);
4168 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4169 fprintf(outfile, "\n");
4170 if (do_showcaprest || (i == 0 && do_showrest))
4171 {
4172 fprintf(outfile, "%2d+ ", i/2);
4173 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4174 outfile);
4175 fprintf(outfile, "\n");
4176 }
4177 }
4178 }
4179
4180 if (markptr != NULL)
4181 {
4182 fprintf(outfile, "MK: ");
4183 PCHARSV(markptr, 0, -1, outfile);
4184 fprintf(outfile, "\n");
4185 }
4186
4187 for (i = 0; i < 32; i++)
4188 {
4189 if ((copystrings & (1 << i)) != 0)
4190 {
4191 int rc;
4192 char copybuffer[256];
4193 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4194 copybuffer, sizeof(copybuffer));
4195 if (rc < 0)
4196 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4197 else
4198 {
4199 fprintf(outfile, "%2dC ", i);
4200 PCHARSV(copybuffer, 0, rc, outfile);
4201 fprintf(outfile, " (%d)\n", rc);
4202 }
4203 }
4204 }
4205
4206 cnptr = copynames;
4207 for (;;)
4208 {
4209 int rc;
4210 char copybuffer[256];
4211
4212 if (use_pcre16)
4213 {
4214 if (*(pcre_uint16 *)cnptr == 0) break;
4215 }
4216 else
4217 {
4218 if (*(pcre_uint8 *)cnptr == 0) break;
4219 }
4220
4221 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4222 cnptr, copybuffer, sizeof(copybuffer));
4223
4224 if (rc < 0)
4225 {
4226 fprintf(outfile, "copy substring ");
4227 PCHARSV(cnptr, 0, -1, outfile);
4228 fprintf(outfile, " failed %d\n", rc);
4229 }
4230 else
4231 {
4232 fprintf(outfile, " C ");
4233 PCHARSV(copybuffer, 0, rc, outfile);
4234 fprintf(outfile, " (%d) ", rc);
4235 PCHARSV(cnptr, 0, -1, outfile);
4236 putc('\n', outfile);
4237 }
4238
4239 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4240 }
4241
4242 for (i = 0; i < 32; i++)
4243 {
4244 if ((getstrings & (1 << i)) != 0)
4245 {
4246 int rc;
4247 const char *substring;
4248 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4249 if (rc < 0)
4250 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4251 else
4252 {
4253 fprintf(outfile, "%2dG ", i);
4254 PCHARSV(substring, 0, rc, outfile);
4255 fprintf(outfile, " (%d)\n", rc);
4256 PCRE_FREE_SUBSTRING(substring);
4257 }
4258 }
4259 }
4260
4261 gnptr = getnames;
4262 for (;;)
4263 {
4264 int rc;
4265 const char *substring;
4266
4267 if (use_pcre16)
4268 {
4269 if (*(pcre_uint16 *)gnptr == 0) break;
4270 }
4271 else
4272 {
4273 if (*(pcre_uint8 *)gnptr == 0) break;
4274 }
4275
4276 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4277 gnptr, &substring);
4278 if (rc < 0)
4279 {
4280 fprintf(outfile, "get substring ");
4281 PCHARSV(gnptr, 0, -1, outfile);
4282 fprintf(outfile, " failed %d\n", rc);
4283 }
4284 else
4285 {
4286 fprintf(outfile, " G ");
4287 PCHARSV(substring, 0, rc, outfile);
4288 fprintf(outfile, " (%d) ", rc);
4289 PCHARSV(gnptr, 0, -1, outfile);
4290 PCRE_FREE_SUBSTRING(substring);
4291 putc('\n', outfile);
4292 }
4293
4294 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4295 }
4296
4297 if (getlist)
4298 {
4299 int rc;
4300 const char **stringlist;
4301 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4302 if (rc < 0)
4303 fprintf(outfile, "get substring list failed %d\n", rc);
4304 else
4305 {
4306 for (i = 0; i < count; i++)
4307 {
4308 fprintf(outfile, "%2dL ", i);
4309 PCHARSV(stringlist[i], 0, -1, outfile);
4310 putc('\n', outfile);
4311 }
4312 if (stringlist[i] != NULL)
4313 fprintf(outfile, "string list not terminated by NULL\n");
4314 PCRE_FREE_SUBSTRING_LIST(stringlist);
4315 }
4316 }
4317 }
4318
4319 /* There was a partial match */
4320
4321 else if (count == PCRE_ERROR_PARTIAL)
4322 {
4323 if (markptr == NULL) fprintf(outfile, "Partial match");
4324 else
4325 {
4326 fprintf(outfile, "Partial match, mark=");
4327 PCHARSV(markptr, 0, -1, outfile);
4328 }
4329 if (use_size_offsets > 1)
4330 {
4331 fprintf(outfile, ": ");
4332 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4333 outfile);
4334 }
4335 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4336 fprintf(outfile, "\n");
4337 break; /* Out of the /g loop */
4338 }
4339
4340 /* Failed to match. If this is a /g or /G loop and we previously set
4341 g_notempty after a null match, this is not necessarily the end. We want
4342 to advance the start offset, and continue. We won't be at the end of the
4343 string - that was checked before setting g_notempty.
4344
4345 Complication arises in the case when the newline convention is "any",
4346 "crlf", or "anycrlf". If the previous match was at the end of a line
4347 terminated by CRLF, an advance of one character just passes the \r,
4348 whereas we should prefer the longer newline sequence, as does the code in
4349 pcre_exec(). Fudge the offset value to achieve this. We check for a
4350 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4351 find the default.
4352
4353 Otherwise, in the case of UTF-8 matching, the advance must be one
4354 character, not one byte. */
4355
4356 else
4357 {
4358 if (g_notempty != 0)
4359 {
4360 int onechar = 1;
4361 unsigned int obits = ((REAL_PCRE *)re)->options;
4362 use_offsets[0] = start_offset;
4363 if ((obits & PCRE_NEWLINE_BITS) == 0)
4364 {
4365 int d;
4366 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4367 /* Note that these values are always the ASCII ones, even in
4368 EBCDIC environments. CR = 13, NL = 10. */
4369 obits = (d == 13)? PCRE_NEWLINE_CR :
4370 (d == 10)? PCRE_NEWLINE_LF :
4371 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4372 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4373 (d == -1)? PCRE_NEWLINE_ANY : 0;
4374 }
4375 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4376 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4377 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4378 &&
4379 start_offset < len - 1 &&
4380 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4381 (use_pcre16?
4382 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4383 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4384 :
4385 bptr[start_offset] == '\r'
4386 && bptr[start_offset + 1] == '\n')
4387 #elif defined SUPPORT_PCRE16
4388 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4389 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4390 #else
4391 bptr[start_offset] == '\r'
4392 && bptr[start_offset + 1] == '\n'
4393 #endif
4394 )
4395 onechar++;
4396 else if (use_utf)
4397 {
4398 while (start_offset + onechar < len)
4399 {
4400 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4401 onechar++;
4402 }
4403 }
4404 use_offsets[1] = start_offset + onechar;
4405 }
4406 else
4407 {
4408 switch(count)
4409 {
4410 case PCRE_ERROR_NOMATCH:
4411 if (gmatched == 0)
4412 {
4413 if (markptr == NULL)
4414 {
4415 fprintf(outfile, "No match");
4416 }
4417 else
4418 {
4419 fprintf(outfile, "No match, mark = ");
4420 PCHARSV(markptr, 0, -1, outfile);
4421 }
4422 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4423 putc('\n', outfile);
4424 }
4425 break;
4426
4427 case PCRE_ERROR_BADUTF8:
4428 case PCRE_ERROR_SHORTUTF8:
4429 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4430 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4431 use_pcre16? "16" : "8");
4432 if (use_size_offsets >= 2)
4433 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4434 use_offsets[1]);
4435 fprintf(outfile, "\n");
4436 break;
4437
4438 case PCRE_ERROR_BADUTF8_OFFSET:
4439 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4440 use_pcre16? "16" : "8");
4441 break;
4442
4443 default:
4444 if (count < 0 &&
4445 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4446 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4447 else
4448 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4449 break;
4450 }
4451
4452 break; /* Out of the /g loop */
4453 }
4454 }
4455
4456 /* If not /g or /G we are done */
4457
4458 if (!do_g && !do_G) break;
4459
4460 /* If we have matched an empty string, first check to see if we are at
4461 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4462 Perl's /g options does. This turns out to be rather cunning. First we set
4463 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4464 same point. If this fails (picked up above) we advance to the next
4465 character. */
4466
4467 g_notempty = 0;
4468
4469 if (use_offsets[0] == use_offsets[1])
4470 {
4471 if (use_offsets[0] == len) break;
4472 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4473 }
4474
4475 /* For /g, update the start offset, leaving the rest alone */
4476
4477 if (do_g) start_offset = use_offsets[1];
4478
4479 /* For /G, update the pointer and length */
4480
4481 else
4482 {
4483 bptr += use_offsets[1] * CHAR_SIZE;
4484 len -= use_offsets[1];
4485 }
4486 } /* End of loop for /g and /G */
4487
4488 NEXT_DATA: continue;
4489 } /* End of loop for data lines */
4490
4491 CONTINUE:
4492
4493 #if !defined NOPOSIX
4494 if (posix || do_posix) regfree(&preg);
4495 #endif
4496
4497 if (re != NULL) new_free(re);
4498 if (extra != NULL)
4499 {
4500 PCRE_FREE_STUDY(extra);
4501 }
4502 if (locale_set)
4503 {
4504 new_free((void *)tables);
4505 setlocale(LC_CTYPE, "C");
4506 locale_set = 0;
4507 }
4508 if (jit_stack != NULL)
4509 {
4510 PCRE_JIT_STACK_FREE(jit_stack);
4511 jit_stack = NULL;
4512 }
4513 }
4514
4515 if (infile == stdin) fprintf(outfile, "\n");
4516
4517 EXIT:
4518
4519 if (infile != NULL && infile != stdin) fclose(infile);
4520 if (outfile != NULL && outfile != stdout) fclose(outfile);
4521
4522 free(buffer);
4523 free(dbuffer);
4524 free(pbuffer);
4525 free(offsets);
4526
4527 #ifdef SUPPORT_PCRE16
4528 if (buffer16 != NULL) free(buffer16);
4529 #endif
4530
4531 #if !defined NODFA
4532 if (dfa_workspace != NULL)
4533 free(dfa_workspace);
4534 #endif
4535
4536 return yield;
4537 }
4538
4539 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5