/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1033 - (show annotations)
Mon Sep 10 11:02:48 2012 UTC (7 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 135261 byte(s)
Error occurred while calculating annotation data.
General spring-clean of EBCDIC-related issues in the code, which had decayed 
over time. Also the documentation. Added one test that can be run in an ASCII
world to do a little testing of EBCDIC-related things. 
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138 /* Configure internal macros to 16 bit mode. */
139 #define COMPILE_PCRE16
140 #endif
141
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155
156 /* We need access to some of the data tables that PCRE uses. So as not to have
157 to keep two copies, we include the source file here, changing the names of the
158 external symbols to prevent clashes. */
159
160 #define PCRE_INCLUDED
161
162 #include "pcre_tables.c"
163
164 /* The definition of the macro PRINTABLE, which determines whether to print an
165 output character as-is or as a hex value when showing compiled patterns, is
166 the same as in the printint.src file. We uses it here in cases when the locale
167 has not been explicitly changed, so as to get consistent output from systems
168 that differ in their output from isprint() even in the "C" locale. */
169
170 #ifdef EBCDIC
171 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
172 #else
173 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174 #endif
175
176 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
177
178 /* Posix support is disabled in 16 bit only mode. */
179 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
180 #define NOPOSIX
181 #endif
182
183 /* It is possible to compile this test program without including support for
184 testing the POSIX interface, though this is not available via the standard
185 Makefile. */
186
187 #if !defined NOPOSIX
188 #include "pcreposix.h"
189 #endif
190
191 /* It is also possible, originally for the benefit of a version that was
192 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
193 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
194 automatically cut out the UTF support if PCRE is built without it. */
195
196 #ifndef SUPPORT_UTF
197 #ifndef NOUTF
198 #define NOUTF
199 #endif
200 #endif
201
202 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
203 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
204 only from one place and is handled differently). I couldn't dream up any way of
205 using a single macro to do this in a generic way, because of the many different
206 argument requirements. We know that at least one of SUPPORT_PCRE8 and
207 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
208 use these in the definitions of generic macros.
209
210 **** Special note about the PCHARSxxx macros: the address of the string to be
211 printed is always given as two arguments: a base address followed by an offset.
212 The base address is cast to the correct data size for 8 or 16 bit data; the
213 offset is in units of this size. If the string were given as base+offset in one
214 argument, the casting might be incorrectly applied. */
215
216 #ifdef SUPPORT_PCRE8
217
218 #define PCHARS8(lv, p, offset, len, f) \
219 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
220
221 #define PCHARSV8(p, offset, len, f) \
222 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
223
224 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
225 p = read_capture_name8(p, cn8, re)
226
227 #define STRLEN8(p) ((int)strlen((char *)p))
228
229 #define SET_PCRE_CALLOUT8(callout) \
230 pcre_callout = callout
231
232 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
233 pcre_assign_jit_stack(extra, callback, userdata)
234
235 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
236 re = pcre_compile((char *)pat, options, error, erroffset, tables)
237
238 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
239 namesptr, cbuffer, size) \
240 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
241 (char *)namesptr, cbuffer, size)
242
243 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
244 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
245
246 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace) \
248 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
249 offsets, size_offsets, workspace, size_workspace)
250
251 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252 offsets, size_offsets) \
253 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
254 offsets, size_offsets)
255
256 #define PCRE_FREE_STUDY8(extra) \
257 pcre_free_study(extra)
258
259 #define PCRE_FREE_SUBSTRING8(substring) \
260 pcre_free_substring(substring)
261
262 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
263 pcre_free_substring_list(listptr)
264
265 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
266 getnamesptr, subsptr) \
267 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
268 (char *)getnamesptr, subsptr)
269
270 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
271 n = pcre_get_stringnumber(re, (char *)ptr)
272
273 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
274 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
275
276 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
277 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
278
279 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
280 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
281
282 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
283 pcre_printint(re, outfile, debug_lengths)
284
285 #define PCRE_STUDY8(extra, re, options, error) \
286 extra = pcre_study(re, options, error)
287
288 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
289 pcre_jit_stack_alloc(startsize, maxsize)
290
291 #define PCRE_JIT_STACK_FREE8(stack) \
292 pcre_jit_stack_free(stack)
293
294 #endif /* SUPPORT_PCRE8 */
295
296 /* -----------------------------------------------------------*/
297
298 #ifdef SUPPORT_PCRE16
299
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
307 p = read_capture_name16(p, cn16, re)
308
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
313
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
317
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
321
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
330
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
336
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
341
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
344
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
347
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
362
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
366
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
370
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
373
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382
383 #endif /* SUPPORT_PCRE16 */
384
385
386 /* ----- Both modes are supported; a runtime test is needed, except for
387 pcre_config(), and the JIT stack functions, when it doesn't matter which
388 version is called. ----- */
389
390 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
391
392 #define CHAR_SIZE (use_pcre16? 2:1)
393
394 #define PCHARS(lv, p, offset, len, f) \
395 if (use_pcre16) \
396 PCHARS16(lv, p, offset, len, f); \
397 else \
398 PCHARS8(lv, p, offset, len, f)
399
400 #define PCHARSV(p, offset, len, f) \
401 if (use_pcre16) \
402 PCHARSV16(p, offset, len, f); \
403 else \
404 PCHARSV8(p, offset, len, f)
405
406 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
407 if (use_pcre16) \
408 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
409 else \
410 READ_CAPTURE_NAME8(p, cn8, cn16, re)
411
412 #define SET_PCRE_CALLOUT(callout) \
413 if (use_pcre16) \
414 SET_PCRE_CALLOUT16(callout); \
415 else \
416 SET_PCRE_CALLOUT8(callout)
417
418 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
419
420 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
421 if (use_pcre16) \
422 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
423 else \
424 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
425
426 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
427 if (use_pcre16) \
428 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
429 else \
430 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
431
432 #define PCRE_CONFIG pcre_config
433
434 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
435 namesptr, cbuffer, size) \
436 if (use_pcre16) \
437 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
438 namesptr, cbuffer, size); \
439 else \
440 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
441 namesptr, cbuffer, size)
442
443 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
444 if (use_pcre16) \
445 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
446 else \
447 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
448
449 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets, workspace, size_workspace) \
451 if (use_pcre16) \
452 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
453 offsets, size_offsets, workspace, size_workspace); \
454 else \
455 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
456 offsets, size_offsets, workspace, size_workspace)
457
458 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
459 offsets, size_offsets) \
460 if (use_pcre16) \
461 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
462 offsets, size_offsets); \
463 else \
464 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
465 offsets, size_offsets)
466
467 #define PCRE_FREE_STUDY(extra) \
468 if (use_pcre16) \
469 PCRE_FREE_STUDY16(extra); \
470 else \
471 PCRE_FREE_STUDY8(extra)
472
473 #define PCRE_FREE_SUBSTRING(substring) \
474 if (use_pcre16) \
475 PCRE_FREE_SUBSTRING16(substring); \
476 else \
477 PCRE_FREE_SUBSTRING8(substring)
478
479 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
480 if (use_pcre16) \
481 PCRE_FREE_SUBSTRING_LIST16(listptr); \
482 else \
483 PCRE_FREE_SUBSTRING_LIST8(listptr)
484
485 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
486 getnamesptr, subsptr) \
487 if (use_pcre16) \
488 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
489 getnamesptr, subsptr); \
490 else \
491 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
492 getnamesptr, subsptr)
493
494 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
495 if (use_pcre16) \
496 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
497 else \
498 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
499
500 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
501 if (use_pcre16) \
502 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
503 else \
504 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
505
506 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
507 if (use_pcre16) \
508 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
509 else \
510 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
511
512 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
513 (use_pcre16 ? \
514 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
515 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
516
517 #define PCRE_JIT_STACK_FREE(stack) \
518 if (use_pcre16) \
519 PCRE_JIT_STACK_FREE16(stack); \
520 else \
521 PCRE_JIT_STACK_FREE8(stack)
522
523 #define PCRE_MAKETABLES \
524 (use_pcre16? pcre16_maketables() : pcre_maketables())
525
526 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
527 if (use_pcre16) \
528 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
529 else \
530 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
531
532 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
533 if (use_pcre16) \
534 PCRE_PRINTINT16(re, outfile, debug_lengths); \
535 else \
536 PCRE_PRINTINT8(re, outfile, debug_lengths)
537
538 #define PCRE_STUDY(extra, re, options, error) \
539 if (use_pcre16) \
540 PCRE_STUDY16(extra, re, options, error); \
541 else \
542 PCRE_STUDY8(extra, re, options, error)
543
544 /* ----- Only 8-bit mode is supported ----- */
545
546 #elif defined SUPPORT_PCRE8
547 #define CHAR_SIZE 1
548 #define PCHARS PCHARS8
549 #define PCHARSV PCHARSV8
550 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
551 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
552 #define STRLEN STRLEN8
553 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
554 #define PCRE_COMPILE PCRE_COMPILE8
555 #define PCRE_CONFIG pcre_config
556 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
557 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
558 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
559 #define PCRE_EXEC PCRE_EXEC8
560 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
561 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
562 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
563 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
564 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
565 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
566 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
567 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
568 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
569 #define PCRE_MAKETABLES pcre_maketables()
570 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
571 #define PCRE_PRINTINT PCRE_PRINTINT8
572 #define PCRE_STUDY PCRE_STUDY8
573
574 /* ----- Only 16-bit mode is supported ----- */
575
576 #else
577 #define CHAR_SIZE 2
578 #define PCHARS PCHARS16
579 #define PCHARSV PCHARSV16
580 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
581 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
582 #define STRLEN STRLEN16
583 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
584 #define PCRE_COMPILE PCRE_COMPILE16
585 #define PCRE_CONFIG pcre16_config
586 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
587 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
588 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
589 #define PCRE_EXEC PCRE_EXEC16
590 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
591 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
592 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
593 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
594 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
595 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
596 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
597 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
598 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
599 #define PCRE_MAKETABLES pcre16_maketables()
600 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
601 #define PCRE_PRINTINT PCRE_PRINTINT16
602 #define PCRE_STUDY PCRE_STUDY16
603 #endif
604
605 /* ----- End of mode-specific function call macros ----- */
606
607
608 /* Other parameters */
609
610 #ifndef CLOCKS_PER_SEC
611 #ifdef CLK_TCK
612 #define CLOCKS_PER_SEC CLK_TCK
613 #else
614 #define CLOCKS_PER_SEC 100
615 #endif
616 #endif
617
618 #if !defined NODFA
619 #define DFA_WS_DIMENSION 1000
620 #endif
621
622 /* This is the default loop count for timing. */
623
624 #define LOOPREPEAT 500000
625
626 /* Static variables */
627
628 static FILE *outfile;
629 static int log_store = 0;
630 static int callout_count;
631 static int callout_extra;
632 static int callout_fail_count;
633 static int callout_fail_id;
634 static int debug_lengths;
635 static int first_callout;
636 static int jit_was_used;
637 static int locale_set = 0;
638 static int show_malloc;
639 static int use_utf;
640 static size_t gotten_store;
641 static size_t first_gotten_store = 0;
642 static const unsigned char *last_callout_mark = NULL;
643
644 /* The buffers grow automatically if very long input lines are encountered. */
645
646 static int buffer_size = 50000;
647 static pcre_uint8 *buffer = NULL;
648 static pcre_uint8 *dbuffer = NULL;
649 static pcre_uint8 *pbuffer = NULL;
650
651 /* Another buffer is needed translation to 16-bit character strings. It will
652 obtained and extended as required. */
653
654 #ifdef SUPPORT_PCRE16
655 static int buffer16_size = 0;
656 static pcre_uint16 *buffer16 = NULL;
657
658 #ifdef SUPPORT_PCRE8
659
660 /* We need the table of operator lengths that is used for 16-bit compiling, in
661 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
662 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
663 appropriately for the 16-bit world. Just as a safety check, make sure that
664 COMPILE_PCRE16 is *not* set. */
665
666 #ifdef COMPILE_PCRE16
667 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
668 #endif
669
670 #if LINK_SIZE == 2
671 #undef LINK_SIZE
672 #define LINK_SIZE 1
673 #elif LINK_SIZE == 3 || LINK_SIZE == 4
674 #undef LINK_SIZE
675 #define LINK_SIZE 2
676 #else
677 #error LINK_SIZE must be either 2, 3, or 4
678 #endif
679
680 #undef IMM2_SIZE
681 #define IMM2_SIZE 1
682
683 #endif /* SUPPORT_PCRE8 */
684
685 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
686 #endif /* SUPPORT_PCRE16 */
687
688 /* If we have 8-bit support, default use_pcre16 to false; if there is also
689 16-bit support, it can be changed by an option. If there is no 8-bit support,
690 there must be 16-bit support, so default it to 1. */
691
692 #ifdef SUPPORT_PCRE8
693 static int use_pcre16 = 0;
694 #else
695 static int use_pcre16 = 1;
696 #endif
697
698 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
699
700 static int jit_study_bits[] =
701 {
702 PCRE_STUDY_JIT_COMPILE,
703 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
704 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
706 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
709 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
710 };
711
712 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
713 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
714
715 /* Textual explanations for runtime error codes */
716
717 static const char *errtexts[] = {
718 NULL, /* 0 is no error */
719 NULL, /* NOMATCH is handled specially */
720 "NULL argument passed",
721 "bad option value",
722 "magic number missing",
723 "unknown opcode - pattern overwritten?",
724 "no more memory",
725 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
726 "match limit exceeded",
727 "callout error code",
728 NULL, /* BADUTF8/16 is handled specially */
729 NULL, /* BADUTF8/16 offset is handled specially */
730 NULL, /* PARTIAL is handled specially */
731 "not used - internal error",
732 "internal error - pattern overwritten?",
733 "bad count value",
734 "item unsupported for DFA matching",
735 "backreference condition or recursion test not supported for DFA matching",
736 "match limit not supported for DFA matching",
737 "workspace size exceeded in DFA matching",
738 "too much recursion for DFA matching",
739 "recursion limit exceeded",
740 "not used - internal error",
741 "invalid combination of newline options",
742 "bad offset value",
743 NULL, /* SHORTUTF8/16 is handled specially */
744 "nested recursion at the same subject position",
745 "JIT stack limit reached",
746 "pattern compiled in wrong mode: 8-bit/16-bit error",
747 "pattern compiled with other endianness",
748 "invalid data in workspace for DFA restart"
749 };
750
751
752 /*************************************************
753 * Alternate character tables *
754 *************************************************/
755
756 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
757 using the default tables of the library. However, the T option can be used to
758 select alternate sets of tables, for different kinds of testing. Note also that
759 the L (locale) option also adjusts the tables. */
760
761 /* This is the set of tables distributed as default with PCRE. It recognizes
762 only ASCII characters. */
763
764 static const pcre_uint8 tables0[] = {
765
766 /* This table is a lower casing table. */
767
768 0, 1, 2, 3, 4, 5, 6, 7,
769 8, 9, 10, 11, 12, 13, 14, 15,
770 16, 17, 18, 19, 20, 21, 22, 23,
771 24, 25, 26, 27, 28, 29, 30, 31,
772 32, 33, 34, 35, 36, 37, 38, 39,
773 40, 41, 42, 43, 44, 45, 46, 47,
774 48, 49, 50, 51, 52, 53, 54, 55,
775 56, 57, 58, 59, 60, 61, 62, 63,
776 64, 97, 98, 99,100,101,102,103,
777 104,105,106,107,108,109,110,111,
778 112,113,114,115,116,117,118,119,
779 120,121,122, 91, 92, 93, 94, 95,
780 96, 97, 98, 99,100,101,102,103,
781 104,105,106,107,108,109,110,111,
782 112,113,114,115,116,117,118,119,
783 120,121,122,123,124,125,126,127,
784 128,129,130,131,132,133,134,135,
785 136,137,138,139,140,141,142,143,
786 144,145,146,147,148,149,150,151,
787 152,153,154,155,156,157,158,159,
788 160,161,162,163,164,165,166,167,
789 168,169,170,171,172,173,174,175,
790 176,177,178,179,180,181,182,183,
791 184,185,186,187,188,189,190,191,
792 192,193,194,195,196,197,198,199,
793 200,201,202,203,204,205,206,207,
794 208,209,210,211,212,213,214,215,
795 216,217,218,219,220,221,222,223,
796 224,225,226,227,228,229,230,231,
797 232,233,234,235,236,237,238,239,
798 240,241,242,243,244,245,246,247,
799 248,249,250,251,252,253,254,255,
800
801 /* This table is a case flipping table. */
802
803 0, 1, 2, 3, 4, 5, 6, 7,
804 8, 9, 10, 11, 12, 13, 14, 15,
805 16, 17, 18, 19, 20, 21, 22, 23,
806 24, 25, 26, 27, 28, 29, 30, 31,
807 32, 33, 34, 35, 36, 37, 38, 39,
808 40, 41, 42, 43, 44, 45, 46, 47,
809 48, 49, 50, 51, 52, 53, 54, 55,
810 56, 57, 58, 59, 60, 61, 62, 63,
811 64, 97, 98, 99,100,101,102,103,
812 104,105,106,107,108,109,110,111,
813 112,113,114,115,116,117,118,119,
814 120,121,122, 91, 92, 93, 94, 95,
815 96, 65, 66, 67, 68, 69, 70, 71,
816 72, 73, 74, 75, 76, 77, 78, 79,
817 80, 81, 82, 83, 84, 85, 86, 87,
818 88, 89, 90,123,124,125,126,127,
819 128,129,130,131,132,133,134,135,
820 136,137,138,139,140,141,142,143,
821 144,145,146,147,148,149,150,151,
822 152,153,154,155,156,157,158,159,
823 160,161,162,163,164,165,166,167,
824 168,169,170,171,172,173,174,175,
825 176,177,178,179,180,181,182,183,
826 184,185,186,187,188,189,190,191,
827 192,193,194,195,196,197,198,199,
828 200,201,202,203,204,205,206,207,
829 208,209,210,211,212,213,214,215,
830 216,217,218,219,220,221,222,223,
831 224,225,226,227,228,229,230,231,
832 232,233,234,235,236,237,238,239,
833 240,241,242,243,244,245,246,247,
834 248,249,250,251,252,253,254,255,
835
836 /* This table contains bit maps for various character classes. Each map is 32
837 bytes long and the bits run from the least significant end of each byte. The
838 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
839 graph, print, punct, and cntrl. Other classes are built from combinations. */
840
841 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
842 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845
846 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850
851 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
858 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860
861 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865
866 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
867 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
868 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870
871 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
872 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875
876 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
877 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880
881 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
882 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885
886 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890
891 /* This table identifies various classes of character by individual bits:
892 0x01 white space character
893 0x02 letter
894 0x04 decimal digit
895 0x08 hexadecimal digit
896 0x10 alphanumeric or '_'
897 0x80 regular expression metacharacter or binary zero
898 */
899
900 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
901 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
902 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
903 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
904 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
905 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
906 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
907 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
908 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
909 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
910 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
911 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
912 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
913 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
914 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
915 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
924 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
925 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
926 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
927 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
928 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
929 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
930 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
931 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
932
933 /* This is a set of tables that came orginally from a Windows user. It seems to
934 be at least an approximation of ISO 8859. In particular, there are characters
935 greater than 128 that are marked as spaces, letters, etc. */
936
937 static const pcre_uint8 tables1[] = {
938 0,1,2,3,4,5,6,7,
939 8,9,10,11,12,13,14,15,
940 16,17,18,19,20,21,22,23,
941 24,25,26,27,28,29,30,31,
942 32,33,34,35,36,37,38,39,
943 40,41,42,43,44,45,46,47,
944 48,49,50,51,52,53,54,55,
945 56,57,58,59,60,61,62,63,
946 64,97,98,99,100,101,102,103,
947 104,105,106,107,108,109,110,111,
948 112,113,114,115,116,117,118,119,
949 120,121,122,91,92,93,94,95,
950 96,97,98,99,100,101,102,103,
951 104,105,106,107,108,109,110,111,
952 112,113,114,115,116,117,118,119,
953 120,121,122,123,124,125,126,127,
954 128,129,130,131,132,133,134,135,
955 136,137,138,139,140,141,142,143,
956 144,145,146,147,148,149,150,151,
957 152,153,154,155,156,157,158,159,
958 160,161,162,163,164,165,166,167,
959 168,169,170,171,172,173,174,175,
960 176,177,178,179,180,181,182,183,
961 184,185,186,187,188,189,190,191,
962 224,225,226,227,228,229,230,231,
963 232,233,234,235,236,237,238,239,
964 240,241,242,243,244,245,246,215,
965 248,249,250,251,252,253,254,223,
966 224,225,226,227,228,229,230,231,
967 232,233,234,235,236,237,238,239,
968 240,241,242,243,244,245,246,247,
969 248,249,250,251,252,253,254,255,
970 0,1,2,3,4,5,6,7,
971 8,9,10,11,12,13,14,15,
972 16,17,18,19,20,21,22,23,
973 24,25,26,27,28,29,30,31,
974 32,33,34,35,36,37,38,39,
975 40,41,42,43,44,45,46,47,
976 48,49,50,51,52,53,54,55,
977 56,57,58,59,60,61,62,63,
978 64,97,98,99,100,101,102,103,
979 104,105,106,107,108,109,110,111,
980 112,113,114,115,116,117,118,119,
981 120,121,122,91,92,93,94,95,
982 96,65,66,67,68,69,70,71,
983 72,73,74,75,76,77,78,79,
984 80,81,82,83,84,85,86,87,
985 88,89,90,123,124,125,126,127,
986 128,129,130,131,132,133,134,135,
987 136,137,138,139,140,141,142,143,
988 144,145,146,147,148,149,150,151,
989 152,153,154,155,156,157,158,159,
990 160,161,162,163,164,165,166,167,
991 168,169,170,171,172,173,174,175,
992 176,177,178,179,180,181,182,183,
993 184,185,186,187,188,189,190,191,
994 224,225,226,227,228,229,230,231,
995 232,233,234,235,236,237,238,239,
996 240,241,242,243,244,245,246,215,
997 248,249,250,251,252,253,254,223,
998 192,193,194,195,196,197,198,199,
999 200,201,202,203,204,205,206,207,
1000 208,209,210,211,212,213,214,247,
1001 216,217,218,219,220,221,222,255,
1002 0,62,0,0,1,0,0,0,
1003 0,0,0,0,0,0,0,0,
1004 32,0,0,0,1,0,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,255,3,
1007 126,0,0,0,126,0,0,0,
1008 0,0,0,0,0,0,0,0,
1009 0,0,0,0,0,0,0,0,
1010 0,0,0,0,0,0,255,3,
1011 0,0,0,0,0,0,0,0,
1012 0,0,0,0,0,0,12,2,
1013 0,0,0,0,0,0,0,0,
1014 0,0,0,0,0,0,0,0,
1015 254,255,255,7,0,0,0,0,
1016 0,0,0,0,0,0,0,0,
1017 255,255,127,127,0,0,0,0,
1018 0,0,0,0,0,0,0,0,
1019 0,0,0,0,254,255,255,7,
1020 0,0,0,0,0,4,32,4,
1021 0,0,0,128,255,255,127,255,
1022 0,0,0,0,0,0,255,3,
1023 254,255,255,135,254,255,255,7,
1024 0,0,0,0,0,4,44,6,
1025 255,255,127,255,255,255,127,255,
1026 0,0,0,0,254,255,255,255,
1027 255,255,255,255,255,255,255,127,
1028 0,0,0,0,254,255,255,255,
1029 255,255,255,255,255,255,255,255,
1030 0,2,0,0,255,255,255,255,
1031 255,255,255,255,255,255,255,127,
1032 0,0,0,0,255,255,255,255,
1033 255,255,255,255,255,255,255,255,
1034 0,0,0,0,254,255,0,252,
1035 1,0,0,248,1,0,0,120,
1036 0,0,0,0,254,255,255,255,
1037 0,0,128,0,0,0,128,0,
1038 255,255,255,255,0,0,0,0,
1039 0,0,0,0,0,0,0,128,
1040 255,255,255,255,0,0,0,0,
1041 0,0,0,0,0,0,0,0,
1042 128,0,0,0,0,0,0,0,
1043 0,1,1,0,1,1,0,0,
1044 0,0,0,0,0,0,0,0,
1045 0,0,0,0,0,0,0,0,
1046 1,0,0,0,128,0,0,0,
1047 128,128,128,128,0,0,128,0,
1048 28,28,28,28,28,28,28,28,
1049 28,28,0,0,0,0,0,128,
1050 0,26,26,26,26,26,26,18,
1051 18,18,18,18,18,18,18,18,
1052 18,18,18,18,18,18,18,18,
1053 18,18,18,128,128,0,128,16,
1054 0,26,26,26,26,26,26,18,
1055 18,18,18,18,18,18,18,18,
1056 18,18,18,18,18,18,18,18,
1057 18,18,18,128,128,0,0,0,
1058 0,0,0,0,0,1,0,0,
1059 0,0,0,0,0,0,0,0,
1060 0,0,0,0,0,0,0,0,
1061 0,0,0,0,0,0,0,0,
1062 1,0,0,0,0,0,0,0,
1063 0,0,18,0,0,0,0,0,
1064 0,0,20,20,0,18,0,0,
1065 0,20,18,0,0,0,0,0,
1066 18,18,18,18,18,18,18,18,
1067 18,18,18,18,18,18,18,18,
1068 18,18,18,18,18,18,18,0,
1069 18,18,18,18,18,18,18,18,
1070 18,18,18,18,18,18,18,18,
1071 18,18,18,18,18,18,18,18,
1072 18,18,18,18,18,18,18,0,
1073 18,18,18,18,18,18,18,18
1074 };
1075
1076
1077
1078
1079 #ifndef HAVE_STRERROR
1080 /*************************************************
1081 * Provide strerror() for non-ANSI libraries *
1082 *************************************************/
1083
1084 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085 in their libraries, but can provide the same facility by this simple
1086 alternative function. */
1087
1088 extern int sys_nerr;
1089 extern char *sys_errlist[];
1090
1091 char *
1092 strerror(int n)
1093 {
1094 if (n < 0 || n >= sys_nerr) return "unknown error number";
1095 return sys_errlist[n];
1096 }
1097 #endif /* HAVE_STRERROR */
1098
1099
1100
1101 /*************************************************
1102 * Print newline configuration *
1103 *************************************************/
1104
1105 /*
1106 Arguments:
1107 rc the return code from PCRE_CONFIG_NEWLINE
1108 isc TRUE if called from "-C newline"
1109 Returns: nothing
1110 */
1111
1112 static void
1113 print_newline_config(int rc, BOOL isc)
1114 {
1115 const char *s = NULL;
1116 if (!isc) printf(" Newline sequence is ");
1117 switch(rc)
1118 {
1119 case CHAR_CR: s = "CR"; break;
1120 case CHAR_LF: s = "LF"; break;
1121 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1122 case -1: s = "ANY"; break;
1123 case -2: s = "ANYCRLF"; break;
1124
1125 default:
1126 printf("a non-standard value: 0x%04x\n", rc);
1127 return;
1128 }
1129
1130 printf("%s\n", s);
1131 }
1132
1133
1134
1135 /*************************************************
1136 * JIT memory callback *
1137 *************************************************/
1138
1139 static pcre_jit_stack* jit_callback(void *arg)
1140 {
1141 jit_was_used = TRUE;
1142 return (pcre_jit_stack *)arg;
1143 }
1144
1145
1146 #if !defined NOUTF || defined SUPPORT_PCRE16
1147 /*************************************************
1148 * Convert UTF-8 string to value *
1149 *************************************************/
1150
1151 /* This function takes one or more bytes that represents a UTF-8 character,
1152 and returns the value of the character.
1153
1154 Argument:
1155 utf8bytes a pointer to the byte vector
1156 vptr a pointer to an int to receive the value
1157
1158 Returns: > 0 => the number of bytes consumed
1159 -6 to 0 => malformed UTF-8 character at offset = (-return)
1160 */
1161
1162 static int
1163 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1164 {
1165 int c = *utf8bytes++;
1166 int d = c;
1167 int i, j, s;
1168
1169 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1170 {
1171 if ((d & 0x80) == 0) break;
1172 d <<= 1;
1173 }
1174
1175 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1176 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1177
1178 /* i now has a value in the range 1-5 */
1179
1180 s = 6*i;
1181 d = (c & utf8_table3[i]) << s;
1182
1183 for (j = 0; j < i; j++)
1184 {
1185 c = *utf8bytes++;
1186 if ((c & 0xc0) != 0x80) return -(j+1);
1187 s -= 6;
1188 d |= (c & 0x3f) << s;
1189 }
1190
1191 /* Check that encoding was the correct unique one */
1192
1193 for (j = 0; j < utf8_table1_size; j++)
1194 if (d <= utf8_table1[j]) break;
1195 if (j != i) return -(i+1);
1196
1197 /* Valid value */
1198
1199 *vptr = d;
1200 return i+1;
1201 }
1202 #endif /* NOUTF || SUPPORT_PCRE16 */
1203
1204
1205
1206 #if !defined NOUTF || defined SUPPORT_PCRE16
1207 /*************************************************
1208 * Convert character value to UTF-8 *
1209 *************************************************/
1210
1211 /* This function takes an integer value in the range 0 - 0x7fffffff
1212 and encodes it as a UTF-8 character in 0 to 6 bytes.
1213
1214 Arguments:
1215 cvalue the character value
1216 utf8bytes pointer to buffer for result - at least 6 bytes long
1217
1218 Returns: number of characters placed in the buffer
1219 */
1220
1221 static int
1222 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1223 {
1224 register int i, j;
1225 for (i = 0; i < utf8_table1_size; i++)
1226 if (cvalue <= utf8_table1[i]) break;
1227 utf8bytes += i;
1228 for (j = i; j > 0; j--)
1229 {
1230 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1231 cvalue >>= 6;
1232 }
1233 *utf8bytes = utf8_table2[i] | cvalue;
1234 return i + 1;
1235 }
1236 #endif
1237
1238
1239 #ifdef SUPPORT_PCRE16
1240 /*************************************************
1241 * Convert a string to 16-bit *
1242 *************************************************/
1243
1244 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1245 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1246 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1247 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1248 result is always left in buffer16.
1249
1250 Note that this function does not object to surrogate values. This is
1251 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1252 for the purpose of testing that they are correctly faulted.
1253
1254 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1255 in UTF-8 so that values greater than 255 can be handled.
1256
1257 Arguments:
1258 data TRUE if converting a data line; FALSE for a regex
1259 p points to a byte string
1260 utf true if UTF-8 (to be converted to UTF-16)
1261 len number of bytes in the string (excluding trailing zero)
1262
1263 Returns: number of 16-bit data items used (excluding trailing zero)
1264 OR -1 if a UTF-8 string is malformed
1265 OR -2 if a value > 0x10ffff is encountered
1266 OR -3 if a value > 0xffff is encountered when not in UTF mode
1267 */
1268
1269 static int
1270 to16(int data, pcre_uint8 *p, int utf, int len)
1271 {
1272 pcre_uint16 *pp;
1273
1274 if (buffer16_size < 2*len + 2)
1275 {
1276 if (buffer16 != NULL) free(buffer16);
1277 buffer16_size = 2*len + 2;
1278 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1279 if (buffer16 == NULL)
1280 {
1281 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1282 exit(1);
1283 }
1284 }
1285
1286 pp = buffer16;
1287
1288 if (!utf && !data)
1289 {
1290 while (len-- > 0) *pp++ = *p++;
1291 }
1292
1293 else
1294 {
1295 int c = 0;
1296 while (len > 0)
1297 {
1298 int chlen = utf82ord(p, &c);
1299 if (chlen <= 0) return -1;
1300 if (c > 0x10ffff) return -2;
1301 p += chlen;
1302 len -= chlen;
1303 if (c < 0x10000) *pp++ = c; else
1304 {
1305 if (!utf) return -3;
1306 c -= 0x10000;
1307 *pp++ = 0xD800 | (c >> 10);
1308 *pp++ = 0xDC00 | (c & 0x3ff);
1309 }
1310 }
1311 }
1312
1313 *pp = 0;
1314 return pp - buffer16;
1315 }
1316 #endif
1317
1318
1319 /*************************************************
1320 * Read or extend an input line *
1321 *************************************************/
1322
1323 /* Input lines are read into buffer, but both patterns and data lines can be
1324 continued over multiple input lines. In addition, if the buffer fills up, we
1325 want to automatically expand it so as to be able to handle extremely large
1326 lines that are needed for certain stress tests. When the input buffer is
1327 expanded, the other two buffers must also be expanded likewise, and the
1328 contents of pbuffer, which are a copy of the input for callouts, must be
1329 preserved (for when expansion happens for a data line). This is not the most
1330 optimal way of handling this, but hey, this is just a test program!
1331
1332 Arguments:
1333 f the file to read
1334 start where in buffer to start (this *must* be within buffer)
1335 prompt for stdin or readline()
1336
1337 Returns: pointer to the start of new data
1338 could be a copy of start, or could be moved
1339 NULL if no data read and EOF reached
1340 */
1341
1342 static pcre_uint8 *
1343 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1344 {
1345 pcre_uint8 *here = start;
1346
1347 for (;;)
1348 {
1349 size_t rlen = (size_t)(buffer_size - (here - buffer));
1350
1351 if (rlen > 1000)
1352 {
1353 int dlen;
1354
1355 /* If libreadline or libedit support is required, use readline() to read a
1356 line if the input is a terminal. Note that readline() removes the trailing
1357 newline, so we must put it back again, to be compatible with fgets(). */
1358
1359 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1360 if (isatty(fileno(f)))
1361 {
1362 size_t len;
1363 char *s = readline(prompt);
1364 if (s == NULL) return (here == start)? NULL : start;
1365 len = strlen(s);
1366 if (len > 0) add_history(s);
1367 if (len > rlen - 1) len = rlen - 1;
1368 memcpy(here, s, len);
1369 here[len] = '\n';
1370 here[len+1] = 0;
1371 free(s);
1372 }
1373 else
1374 #endif
1375
1376 /* Read the next line by normal means, prompting if the file is stdin. */
1377
1378 {
1379 if (f == stdin) printf("%s", prompt);
1380 if (fgets((char *)here, rlen, f) == NULL)
1381 return (here == start)? NULL : start;
1382 }
1383
1384 dlen = (int)strlen((char *)here);
1385 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1386 here += dlen;
1387 }
1388
1389 else
1390 {
1391 int new_buffer_size = 2*buffer_size;
1392 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1393 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1394 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1395
1396 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1397 {
1398 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1399 exit(1);
1400 }
1401
1402 memcpy(new_buffer, buffer, buffer_size);
1403 memcpy(new_pbuffer, pbuffer, buffer_size);
1404
1405 buffer_size = new_buffer_size;
1406
1407 start = new_buffer + (start - buffer);
1408 here = new_buffer + (here - buffer);
1409
1410 free(buffer);
1411 free(dbuffer);
1412 free(pbuffer);
1413
1414 buffer = new_buffer;
1415 dbuffer = new_dbuffer;
1416 pbuffer = new_pbuffer;
1417 }
1418 }
1419
1420 return NULL; /* Control never gets here */
1421 }
1422
1423
1424
1425 /*************************************************
1426 * Read number from string *
1427 *************************************************/
1428
1429 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1430 around with conditional compilation, just do the job by hand. It is only used
1431 for unpicking arguments, so just keep it simple.
1432
1433 Arguments:
1434 str string to be converted
1435 endptr where to put the end pointer
1436
1437 Returns: the unsigned long
1438 */
1439
1440 static int
1441 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1442 {
1443 int result = 0;
1444 while(*str != 0 && isspace(*str)) str++;
1445 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1446 *endptr = str;
1447 return(result);
1448 }
1449
1450
1451
1452 /*************************************************
1453 * Print one character *
1454 *************************************************/
1455
1456 /* Print a single character either literally, or as a hex escape. */
1457
1458 static int pchar(int c, FILE *f)
1459 {
1460 if (PRINTOK(c))
1461 {
1462 if (f != NULL) fprintf(f, "%c", c);
1463 return 1;
1464 }
1465
1466 if (c < 0x100)
1467 {
1468 if (use_utf)
1469 {
1470 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1471 return 6;
1472 }
1473 else
1474 {
1475 if (f != NULL) fprintf(f, "\\x%02x", c);
1476 return 4;
1477 }
1478 }
1479
1480 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1481 return (c <= 0x000000ff)? 6 :
1482 (c <= 0x00000fff)? 7 :
1483 (c <= 0x0000ffff)? 8 :
1484 (c <= 0x000fffff)? 9 : 10;
1485 }
1486
1487
1488
1489 #ifdef SUPPORT_PCRE8
1490 /*************************************************
1491 * Print 8-bit character string *
1492 *************************************************/
1493
1494 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1495 If handed a NULL file, just counts chars without printing. */
1496
1497 static int pchars(pcre_uint8 *p, int length, FILE *f)
1498 {
1499 int c = 0;
1500 int yield = 0;
1501
1502 if (length < 0)
1503 length = strlen((char *)p);
1504
1505 while (length-- > 0)
1506 {
1507 #if !defined NOUTF
1508 if (use_utf)
1509 {
1510 int rc = utf82ord(p, &c);
1511 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1512 {
1513 length -= rc - 1;
1514 p += rc;
1515 yield += pchar(c, f);
1516 continue;
1517 }
1518 }
1519 #endif
1520 c = *p++;
1521 yield += pchar(c, f);
1522 }
1523
1524 return yield;
1525 }
1526 #endif
1527
1528
1529
1530 #ifdef SUPPORT_PCRE16
1531 /*************************************************
1532 * Find length of 0-terminated 16-bit string *
1533 *************************************************/
1534
1535 static int strlen16(PCRE_SPTR16 p)
1536 {
1537 int len = 0;
1538 while (*p++ != 0) len++;
1539 return len;
1540 }
1541 #endif /* SUPPORT_PCRE16 */
1542
1543
1544 #ifdef SUPPORT_PCRE16
1545 /*************************************************
1546 * Print 16-bit character string *
1547 *************************************************/
1548
1549 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1550 If handed a NULL file, just counts chars without printing. */
1551
1552 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1553 {
1554 int yield = 0;
1555
1556 if (length < 0)
1557 length = strlen16(p);
1558
1559 while (length-- > 0)
1560 {
1561 int c = *p++ & 0xffff;
1562 #if !defined NOUTF
1563 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1564 {
1565 int d = *p & 0xffff;
1566 if (d >= 0xDC00 && d < 0xDFFF)
1567 {
1568 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1569 length--;
1570 p++;
1571 }
1572 }
1573 #endif
1574 yield += pchar(c, f);
1575 }
1576
1577 return yield;
1578 }
1579 #endif /* SUPPORT_PCRE16 */
1580
1581
1582
1583 #ifdef SUPPORT_PCRE8
1584 /*************************************************
1585 * Read a capture name (8-bit) and check it *
1586 *************************************************/
1587
1588 static pcre_uint8 *
1589 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1590 {
1591 pcre_uint8 *npp = *pp;
1592 while (isalnum(*p)) *npp++ = *p++;
1593 *npp++ = 0;
1594 *npp = 0;
1595 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1596 {
1597 fprintf(outfile, "no parentheses with name \"");
1598 PCHARSV(*pp, 0, -1, outfile);
1599 fprintf(outfile, "\"\n");
1600 }
1601
1602 *pp = npp;
1603 return p;
1604 }
1605 #endif /* SUPPORT_PCRE8 */
1606
1607
1608
1609 #ifdef SUPPORT_PCRE16
1610 /*************************************************
1611 * Read a capture name (16-bit) and check it *
1612 *************************************************/
1613
1614 /* Note that the text being read is 8-bit. */
1615
1616 static pcre_uint8 *
1617 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1618 {
1619 pcre_uint16 *npp = *pp;
1620 while (isalnum(*p)) *npp++ = *p++;
1621 *npp++ = 0;
1622 *npp = 0;
1623 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1624 {
1625 fprintf(outfile, "no parentheses with name \"");
1626 PCHARSV(*pp, 0, -1, outfile);
1627 fprintf(outfile, "\"\n");
1628 }
1629 *pp = npp;
1630 return p;
1631 }
1632 #endif /* SUPPORT_PCRE16 */
1633
1634
1635
1636 /*************************************************
1637 * Callout function *
1638 *************************************************/
1639
1640 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1641 the match. Yield zero unless more callouts than the fail count, or the callout
1642 data is not zero. */
1643
1644 static int callout(pcre_callout_block *cb)
1645 {
1646 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1647 int i, pre_start, post_start, subject_length;
1648
1649 if (callout_extra)
1650 {
1651 fprintf(f, "Callout %d: last capture = %d\n",
1652 cb->callout_number, cb->capture_last);
1653
1654 for (i = 0; i < cb->capture_top * 2; i += 2)
1655 {
1656 if (cb->offset_vector[i] < 0)
1657 fprintf(f, "%2d: <unset>\n", i/2);
1658 else
1659 {
1660 fprintf(f, "%2d: ", i/2);
1661 PCHARSV(cb->subject, cb->offset_vector[i],
1662 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1663 fprintf(f, "\n");
1664 }
1665 }
1666 }
1667
1668 /* Re-print the subject in canonical form, the first time or if giving full
1669 datails. On subsequent calls in the same match, we use pchars just to find the
1670 printed lengths of the substrings. */
1671
1672 if (f != NULL) fprintf(f, "--->");
1673
1674 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1675 PCHARS(post_start, cb->subject, cb->start_match,
1676 cb->current_position - cb->start_match, f);
1677
1678 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1679
1680 PCHARSV(cb->subject, cb->current_position,
1681 cb->subject_length - cb->current_position, f);
1682
1683 if (f != NULL) fprintf(f, "\n");
1684
1685 /* Always print appropriate indicators, with callout number if not already
1686 shown. For automatic callouts, show the pattern offset. */
1687
1688 if (cb->callout_number == 255)
1689 {
1690 fprintf(outfile, "%+3d ", cb->pattern_position);
1691 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1692 }
1693 else
1694 {
1695 if (callout_extra) fprintf(outfile, " ");
1696 else fprintf(outfile, "%3d ", cb->callout_number);
1697 }
1698
1699 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1700 fprintf(outfile, "^");
1701
1702 if (post_start > 0)
1703 {
1704 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1705 fprintf(outfile, "^");
1706 }
1707
1708 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1709 fprintf(outfile, " ");
1710
1711 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1712 pbuffer + cb->pattern_position);
1713
1714 fprintf(outfile, "\n");
1715 first_callout = 0;
1716
1717 if (cb->mark != last_callout_mark)
1718 {
1719 if (cb->mark == NULL)
1720 fprintf(outfile, "Latest Mark: <unset>\n");
1721 else
1722 {
1723 fprintf(outfile, "Latest Mark: ");
1724 PCHARSV(cb->mark, 0, -1, outfile);
1725 putc('\n', outfile);
1726 }
1727 last_callout_mark = cb->mark;
1728 }
1729
1730 if (cb->callout_data != NULL)
1731 {
1732 int callout_data = *((int *)(cb->callout_data));
1733 if (callout_data != 0)
1734 {
1735 fprintf(outfile, "Callout data = %d\n", callout_data);
1736 return callout_data;
1737 }
1738 }
1739
1740 return (cb->callout_number != callout_fail_id)? 0 :
1741 (++callout_count >= callout_fail_count)? 1 : 0;
1742 }
1743
1744
1745 /*************************************************
1746 * Local malloc functions *
1747 *************************************************/
1748
1749 /* Alternative malloc function, to test functionality and save the size of a
1750 compiled re, which is the first store request that pcre_compile() makes. The
1751 show_malloc variable is set only during matching. */
1752
1753 static void *new_malloc(size_t size)
1754 {
1755 void *block = malloc(size);
1756 gotten_store = size;
1757 if (first_gotten_store == 0) first_gotten_store = size;
1758 if (show_malloc)
1759 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1760 return block;
1761 }
1762
1763 static void new_free(void *block)
1764 {
1765 if (show_malloc)
1766 fprintf(outfile, "free %p\n", block);
1767 free(block);
1768 }
1769
1770 /* For recursion malloc/free, to test stacking calls */
1771
1772 static void *stack_malloc(size_t size)
1773 {
1774 void *block = malloc(size);
1775 if (show_malloc)
1776 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1777 return block;
1778 }
1779
1780 static void stack_free(void *block)
1781 {
1782 if (show_malloc)
1783 fprintf(outfile, "stack_free %p\n", block);
1784 free(block);
1785 }
1786
1787
1788 /*************************************************
1789 * Call pcre_fullinfo() *
1790 *************************************************/
1791
1792 /* Get one piece of information from the pcre_fullinfo() function. When only
1793 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1794 value, but the code is defensive.
1795
1796 Arguments:
1797 re compiled regex
1798 study study data
1799 option PCRE_INFO_xxx option
1800 ptr where to put the data
1801
1802 Returns: 0 when OK, < 0 on error
1803 */
1804
1805 static int
1806 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1807 {
1808 int rc;
1809
1810 if (use_pcre16)
1811 #ifdef SUPPORT_PCRE16
1812 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1813 #else
1814 rc = PCRE_ERROR_BADMODE;
1815 #endif
1816 else
1817 #ifdef SUPPORT_PCRE8
1818 rc = pcre_fullinfo(re, study, option, ptr);
1819 #else
1820 rc = PCRE_ERROR_BADMODE;
1821 #endif
1822
1823 if (rc < 0)
1824 {
1825 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1826 use_pcre16? "16" : "", option);
1827 if (rc == PCRE_ERROR_BADMODE)
1828 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1829 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1830 }
1831
1832 return rc;
1833 }
1834
1835
1836
1837 /*************************************************
1838 * Swap byte functions *
1839 *************************************************/
1840
1841 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1842 value, respectively.
1843
1844 Arguments:
1845 value any number
1846
1847 Returns: the byte swapped value
1848 */
1849
1850 static pcre_uint32
1851 swap_uint32(pcre_uint32 value)
1852 {
1853 return ((value & 0x000000ff) << 24) |
1854 ((value & 0x0000ff00) << 8) |
1855 ((value & 0x00ff0000) >> 8) |
1856 (value >> 24);
1857 }
1858
1859 static pcre_uint16
1860 swap_uint16(pcre_uint16 value)
1861 {
1862 return (value >> 8) | (value << 8);
1863 }
1864
1865
1866
1867 /*************************************************
1868 * Flip bytes in a compiled pattern *
1869 *************************************************/
1870
1871 /* This function is called if the 'F' option was present on a pattern that is
1872 to be written to a file. We flip the bytes of all the integer fields in the
1873 regex data block and the study block. In 16-bit mode this also flips relevant
1874 bytes in the pattern itself. This is to make it possible to test PCRE's
1875 ability to reload byte-flipped patterns, e.g. those compiled on a different
1876 architecture. */
1877
1878 static void
1879 regexflip(pcre *ere, pcre_extra *extra)
1880 {
1881 REAL_PCRE *re = (REAL_PCRE *)ere;
1882 #ifdef SUPPORT_PCRE16
1883 int op;
1884 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1885 int length = re->name_count * re->name_entry_size;
1886 #ifdef SUPPORT_UTF
1887 BOOL utf = (re->options & PCRE_UTF16) != 0;
1888 BOOL utf16_char = FALSE;
1889 #endif /* SUPPORT_UTF */
1890 #endif /* SUPPORT_PCRE16 */
1891
1892 /* Always flip the bytes in the main data block and study blocks. */
1893
1894 re->magic_number = REVERSED_MAGIC_NUMBER;
1895 re->size = swap_uint32(re->size);
1896 re->options = swap_uint32(re->options);
1897 re->flags = swap_uint16(re->flags);
1898 re->top_bracket = swap_uint16(re->top_bracket);
1899 re->top_backref = swap_uint16(re->top_backref);
1900 re->first_char = swap_uint16(re->first_char);
1901 re->req_char = swap_uint16(re->req_char);
1902 re->name_table_offset = swap_uint16(re->name_table_offset);
1903 re->name_entry_size = swap_uint16(re->name_entry_size);
1904 re->name_count = swap_uint16(re->name_count);
1905
1906 if (extra != NULL)
1907 {
1908 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1909 rsd->size = swap_uint32(rsd->size);
1910 rsd->flags = swap_uint32(rsd->flags);
1911 rsd->minlength = swap_uint32(rsd->minlength);
1912 }
1913
1914 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1915 in the name table, if present, and then in the pattern itself. */
1916
1917 #ifdef SUPPORT_PCRE16
1918 if (!use_pcre16) return;
1919
1920 while(TRUE)
1921 {
1922 /* Swap previous characters. */
1923 while (length-- > 0)
1924 {
1925 *ptr = swap_uint16(*ptr);
1926 ptr++;
1927 }
1928 #ifdef SUPPORT_UTF
1929 if (utf16_char)
1930 {
1931 if ((ptr[-1] & 0xfc00) == 0xd800)
1932 {
1933 /* We know that there is only one extra character in UTF-16. */
1934 *ptr = swap_uint16(*ptr);
1935 ptr++;
1936 }
1937 }
1938 utf16_char = FALSE;
1939 #endif /* SUPPORT_UTF */
1940
1941 /* Get next opcode. */
1942
1943 length = 0;
1944 op = *ptr;
1945 *ptr++ = swap_uint16(op);
1946
1947 switch (op)
1948 {
1949 case OP_END:
1950 return;
1951
1952 #ifdef SUPPORT_UTF
1953 case OP_CHAR:
1954 case OP_CHARI:
1955 case OP_NOT:
1956 case OP_NOTI:
1957 case OP_STAR:
1958 case OP_MINSTAR:
1959 case OP_PLUS:
1960 case OP_MINPLUS:
1961 case OP_QUERY:
1962 case OP_MINQUERY:
1963 case OP_UPTO:
1964 case OP_MINUPTO:
1965 case OP_EXACT:
1966 case OP_POSSTAR:
1967 case OP_POSPLUS:
1968 case OP_POSQUERY:
1969 case OP_POSUPTO:
1970 case OP_STARI:
1971 case OP_MINSTARI:
1972 case OP_PLUSI:
1973 case OP_MINPLUSI:
1974 case OP_QUERYI:
1975 case OP_MINQUERYI:
1976 case OP_UPTOI:
1977 case OP_MINUPTOI:
1978 case OP_EXACTI:
1979 case OP_POSSTARI:
1980 case OP_POSPLUSI:
1981 case OP_POSQUERYI:
1982 case OP_POSUPTOI:
1983 case OP_NOTSTAR:
1984 case OP_NOTMINSTAR:
1985 case OP_NOTPLUS:
1986 case OP_NOTMINPLUS:
1987 case OP_NOTQUERY:
1988 case OP_NOTMINQUERY:
1989 case OP_NOTUPTO:
1990 case OP_NOTMINUPTO:
1991 case OP_NOTEXACT:
1992 case OP_NOTPOSSTAR:
1993 case OP_NOTPOSPLUS:
1994 case OP_NOTPOSQUERY:
1995 case OP_NOTPOSUPTO:
1996 case OP_NOTSTARI:
1997 case OP_NOTMINSTARI:
1998 case OP_NOTPLUSI:
1999 case OP_NOTMINPLUSI:
2000 case OP_NOTQUERYI:
2001 case OP_NOTMINQUERYI:
2002 case OP_NOTUPTOI:
2003 case OP_NOTMINUPTOI:
2004 case OP_NOTEXACTI:
2005 case OP_NOTPOSSTARI:
2006 case OP_NOTPOSPLUSI:
2007 case OP_NOTPOSQUERYI:
2008 case OP_NOTPOSUPTOI:
2009 if (utf) utf16_char = TRUE;
2010 #endif
2011 /* Fall through. */
2012
2013 default:
2014 length = OP_lengths16[op] - 1;
2015 break;
2016
2017 case OP_CLASS:
2018 case OP_NCLASS:
2019 /* Skip the character bit map. */
2020 ptr += 32/sizeof(pcre_uint16);
2021 length = 0;
2022 break;
2023
2024 case OP_XCLASS:
2025 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2026 if (LINK_SIZE > 1)
2027 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2028 - (1 + LINK_SIZE + 1));
2029 else
2030 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2031
2032 /* Reverse the size of the XCLASS instance. */
2033 *ptr = swap_uint16(*ptr);
2034 ptr++;
2035 if (LINK_SIZE > 1)
2036 {
2037 *ptr = swap_uint16(*ptr);
2038 ptr++;
2039 }
2040
2041 op = *ptr;
2042 *ptr = swap_uint16(op);
2043 ptr++;
2044 if ((op & XCL_MAP) != 0)
2045 {
2046 /* Skip the character bit map. */
2047 ptr += 32/sizeof(pcre_uint16);
2048 length -= 32/sizeof(pcre_uint16);
2049 }
2050 break;
2051 }
2052 }
2053 /* Control should never reach here in 16 bit mode. */
2054 #endif /* SUPPORT_PCRE16 */
2055 }
2056
2057
2058
2059 /*************************************************
2060 * Check match or recursion limit *
2061 *************************************************/
2062
2063 static int
2064 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2065 int start_offset, int options, int *use_offsets, int use_size_offsets,
2066 int flag, unsigned long int *limit, int errnumber, const char *msg)
2067 {
2068 int count;
2069 int min = 0;
2070 int mid = 64;
2071 int max = -1;
2072
2073 extra->flags |= flag;
2074
2075 for (;;)
2076 {
2077 *limit = mid;
2078
2079 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2080 use_offsets, use_size_offsets);
2081
2082 if (count == errnumber)
2083 {
2084 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2085 min = mid;
2086 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2087 }
2088
2089 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2090 count == PCRE_ERROR_PARTIAL)
2091 {
2092 if (mid == min + 1)
2093 {
2094 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2095 break;
2096 }
2097 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2098 max = mid;
2099 mid = (min + mid)/2;
2100 }
2101 else break; /* Some other error */
2102 }
2103
2104 extra->flags &= ~flag;
2105 return count;
2106 }
2107
2108
2109
2110 /*************************************************
2111 * Case-independent strncmp() function *
2112 *************************************************/
2113
2114 /*
2115 Arguments:
2116 s first string
2117 t second string
2118 n number of characters to compare
2119
2120 Returns: < 0, = 0, or > 0, according to the comparison
2121 */
2122
2123 static int
2124 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2125 {
2126 while (n--)
2127 {
2128 int c = tolower(*s++) - tolower(*t++);
2129 if (c) return c;
2130 }
2131 return 0;
2132 }
2133
2134
2135
2136 /*************************************************
2137 * Check newline indicator *
2138 *************************************************/
2139
2140 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2141 a message and return 0 if there is no match.
2142
2143 Arguments:
2144 p points after the leading '<'
2145 f file for error message
2146
2147 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2148 */
2149
2150 static int
2151 check_newline(pcre_uint8 *p, FILE *f)
2152 {
2153 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2154 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2155 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2156 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2157 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2158 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2159 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2160 fprintf(f, "Unknown newline type at: <%s\n", p);
2161 return 0;
2162 }
2163
2164
2165
2166 /*************************************************
2167 * Usage function *
2168 *************************************************/
2169
2170 static void
2171 usage(void)
2172 {
2173 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2174 printf("Input and output default to stdin and stdout.\n");
2175 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2176 printf("If input is a terminal, readline() is used to read from it.\n");
2177 #else
2178 printf("This version of pcretest is not linked with readline().\n");
2179 #endif
2180 printf("\nOptions:\n");
2181 #ifdef SUPPORT_PCRE16
2182 printf(" -16 use the 16-bit library\n");
2183 #endif
2184 printf(" -b show compiled code\n");
2185 printf(" -C show PCRE compile-time options and exit\n");
2186 printf(" -C arg show a specific compile-time option\n");
2187 printf(" and exit with its value. The arg can be:\n");
2188 printf(" linksize internal link size [2, 3, 4]\n");
2189 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2190 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2191 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2192 printf(" ucp Unicode Properties supported [0, 1]\n");
2193 printf(" jit Just-in-time compiler supported [0, 1]\n");
2194 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2195 printf(" -d debug: show compiled code and information (-b and -i)\n");
2196 #if !defined NODFA
2197 printf(" -dfa force DFA matching for all subjects\n");
2198 #endif
2199 printf(" -help show usage information\n");
2200 printf(" -i show information about compiled patterns\n"
2201 " -M find MATCH_LIMIT minimum for each subject\n"
2202 " -m output memory used information\n"
2203 " -o <n> set size of offsets vector to <n>\n");
2204 #if !defined NOPOSIX
2205 printf(" -p use POSIX interface\n");
2206 #endif
2207 printf(" -q quiet: do not output PCRE version number at start\n");
2208 printf(" -S <n> set stack size to <n> megabytes\n");
2209 printf(" -s force each pattern to be studied at basic level\n"
2210 " -s+ force each pattern to be studied, using JIT if available\n"
2211 " -s++ ditto, verifying when JIT was actually used\n"
2212 " -s+n force each pattern to be studied, using JIT if available,\n"
2213 " where 1 <= n <= 7 selects JIT options\n"
2214 " -s++n ditto, verifying when JIT was actually used\n"
2215 " -t time compilation and execution\n");
2216 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2217 printf(" -tm time execution (matching) only\n");
2218 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2219 }
2220
2221
2222
2223 /*************************************************
2224 * Main Program *
2225 *************************************************/
2226
2227 /* Read lines from named file or stdin and write to named file or stdout; lines
2228 consist of a regular expression, in delimiters and optionally followed by
2229 options, followed by a set of test data, terminated by an empty line. */
2230
2231 int main(int argc, char **argv)
2232 {
2233 FILE *infile = stdin;
2234 const char *version;
2235 int options = 0;
2236 int study_options = 0;
2237 int default_find_match_limit = FALSE;
2238 int op = 1;
2239 int timeit = 0;
2240 int timeitm = 0;
2241 int showinfo = 0;
2242 int showstore = 0;
2243 int force_study = -1;
2244 int force_study_options = 0;
2245 int quiet = 0;
2246 int size_offsets = 45;
2247 int size_offsets_max;
2248 int *offsets = NULL;
2249 int debug = 0;
2250 int done = 0;
2251 int all_use_dfa = 0;
2252 int verify_jit = 0;
2253 int yield = 0;
2254 int stack_size;
2255
2256 #if !defined NOPOSIX
2257 int posix = 0;
2258 #endif
2259 #if !defined NODFA
2260 int *dfa_workspace = NULL;
2261 #endif
2262
2263 pcre_jit_stack *jit_stack = NULL;
2264
2265 /* These vectors store, end-to-end, a list of zero-terminated captured
2266 substring names, each list itself being terminated by an empty name. Assume
2267 that 1024 is plenty long enough for the few names we'll be testing. It is
2268 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2269 for the actual memory, to ensure alignment. */
2270
2271 pcre_uint16 copynames[1024];
2272 pcre_uint16 getnames[1024];
2273
2274 #ifdef SUPPORT_PCRE16
2275 pcre_uint16 *cn16ptr;
2276 pcre_uint16 *gn16ptr;
2277 #endif
2278
2279 #ifdef SUPPORT_PCRE8
2280 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2281 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2282 pcre_uint8 *cn8ptr;
2283 pcre_uint8 *gn8ptr;
2284 #endif
2285
2286 /* Get buffers from malloc() so that valgrind will check their misuse when
2287 debugging. They grow automatically when very long lines are read. The 16-bit
2288 buffer (buffer16) is obtained only if needed. */
2289
2290 buffer = (pcre_uint8 *)malloc(buffer_size);
2291 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2292 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2293
2294 /* The outfile variable is static so that new_malloc can use it. */
2295
2296 outfile = stdout;
2297
2298 /* The following _setmode() stuff is some Windows magic that tells its runtime
2299 library to translate CRLF into a single LF character. At least, that's what
2300 I've been told: never having used Windows I take this all on trust. Originally
2301 it set 0x8000, but then I was advised that _O_BINARY was better. */
2302
2303 #if defined(_WIN32) || defined(WIN32)
2304 _setmode( _fileno( stdout ), _O_BINARY );
2305 #endif
2306
2307 /* Get the version number: both pcre_version() and pcre16_version() give the
2308 same answer. We just need to ensure that we call one that is available. */
2309
2310 #ifdef SUPPORT_PCRE8
2311 version = pcre_version();
2312 #else
2313 version = pcre16_version();
2314 #endif
2315
2316 /* Scan options */
2317
2318 while (argc > 1 && argv[op][0] == '-')
2319 {
2320 pcre_uint8 *endptr;
2321 char *arg = argv[op];
2322
2323 if (strcmp(arg, "-m") == 0) showstore = 1;
2324 else if (strcmp(arg, "-s") == 0) force_study = 0;
2325
2326 else if (strncmp(arg, "-s+", 3) == 0)
2327 {
2328 arg += 3;
2329 if (*arg == '+') { arg++; verify_jit = TRUE; }
2330 force_study = 1;
2331 if (*arg == 0)
2332 force_study_options = jit_study_bits[6];
2333 else if (*arg >= '1' && *arg <= '7')
2334 force_study_options = jit_study_bits[*arg - '1'];
2335 else goto BAD_ARG;
2336 }
2337 else if (strcmp(arg, "-16") == 0)
2338 {
2339 #ifdef SUPPORT_PCRE16
2340 use_pcre16 = 1;
2341 #else
2342 printf("** This version of PCRE was built without 16-bit support\n");
2343 exit(1);
2344 #endif
2345 }
2346 else if (strcmp(arg, "-q") == 0) quiet = 1;
2347 else if (strcmp(arg, "-b") == 0) debug = 1;
2348 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2349 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2350 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2351 #if !defined NODFA
2352 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2353 #endif
2354 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2355 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2356 *endptr == 0))
2357 {
2358 op++;
2359 argc--;
2360 }
2361 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2362 {
2363 int both = arg[2] == 0;
2364 int temp;
2365 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2366 *endptr == 0))
2367 {
2368 timeitm = temp;
2369 op++;
2370 argc--;
2371 }
2372 else timeitm = LOOPREPEAT;
2373 if (both) timeit = timeitm;
2374 }
2375 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2376 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2377 *endptr == 0))
2378 {
2379 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2380 printf("PCRE: -S not supported on this OS\n");
2381 exit(1);
2382 #else
2383 int rc;
2384 struct rlimit rlim;
2385 getrlimit(RLIMIT_STACK, &rlim);
2386 rlim.rlim_cur = stack_size * 1024 * 1024;
2387 rc = setrlimit(RLIMIT_STACK, &rlim);
2388 if (rc != 0)
2389 {
2390 printf("PCRE: setrlimit() failed with error %d\n", rc);
2391 exit(1);
2392 }
2393 op++;
2394 argc--;
2395 #endif
2396 }
2397 #if !defined NOPOSIX
2398 else if (strcmp(arg, "-p") == 0) posix = 1;
2399 #endif
2400 else if (strcmp(arg, "-C") == 0)
2401 {
2402 int rc;
2403 unsigned long int lrc;
2404
2405 if (argc > 2)
2406 {
2407 if (strcmp(argv[op + 1], "linksize") == 0)
2408 {
2409 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2410 printf("%d\n", rc);
2411 yield = rc;
2412 }
2413 else if (strcmp(argv[op + 1], "pcre8") == 0)
2414 {
2415 #ifdef SUPPORT_PCRE8
2416 printf("1\n");
2417 yield = 1;
2418 #else
2419 printf("0\n");
2420 yield = 0;
2421 #endif
2422 }
2423 else if (strcmp(argv[op + 1], "pcre16") == 0)
2424 {
2425 #ifdef SUPPORT_PCRE16
2426 printf("1\n");
2427 yield = 1;
2428 #else
2429 printf("0\n");
2430 yield = 0;
2431 #endif
2432 }
2433 else if (strcmp(argv[op + 1], "utf") == 0)
2434 {
2435 #ifdef SUPPORT_PCRE8
2436 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2437 printf("%d\n", rc);
2438 yield = rc;
2439 #else
2440 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2441 printf("%d\n", rc);
2442 yield = rc;
2443 #endif
2444 }
2445 else if (strcmp(argv[op + 1], "ucp") == 0)
2446 {
2447 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2448 printf("%d\n", rc);
2449 yield = rc;
2450 }
2451 else if (strcmp(argv[op + 1], "jit") == 0)
2452 {
2453 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2454 printf("%d\n", rc);
2455 yield = rc;
2456 }
2457 else if (strcmp(argv[op + 1], "newline") == 0)
2458 {
2459 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2460 print_newline_config(rc, TRUE);
2461 }
2462 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2463 {
2464 #ifdef EBCDIC
2465 printf("1\n");
2466 yield = 1;
2467 #else
2468 printf("0\n");
2469 #endif
2470 }
2471 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2472 {
2473 #ifdef EBCDIC
2474 printf("0x%02x\n", CHAR_LF);
2475 #else
2476 printf("0\n");
2477 #endif
2478 }
2479 else
2480 {
2481 printf("Unknown -C option: %s\n", argv[op + 1]);
2482 }
2483 goto EXIT;
2484 }
2485
2486 /* No argument for -C: output all configuration information. */
2487
2488 printf("PCRE version %s\n", version);
2489 printf("Compiled with\n");
2490
2491 #ifdef EBCDIC
2492 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2493 #endif
2494
2495 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2496 are set, either both UTFs are supported or both are not supported. */
2497
2498 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2499 printf(" 8-bit and 16-bit support\n");
2500 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2501 if (rc)
2502 printf(" UTF-8 and UTF-16 support\n");
2503 else
2504 printf(" No UTF-8 or UTF-16 support\n");
2505 #elif defined SUPPORT_PCRE8
2506 printf(" 8-bit support only\n");
2507 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2508 printf(" %sUTF-8 support\n", rc? "" : "No ");
2509 #else
2510 printf(" 16-bit support only\n");
2511 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2512 printf(" %sUTF-16 support\n", rc? "" : "No ");
2513 #endif
2514
2515 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2516 printf(" %sUnicode properties support\n", rc? "" : "No ");
2517 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2518 if (rc)
2519 {
2520 const char *arch;
2521 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2522 printf(" Just-in-time compiler support: %s\n", arch);
2523 }
2524 else
2525 printf(" No just-in-time compiler support\n");
2526 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2527 print_newline_config(rc, FALSE);
2528 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2529 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2530 "all Unicode newlines");
2531 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2532 printf(" Internal link size = %d\n", rc);
2533 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2534 printf(" POSIX malloc threshold = %d\n", rc);
2535 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2536 printf(" Default match limit = %ld\n", lrc);
2537 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2538 printf(" Default recursion depth limit = %ld\n", lrc);
2539 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2540 printf(" Match recursion uses %s", rc? "stack" : "heap");
2541 if (showstore)
2542 {
2543 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2544 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2545 }
2546 printf("\n");
2547 goto EXIT;
2548 }
2549 else if (strcmp(arg, "-help") == 0 ||
2550 strcmp(arg, "--help") == 0)
2551 {
2552 usage();
2553 goto EXIT;
2554 }
2555 else
2556 {
2557 BAD_ARG:
2558 printf("** Unknown or malformed option %s\n", arg);
2559 usage();
2560 yield = 1;
2561 goto EXIT;
2562 }
2563 op++;
2564 argc--;
2565 }
2566
2567 /* Get the store for the offsets vector, and remember what it was */
2568
2569 size_offsets_max = size_offsets;
2570 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2571 if (offsets == NULL)
2572 {
2573 printf("** Failed to get %d bytes of memory for offsets vector\n",
2574 (int)(size_offsets_max * sizeof(int)));
2575 yield = 1;
2576 goto EXIT;
2577 }
2578
2579 /* Sort out the input and output files */
2580
2581 if (argc > 1)
2582 {
2583 infile = fopen(argv[op], INPUT_MODE);
2584 if (infile == NULL)
2585 {
2586 printf("** Failed to open %s\n", argv[op]);
2587 yield = 1;
2588 goto EXIT;
2589 }
2590 }
2591
2592 if (argc > 2)
2593 {
2594 outfile = fopen(argv[op+1], OUTPUT_MODE);
2595 if (outfile == NULL)
2596 {
2597 printf("** Failed to open %s\n", argv[op+1]);
2598 yield = 1;
2599 goto EXIT;
2600 }
2601 }
2602
2603 /* Set alternative malloc function */
2604
2605 #ifdef SUPPORT_PCRE8
2606 pcre_malloc = new_malloc;
2607 pcre_free = new_free;
2608 pcre_stack_malloc = stack_malloc;
2609 pcre_stack_free = stack_free;
2610 #endif
2611
2612 #ifdef SUPPORT_PCRE16
2613 pcre16_malloc = new_malloc;
2614 pcre16_free = new_free;
2615 pcre16_stack_malloc = stack_malloc;
2616 pcre16_stack_free = stack_free;
2617 #endif
2618
2619 /* Heading line unless quiet, then prompt for first regex if stdin */
2620
2621 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2622
2623 /* Main loop */
2624
2625 while (!done)
2626 {
2627 pcre *re = NULL;
2628 pcre_extra *extra = NULL;
2629
2630 #if !defined NOPOSIX /* There are still compilers that require no indent */
2631 regex_t preg;
2632 int do_posix = 0;
2633 #endif
2634
2635 const char *error;
2636 pcre_uint8 *markptr;
2637 pcre_uint8 *p, *pp, *ppp;
2638 pcre_uint8 *to_file = NULL;
2639 const pcre_uint8 *tables = NULL;
2640 unsigned long int get_options;
2641 unsigned long int true_size, true_study_size = 0;
2642 size_t size, regex_gotten_store;
2643 int do_allcaps = 0;
2644 int do_mark = 0;
2645 int do_study = 0;
2646 int no_force_study = 0;
2647 int do_debug = debug;
2648 int do_G = 0;
2649 int do_g = 0;
2650 int do_showinfo = showinfo;
2651 int do_showrest = 0;
2652 int do_showcaprest = 0;
2653 int do_flip = 0;
2654 int erroroffset, len, delimiter, poffset;
2655
2656 #if !defined NODFA
2657 int dfa_matched = 0;
2658 #endif
2659
2660 use_utf = 0;
2661 debug_lengths = 1;
2662
2663 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2664 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2665 fflush(outfile);
2666
2667 p = buffer;
2668 while (isspace(*p)) p++;
2669 if (*p == 0) continue;
2670
2671 /* See if the pattern is to be loaded pre-compiled from a file. */
2672
2673 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2674 {
2675 pcre_uint32 magic;
2676 pcre_uint8 sbuf[8];
2677 FILE *f;
2678
2679 p++;
2680 if (*p == '!')
2681 {
2682 do_debug = TRUE;
2683 do_showinfo = TRUE;
2684 p++;
2685 }
2686
2687 pp = p + (int)strlen((char *)p);
2688 while (isspace(pp[-1])) pp--;
2689 *pp = 0;
2690
2691 f = fopen((char *)p, "rb");
2692 if (f == NULL)
2693 {
2694 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2695 continue;
2696 }
2697
2698 first_gotten_store = 0;
2699 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2700
2701 true_size =
2702 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2703 true_study_size =
2704 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2705
2706 re = (pcre *)new_malloc(true_size);
2707 if (re == NULL)
2708 {
2709 printf("** Failed to get %d bytes of memory for pcre object\n",
2710 (int)true_size);
2711 yield = 1;
2712 goto EXIT;
2713 }
2714 regex_gotten_store = first_gotten_store;
2715
2716 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2717
2718 magic = ((REAL_PCRE *)re)->magic_number;
2719 if (magic != MAGIC_NUMBER)
2720 {
2721 if (swap_uint32(magic) == MAGIC_NUMBER)
2722 {
2723 do_flip = 1;
2724 }
2725 else
2726 {
2727 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2728 new_free(re);
2729 fclose(f);
2730 continue;
2731 }
2732 }
2733
2734 /* We hide the byte-invert info for little and big endian tests. */
2735 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2736 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2737
2738 /* Now see if there is any following study data. */
2739
2740 if (true_study_size != 0)
2741 {
2742 pcre_study_data *psd;
2743
2744 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2745 extra->flags = PCRE_EXTRA_STUDY_DATA;
2746
2747 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2748 extra->study_data = psd;
2749
2750 if (fread(psd, 1, true_study_size, f) != true_study_size)
2751 {
2752 FAIL_READ:
2753 fprintf(outfile, "Failed to read data from %s\n", p);
2754 if (extra != NULL)
2755 {
2756 PCRE_FREE_STUDY(extra);
2757 }
2758 new_free(re);
2759 fclose(f);
2760 continue;
2761 }
2762 fprintf(outfile, "Study data loaded from %s\n", p);
2763 do_study = 1; /* To get the data output if requested */
2764 }
2765 else fprintf(outfile, "No study data\n");
2766
2767 /* Flip the necessary bytes. */
2768 if (do_flip)
2769 {
2770 int rc;
2771 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2772 if (rc == PCRE_ERROR_BADMODE)
2773 {
2774 /* Simulate the result of the function call below. */
2775 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2776 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2777 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2778 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2779 new_free(re);
2780 fclose(f);
2781 continue;
2782 }
2783 }
2784
2785 /* Need to know if UTF-8 for printing data strings. */
2786
2787 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2788 {
2789 new_free(re);
2790 fclose(f);
2791 continue;
2792 }
2793 use_utf = (get_options & PCRE_UTF8) != 0;
2794
2795 fclose(f);
2796 goto SHOW_INFO;
2797 }
2798
2799 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2800 the pattern; if it isn't complete, read more. */
2801
2802 delimiter = *p++;
2803
2804 if (isalnum(delimiter) || delimiter == '\\')
2805 {
2806 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2807 goto SKIP_DATA;
2808 }
2809
2810 pp = p;
2811 poffset = (int)(p - buffer);
2812
2813 for(;;)
2814 {
2815 while (*pp != 0)
2816 {
2817 if (*pp == '\\' && pp[1] != 0) pp++;
2818 else if (*pp == delimiter) break;
2819 pp++;
2820 }
2821 if (*pp != 0) break;
2822 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2823 {
2824 fprintf(outfile, "** Unexpected EOF\n");
2825 done = 1;
2826 goto CONTINUE;
2827 }
2828 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2829 }
2830
2831 /* The buffer may have moved while being extended; reset the start of data
2832 pointer to the correct relative point in the buffer. */
2833
2834 p = buffer + poffset;
2835
2836 /* If the first character after the delimiter is backslash, make
2837 the pattern end with backslash. This is purely to provide a way
2838 of testing for the error message when a pattern ends with backslash. */
2839
2840 if (pp[1] == '\\') *pp++ = '\\';
2841
2842 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2843 for callouts. */
2844
2845 *pp++ = 0;
2846 strcpy((char *)pbuffer, (char *)p);
2847
2848 /* Look for options after final delimiter */
2849
2850 options = 0;
2851 study_options = force_study_options;
2852 log_store = showstore; /* default from command line */
2853
2854 while (*pp != 0)
2855 {
2856 switch (*pp++)
2857 {
2858 case 'f': options |= PCRE_FIRSTLINE; break;
2859 case 'g': do_g = 1; break;
2860 case 'i': options |= PCRE_CASELESS; break;
2861 case 'm': options |= PCRE_MULTILINE; break;
2862 case 's': options |= PCRE_DOTALL; break;
2863 case 'x': options |= PCRE_EXTENDED; break;
2864
2865 case '+':
2866 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2867 break;
2868
2869 case '=': do_allcaps = 1; break;
2870 case 'A': options |= PCRE_ANCHORED; break;
2871 case 'B': do_debug = 1; break;
2872 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2873 case 'D': do_debug = do_showinfo = 1; break;
2874 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2875 case 'F': do_flip = 1; break;
2876 case 'G': do_G = 1; break;
2877 case 'I': do_showinfo = 1; break;
2878 case 'J': options |= PCRE_DUPNAMES; break;
2879 case 'K': do_mark = 1; break;
2880 case 'M': log_store = 1; break;
2881 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2882
2883 #if !defined NOPOSIX
2884 case 'P': do_posix = 1; break;
2885 #endif
2886
2887 case 'S':
2888 do_study = 1;
2889 for (;;)
2890 {
2891 switch (*pp++)
2892 {
2893 case 'S':
2894 do_study = 0;
2895 no_force_study = 1;
2896 break;
2897
2898 case '!':
2899 study_options |= PCRE_STUDY_EXTRA_NEEDED;
2900 break;
2901
2902 case '+':
2903 if (*pp == '+')
2904 {
2905 verify_jit = TRUE;
2906 pp++;
2907 }
2908 if (*pp >= '1' && *pp <= '7')
2909 study_options |= jit_study_bits[*pp++ - '1'];
2910 else
2911 study_options |= jit_study_bits[6];
2912 break;
2913
2914 case '-':
2915 study_options &= ~PCRE_STUDY_ALLJIT;
2916 break;
2917
2918 default:
2919 pp--;
2920 goto ENDLOOP;
2921 }
2922 }
2923 ENDLOOP:
2924 break;
2925
2926 case 'U': options |= PCRE_UNGREEDY; break;
2927 case 'W': options |= PCRE_UCP; break;
2928 case 'X': options |= PCRE_EXTRA; break;
2929 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2930 case 'Z': debug_lengths = 0; break;
2931 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2932 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2933
2934 case 'T':
2935 switch (*pp++)
2936 {
2937 case '0': tables = tables0; break;
2938 case '1': tables = tables1; break;
2939
2940 case '\r':
2941 case '\n':
2942 case ' ':
2943 case 0:
2944 fprintf(outfile, "** Missing table number after /T\n");
2945 goto SKIP_DATA;
2946
2947 default:
2948 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2949 goto SKIP_DATA;
2950 }
2951 break;
2952
2953 case 'L':
2954 ppp = pp;
2955 /* The '\r' test here is so that it works on Windows. */
2956 /* The '0' test is just in case this is an unterminated line. */
2957 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2958 *ppp = 0;
2959 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2960 {
2961 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2962 goto SKIP_DATA;
2963 }
2964 locale_set = 1;
2965 tables = PCRE_MAKETABLES;
2966 pp = ppp;
2967 break;
2968
2969 case '>':
2970 to_file = pp;
2971 while (*pp != 0) pp++;
2972 while (isspace(pp[-1])) pp--;
2973 *pp = 0;
2974 break;
2975
2976 case '<':
2977 {
2978 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2979 {
2980 options |= PCRE_JAVASCRIPT_COMPAT;
2981 pp += 3;
2982 }
2983 else
2984 {
2985 int x = check_newline(pp, outfile);
2986 if (x == 0) goto SKIP_DATA;
2987 options |= x;
2988 while (*pp++ != '>');
2989 }
2990 }
2991 break;
2992
2993 case '\r': /* So that it works in Windows */
2994 case '\n':
2995 case ' ':
2996 break;
2997
2998 default:
2999 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3000 goto SKIP_DATA;
3001 }
3002 }
3003
3004 /* Handle compiling via the POSIX interface, which doesn't support the
3005 timing, showing, or debugging options, nor the ability to pass over
3006 local character tables. Neither does it have 16-bit support. */
3007
3008 #if !defined NOPOSIX
3009 if (posix || do_posix)
3010 {
3011 int rc;
3012 int cflags = 0;
3013
3014 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3015 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3016 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3017 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3018 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3019 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3020 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3021
3022 first_gotten_store = 0;
3023 rc = regcomp(&preg, (char *)p, cflags);
3024
3025 /* Compilation failed; go back for another re, skipping to blank line
3026 if non-interactive. */
3027
3028 if (rc != 0)
3029 {
3030 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3031 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3032 goto SKIP_DATA;
3033 }
3034 }
3035
3036 /* Handle compiling via the native interface */
3037
3038 else
3039 #endif /* !defined NOPOSIX */
3040
3041 {
3042 /* In 16-bit mode, convert the input. */
3043
3044 #ifdef SUPPORT_PCRE16
3045 if (use_pcre16)
3046 {
3047 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3048 {
3049 case -1:
3050 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3051 "converted to UTF-16\n");
3052 goto SKIP_DATA;
3053
3054 case -2:
3055 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3056 "cannot be converted to UTF-16\n");
3057 goto SKIP_DATA;
3058
3059 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3060 fprintf(outfile, "**Failed: character value greater than 0xffff "
3061 "cannot be converted to 16-bit in non-UTF mode\n");
3062 goto SKIP_DATA;
3063
3064 default:
3065 break;
3066 }
3067 p = (pcre_uint8 *)buffer16;
3068 }
3069 #endif
3070
3071 /* Compile many times when timing */
3072
3073 if (timeit > 0)
3074 {
3075 register int i;
3076 clock_t time_taken;
3077 clock_t start_time = clock();
3078 for (i = 0; i < timeit; i++)
3079 {
3080 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3081 if (re != NULL) free(re);
3082 }
3083 time_taken = clock() - start_time;
3084 fprintf(outfile, "Compile time %.4f milliseconds\n",
3085 (((double)time_taken * 1000.0) / (double)timeit) /
3086 (double)CLOCKS_PER_SEC);
3087 }
3088
3089 first_gotten_store = 0;
3090 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3091
3092 /* Compilation failed; go back for another re, skipping to blank line
3093 if non-interactive. */
3094
3095 if (re == NULL)
3096 {
3097 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3098 SKIP_DATA:
3099 if (infile != stdin)
3100 {
3101 for (;;)
3102 {
3103 if (extend_inputline(infile, buffer, NULL) == NULL)
3104 {
3105 done = 1;
3106 goto CONTINUE;
3107 }
3108 len = (int)strlen((char *)buffer);
3109 while (len > 0 && isspace(buffer[len-1])) len--;
3110 if (len == 0) break;
3111 }
3112 fprintf(outfile, "\n");
3113 }
3114 goto CONTINUE;
3115 }
3116
3117 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3118 within the regex; check for this so that we know how to process the data
3119 lines. */
3120
3121 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3122 goto SKIP_DATA;
3123 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3124
3125 /* Extract the size for possible writing before possibly flipping it,
3126 and remember the store that was got. */
3127
3128 true_size = ((REAL_PCRE *)re)->size;
3129 regex_gotten_store = first_gotten_store;
3130
3131 /* Output code size information if requested */
3132
3133 if (log_store)
3134 fprintf(outfile, "Memory allocation (code space): %d\n",
3135 (int)(first_gotten_store -
3136 sizeof(REAL_PCRE) -
3137 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3138
3139 /* If -s or /S was present, study the regex to generate additional info to
3140 help with the matching, unless the pattern has the SS option, which
3141 suppresses the effect of /S (used for a few test patterns where studying is
3142 never sensible). */
3143
3144 if (do_study || (force_study >= 0 && !no_force_study))
3145 {
3146 if (timeit > 0)
3147 {
3148 register int i;
3149 clock_t time_taken;
3150 clock_t start_time = clock();
3151 for (i = 0; i < timeit; i++)
3152 {
3153 PCRE_STUDY(extra, re, study_options, &error);
3154 }
3155 time_taken = clock() - start_time;
3156 if (extra != NULL)
3157 {
3158 PCRE_FREE_STUDY(extra);
3159 }
3160 fprintf(outfile, " Study time %.4f milliseconds\n",
3161 (((double)time_taken * 1000.0) / (double)timeit) /
3162 (double)CLOCKS_PER_SEC);
3163 }
3164 PCRE_STUDY(extra, re, study_options, &error);
3165 if (error != NULL)
3166 fprintf(outfile, "Failed to study: %s\n", error);
3167 else if (extra != NULL)
3168 {
3169 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3170 if (log_store)
3171 {
3172 size_t jitsize;
3173 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3174 jitsize != 0)
3175 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3176 }
3177 }
3178 }
3179
3180 /* If /K was present, we set up for handling MARK data. */
3181
3182 if (do_mark)
3183 {
3184 if (extra == NULL)
3185 {
3186 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3187 extra->flags = 0;
3188 }
3189 extra->mark = &markptr;
3190 extra->flags |= PCRE_EXTRA_MARK;
3191 }
3192
3193 /* Extract and display information from the compiled data if required. */
3194
3195 SHOW_INFO:
3196
3197 if (do_debug)
3198 {
3199 fprintf(outfile, "------------------------------------------------------------------\n");
3200 PCRE_PRINTINT(re, outfile, debug_lengths);
3201 }
3202
3203 /* We already have the options in get_options (see above) */
3204
3205 if (do_showinfo)
3206 {
3207 unsigned long int all_options;
3208 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3209 hascrorlf, maxlookbehind;
3210 int nameentrysize, namecount;
3211 const pcre_uint8 *nametable;
3212
3213 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3214 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3215 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3216 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3217 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3218 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3219 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3220 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3221 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3222 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3223 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3224 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3225 != 0)
3226 goto SKIP_DATA;
3227
3228 if (size != regex_gotten_store) fprintf(outfile,
3229 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3230 (int)size, (int)regex_gotten_store);
3231
3232 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3233 if (backrefmax > 0)
3234 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3235
3236 if (namecount > 0)
3237 {
3238 fprintf(outfile, "Named capturing subpatterns:\n");
3239 while (namecount-- > 0)
3240 {
3241 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3242 int imm2_size = use_pcre16 ? 1 : 2;
3243 #else
3244 int imm2_size = IMM2_SIZE;
3245 #endif
3246 int length = (int)STRLEN(nametable + imm2_size);
3247 fprintf(outfile, " ");
3248 PCHARSV(nametable, imm2_size, length, outfile);
3249 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3250 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3251 fprintf(outfile, "%3d\n", use_pcre16?
3252 (int)(((PCRE_SPTR16)nametable)[0])
3253 :((int)nametable[0] << 8) | (int)nametable[1]);
3254 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3255 #else
3256 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3257 #ifdef SUPPORT_PCRE8
3258 nametable += nameentrysize;
3259 #else
3260 nametable += nameentrysize * 2;
3261 #endif
3262 #endif
3263 }
3264 }
3265
3266 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3267 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3268
3269 all_options = ((REAL_PCRE *)re)->options;
3270 if (do_flip) all_options = swap_uint32(all_options);
3271
3272 if (get_options == 0) fprintf(outfile, "No options\n");
3273 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3274 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3275 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3276 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3277 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3278 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3279 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3280 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3281 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3282 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3283 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3284 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3285 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3286 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3287 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3288 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3289 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3290 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3291
3292 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3293
3294 switch (get_options & PCRE_NEWLINE_BITS)
3295 {
3296 case PCRE_NEWLINE_CR:
3297 fprintf(outfile, "Forced newline sequence: CR\n");
3298 break;
3299
3300 case PCRE_NEWLINE_LF:
3301 fprintf(outfile, "Forced newline sequence: LF\n");
3302 break;
3303
3304 case PCRE_NEWLINE_CRLF:
3305 fprintf(outfile, "Forced newline sequence: CRLF\n");
3306 break;
3307
3308 case PCRE_NEWLINE_ANYCRLF:
3309 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3310 break;
3311
3312 case PCRE_NEWLINE_ANY:
3313 fprintf(outfile, "Forced newline sequence: ANY\n");
3314 break;
3315
3316 default:
3317 break;
3318 }
3319
3320 if (first_char == -1)
3321 {
3322 fprintf(outfile, "First char at start or follows newline\n");
3323 }
3324 else if (first_char < 0)
3325 {
3326 fprintf(outfile, "No first char\n");
3327 }
3328 else
3329 {
3330 const char *caseless =
3331 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3332 "" : " (caseless)";
3333
3334 if (PRINTOK(first_char))
3335 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3336 else
3337 {
3338 fprintf(outfile, "First char = ");
3339 pchar(first_char, outfile);
3340 fprintf(outfile, "%s\n", caseless);
3341 }
3342 }
3343
3344 if (need_char < 0)
3345 {
3346 fprintf(outfile, "No need char\n");
3347 }
3348 else
3349 {
3350 const char *caseless =
3351 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3352 "" : " (caseless)";
3353
3354 if (PRINTOK(need_char))
3355 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3356 else
3357 {
3358 fprintf(outfile, "Need char = ");
3359 pchar(need_char, outfile);
3360 fprintf(outfile, "%s\n", caseless);
3361 }
3362 }
3363
3364 if (maxlookbehind > 0)
3365 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3366
3367 /* Don't output study size; at present it is in any case a fixed
3368 value, but it varies, depending on the computer architecture, and
3369 so messes up the test suite. (And with the /F option, it might be
3370 flipped.) If study was forced by an external -s, don't show this
3371 information unless -i or -d was also present. This means that, except
3372 when auto-callouts are involved, the output from runs with and without
3373 -s should be identical. */
3374
3375 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3376 {
3377 if (extra == NULL)
3378 fprintf(outfile, "Study returned NULL\n");
3379 else
3380 {
3381 pcre_uint8 *start_bits = NULL;
3382 int minlength;
3383
3384 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3385 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3386
3387 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3388 {
3389 if (start_bits == NULL)
3390 fprintf(outfile, "No set of starting bytes\n");
3391 else
3392 {
3393 int i;
3394 int c = 24;
3395 fprintf(outfile, "Starting byte set: ");
3396 for (i = 0; i < 256; i++)
3397 {
3398 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3399 {
3400 if (c > 75)
3401 {
3402 fprintf(outfile, "\n ");
3403 c = 2;
3404 }
3405 if (PRINTOK(i) && i != ' ')
3406 {
3407 fprintf(outfile, "%c ", i);
3408 c += 2;
3409 }
3410 else
3411 {
3412 fprintf(outfile, "\\x%02x ", i);
3413 c += 5;
3414 }
3415 }
3416 }
3417 fprintf(outfile, "\n");
3418 }
3419 }
3420 }
3421
3422 /* Show this only if the JIT was set by /S, not by -s. */
3423
3424 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3425 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3426 {
3427 int jit;
3428 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3429 {
3430 if (jit)
3431 fprintf(outfile, "JIT study was successful\n");
3432 else
3433 #ifdef SUPPORT_JIT
3434 fprintf(outfile, "JIT study was not successful\n");
3435 #else
3436 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3437 #endif
3438 }
3439 }
3440 }
3441 }
3442
3443 /* If the '>' option was present, we write out the regex to a file, and
3444 that is all. The first 8 bytes of the file are the regex length and then
3445 the study length, in big-endian order. */
3446
3447 if (to_file != NULL)
3448 {
3449 FILE *f = fopen((char *)to_file, "wb");
3450 if (f == NULL)
3451 {
3452 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3453 }
3454 else
3455 {
3456 pcre_uint8 sbuf[8];
3457
3458 if (do_flip) regexflip(re, extra);
3459 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3460 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3461 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3462 sbuf[3] = (pcre_uint8)((true_size) & 255);
3463 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3464 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3465 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3466 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3467
3468 if (fwrite(sbuf, 1, 8, f) < 8 ||
3469 fwrite(re, 1, true_size, f) < true_size)
3470 {
3471 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3472 }
3473 else
3474 {
3475 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3476
3477 /* If there is study data, write it. */
3478
3479 if (extra != NULL)
3480 {
3481 if (fwrite(extra->study_data, 1, true_study_size, f) <
3482 true_study_size)
3483 {
3484 fprintf(outfile, "Write error on %s: %s\n", to_file,
3485 strerror(errno));
3486 }
3487 else fprintf(outfile, "Study data written to %s\n", to_file);
3488 }
3489 }
3490 fclose(f);
3491 }
3492
3493 new_free(re);
3494 if (extra != NULL)
3495 {
3496 PCRE_FREE_STUDY(extra);
3497 }
3498 if (locale_set)
3499 {
3500 new_free((void *)tables);
3501 setlocale(LC_CTYPE, "C");
3502 locale_set = 0;
3503 }
3504 continue; /* With next regex */
3505 }
3506 } /* End of non-POSIX compile */
3507
3508 /* Read data lines and test them */
3509
3510 for (;;)
3511 {
3512 pcre_uint8 *q;
3513 pcre_uint8 *bptr;
3514 int *use_offsets = offsets;
3515 int use_size_offsets = size_offsets;
3516 int callout_data = 0;
3517 int callout_data_set = 0;
3518 int count, c;
3519 int copystrings = 0;
3520 int find_match_limit = default_find_match_limit;
3521 int getstrings = 0;
3522 int getlist = 0;
3523 int gmatched = 0;
3524 int start_offset = 0;
3525 int start_offset_sign = 1;
3526 int g_notempty = 0;
3527 int use_dfa = 0;
3528
3529 *copynames = 0;
3530 *getnames = 0;
3531
3532 #ifdef SUPPORT_PCRE16
3533 cn16ptr = copynames;
3534 gn16ptr = getnames;
3535 #endif
3536 #ifdef SUPPORT_PCRE8
3537 cn8ptr = copynames8;
3538 gn8ptr = getnames8;
3539 #endif
3540
3541 SET_PCRE_CALLOUT(callout);
3542 first_callout = 1;
3543 last_callout_mark = NULL;
3544 callout_extra = 0;
3545 callout_count = 0;
3546 callout_fail_count = 999999;
3547 callout_fail_id = -1;
3548 show_malloc = 0;
3549 options = 0;
3550
3551 if (extra != NULL) extra->flags &=
3552 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3553
3554 len = 0;
3555 for (;;)
3556 {
3557 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3558 {
3559 if (len > 0) /* Reached EOF without hitting a newline */
3560 {
3561 fprintf(outfile, "\n");
3562 break;
3563 }
3564 done = 1;
3565 goto CONTINUE;
3566 }
3567 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3568 len = (int)strlen((char *)buffer);
3569 if (buffer[len-1] == '\n') break;
3570 }
3571
3572 while (len > 0 && isspace(buffer[len-1])) len--;
3573 buffer[len] = 0;
3574 if (len == 0) break;
3575
3576 p = buffer;
3577 while (isspace(*p)) p++;
3578
3579 bptr = q = dbuffer;
3580 while ((c = *p++) != 0)
3581 {
3582 int i = 0;
3583 int n = 0;
3584
3585 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3586 In non-UTF mode, allow the value of the byte to fall through to later,
3587 where values greater than 127 are turned into UTF-8 when running in
3588 16-bit mode. */
3589
3590 if (c != '\\')
3591 {
3592 if (use_utf)
3593 {
3594 *q++ = c;
3595 continue;
3596 }
3597 }
3598
3599 /* Handle backslash escapes */
3600
3601 else switch ((c = *p++))
3602 {
3603 case 'a': c = 7; break;
3604 case 'b': c = '\b'; break;
3605 case 'e': c = 27; break;
3606 case 'f': c = '\f'; break;
3607 case 'n': c = '\n'; break;
3608 case 'r': c = '\r'; break;
3609 case 't': c = '\t'; break;
3610 case 'v': c = '\v'; break;
3611
3612 case '0': case '1': case '2': case '3':
3613 case '4': case '5': case '6': case '7':
3614 c -= '0';
3615 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3616 c = c * 8 + *p++ - '0';
3617 break;
3618
3619 case 'x':
3620 if (*p == '{')
3621 {
3622 pcre_uint8 *pt = p;
3623 c = 0;
3624
3625 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3626 when isxdigit() is a macro that refers to its argument more than
3627 once. This is banned by the C Standard, but apparently happens in at
3628 least one MacOS environment. */
3629
3630 for (pt++; isxdigit(*pt); pt++)
3631 {
3632 if (++i == 9)
3633 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3634 "using only the first eight.\n");
3635 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3636 }
3637 if (*pt == '}')
3638 {
3639 p = pt + 1;
3640 break;
3641 }
3642 /* Not correct form for \x{...}; fall through */
3643 }
3644
3645 /* \x without {} always defines just one byte in 8-bit mode. This
3646 allows UTF-8 characters to be constructed byte by byte, and also allows
3647 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3648 Otherwise, pass it down to later code so that it can be turned into
3649 UTF-8 when running in 16-bit mode. */
3650
3651 c = 0;
3652 while (i++ < 2 && isxdigit(*p))
3653 {
3654 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3655 p++;
3656 }
3657 if (use_utf)
3658 {
3659 *q++ = c;
3660 continue;
3661 }
3662 break;
3663
3664 case 0: /* \ followed by EOF allows for an empty line */
3665 p--;
3666 continue;
3667
3668 case '>':
3669 if (*p == '-')
3670 {
3671 start_offset_sign = -1;
3672 p++;
3673 }
3674 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3675 start_offset *= start_offset_sign;
3676 continue;
3677
3678 case 'A': /* Option setting */
3679 options |= PCRE_ANCHORED;
3680 continue;
3681
3682 case 'B':
3683 options |= PCRE_NOTBOL;
3684 continue;
3685
3686 case 'C':
3687 if (isdigit(*p)) /* Set copy string */
3688 {
3689 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3690 copystrings |= 1 << n;
3691 }
3692 else if (isalnum(*p))
3693 {
3694 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3695 }
3696 else if (*p == '+')
3697 {
3698 callout_extra = 1;
3699 p++;
3700 }
3701 else if (*p == '-')
3702 {
3703 SET_PCRE_CALLOUT(NULL);
3704 p++;
3705 }
3706 else if (*p == '!')
3707 {
3708 callout_fail_id = 0;
3709 p++;
3710 while(isdigit(*p))
3711 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3712 callout_fail_count = 0;
3713 if (*p == '!')
3714 {
3715 p++;
3716 while(isdigit(*p))
3717 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3718 }
3719 }
3720 else if (*p == '*')
3721 {
3722 int sign = 1;
3723 callout_data = 0;
3724 if (*(++p) == '-') { sign = -1; p++; }
3725 while(isdigit(*p))
3726 callout_data = callout_data * 10 + *p++ - '0';
3727 callout_data *= sign;
3728 callout_data_set = 1;
3729 }
3730 continue;
3731
3732 #if !defined NODFA
3733 case 'D':
3734 #if !defined NOPOSIX
3735 if (posix || do_posix)
3736 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3737 else
3738 #endif
3739 use_dfa = 1;
3740 continue;
3741 #endif
3742
3743 #if !defined NODFA
3744 case 'F':
3745 options |= PCRE_DFA_SHORTEST;
3746 continue;
3747 #endif
3748
3749 case 'G':
3750 if (isdigit(*p))
3751 {
3752 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3753 getstrings |= 1 << n;
3754 }
3755 else if (isalnum(*p))
3756 {
3757 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3758 }
3759 continue;
3760
3761 case 'J':
3762 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3763 if (extra != NULL
3764 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3765 && extra->executable_jit != NULL)
3766 {
3767 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3768 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3769 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3770 }
3771 continue;
3772
3773 case 'L':
3774 getlist = 1;
3775 continue;
3776
3777 case 'M':
3778 find_match_limit = 1;
3779 continue;
3780
3781 case 'N':
3782 if ((options & PCRE_NOTEMPTY) != 0)
3783 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3784 else
3785 options |= PCRE_NOTEMPTY;
3786 continue;
3787
3788 case 'O':
3789 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3790 if (n > size_offsets_max)
3791 {
3792 size_offsets_max = n;
3793 free(offsets);
3794 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3795 if (offsets == NULL)
3796 {
3797 printf("** Failed to get %d bytes of memory for offsets vector\n",
3798 (int)(size_offsets_max * sizeof(int)));
3799 yield = 1;
3800 goto EXIT;
3801 }
3802 }
3803 use_size_offsets = n;
3804 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3805 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
3806 continue;
3807
3808 case 'P':
3809 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3810 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3811 continue;
3812
3813 case 'Q':
3814 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3815 if (extra == NULL)
3816 {
3817 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3818 extra->flags = 0;
3819 }
3820 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3821 extra->match_limit_recursion = n;
3822 continue;
3823
3824 case 'q':
3825 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3826 if (extra == NULL)
3827 {
3828 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3829 extra->flags = 0;
3830 }
3831 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3832 extra->match_limit = n;
3833 continue;
3834
3835 #if !defined NODFA
3836 case 'R':
3837 options |= PCRE_DFA_RESTART;
3838 continue;
3839 #endif
3840
3841 case 'S':
3842 show_malloc = 1;
3843 continue;
3844
3845 case 'Y':
3846 options |= PCRE_NO_START_OPTIMIZE;
3847 continue;
3848
3849 case 'Z':
3850 options |= PCRE_NOTEOL;
3851 continue;
3852
3853 case '?':
3854 options |= PCRE_NO_UTF8_CHECK;
3855 continue;
3856
3857 case '<':
3858 {
3859 int x = check_newline(p, outfile);
3860 if (x == 0) goto NEXT_DATA;
3861 options |= x;
3862 while (*p++ != '>');
3863 }
3864 continue;
3865 }
3866
3867 /* We now have a character value in c that may be greater than 255. In
3868 16-bit mode, we always convert characters to UTF-8 so that values greater
3869 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3870 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3871 mode must have come from \x{...} or octal constructs because values from
3872 \x.. get this far only in non-UTF mode. */
3873
3874 #if !defined NOUTF || defined SUPPORT_PCRE16
3875 if (use_pcre16 || use_utf)
3876 {
3877 pcre_uint8 buff8[8];
3878 int ii, utn;
3879 utn = ord2utf8(c, buff8);
3880 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3881 }
3882 else
3883 #endif
3884 {
3885 if (c > 255)
3886 {
3887 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3888 "and UTF-8 mode is not enabled.\n", c);
3889 fprintf(outfile, "** Truncation will probably give the wrong "
3890 "result.\n");
3891 }
3892 *q++ = c;
3893 }
3894 }
3895
3896 /* Reached end of subject string */
3897
3898 *q = 0;
3899 len = (int)(q - dbuffer);
3900
3901 /* Move the data to the end of the buffer so that a read over the end of
3902 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3903 we are using the POSIX interface, we must include the terminating zero. */
3904
3905 #if !defined NOPOSIX
3906 if (posix || do_posix)
3907 {
3908 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3909 bptr += buffer_size - len - 1;
3910 }
3911 else
3912 #endif
3913 {
3914 memmove(bptr + buffer_size - len, bptr, len);
3915 bptr += buffer_size - len;
3916 }
3917
3918 if ((all_use_dfa || use_dfa) && find_match_limit)
3919 {
3920 printf("**Match limit not relevant for DFA matching: ignored\n");
3921 find_match_limit = 0;
3922 }
3923
3924 /* Handle matching via the POSIX interface, which does not
3925 support timing or playing with the match limit or callout data. */
3926
3927 #if !defined NOPOSIX
3928 if (posix || do_posix)
3929 {
3930 int rc;
3931 int eflags = 0;
3932 regmatch_t *pmatch = NULL;
3933 if (use_size_offsets > 0)
3934 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3935 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3936 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3937 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3938
3939 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3940
3941 if (rc != 0)
3942 {
3943 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3944 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3945 }
3946 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3947 != 0)
3948 {
3949 fprintf(outfile, "Matched with REG_NOSUB\n");
3950 }
3951 else
3952 {
3953 size_t i;
3954 for (i = 0; i < (size_t)use_size_offsets; i++)
3955 {
3956 if (pmatch[i].rm_so >= 0)
3957 {
3958 fprintf(outfile, "%2d: ", (int)i);
3959 PCHARSV(dbuffer, pmatch[i].rm_so,
3960 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3961 fprintf(outfile, "\n");
3962 if (do_showcaprest || (i == 0 && do_showrest))
3963 {
3964 fprintf(outfile, "%2d+ ", (int)i);
3965 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3966 outfile);
3967 fprintf(outfile, "\n");
3968 }
3969 }
3970 }
3971 }
3972 free(pmatch);
3973 goto NEXT_DATA;
3974 }
3975
3976 #endif /* !defined NOPOSIX */
3977
3978 /* Handle matching via the native interface - repeats for /g and /G */
3979
3980 #ifdef SUPPORT_PCRE16
3981 if (use_pcre16)
3982 {
3983 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3984 switch(len)
3985 {
3986 case -1:
3987 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3988 "converted to UTF-16\n");
3989 goto NEXT_DATA;
3990
3991 case -2:
3992 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3993 "cannot be converted to UTF-16\n");
3994 goto NEXT_DATA;
3995
3996 case -3:
3997 fprintf(outfile, "**Failed: character value greater than 0xffff "
3998 "cannot be converted to 16-bit in non-UTF mode\n");
3999 goto NEXT_DATA;
4000
4001 default:
4002 break;
4003 }
4004 bptr = (pcre_uint8 *)buffer16;
4005 }
4006 #endif
4007
4008 /* Ensure that there is a JIT callback if we want to verify that JIT was
4009 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4010
4011 if (verify_jit && jit_stack == NULL && extra != NULL)
4012 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4013
4014 for (;; gmatched++) /* Loop for /g or /G */
4015 {
4016 markptr = NULL;
4017 jit_was_used = FALSE;
4018
4019 if (timeitm > 0)
4020 {
4021 register int i;
4022 clock_t time_taken;
4023 clock_t start_time = clock();
4024
4025 #if !defined NODFA
4026 if (all_use_dfa || use_dfa)
4027 {
4028 if ((options & PCRE_DFA_RESTART) != 0)
4029 {
4030 fprintf(outfile, "Timing DFA restarts is not supported\n");
4031 break;
4032 }
4033 if (dfa_workspace == NULL)
4034 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4035 for (i = 0; i < timeitm; i++)
4036 {
4037 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4038 (options | g_notempty), use_offsets, use_size_offsets,
4039 dfa_workspace, DFA_WS_DIMENSION);
4040 }
4041 }
4042 else
4043 #endif
4044
4045 for (i = 0; i < timeitm; i++)
4046 {
4047 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4048 (options | g_notempty), use_offsets, use_size_offsets);
4049 }
4050 time_taken = clock() - start_time;
4051 fprintf(outfile, "Execute time %.4f milliseconds\n",
4052 (((double)time_taken * 1000.0) / (double)timeitm) /
4053 (double)CLOCKS_PER_SEC);
4054 }
4055
4056 /* If find_match_limit is set, we want to do repeated matches with
4057 varying limits in order to find the minimum value for the match limit and
4058 for the recursion limit. The match limits are relevant only to the normal
4059 running of pcre_exec(), so disable the JIT optimization. This makes it
4060 possible to run the same set of tests with and without JIT externally
4061 requested. */
4062
4063 if (find_match_limit)
4064 {
4065 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4066 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4067 extra->flags = 0;
4068
4069 (void)check_match_limit(re, extra, bptr, len, start_offset,
4070 options|g_notempty, use_offsets, use_size_offsets,
4071 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4072 PCRE_ERROR_MATCHLIMIT, "match()");
4073
4074 count = check_match_limit(re, extra, bptr, len, start_offset,
4075 options|g_notempty, use_offsets, use_size_offsets,
4076 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4077 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4078 }
4079
4080 /* If callout_data is set, use the interface with additional data */
4081
4082 else if (callout_data_set)
4083 {
4084 if (extra == NULL)
4085 {
4086 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4087 extra->flags = 0;
4088 }
4089 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4090 extra->callout_data = &callout_data;
4091 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4092 options | g_notempty, use_offsets, use_size_offsets);
4093 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4094 }
4095
4096 /* The normal case is just to do the match once, with the default
4097 value of match_limit. */
4098
4099 #if !defined NODFA
4100 else if (all_use_dfa || use_dfa)
4101 {
4102 if (dfa_workspace == NULL)
4103 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4104 if (dfa_matched++ == 0)
4105 dfa_workspace[0] = -1; /* To catch bad restart */
4106 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4107 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4108 DFA_WS_DIMENSION);
4109 if (count == 0)
4110 {
4111 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4112 count = use_size_offsets/2;
4113 }
4114 }
4115 #endif
4116
4117 else
4118 {
4119 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4120 options | g_notempty, use_offsets, use_size_offsets);
4121 if (count == 0)
4122 {
4123 fprintf(outfile, "Matched, but too many substrings\n");
4124 count = use_size_offsets/3;
4125 }
4126 }
4127
4128 /* Matched */
4129
4130 if (count >= 0)
4131 {
4132 int i, maxcount;
4133 void *cnptr, *gnptr;
4134
4135 #if !defined NODFA
4136 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4137 #endif
4138 maxcount = use_size_offsets/3;
4139
4140 /* This is a check against a lunatic return value. */
4141
4142 if (count > maxcount)
4143 {
4144 fprintf(outfile,
4145 "** PCRE error: returned count %d is too big for offset size %d\n",
4146 count, use_size_offsets);
4147 count = use_size_offsets/3;
4148 if (do_g || do_G)
4149 {
4150 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4151 do_g = do_G = FALSE; /* Break g/G loop */
4152 }
4153 }
4154
4155 /* do_allcaps requests showing of all captures in the pattern, to check
4156 unset ones at the end. */
4157
4158 if (do_allcaps)
4159 {
4160 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4161 goto SKIP_DATA;
4162 count++; /* Allow for full match */
4163 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4164 }
4165
4166 /* Output the captured substrings */
4167
4168 for (i = 0; i < count * 2; i += 2)
4169 {
4170 if (use_offsets[i] < 0)
4171 {
4172 if (use_offsets[i] != -1)
4173 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4174 use_offsets[i], i);
4175 if (use_offsets[i+1] != -1)
4176 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4177 use_offsets[i+1], i+1);
4178 fprintf(outfile, "%2d: <unset>\n", i/2);
4179 }
4180 else
4181 {
4182 fprintf(outfile, "%2d: ", i/2);
4183 PCHARSV(bptr, use_offsets[i],
4184 use_offsets[i+1] - use_offsets[i], outfile);
4185 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4186 fprintf(outfile, "\n");
4187 if (do_showcaprest || (i == 0 && do_showrest))
4188 {
4189 fprintf(outfile, "%2d+ ", i/2);
4190 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4191 outfile);
4192 fprintf(outfile, "\n");
4193 }
4194 }
4195 }
4196
4197 if (markptr != NULL)
4198 {
4199 fprintf(outfile, "MK: ");
4200 PCHARSV(markptr, 0, -1, outfile);
4201 fprintf(outfile, "\n");
4202 }
4203
4204 for (i = 0; i < 32; i++)
4205 {
4206 if ((copystrings & (1 << i)) != 0)
4207 {
4208 int rc;
4209 char copybuffer[256];
4210 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4211 copybuffer, sizeof(copybuffer));
4212 if (rc < 0)
4213 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4214 else
4215 {
4216 fprintf(outfile, "%2dC ", i);
4217 PCHARSV(copybuffer, 0, rc, outfile);
4218 fprintf(outfile, " (%d)\n", rc);
4219 }
4220 }
4221 }
4222
4223 cnptr = copynames;
4224 for (;;)
4225 {
4226 int rc;
4227 char copybuffer[256];
4228
4229 if (use_pcre16)
4230 {
4231 if (*(pcre_uint16 *)cnptr == 0) break;
4232 }
4233 else
4234 {
4235 if (*(pcre_uint8 *)cnptr == 0) break;
4236 }
4237
4238 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4239 cnptr, copybuffer, sizeof(copybuffer));
4240
4241 if (rc < 0)
4242 {
4243 fprintf(outfile, "copy substring ");
4244 PCHARSV(cnptr, 0, -1, outfile);
4245 fprintf(outfile, " failed %d\n", rc);
4246 }
4247 else
4248 {
4249 fprintf(outfile, " C ");
4250 PCHARSV(copybuffer, 0, rc, outfile);
4251 fprintf(outfile, " (%d) ", rc);
4252 PCHARSV(cnptr, 0, -1, outfile);
4253 putc('\n', outfile);
4254 }
4255
4256 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4257 }
4258
4259 for (i = 0; i < 32; i++)
4260 {
4261 if ((getstrings & (1 << i)) != 0)
4262 {
4263 int rc;
4264 const char *substring;
4265 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4266 if (rc < 0)
4267 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4268 else
4269 {
4270 fprintf(outfile, "%2dG ", i);
4271 PCHARSV(substring, 0, rc, outfile);
4272 fprintf(outfile, " (%d)\n", rc);
4273 PCRE_FREE_SUBSTRING(substring);
4274 }
4275 }
4276 }
4277
4278 gnptr = getnames;
4279 for (;;)
4280 {
4281 int rc;
4282 const char *substring;
4283
4284 if (use_pcre16)
4285 {
4286 if (*(pcre_uint16 *)gnptr == 0) break;
4287 }
4288 else
4289 {
4290 if (*(pcre_uint8 *)gnptr == 0) break;
4291 }
4292
4293 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4294 gnptr, &substring);
4295 if (rc < 0)
4296 {
4297 fprintf(outfile, "get substring ");
4298 PCHARSV(gnptr, 0, -1, outfile);
4299 fprintf(outfile, " failed %d\n", rc);
4300 }
4301 else
4302 {
4303 fprintf(outfile, " G ");
4304 PCHARSV(substring, 0, rc, outfile);
4305 fprintf(outfile, " (%d) ", rc);
4306 PCHARSV(gnptr, 0, -1, outfile);
4307 PCRE_FREE_SUBSTRING(substring);
4308 putc('\n', outfile);
4309 }
4310
4311 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4312 }
4313
4314 if (getlist)
4315 {
4316 int rc;
4317 const char **stringlist;
4318 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4319 if (rc < 0)
4320 fprintf(outfile, "get substring list failed %d\n", rc);
4321 else
4322 {
4323 for (i = 0; i < count; i++)
4324 {
4325 fprintf(outfile, "%2dL ", i);
4326 PCHARSV(stringlist[i], 0, -1, outfile);
4327 putc('\n', outfile);
4328 }
4329 if (stringlist[i] != NULL)
4330 fprintf(outfile, "string list not terminated by NULL\n");
4331 PCRE_FREE_SUBSTRING_LIST(stringlist);
4332 }
4333 }
4334 }
4335
4336 /* There was a partial match */
4337
4338 else if (count == PCRE_ERROR_PARTIAL)
4339 {
4340 if (markptr == NULL) fprintf(outfile, "Partial match");
4341 else
4342 {
4343 fprintf(outfile, "Partial match, mark=");
4344 PCHARSV(markptr, 0, -1, outfile);
4345 }
4346 if (use_size_offsets > 1)
4347 {
4348 fprintf(outfile, ": ");
4349 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4350 outfile);
4351 }
4352 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4353 fprintf(outfile, "\n");
4354 break; /* Out of the /g loop */
4355 }
4356
4357 /* Failed to match. If this is a /g or /G loop and we previously set
4358 g_notempty after a null match, this is not necessarily the end. We want
4359 to advance the start offset, and continue. We won't be at the end of the
4360 string - that was checked before setting g_notempty.
4361
4362 Complication arises in the case when the newline convention is "any",
4363 "crlf", or "anycrlf". If the previous match was at the end of a line
4364 terminated by CRLF, an advance of one character just passes the \r,
4365 whereas we should prefer the longer newline sequence, as does the code in
4366 pcre_exec(). Fudge the offset value to achieve this. We check for a
4367 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4368 find the default.
4369
4370 Otherwise, in the case of UTF-8 matching, the advance must be one
4371 character, not one byte. */
4372
4373 else
4374 {
4375 if (g_notempty != 0)
4376 {
4377 int onechar = 1;
4378 unsigned int obits = ((REAL_PCRE *)re)->options;
4379 use_offsets[0] = start_offset;
4380 if ((obits & PCRE_NEWLINE_BITS) == 0)
4381 {
4382 int d;
4383 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4384 /* Note that these values are always the ASCII ones, even in
4385 EBCDIC environments. CR = 13, NL = 10. */
4386 obits = (d == 13)? PCRE_NEWLINE_CR :
4387 (d == 10)? PCRE_NEWLINE_LF :
4388 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4389 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4390 (d == -1)? PCRE_NEWLINE_ANY : 0;
4391 }
4392 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4393 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4394 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4395 &&
4396 start_offset < len - 1 &&
4397 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4398 (use_pcre16?
4399 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4400 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4401 :
4402 bptr[start_offset] == '\r'
4403 && bptr[start_offset + 1] == '\n')
4404 #elif defined SUPPORT_PCRE16
4405 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4406 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4407 #else
4408 bptr[start_offset] == '\r'
4409 && bptr[start_offset + 1] == '\n'
4410 #endif
4411 )
4412 onechar++;
4413 else if (use_utf)
4414 {
4415 while (start_offset + onechar < len)
4416 {
4417 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4418 onechar++;
4419 }
4420 }
4421 use_offsets[1] = start_offset + onechar;
4422 }
4423 else
4424 {
4425 switch(count)
4426 {
4427 case PCRE_ERROR_NOMATCH:
4428 if (gmatched == 0)
4429 {
4430 if (markptr == NULL)
4431 {
4432 fprintf(outfile, "No match");
4433 }
4434 else
4435 {
4436 fprintf(outfile, "No match, mark = ");
4437 PCHARSV(markptr, 0, -1, outfile);
4438 }
4439 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4440 putc('\n', outfile);
4441 }
4442 break;
4443
4444 case PCRE_ERROR_BADUTF8:
4445 case PCRE_ERROR_SHORTUTF8:
4446 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4447 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4448 use_pcre16? "16" : "8");
4449 if (use_size_offsets >= 2)
4450 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4451 use_offsets[1]);
4452 fprintf(outfile, "\n");
4453 break;
4454
4455 case PCRE_ERROR_BADUTF8_OFFSET:
4456 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4457 use_pcre16? "16" : "8");
4458 break;
4459
4460 default:
4461 if (count < 0 &&
4462 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4463 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4464 else
4465 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4466 break;
4467 }
4468
4469 break; /* Out of the /g loop */
4470 }
4471 }
4472
4473 /* If not /g or /G we are done */
4474
4475 if (!do_g && !do_G) break;
4476
4477 /* If we have matched an empty string, first check to see if we are at
4478 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4479 Perl's /g options does. This turns out to be rather cunning. First we set
4480 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4481 same point. If this fails (picked up above) we advance to the next
4482 character. */
4483
4484 g_notempty = 0;
4485
4486 if (use_offsets[0] == use_offsets[1])
4487 {
4488 if (use_offsets[0] == len) break;
4489 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4490 }
4491
4492 /* For /g, update the start offset, leaving the rest alone */
4493
4494 if (do_g) start_offset = use_offsets[1];
4495
4496 /* For /G, update the pointer and length */
4497
4498 else
4499 {
4500 bptr += use_offsets[1] * CHAR_SIZE;
4501 len -= use_offsets[1];
4502 }
4503 } /* End of loop for /g and /G */
4504
4505 NEXT_DATA: continue;
4506 } /* End of loop for data lines */
4507
4508 CONTINUE:
4509
4510 #if !defined NOPOSIX
4511 if (posix || do_posix) regfree(&preg);
4512 #endif
4513
4514 if (re != NULL) new_free(re);
4515 if (extra != NULL)
4516 {
4517 PCRE_FREE_STUDY(extra);
4518 }
4519 if (locale_set)
4520 {
4521 new_free((void *)tables);
4522 setlocale(LC_CTYPE, "C");
4523 locale_set = 0;
4524 }
4525 if (jit_stack != NULL)
4526 {
4527 PCRE_JIT_STACK_FREE(jit_stack);
4528 jit_stack = NULL;
4529 }
4530 }
4531
4532 if (infile == stdin) fprintf(outfile, "\n");
4533
4534 EXIT:
4535
4536 if (infile != NULL && infile != stdin) fclose(infile);
4537 if (outfile != NULL && outfile != stdout) fclose(outfile);
4538
4539 free(buffer);
4540 free(dbuffer);
4541 free(pbuffer);
4542 free(offsets);
4543
4544 #ifdef SUPPORT_PCRE16
4545 if (buffer16 != NULL) free(buffer16);
4546 #endif
4547
4548 #if !defined NODFA
4549 if (dfa_workspace != NULL)
4550 free(dfa_workspace);
4551 #endif
4552
4553 return yield;
4554 }
4555
4556 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5