/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 963 - (show annotations)
Sat Apr 21 18:06:31 2012 UTC (7 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 133750 byte(s)
Fix ovector overrun when backreferences need temporary memory and the highest 
block is not used.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #define INPUT_MODE "rb"
116 #define OUTPUT_MODE "wb"
117 #endif
118
119 #define PRIV(name) name
120
121 /* We have to include pcre_internal.h because we need the internal info for
122 displaying the results of pcre_study() and we also need to know about the
123 internal macros, structures, and other internal data values; pcretest has
124 "inside information" compared to a program that strictly follows the PCRE API.
125
126 Although pcre_internal.h does itself include pcre.h, we explicitly include it
127 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
128 appropriately for an application, not for building PCRE. */
129
130 #include "pcre.h"
131
132 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133 /* Configure internal macros to 16 bit mode. */
134 #define COMPILE_PCRE16
135 #endif
136
137 #include "pcre_internal.h"
138
139 /* The pcre_printint() function, which prints the internal form of a compiled
140 regex, is held in a separate file so that (a) it can be compiled in either
141 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142 when that is compiled in debug mode. */
143
144 #ifdef SUPPORT_PCRE8
145 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146 #endif
147 #ifdef SUPPORT_PCRE16
148 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149 #endif
150
151 /* We need access to some of the data tables that PCRE uses. So as not to have
152 to keep two copies, we include the source file here, changing the names of the
153 external symbols to prevent clashes. */
154
155 #define PCRE_INCLUDED
156
157 #include "pcre_tables.c"
158
159 /* The definition of the macro PRINTABLE, which determines whether to print an
160 output character as-is or as a hex value when showing compiled patterns, is
161 the same as in the printint.src file. We uses it here in cases when the locale
162 has not been explicitly changed, so as to get consistent output from systems
163 that differ in their output from isprint() even in the "C" locale. */
164
165 #ifdef EBCDIC
166 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167 #else
168 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169 #endif
170
171 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172
173 /* Posix support is disabled in 16 bit only mode. */
174 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175 #define NOPOSIX
176 #endif
177
178 /* It is possible to compile this test program without including support for
179 testing the POSIX interface, though this is not available via the standard
180 Makefile. */
181
182 #if !defined NOPOSIX
183 #include "pcreposix.h"
184 #endif
185
186 /* It is also possible, originally for the benefit of a version that was
187 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189 automatically cut out the UTF support if PCRE is built without it. */
190
191 #ifndef SUPPORT_UTF
192 #ifndef NOUTF
193 #define NOUTF
194 #endif
195 #endif
196
197 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199 only from one place and is handled differently). I couldn't dream up any way of
200 using a single macro to do this in a generic way, because of the many different
201 argument requirements. We know that at least one of SUPPORT_PCRE8 and
202 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203 use these in the definitions of generic macros.
204
205 **** Special note about the PCHARSxxx macros: the address of the string to be
206 printed is always given as two arguments: a base address followed by an offset.
207 The base address is cast to the correct data size for 8 or 16 bit data; the
208 offset is in units of this size. If the string were given as base+offset in one
209 argument, the casting might be incorrectly applied. */
210
211 #ifdef SUPPORT_PCRE8
212
213 #define PCHARS8(lv, p, offset, len, f) \
214 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215
216 #define PCHARSV8(p, offset, len, f) \
217 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218
219 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220 p = read_capture_name8(p, cn8, re)
221
222 #define STRLEN8(p) ((int)strlen((char *)p))
223
224 #define SET_PCRE_CALLOUT8(callout) \
225 pcre_callout = callout
226
227 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228 pcre_assign_jit_stack(extra, callback, userdata)
229
230 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231 re = pcre_compile((char *)pat, options, error, erroffset, tables)
232
233 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234 namesptr, cbuffer, size) \
235 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236 (char *)namesptr, cbuffer, size)
237
238 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240
241 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242 offsets, size_offsets, workspace, size_workspace) \
243 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244 offsets, size_offsets, workspace, size_workspace)
245
246 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247 offsets, size_offsets) \
248 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249 offsets, size_offsets)
250
251 #define PCRE_FREE_STUDY8(extra) \
252 pcre_free_study(extra)
253
254 #define PCRE_FREE_SUBSTRING8(substring) \
255 pcre_free_substring(substring)
256
257 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258 pcre_free_substring_list(listptr)
259
260 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261 getnamesptr, subsptr) \
262 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263 (char *)getnamesptr, subsptr)
264
265 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266 n = pcre_get_stringnumber(re, (char *)ptr)
267
268 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270
271 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273
274 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276
277 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278 pcre_printint(re, outfile, debug_lengths)
279
280 #define PCRE_STUDY8(extra, re, options, error) \
281 extra = pcre_study(re, options, error)
282
283 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284 pcre_jit_stack_alloc(startsize, maxsize)
285
286 #define PCRE_JIT_STACK_FREE8(stack) \
287 pcre_jit_stack_free(stack)
288
289 #endif /* SUPPORT_PCRE8 */
290
291 /* -----------------------------------------------------------*/
292
293 #ifdef SUPPORT_PCRE16
294
295 #define PCHARS16(lv, p, offset, len, f) \
296 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297
298 #define PCHARSV16(p, offset, len, f) \
299 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300
301 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302 p = read_capture_name16(p, cn16, re)
303
304 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305
306 #define SET_PCRE_CALLOUT16(callout) \
307 pcre16_callout = (int (*)(pcre16_callout_block *))callout
308
309 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310 pcre16_assign_jit_stack((pcre16_extra *)extra, \
311 (pcre16_jit_callback)callback, userdata)
312
313 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315 tables)
316
317 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318 namesptr, cbuffer, size) \
319 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321
322 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324 (PCRE_UCHAR16 *)cbuffer, size/2)
325
326 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327 offsets, size_offsets, workspace, size_workspace) \
328 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330 workspace, size_workspace)
331
332 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333 offsets, size_offsets) \
334 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335 len, start_offset, options, offsets, size_offsets)
336
337 #define PCRE_FREE_STUDY16(extra) \
338 pcre16_free_study((pcre16_extra *)extra)
339
340 #define PCRE_FREE_SUBSTRING16(substring) \
341 pcre16_free_substring((PCRE_SPTR16)substring)
342
343 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345
346 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347 getnamesptr, subsptr) \
348 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350
351 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353
354 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356 (PCRE_SPTR16 *)(void*)subsptr)
357
358 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360 (PCRE_SPTR16 **)(void*)listptr)
361
362 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364 tables)
365
366 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367 pcre16_printint(re, outfile, debug_lengths)
368
369 #define PCRE_STUDY16(extra, re, options, error) \
370 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371
372 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374
375 #define PCRE_JIT_STACK_FREE16(stack) \
376 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377
378 #endif /* SUPPORT_PCRE16 */
379
380
381 /* ----- Both modes are supported; a runtime test is needed, except for
382 pcre_config(), and the JIT stack functions, when it doesn't matter which
383 version is called. ----- */
384
385 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386
387 #define CHAR_SIZE (use_pcre16? 2:1)
388
389 #define PCHARS(lv, p, offset, len, f) \
390 if (use_pcre16) \
391 PCHARS16(lv, p, offset, len, f); \
392 else \
393 PCHARS8(lv, p, offset, len, f)
394
395 #define PCHARSV(p, offset, len, f) \
396 if (use_pcre16) \
397 PCHARSV16(p, offset, len, f); \
398 else \
399 PCHARSV8(p, offset, len, f)
400
401 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402 if (use_pcre16) \
403 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404 else \
405 READ_CAPTURE_NAME8(p, cn8, cn16, re)
406
407 #define SET_PCRE_CALLOUT(callout) \
408 if (use_pcre16) \
409 SET_PCRE_CALLOUT16(callout); \
410 else \
411 SET_PCRE_CALLOUT8(callout)
412
413 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414
415 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416 if (use_pcre16) \
417 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418 else \
419 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420
421 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422 if (use_pcre16) \
423 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424 else \
425 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426
427 #define PCRE_CONFIG pcre_config
428
429 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430 namesptr, cbuffer, size) \
431 if (use_pcre16) \
432 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433 namesptr, cbuffer, size); \
434 else \
435 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436 namesptr, cbuffer, size)
437
438 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439 if (use_pcre16) \
440 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441 else \
442 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443
444 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445 offsets, size_offsets, workspace, size_workspace) \
446 if (use_pcre16) \
447 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448 offsets, size_offsets, workspace, size_workspace); \
449 else \
450 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451 offsets, size_offsets, workspace, size_workspace)
452
453 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454 offsets, size_offsets) \
455 if (use_pcre16) \
456 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457 offsets, size_offsets); \
458 else \
459 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460 offsets, size_offsets)
461
462 #define PCRE_FREE_STUDY(extra) \
463 if (use_pcre16) \
464 PCRE_FREE_STUDY16(extra); \
465 else \
466 PCRE_FREE_STUDY8(extra)
467
468 #define PCRE_FREE_SUBSTRING(substring) \
469 if (use_pcre16) \
470 PCRE_FREE_SUBSTRING16(substring); \
471 else \
472 PCRE_FREE_SUBSTRING8(substring)
473
474 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475 if (use_pcre16) \
476 PCRE_FREE_SUBSTRING_LIST16(listptr); \
477 else \
478 PCRE_FREE_SUBSTRING_LIST8(listptr)
479
480 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481 getnamesptr, subsptr) \
482 if (use_pcre16) \
483 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484 getnamesptr, subsptr); \
485 else \
486 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487 getnamesptr, subsptr)
488
489 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490 if (use_pcre16) \
491 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492 else \
493 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494
495 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496 if (use_pcre16) \
497 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498 else \
499 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500
501 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502 if (use_pcre16) \
503 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504 else \
505 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506
507 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508 (use_pcre16 ? \
509 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511
512 #define PCRE_JIT_STACK_FREE(stack) \
513 if (use_pcre16) \
514 PCRE_JIT_STACK_FREE16(stack); \
515 else \
516 PCRE_JIT_STACK_FREE8(stack)
517
518 #define PCRE_MAKETABLES \
519 (use_pcre16? pcre16_maketables() : pcre_maketables())
520
521 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522 if (use_pcre16) \
523 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524 else \
525 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526
527 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528 if (use_pcre16) \
529 PCRE_PRINTINT16(re, outfile, debug_lengths); \
530 else \
531 PCRE_PRINTINT8(re, outfile, debug_lengths)
532
533 #define PCRE_STUDY(extra, re, options, error) \
534 if (use_pcre16) \
535 PCRE_STUDY16(extra, re, options, error); \
536 else \
537 PCRE_STUDY8(extra, re, options, error)
538
539 /* ----- Only 8-bit mode is supported ----- */
540
541 #elif defined SUPPORT_PCRE8
542 #define CHAR_SIZE 1
543 #define PCHARS PCHARS8
544 #define PCHARSV PCHARSV8
545 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
546 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
547 #define STRLEN STRLEN8
548 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
549 #define PCRE_COMPILE PCRE_COMPILE8
550 #define PCRE_CONFIG pcre_config
551 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
553 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
554 #define PCRE_EXEC PCRE_EXEC8
555 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
556 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
557 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
558 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
559 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
560 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
561 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
562 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
563 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
564 #define PCRE_MAKETABLES pcre_maketables()
565 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566 #define PCRE_PRINTINT PCRE_PRINTINT8
567 #define PCRE_STUDY PCRE_STUDY8
568
569 /* ----- Only 16-bit mode is supported ----- */
570
571 #else
572 #define CHAR_SIZE 2
573 #define PCHARS PCHARS16
574 #define PCHARSV PCHARSV16
575 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
576 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
577 #define STRLEN STRLEN16
578 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
579 #define PCRE_COMPILE PCRE_COMPILE16
580 #define PCRE_CONFIG pcre16_config
581 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
583 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
584 #define PCRE_EXEC PCRE_EXEC16
585 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
586 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
587 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
588 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
589 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
590 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
591 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
592 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
593 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
594 #define PCRE_MAKETABLES pcre16_maketables()
595 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596 #define PCRE_PRINTINT PCRE_PRINTINT16
597 #define PCRE_STUDY PCRE_STUDY16
598 #endif
599
600 /* ----- End of mode-specific function call macros ----- */
601
602
603 /* Other parameters */
604
605 #ifndef CLOCKS_PER_SEC
606 #ifdef CLK_TCK
607 #define CLOCKS_PER_SEC CLK_TCK
608 #else
609 #define CLOCKS_PER_SEC 100
610 #endif
611 #endif
612
613 #if !defined NODFA
614 #define DFA_WS_DIMENSION 1000
615 #endif
616
617 /* This is the default loop count for timing. */
618
619 #define LOOPREPEAT 500000
620
621 /* Static variables */
622
623 static FILE *outfile;
624 static int log_store = 0;
625 static int callout_count;
626 static int callout_extra;
627 static int callout_fail_count;
628 static int callout_fail_id;
629 static int debug_lengths;
630 static int first_callout;
631 static int jit_was_used;
632 static int locale_set = 0;
633 static int show_malloc;
634 static int use_utf;
635 static size_t gotten_store;
636 static size_t first_gotten_store = 0;
637 static const unsigned char *last_callout_mark = NULL;
638
639 /* The buffers grow automatically if very long input lines are encountered. */
640
641 static int buffer_size = 50000;
642 static pcre_uint8 *buffer = NULL;
643 static pcre_uint8 *dbuffer = NULL;
644 static pcre_uint8 *pbuffer = NULL;
645
646 /* Another buffer is needed translation to 16-bit character strings. It will
647 obtained and extended as required. */
648
649 #ifdef SUPPORT_PCRE16
650 static int buffer16_size = 0;
651 static pcre_uint16 *buffer16 = NULL;
652
653 #ifdef SUPPORT_PCRE8
654
655 /* We need the table of operator lengths that is used for 16-bit compiling, in
656 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658 appropriately for the 16-bit world. Just as a safety check, make sure that
659 COMPILE_PCRE16 is *not* set. */
660
661 #ifdef COMPILE_PCRE16
662 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663 #endif
664
665 #if LINK_SIZE == 2
666 #undef LINK_SIZE
667 #define LINK_SIZE 1
668 #elif LINK_SIZE == 3 || LINK_SIZE == 4
669 #undef LINK_SIZE
670 #define LINK_SIZE 2
671 #else
672 #error LINK_SIZE must be either 2, 3, or 4
673 #endif
674
675 #undef IMM2_SIZE
676 #define IMM2_SIZE 1
677
678 #endif /* SUPPORT_PCRE8 */
679
680 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681 #endif /* SUPPORT_PCRE16 */
682
683 /* If we have 8-bit support, default use_pcre16 to false; if there is also
684 16-bit support, it can be changed by an option. If there is no 8-bit support,
685 there must be 16-bit support, so default it to 1. */
686
687 #ifdef SUPPORT_PCRE8
688 static int use_pcre16 = 0;
689 #else
690 static int use_pcre16 = 1;
691 #endif
692
693 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694
695 static int jit_study_bits[] =
696 {
697 PCRE_STUDY_JIT_COMPILE,
698 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705 };
706
707 /* Textual explanations for runtime error codes */
708
709 static const char *errtexts[] = {
710 NULL, /* 0 is no error */
711 NULL, /* NOMATCH is handled specially */
712 "NULL argument passed",
713 "bad option value",
714 "magic number missing",
715 "unknown opcode - pattern overwritten?",
716 "no more memory",
717 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
718 "match limit exceeded",
719 "callout error code",
720 NULL, /* BADUTF8/16 is handled specially */
721 NULL, /* BADUTF8/16 offset is handled specially */
722 NULL, /* PARTIAL is handled specially */
723 "not used - internal error",
724 "internal error - pattern overwritten?",
725 "bad count value",
726 "item unsupported for DFA matching",
727 "backreference condition or recursion test not supported for DFA matching",
728 "match limit not supported for DFA matching",
729 "workspace size exceeded in DFA matching",
730 "too much recursion for DFA matching",
731 "recursion limit exceeded",
732 "not used - internal error",
733 "invalid combination of newline options",
734 "bad offset value",
735 NULL, /* SHORTUTF8/16 is handled specially */
736 "nested recursion at the same subject position",
737 "JIT stack limit reached",
738 "pattern compiled in wrong mode: 8-bit/16-bit error",
739 "pattern compiled with other endianness",
740 "invalid data in workspace for DFA restart"
741 };
742
743
744 /*************************************************
745 * Alternate character tables *
746 *************************************************/
747
748 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
749 using the default tables of the library. However, the T option can be used to
750 select alternate sets of tables, for different kinds of testing. Note also that
751 the L (locale) option also adjusts the tables. */
752
753 /* This is the set of tables distributed as default with PCRE. It recognizes
754 only ASCII characters. */
755
756 static const pcre_uint8 tables0[] = {
757
758 /* This table is a lower casing table. */
759
760 0, 1, 2, 3, 4, 5, 6, 7,
761 8, 9, 10, 11, 12, 13, 14, 15,
762 16, 17, 18, 19, 20, 21, 22, 23,
763 24, 25, 26, 27, 28, 29, 30, 31,
764 32, 33, 34, 35, 36, 37, 38, 39,
765 40, 41, 42, 43, 44, 45, 46, 47,
766 48, 49, 50, 51, 52, 53, 54, 55,
767 56, 57, 58, 59, 60, 61, 62, 63,
768 64, 97, 98, 99,100,101,102,103,
769 104,105,106,107,108,109,110,111,
770 112,113,114,115,116,117,118,119,
771 120,121,122, 91, 92, 93, 94, 95,
772 96, 97, 98, 99,100,101,102,103,
773 104,105,106,107,108,109,110,111,
774 112,113,114,115,116,117,118,119,
775 120,121,122,123,124,125,126,127,
776 128,129,130,131,132,133,134,135,
777 136,137,138,139,140,141,142,143,
778 144,145,146,147,148,149,150,151,
779 152,153,154,155,156,157,158,159,
780 160,161,162,163,164,165,166,167,
781 168,169,170,171,172,173,174,175,
782 176,177,178,179,180,181,182,183,
783 184,185,186,187,188,189,190,191,
784 192,193,194,195,196,197,198,199,
785 200,201,202,203,204,205,206,207,
786 208,209,210,211,212,213,214,215,
787 216,217,218,219,220,221,222,223,
788 224,225,226,227,228,229,230,231,
789 232,233,234,235,236,237,238,239,
790 240,241,242,243,244,245,246,247,
791 248,249,250,251,252,253,254,255,
792
793 /* This table is a case flipping table. */
794
795 0, 1, 2, 3, 4, 5, 6, 7,
796 8, 9, 10, 11, 12, 13, 14, 15,
797 16, 17, 18, 19, 20, 21, 22, 23,
798 24, 25, 26, 27, 28, 29, 30, 31,
799 32, 33, 34, 35, 36, 37, 38, 39,
800 40, 41, 42, 43, 44, 45, 46, 47,
801 48, 49, 50, 51, 52, 53, 54, 55,
802 56, 57, 58, 59, 60, 61, 62, 63,
803 64, 97, 98, 99,100,101,102,103,
804 104,105,106,107,108,109,110,111,
805 112,113,114,115,116,117,118,119,
806 120,121,122, 91, 92, 93, 94, 95,
807 96, 65, 66, 67, 68, 69, 70, 71,
808 72, 73, 74, 75, 76, 77, 78, 79,
809 80, 81, 82, 83, 84, 85, 86, 87,
810 88, 89, 90,123,124,125,126,127,
811 128,129,130,131,132,133,134,135,
812 136,137,138,139,140,141,142,143,
813 144,145,146,147,148,149,150,151,
814 152,153,154,155,156,157,158,159,
815 160,161,162,163,164,165,166,167,
816 168,169,170,171,172,173,174,175,
817 176,177,178,179,180,181,182,183,
818 184,185,186,187,188,189,190,191,
819 192,193,194,195,196,197,198,199,
820 200,201,202,203,204,205,206,207,
821 208,209,210,211,212,213,214,215,
822 216,217,218,219,220,221,222,223,
823 224,225,226,227,228,229,230,231,
824 232,233,234,235,236,237,238,239,
825 240,241,242,243,244,245,246,247,
826 248,249,250,251,252,253,254,255,
827
828 /* This table contains bit maps for various character classes. Each map is 32
829 bytes long and the bits run from the least significant end of each byte. The
830 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
831 graph, print, punct, and cntrl. Other classes are built from combinations. */
832
833 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837
838 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842
843 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857
858 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
859 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862
863 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
864 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867
868 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
869 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
870 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872
873 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
874 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
875 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877
878 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
882
883 /* This table identifies various classes of character by individual bits:
884 0x01 white space character
885 0x02 letter
886 0x04 decimal digit
887 0x08 hexadecimal digit
888 0x10 alphanumeric or '_'
889 0x80 regular expression metacharacter or binary zero
890 */
891
892 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
893 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
894 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
895 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
896 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
897 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
898 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
899 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
900 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
901 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
902 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
903 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
904 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
905 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
906 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
907 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
908 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
909 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
910 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
911 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
912 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
913 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
914 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
915 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
924
925 /* This is a set of tables that came orginally from a Windows user. It seems to
926 be at least an approximation of ISO 8859. In particular, there are characters
927 greater than 128 that are marked as spaces, letters, etc. */
928
929 static const pcre_uint8 tables1[] = {
930 0,1,2,3,4,5,6,7,
931 8,9,10,11,12,13,14,15,
932 16,17,18,19,20,21,22,23,
933 24,25,26,27,28,29,30,31,
934 32,33,34,35,36,37,38,39,
935 40,41,42,43,44,45,46,47,
936 48,49,50,51,52,53,54,55,
937 56,57,58,59,60,61,62,63,
938 64,97,98,99,100,101,102,103,
939 104,105,106,107,108,109,110,111,
940 112,113,114,115,116,117,118,119,
941 120,121,122,91,92,93,94,95,
942 96,97,98,99,100,101,102,103,
943 104,105,106,107,108,109,110,111,
944 112,113,114,115,116,117,118,119,
945 120,121,122,123,124,125,126,127,
946 128,129,130,131,132,133,134,135,
947 136,137,138,139,140,141,142,143,
948 144,145,146,147,148,149,150,151,
949 152,153,154,155,156,157,158,159,
950 160,161,162,163,164,165,166,167,
951 168,169,170,171,172,173,174,175,
952 176,177,178,179,180,181,182,183,
953 184,185,186,187,188,189,190,191,
954 224,225,226,227,228,229,230,231,
955 232,233,234,235,236,237,238,239,
956 240,241,242,243,244,245,246,215,
957 248,249,250,251,252,253,254,223,
958 224,225,226,227,228,229,230,231,
959 232,233,234,235,236,237,238,239,
960 240,241,242,243,244,245,246,247,
961 248,249,250,251,252,253,254,255,
962 0,1,2,3,4,5,6,7,
963 8,9,10,11,12,13,14,15,
964 16,17,18,19,20,21,22,23,
965 24,25,26,27,28,29,30,31,
966 32,33,34,35,36,37,38,39,
967 40,41,42,43,44,45,46,47,
968 48,49,50,51,52,53,54,55,
969 56,57,58,59,60,61,62,63,
970 64,97,98,99,100,101,102,103,
971 104,105,106,107,108,109,110,111,
972 112,113,114,115,116,117,118,119,
973 120,121,122,91,92,93,94,95,
974 96,65,66,67,68,69,70,71,
975 72,73,74,75,76,77,78,79,
976 80,81,82,83,84,85,86,87,
977 88,89,90,123,124,125,126,127,
978 128,129,130,131,132,133,134,135,
979 136,137,138,139,140,141,142,143,
980 144,145,146,147,148,149,150,151,
981 152,153,154,155,156,157,158,159,
982 160,161,162,163,164,165,166,167,
983 168,169,170,171,172,173,174,175,
984 176,177,178,179,180,181,182,183,
985 184,185,186,187,188,189,190,191,
986 224,225,226,227,228,229,230,231,
987 232,233,234,235,236,237,238,239,
988 240,241,242,243,244,245,246,215,
989 248,249,250,251,252,253,254,223,
990 192,193,194,195,196,197,198,199,
991 200,201,202,203,204,205,206,207,
992 208,209,210,211,212,213,214,247,
993 216,217,218,219,220,221,222,255,
994 0,62,0,0,1,0,0,0,
995 0,0,0,0,0,0,0,0,
996 32,0,0,0,1,0,0,0,
997 0,0,0,0,0,0,0,0,
998 0,0,0,0,0,0,255,3,
999 126,0,0,0,126,0,0,0,
1000 0,0,0,0,0,0,0,0,
1001 0,0,0,0,0,0,0,0,
1002 0,0,0,0,0,0,255,3,
1003 0,0,0,0,0,0,0,0,
1004 0,0,0,0,0,0,12,2,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 254,255,255,7,0,0,0,0,
1008 0,0,0,0,0,0,0,0,
1009 255,255,127,127,0,0,0,0,
1010 0,0,0,0,0,0,0,0,
1011 0,0,0,0,254,255,255,7,
1012 0,0,0,0,0,4,32,4,
1013 0,0,0,128,255,255,127,255,
1014 0,0,0,0,0,0,255,3,
1015 254,255,255,135,254,255,255,7,
1016 0,0,0,0,0,4,44,6,
1017 255,255,127,255,255,255,127,255,
1018 0,0,0,0,254,255,255,255,
1019 255,255,255,255,255,255,255,127,
1020 0,0,0,0,254,255,255,255,
1021 255,255,255,255,255,255,255,255,
1022 0,2,0,0,255,255,255,255,
1023 255,255,255,255,255,255,255,127,
1024 0,0,0,0,255,255,255,255,
1025 255,255,255,255,255,255,255,255,
1026 0,0,0,0,254,255,0,252,
1027 1,0,0,248,1,0,0,120,
1028 0,0,0,0,254,255,255,255,
1029 0,0,128,0,0,0,128,0,
1030 255,255,255,255,0,0,0,0,
1031 0,0,0,0,0,0,0,128,
1032 255,255,255,255,0,0,0,0,
1033 0,0,0,0,0,0,0,0,
1034 128,0,0,0,0,0,0,0,
1035 0,1,1,0,1,1,0,0,
1036 0,0,0,0,0,0,0,0,
1037 0,0,0,0,0,0,0,0,
1038 1,0,0,0,128,0,0,0,
1039 128,128,128,128,0,0,128,0,
1040 28,28,28,28,28,28,28,28,
1041 28,28,0,0,0,0,0,128,
1042 0,26,26,26,26,26,26,18,
1043 18,18,18,18,18,18,18,18,
1044 18,18,18,18,18,18,18,18,
1045 18,18,18,128,128,0,128,16,
1046 0,26,26,26,26,26,26,18,
1047 18,18,18,18,18,18,18,18,
1048 18,18,18,18,18,18,18,18,
1049 18,18,18,128,128,0,0,0,
1050 0,0,0,0,0,1,0,0,
1051 0,0,0,0,0,0,0,0,
1052 0,0,0,0,0,0,0,0,
1053 0,0,0,0,0,0,0,0,
1054 1,0,0,0,0,0,0,0,
1055 0,0,18,0,0,0,0,0,
1056 0,0,20,20,0,18,0,0,
1057 0,20,18,0,0,0,0,0,
1058 18,18,18,18,18,18,18,18,
1059 18,18,18,18,18,18,18,18,
1060 18,18,18,18,18,18,18,0,
1061 18,18,18,18,18,18,18,18,
1062 18,18,18,18,18,18,18,18,
1063 18,18,18,18,18,18,18,18,
1064 18,18,18,18,18,18,18,0,
1065 18,18,18,18,18,18,18,18
1066 };
1067
1068
1069
1070
1071 #ifndef HAVE_STRERROR
1072 /*************************************************
1073 * Provide strerror() for non-ANSI libraries *
1074 *************************************************/
1075
1076 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1077 in their libraries, but can provide the same facility by this simple
1078 alternative function. */
1079
1080 extern int sys_nerr;
1081 extern char *sys_errlist[];
1082
1083 char *
1084 strerror(int n)
1085 {
1086 if (n < 0 || n >= sys_nerr) return "unknown error number";
1087 return sys_errlist[n];
1088 }
1089 #endif /* HAVE_STRERROR */
1090
1091
1092 /*************************************************
1093 * JIT memory callback *
1094 *************************************************/
1095
1096 static pcre_jit_stack* jit_callback(void *arg)
1097 {
1098 jit_was_used = TRUE;
1099 return (pcre_jit_stack *)arg;
1100 }
1101
1102
1103 #if !defined NOUTF || defined SUPPORT_PCRE16
1104 /*************************************************
1105 * Convert UTF-8 string to value *
1106 *************************************************/
1107
1108 /* This function takes one or more bytes that represents a UTF-8 character,
1109 and returns the value of the character.
1110
1111 Argument:
1112 utf8bytes a pointer to the byte vector
1113 vptr a pointer to an int to receive the value
1114
1115 Returns: > 0 => the number of bytes consumed
1116 -6 to 0 => malformed UTF-8 character at offset = (-return)
1117 */
1118
1119 static int
1120 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1121 {
1122 int c = *utf8bytes++;
1123 int d = c;
1124 int i, j, s;
1125
1126 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1127 {
1128 if ((d & 0x80) == 0) break;
1129 d <<= 1;
1130 }
1131
1132 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1133 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1134
1135 /* i now has a value in the range 1-5 */
1136
1137 s = 6*i;
1138 d = (c & utf8_table3[i]) << s;
1139
1140 for (j = 0; j < i; j++)
1141 {
1142 c = *utf8bytes++;
1143 if ((c & 0xc0) != 0x80) return -(j+1);
1144 s -= 6;
1145 d |= (c & 0x3f) << s;
1146 }
1147
1148 /* Check that encoding was the correct unique one */
1149
1150 for (j = 0; j < utf8_table1_size; j++)
1151 if (d <= utf8_table1[j]) break;
1152 if (j != i) return -(i+1);
1153
1154 /* Valid value */
1155
1156 *vptr = d;
1157 return i+1;
1158 }
1159 #endif /* NOUTF || SUPPORT_PCRE16 */
1160
1161
1162
1163 #if !defined NOUTF || defined SUPPORT_PCRE16
1164 /*************************************************
1165 * Convert character value to UTF-8 *
1166 *************************************************/
1167
1168 /* This function takes an integer value in the range 0 - 0x7fffffff
1169 and encodes it as a UTF-8 character in 0 to 6 bytes.
1170
1171 Arguments:
1172 cvalue the character value
1173 utf8bytes pointer to buffer for result - at least 6 bytes long
1174
1175 Returns: number of characters placed in the buffer
1176 */
1177
1178 static int
1179 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1180 {
1181 register int i, j;
1182 for (i = 0; i < utf8_table1_size; i++)
1183 if (cvalue <= utf8_table1[i]) break;
1184 utf8bytes += i;
1185 for (j = i; j > 0; j--)
1186 {
1187 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1188 cvalue >>= 6;
1189 }
1190 *utf8bytes = utf8_table2[i] | cvalue;
1191 return i + 1;
1192 }
1193 #endif
1194
1195
1196 #ifdef SUPPORT_PCRE16
1197 /*************************************************
1198 * Convert a string to 16-bit *
1199 *************************************************/
1200
1201 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1202 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1203 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1204 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1205 result is always left in buffer16.
1206
1207 Note that this function does not object to surrogate values. This is
1208 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1209 for the purpose of testing that they are correctly faulted.
1210
1211 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1212 in UTF-8 so that values greater than 255 can be handled.
1213
1214 Arguments:
1215 data TRUE if converting a data line; FALSE for a regex
1216 p points to a byte string
1217 utf true if UTF-8 (to be converted to UTF-16)
1218 len number of bytes in the string (excluding trailing zero)
1219
1220 Returns: number of 16-bit data items used (excluding trailing zero)
1221 OR -1 if a UTF-8 string is malformed
1222 OR -2 if a value > 0x10ffff is encountered
1223 OR -3 if a value > 0xffff is encountered when not in UTF mode
1224 */
1225
1226 static int
1227 to16(int data, pcre_uint8 *p, int utf, int len)
1228 {
1229 pcre_uint16 *pp;
1230
1231 if (buffer16_size < 2*len + 2)
1232 {
1233 if (buffer16 != NULL) free(buffer16);
1234 buffer16_size = 2*len + 2;
1235 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1236 if (buffer16 == NULL)
1237 {
1238 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1239 exit(1);
1240 }
1241 }
1242
1243 pp = buffer16;
1244
1245 if (!utf && !data)
1246 {
1247 while (len-- > 0) *pp++ = *p++;
1248 }
1249
1250 else
1251 {
1252 int c = 0;
1253 while (len > 0)
1254 {
1255 int chlen = utf82ord(p, &c);
1256 if (chlen <= 0) return -1;
1257 if (c > 0x10ffff) return -2;
1258 p += chlen;
1259 len -= chlen;
1260 if (c < 0x10000) *pp++ = c; else
1261 {
1262 if (!utf) return -3;
1263 c -= 0x10000;
1264 *pp++ = 0xD800 | (c >> 10);
1265 *pp++ = 0xDC00 | (c & 0x3ff);
1266 }
1267 }
1268 }
1269
1270 *pp = 0;
1271 return pp - buffer16;
1272 }
1273 #endif
1274
1275
1276 /*************************************************
1277 * Read or extend an input line *
1278 *************************************************/
1279
1280 /* Input lines are read into buffer, but both patterns and data lines can be
1281 continued over multiple input lines. In addition, if the buffer fills up, we
1282 want to automatically expand it so as to be able to handle extremely large
1283 lines that are needed for certain stress tests. When the input buffer is
1284 expanded, the other two buffers must also be expanded likewise, and the
1285 contents of pbuffer, which are a copy of the input for callouts, must be
1286 preserved (for when expansion happens for a data line). This is not the most
1287 optimal way of handling this, but hey, this is just a test program!
1288
1289 Arguments:
1290 f the file to read
1291 start where in buffer to start (this *must* be within buffer)
1292 prompt for stdin or readline()
1293
1294 Returns: pointer to the start of new data
1295 could be a copy of start, or could be moved
1296 NULL if no data read and EOF reached
1297 */
1298
1299 static pcre_uint8 *
1300 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1301 {
1302 pcre_uint8 *here = start;
1303
1304 for (;;)
1305 {
1306 size_t rlen = (size_t)(buffer_size - (here - buffer));
1307
1308 if (rlen > 1000)
1309 {
1310 int dlen;
1311
1312 /* If libreadline or libedit support is required, use readline() to read a
1313 line if the input is a terminal. Note that readline() removes the trailing
1314 newline, so we must put it back again, to be compatible with fgets(). */
1315
1316 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1317 if (isatty(fileno(f)))
1318 {
1319 size_t len;
1320 char *s = readline(prompt);
1321 if (s == NULL) return (here == start)? NULL : start;
1322 len = strlen(s);
1323 if (len > 0) add_history(s);
1324 if (len > rlen - 1) len = rlen - 1;
1325 memcpy(here, s, len);
1326 here[len] = '\n';
1327 here[len+1] = 0;
1328 free(s);
1329 }
1330 else
1331 #endif
1332
1333 /* Read the next line by normal means, prompting if the file is stdin. */
1334
1335 {
1336 if (f == stdin) printf("%s", prompt);
1337 if (fgets((char *)here, rlen, f) == NULL)
1338 return (here == start)? NULL : start;
1339 }
1340
1341 dlen = (int)strlen((char *)here);
1342 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1343 here += dlen;
1344 }
1345
1346 else
1347 {
1348 int new_buffer_size = 2*buffer_size;
1349 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1350 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1351 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1352
1353 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1354 {
1355 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1356 exit(1);
1357 }
1358
1359 memcpy(new_buffer, buffer, buffer_size);
1360 memcpy(new_pbuffer, pbuffer, buffer_size);
1361
1362 buffer_size = new_buffer_size;
1363
1364 start = new_buffer + (start - buffer);
1365 here = new_buffer + (here - buffer);
1366
1367 free(buffer);
1368 free(dbuffer);
1369 free(pbuffer);
1370
1371 buffer = new_buffer;
1372 dbuffer = new_dbuffer;
1373 pbuffer = new_pbuffer;
1374 }
1375 }
1376
1377 return NULL; /* Control never gets here */
1378 }
1379
1380
1381
1382 /*************************************************
1383 * Read number from string *
1384 *************************************************/
1385
1386 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1387 around with conditional compilation, just do the job by hand. It is only used
1388 for unpicking arguments, so just keep it simple.
1389
1390 Arguments:
1391 str string to be converted
1392 endptr where to put the end pointer
1393
1394 Returns: the unsigned long
1395 */
1396
1397 static int
1398 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1399 {
1400 int result = 0;
1401 while(*str != 0 && isspace(*str)) str++;
1402 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1403 *endptr = str;
1404 return(result);
1405 }
1406
1407
1408
1409 /*************************************************
1410 * Print one character *
1411 *************************************************/
1412
1413 /* Print a single character either literally, or as a hex escape. */
1414
1415 static int pchar(int c, FILE *f)
1416 {
1417 if (PRINTOK(c))
1418 {
1419 if (f != NULL) fprintf(f, "%c", c);
1420 return 1;
1421 }
1422
1423 if (c < 0x100)
1424 {
1425 if (use_utf)
1426 {
1427 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1428 return 6;
1429 }
1430 else
1431 {
1432 if (f != NULL) fprintf(f, "\\x%02x", c);
1433 return 4;
1434 }
1435 }
1436
1437 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1438 return (c <= 0x000000ff)? 6 :
1439 (c <= 0x00000fff)? 7 :
1440 (c <= 0x0000ffff)? 8 :
1441 (c <= 0x000fffff)? 9 : 10;
1442 }
1443
1444
1445
1446 #ifdef SUPPORT_PCRE8
1447 /*************************************************
1448 * Print 8-bit character string *
1449 *************************************************/
1450
1451 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1452 If handed a NULL file, just counts chars without printing. */
1453
1454 static int pchars(pcre_uint8 *p, int length, FILE *f)
1455 {
1456 int c = 0;
1457 int yield = 0;
1458
1459 if (length < 0)
1460 length = strlen((char *)p);
1461
1462 while (length-- > 0)
1463 {
1464 #if !defined NOUTF
1465 if (use_utf)
1466 {
1467 int rc = utf82ord(p, &c);
1468 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1469 {
1470 length -= rc - 1;
1471 p += rc;
1472 yield += pchar(c, f);
1473 continue;
1474 }
1475 }
1476 #endif
1477 c = *p++;
1478 yield += pchar(c, f);
1479 }
1480
1481 return yield;
1482 }
1483 #endif
1484
1485
1486
1487 #ifdef SUPPORT_PCRE16
1488 /*************************************************
1489 * Find length of 0-terminated 16-bit string *
1490 *************************************************/
1491
1492 static int strlen16(PCRE_SPTR16 p)
1493 {
1494 int len = 0;
1495 while (*p++ != 0) len++;
1496 return len;
1497 }
1498 #endif /* SUPPORT_PCRE16 */
1499
1500
1501 #ifdef SUPPORT_PCRE16
1502 /*************************************************
1503 * Print 16-bit character string *
1504 *************************************************/
1505
1506 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1507 If handed a NULL file, just counts chars without printing. */
1508
1509 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1510 {
1511 int yield = 0;
1512
1513 if (length < 0)
1514 length = strlen16(p);
1515
1516 while (length-- > 0)
1517 {
1518 int c = *p++ & 0xffff;
1519 #if !defined NOUTF
1520 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1521 {
1522 int d = *p & 0xffff;
1523 if (d >= 0xDC00 && d < 0xDFFF)
1524 {
1525 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1526 length--;
1527 p++;
1528 }
1529 }
1530 #endif
1531 yield += pchar(c, f);
1532 }
1533
1534 return yield;
1535 }
1536 #endif /* SUPPORT_PCRE16 */
1537
1538
1539
1540 #ifdef SUPPORT_PCRE8
1541 /*************************************************
1542 * Read a capture name (8-bit) and check it *
1543 *************************************************/
1544
1545 static pcre_uint8 *
1546 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1547 {
1548 pcre_uint8 *npp = *pp;
1549 while (isalnum(*p)) *npp++ = *p++;
1550 *npp++ = 0;
1551 *npp = 0;
1552 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1553 {
1554 fprintf(outfile, "no parentheses with name \"");
1555 PCHARSV(*pp, 0, -1, outfile);
1556 fprintf(outfile, "\"\n");
1557 }
1558
1559 *pp = npp;
1560 return p;
1561 }
1562 #endif /* SUPPORT_PCRE8 */
1563
1564
1565
1566 #ifdef SUPPORT_PCRE16
1567 /*************************************************
1568 * Read a capture name (16-bit) and check it *
1569 *************************************************/
1570
1571 /* Note that the text being read is 8-bit. */
1572
1573 static pcre_uint8 *
1574 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1575 {
1576 pcre_uint16 *npp = *pp;
1577 while (isalnum(*p)) *npp++ = *p++;
1578 *npp++ = 0;
1579 *npp = 0;
1580 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1581 {
1582 fprintf(outfile, "no parentheses with name \"");
1583 PCHARSV(*pp, 0, -1, outfile);
1584 fprintf(outfile, "\"\n");
1585 }
1586 *pp = npp;
1587 return p;
1588 }
1589 #endif /* SUPPORT_PCRE16 */
1590
1591
1592
1593 /*************************************************
1594 * Callout function *
1595 *************************************************/
1596
1597 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1598 the match. Yield zero unless more callouts than the fail count, or the callout
1599 data is not zero. */
1600
1601 static int callout(pcre_callout_block *cb)
1602 {
1603 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1604 int i, pre_start, post_start, subject_length;
1605
1606 if (callout_extra)
1607 {
1608 fprintf(f, "Callout %d: last capture = %d\n",
1609 cb->callout_number, cb->capture_last);
1610
1611 for (i = 0; i < cb->capture_top * 2; i += 2)
1612 {
1613 if (cb->offset_vector[i] < 0)
1614 fprintf(f, "%2d: <unset>\n", i/2);
1615 else
1616 {
1617 fprintf(f, "%2d: ", i/2);
1618 PCHARSV(cb->subject, cb->offset_vector[i],
1619 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1620 fprintf(f, "\n");
1621 }
1622 }
1623 }
1624
1625 /* Re-print the subject in canonical form, the first time or if giving full
1626 datails. On subsequent calls in the same match, we use pchars just to find the
1627 printed lengths of the substrings. */
1628
1629 if (f != NULL) fprintf(f, "--->");
1630
1631 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1632 PCHARS(post_start, cb->subject, cb->start_match,
1633 cb->current_position - cb->start_match, f);
1634
1635 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1636
1637 PCHARSV(cb->subject, cb->current_position,
1638 cb->subject_length - cb->current_position, f);
1639
1640 if (f != NULL) fprintf(f, "\n");
1641
1642 /* Always print appropriate indicators, with callout number if not already
1643 shown. For automatic callouts, show the pattern offset. */
1644
1645 if (cb->callout_number == 255)
1646 {
1647 fprintf(outfile, "%+3d ", cb->pattern_position);
1648 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1649 }
1650 else
1651 {
1652 if (callout_extra) fprintf(outfile, " ");
1653 else fprintf(outfile, "%3d ", cb->callout_number);
1654 }
1655
1656 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1657 fprintf(outfile, "^");
1658
1659 if (post_start > 0)
1660 {
1661 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1662 fprintf(outfile, "^");
1663 }
1664
1665 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1666 fprintf(outfile, " ");
1667
1668 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1669 pbuffer + cb->pattern_position);
1670
1671 fprintf(outfile, "\n");
1672 first_callout = 0;
1673
1674 if (cb->mark != last_callout_mark)
1675 {
1676 if (cb->mark == NULL)
1677 fprintf(outfile, "Latest Mark: <unset>\n");
1678 else
1679 {
1680 fprintf(outfile, "Latest Mark: ");
1681 PCHARSV(cb->mark, 0, -1, outfile);
1682 putc('\n', outfile);
1683 }
1684 last_callout_mark = cb->mark;
1685 }
1686
1687 if (cb->callout_data != NULL)
1688 {
1689 int callout_data = *((int *)(cb->callout_data));
1690 if (callout_data != 0)
1691 {
1692 fprintf(outfile, "Callout data = %d\n", callout_data);
1693 return callout_data;
1694 }
1695 }
1696
1697 return (cb->callout_number != callout_fail_id)? 0 :
1698 (++callout_count >= callout_fail_count)? 1 : 0;
1699 }
1700
1701
1702 /*************************************************
1703 * Local malloc functions *
1704 *************************************************/
1705
1706 /* Alternative malloc function, to test functionality and save the size of a
1707 compiled re, which is the first store request that pcre_compile() makes. The
1708 show_malloc variable is set only during matching. */
1709
1710 static void *new_malloc(size_t size)
1711 {
1712 void *block = malloc(size);
1713 gotten_store = size;
1714 if (first_gotten_store == 0) first_gotten_store = size;
1715 if (show_malloc)
1716 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1717 return block;
1718 }
1719
1720 static void new_free(void *block)
1721 {
1722 if (show_malloc)
1723 fprintf(outfile, "free %p\n", block);
1724 free(block);
1725 }
1726
1727 /* For recursion malloc/free, to test stacking calls */
1728
1729 static void *stack_malloc(size_t size)
1730 {
1731 void *block = malloc(size);
1732 if (show_malloc)
1733 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1734 return block;
1735 }
1736
1737 static void stack_free(void *block)
1738 {
1739 if (show_malloc)
1740 fprintf(outfile, "stack_free %p\n", block);
1741 free(block);
1742 }
1743
1744
1745 /*************************************************
1746 * Call pcre_fullinfo() *
1747 *************************************************/
1748
1749 /* Get one piece of information from the pcre_fullinfo() function. When only
1750 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1751 value, but the code is defensive.
1752
1753 Arguments:
1754 re compiled regex
1755 study study data
1756 option PCRE_INFO_xxx option
1757 ptr where to put the data
1758
1759 Returns: 0 when OK, < 0 on error
1760 */
1761
1762 static int
1763 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1764 {
1765 int rc;
1766
1767 if (use_pcre16)
1768 #ifdef SUPPORT_PCRE16
1769 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1770 #else
1771 rc = PCRE_ERROR_BADMODE;
1772 #endif
1773 else
1774 #ifdef SUPPORT_PCRE8
1775 rc = pcre_fullinfo(re, study, option, ptr);
1776 #else
1777 rc = PCRE_ERROR_BADMODE;
1778 #endif
1779
1780 if (rc < 0)
1781 {
1782 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1783 use_pcre16? "16" : "", option);
1784 if (rc == PCRE_ERROR_BADMODE)
1785 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1786 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1787 }
1788
1789 return rc;
1790 }
1791
1792
1793
1794 /*************************************************
1795 * Swap byte functions *
1796 *************************************************/
1797
1798 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1799 value, respectively.
1800
1801 Arguments:
1802 value any number
1803
1804 Returns: the byte swapped value
1805 */
1806
1807 static pcre_uint32
1808 swap_uint32(pcre_uint32 value)
1809 {
1810 return ((value & 0x000000ff) << 24) |
1811 ((value & 0x0000ff00) << 8) |
1812 ((value & 0x00ff0000) >> 8) |
1813 (value >> 24);
1814 }
1815
1816 static pcre_uint16
1817 swap_uint16(pcre_uint16 value)
1818 {
1819 return (value >> 8) | (value << 8);
1820 }
1821
1822
1823
1824 /*************************************************
1825 * Flip bytes in a compiled pattern *
1826 *************************************************/
1827
1828 /* This function is called if the 'F' option was present on a pattern that is
1829 to be written to a file. We flip the bytes of all the integer fields in the
1830 regex data block and the study block. In 16-bit mode this also flips relevant
1831 bytes in the pattern itself. This is to make it possible to test PCRE's
1832 ability to reload byte-flipped patterns, e.g. those compiled on a different
1833 architecture. */
1834
1835 static void
1836 regexflip(pcre *ere, pcre_extra *extra)
1837 {
1838 REAL_PCRE *re = (REAL_PCRE *)ere;
1839 #ifdef SUPPORT_PCRE16
1840 int op;
1841 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1842 int length = re->name_count * re->name_entry_size;
1843 #ifdef SUPPORT_UTF
1844 BOOL utf = (re->options & PCRE_UTF16) != 0;
1845 BOOL utf16_char = FALSE;
1846 #endif /* SUPPORT_UTF */
1847 #endif /* SUPPORT_PCRE16 */
1848
1849 /* Always flip the bytes in the main data block and study blocks. */
1850
1851 re->magic_number = REVERSED_MAGIC_NUMBER;
1852 re->size = swap_uint32(re->size);
1853 re->options = swap_uint32(re->options);
1854 re->flags = swap_uint16(re->flags);
1855 re->top_bracket = swap_uint16(re->top_bracket);
1856 re->top_backref = swap_uint16(re->top_backref);
1857 re->first_char = swap_uint16(re->first_char);
1858 re->req_char = swap_uint16(re->req_char);
1859 re->name_table_offset = swap_uint16(re->name_table_offset);
1860 re->name_entry_size = swap_uint16(re->name_entry_size);
1861 re->name_count = swap_uint16(re->name_count);
1862
1863 if (extra != NULL)
1864 {
1865 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1866 rsd->size = swap_uint32(rsd->size);
1867 rsd->flags = swap_uint32(rsd->flags);
1868 rsd->minlength = swap_uint32(rsd->minlength);
1869 }
1870
1871 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1872 in the name table, if present, and then in the pattern itself. */
1873
1874 #ifdef SUPPORT_PCRE16
1875 if (!use_pcre16) return;
1876
1877 while(TRUE)
1878 {
1879 /* Swap previous characters. */
1880 while (length-- > 0)
1881 {
1882 *ptr = swap_uint16(*ptr);
1883 ptr++;
1884 }
1885 #ifdef SUPPORT_UTF
1886 if (utf16_char)
1887 {
1888 if ((ptr[-1] & 0xfc00) == 0xd800)
1889 {
1890 /* We know that there is only one extra character in UTF-16. */
1891 *ptr = swap_uint16(*ptr);
1892 ptr++;
1893 }
1894 }
1895 utf16_char = FALSE;
1896 #endif /* SUPPORT_UTF */
1897
1898 /* Get next opcode. */
1899
1900 length = 0;
1901 op = *ptr;
1902 *ptr++ = swap_uint16(op);
1903
1904 switch (op)
1905 {
1906 case OP_END:
1907 return;
1908
1909 #ifdef SUPPORT_UTF
1910 case OP_CHAR:
1911 case OP_CHARI:
1912 case OP_NOT:
1913 case OP_NOTI:
1914 case OP_STAR:
1915 case OP_MINSTAR:
1916 case OP_PLUS:
1917 case OP_MINPLUS:
1918 case OP_QUERY:
1919 case OP_MINQUERY:
1920 case OP_UPTO:
1921 case OP_MINUPTO:
1922 case OP_EXACT:
1923 case OP_POSSTAR:
1924 case OP_POSPLUS:
1925 case OP_POSQUERY:
1926 case OP_POSUPTO:
1927 case OP_STARI:
1928 case OP_MINSTARI:
1929 case OP_PLUSI:
1930 case OP_MINPLUSI:
1931 case OP_QUERYI:
1932 case OP_MINQUERYI:
1933 case OP_UPTOI:
1934 case OP_MINUPTOI:
1935 case OP_EXACTI:
1936 case OP_POSSTARI:
1937 case OP_POSPLUSI:
1938 case OP_POSQUERYI:
1939 case OP_POSUPTOI:
1940 case OP_NOTSTAR:
1941 case OP_NOTMINSTAR:
1942 case OP_NOTPLUS:
1943 case OP_NOTMINPLUS:
1944 case OP_NOTQUERY:
1945 case OP_NOTMINQUERY:
1946 case OP_NOTUPTO:
1947 case OP_NOTMINUPTO:
1948 case OP_NOTEXACT:
1949 case OP_NOTPOSSTAR:
1950 case OP_NOTPOSPLUS:
1951 case OP_NOTPOSQUERY:
1952 case OP_NOTPOSUPTO:
1953 case OP_NOTSTARI:
1954 case OP_NOTMINSTARI:
1955 case OP_NOTPLUSI:
1956 case OP_NOTMINPLUSI:
1957 case OP_NOTQUERYI:
1958 case OP_NOTMINQUERYI:
1959 case OP_NOTUPTOI:
1960 case OP_NOTMINUPTOI:
1961 case OP_NOTEXACTI:
1962 case OP_NOTPOSSTARI:
1963 case OP_NOTPOSPLUSI:
1964 case OP_NOTPOSQUERYI:
1965 case OP_NOTPOSUPTOI:
1966 if (utf) utf16_char = TRUE;
1967 #endif
1968 /* Fall through. */
1969
1970 default:
1971 length = OP_lengths16[op] - 1;
1972 break;
1973
1974 case OP_CLASS:
1975 case OP_NCLASS:
1976 /* Skip the character bit map. */
1977 ptr += 32/sizeof(pcre_uint16);
1978 length = 0;
1979 break;
1980
1981 case OP_XCLASS:
1982 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1983 if (LINK_SIZE > 1)
1984 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1985 - (1 + LINK_SIZE + 1));
1986 else
1987 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1988
1989 /* Reverse the size of the XCLASS instance. */
1990 *ptr = swap_uint16(*ptr);
1991 ptr++;
1992 if (LINK_SIZE > 1)
1993 {
1994 *ptr = swap_uint16(*ptr);
1995 ptr++;
1996 }
1997
1998 op = *ptr;
1999 *ptr = swap_uint16(op);
2000 ptr++;
2001 if ((op & XCL_MAP) != 0)
2002 {
2003 /* Skip the character bit map. */
2004 ptr += 32/sizeof(pcre_uint16);
2005 length -= 32/sizeof(pcre_uint16);
2006 }
2007 break;
2008 }
2009 }
2010 /* Control should never reach here in 16 bit mode. */
2011 #endif /* SUPPORT_PCRE16 */
2012 }
2013
2014
2015
2016 /*************************************************
2017 * Check match or recursion limit *
2018 *************************************************/
2019
2020 static int
2021 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2022 int start_offset, int options, int *use_offsets, int use_size_offsets,
2023 int flag, unsigned long int *limit, int errnumber, const char *msg)
2024 {
2025 int count;
2026 int min = 0;
2027 int mid = 64;
2028 int max = -1;
2029
2030 extra->flags |= flag;
2031
2032 for (;;)
2033 {
2034 *limit = mid;
2035
2036 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2037 use_offsets, use_size_offsets);
2038
2039 if (count == errnumber)
2040 {
2041 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2042 min = mid;
2043 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2044 }
2045
2046 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2047 count == PCRE_ERROR_PARTIAL)
2048 {
2049 if (mid == min + 1)
2050 {
2051 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2052 break;
2053 }
2054 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2055 max = mid;
2056 mid = (min + mid)/2;
2057 }
2058 else break; /* Some other error */
2059 }
2060
2061 extra->flags &= ~flag;
2062 return count;
2063 }
2064
2065
2066
2067 /*************************************************
2068 * Case-independent strncmp() function *
2069 *************************************************/
2070
2071 /*
2072 Arguments:
2073 s first string
2074 t second string
2075 n number of characters to compare
2076
2077 Returns: < 0, = 0, or > 0, according to the comparison
2078 */
2079
2080 static int
2081 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2082 {
2083 while (n--)
2084 {
2085 int c = tolower(*s++) - tolower(*t++);
2086 if (c) return c;
2087 }
2088 return 0;
2089 }
2090
2091
2092
2093 /*************************************************
2094 * Check newline indicator *
2095 *************************************************/
2096
2097 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2098 a message and return 0 if there is no match.
2099
2100 Arguments:
2101 p points after the leading '<'
2102 f file for error message
2103
2104 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2105 */
2106
2107 static int
2108 check_newline(pcre_uint8 *p, FILE *f)
2109 {
2110 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2111 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2112 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2113 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2114 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2115 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2116 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2117 fprintf(f, "Unknown newline type at: <%s\n", p);
2118 return 0;
2119 }
2120
2121
2122
2123 /*************************************************
2124 * Usage function *
2125 *************************************************/
2126
2127 static void
2128 usage(void)
2129 {
2130 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2131 printf("Input and output default to stdin and stdout.\n");
2132 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2133 printf("If input is a terminal, readline() is used to read from it.\n");
2134 #else
2135 printf("This version of pcretest is not linked with readline().\n");
2136 #endif
2137 printf("\nOptions:\n");
2138 #ifdef SUPPORT_PCRE16
2139 printf(" -16 use the 16-bit library\n");
2140 #endif
2141 printf(" -b show compiled code\n");
2142 printf(" -C show PCRE compile-time options and exit\n");
2143 printf(" -C arg show a specific compile-time option\n");
2144 printf(" and exit with its value. The arg can be:\n");
2145 printf(" linksize internal link size [2, 3, 4]\n");
2146 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2147 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2148 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2149 printf(" ucp Unicode Properties supported [0, 1]\n");
2150 printf(" jit Just-in-time compiler supported [0, 1]\n");
2151 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2152 printf(" -d debug: show compiled code and information (-b and -i)\n");
2153 #if !defined NODFA
2154 printf(" -dfa force DFA matching for all subjects\n");
2155 #endif
2156 printf(" -help show usage information\n");
2157 printf(" -i show information about compiled patterns\n"
2158 " -M find MATCH_LIMIT minimum for each subject\n"
2159 " -m output memory used information\n"
2160 " -o <n> set size of offsets vector to <n>\n");
2161 #if !defined NOPOSIX
2162 printf(" -p use POSIX interface\n");
2163 #endif
2164 printf(" -q quiet: do not output PCRE version number at start\n");
2165 printf(" -S <n> set stack size to <n> megabytes\n");
2166 printf(" -s force each pattern to be studied at basic level\n"
2167 " -s+ force each pattern to be studied, using JIT if available\n"
2168 " -s++ ditto, verifying when JIT was actually used\n"
2169 " -s+n force each pattern to be studied, using JIT if available,\n"
2170 " where 1 <= n <= 7 selects JIT options\n"
2171 " -s++n ditto, verifying when JIT was actually used\n"
2172 " -t time compilation and execution\n");
2173 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2174 printf(" -tm time execution (matching) only\n");
2175 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2176 }
2177
2178
2179
2180 /*************************************************
2181 * Main Program *
2182 *************************************************/
2183
2184 /* Read lines from named file or stdin and write to named file or stdout; lines
2185 consist of a regular expression, in delimiters and optionally followed by
2186 options, followed by a set of test data, terminated by an empty line. */
2187
2188 int main(int argc, char **argv)
2189 {
2190 FILE *infile = stdin;
2191 const char *version;
2192 int options = 0;
2193 int study_options = 0;
2194 int default_find_match_limit = FALSE;
2195 int op = 1;
2196 int timeit = 0;
2197 int timeitm = 0;
2198 int showinfo = 0;
2199 int showstore = 0;
2200 int force_study = -1;
2201 int force_study_options = 0;
2202 int quiet = 0;
2203 int size_offsets = 45;
2204 int size_offsets_max;
2205 int *offsets = NULL;
2206 int debug = 0;
2207 int done = 0;
2208 int all_use_dfa = 0;
2209 int verify_jit = 0;
2210 int yield = 0;
2211 int stack_size;
2212
2213 #if !defined NOPOSIX
2214 int posix = 0;
2215 #endif
2216 #if !defined NODFA
2217 int *dfa_workspace = NULL;
2218 #endif
2219
2220 pcre_jit_stack *jit_stack = NULL;
2221
2222 /* These vectors store, end-to-end, a list of zero-terminated captured
2223 substring names, each list itself being terminated by an empty name. Assume
2224 that 1024 is plenty long enough for the few names we'll be testing. It is
2225 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2226 for the actual memory, to ensure alignment. */
2227
2228 pcre_uint16 copynames[1024];
2229 pcre_uint16 getnames[1024];
2230
2231 #ifdef SUPPORT_PCRE16
2232 pcre_uint16 *cn16ptr;
2233 pcre_uint16 *gn16ptr;
2234 #endif
2235
2236 #ifdef SUPPORT_PCRE8
2237 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2238 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2239 pcre_uint8 *cn8ptr;
2240 pcre_uint8 *gn8ptr;
2241 #endif
2242
2243 /* Get buffers from malloc() so that valgrind will check their misuse when
2244 debugging. They grow automatically when very long lines are read. The 16-bit
2245 buffer (buffer16) is obtained only if needed. */
2246
2247 buffer = (pcre_uint8 *)malloc(buffer_size);
2248 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2249 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2250
2251 /* The outfile variable is static so that new_malloc can use it. */
2252
2253 outfile = stdout;
2254
2255 /* The following _setmode() stuff is some Windows magic that tells its runtime
2256 library to translate CRLF into a single LF character. At least, that's what
2257 I've been told: never having used Windows I take this all on trust. Originally
2258 it set 0x8000, but then I was advised that _O_BINARY was better. */
2259
2260 #if defined(_WIN32) || defined(WIN32)
2261 _setmode( _fileno( stdout ), _O_BINARY );
2262 #endif
2263
2264 /* Get the version number: both pcre_version() and pcre16_version() give the
2265 same answer. We just need to ensure that we call one that is available. */
2266
2267 #ifdef SUPPORT_PCRE8
2268 version = pcre_version();
2269 #else
2270 version = pcre16_version();
2271 #endif
2272
2273 /* Scan options */
2274
2275 while (argc > 1 && argv[op][0] == '-')
2276 {
2277 pcre_uint8 *endptr;
2278 char *arg = argv[op];
2279
2280 if (strcmp(arg, "-m") == 0) showstore = 1;
2281 else if (strcmp(arg, "-s") == 0) force_study = 0;
2282
2283 else if (strncmp(arg, "-s+", 3) == 0)
2284 {
2285 arg += 3;
2286 if (*arg == '+') { arg++; verify_jit = TRUE; }
2287 force_study = 1;
2288 if (*arg == 0)
2289 force_study_options = jit_study_bits[6];
2290 else if (*arg >= '1' && *arg <= '7')
2291 force_study_options = jit_study_bits[*arg - '1'];
2292 else goto BAD_ARG;
2293 }
2294 else if (strcmp(arg, "-16") == 0)
2295 {
2296 #ifdef SUPPORT_PCRE16
2297 use_pcre16 = 1;
2298 #else
2299 printf("** This version of PCRE was built without 16-bit support\n");
2300 exit(1);
2301 #endif
2302 }
2303 else if (strcmp(arg, "-q") == 0) quiet = 1;
2304 else if (strcmp(arg, "-b") == 0) debug = 1;
2305 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2306 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2307 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2308 #if !defined NODFA
2309 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2310 #endif
2311 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2312 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2313 *endptr == 0))
2314 {
2315 op++;
2316 argc--;
2317 }
2318 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2319 {
2320 int both = arg[2] == 0;
2321 int temp;
2322 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2323 *endptr == 0))
2324 {
2325 timeitm = temp;
2326 op++;
2327 argc--;
2328 }
2329 else timeitm = LOOPREPEAT;
2330 if (both) timeit = timeitm;
2331 }
2332 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2333 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2334 *endptr == 0))
2335 {
2336 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2337 printf("PCRE: -S not supported on this OS\n");
2338 exit(1);
2339 #else
2340 int rc;
2341 struct rlimit rlim;
2342 getrlimit(RLIMIT_STACK, &rlim);
2343 rlim.rlim_cur = stack_size * 1024 * 1024;
2344 rc = setrlimit(RLIMIT_STACK, &rlim);
2345 if (rc != 0)
2346 {
2347 printf("PCRE: setrlimit() failed with error %d\n", rc);
2348 exit(1);
2349 }
2350 op++;
2351 argc--;
2352 #endif
2353 }
2354 #if !defined NOPOSIX
2355 else if (strcmp(arg, "-p") == 0) posix = 1;
2356 #endif
2357 else if (strcmp(arg, "-C") == 0)
2358 {
2359 int rc;
2360 unsigned long int lrc;
2361
2362 if (argc > 2)
2363 {
2364 if (strcmp(argv[op + 1], "linksize") == 0)
2365 {
2366 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2367 printf("%d\n", rc);
2368 yield = rc;
2369 goto EXIT;
2370 }
2371 if (strcmp(argv[op + 1], "pcre8") == 0)
2372 {
2373 #ifdef SUPPORT_PCRE8
2374 printf("1\n");
2375 yield = 1;
2376 #else
2377 printf("0\n");
2378 yield = 0;
2379 #endif
2380 goto EXIT;
2381 }
2382 if (strcmp(argv[op + 1], "pcre16") == 0)
2383 {
2384 #ifdef SUPPORT_PCRE16
2385 printf("1\n");
2386 yield = 1;
2387 #else
2388 printf("0\n");
2389 yield = 0;
2390 #endif
2391 goto EXIT;
2392 }
2393 if (strcmp(argv[op + 1], "utf") == 0)
2394 {
2395 #ifdef SUPPORT_PCRE8
2396 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2397 printf("%d\n", rc);
2398 yield = rc;
2399 #else
2400 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2401 printf("%d\n", rc);
2402 yield = rc;
2403 #endif
2404 goto EXIT;
2405 }
2406 if (strcmp(argv[op + 1], "ucp") == 0)
2407 {
2408 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409 printf("%d\n", rc);
2410 yield = rc;
2411 goto EXIT;
2412 }
2413 if (strcmp(argv[op + 1], "jit") == 0)
2414 {
2415 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2416 printf("%d\n", rc);
2417 yield = rc;
2418 goto EXIT;
2419 }
2420 if (strcmp(argv[op + 1], "newline") == 0)
2421 {
2422 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2423 /* Note that these values are always the ASCII values, even
2424 in EBCDIC environments. CR is 13 and NL is 10. */
2425 printf("%s\n", (rc == 13)? "CR" :
2426 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2427 (rc == -2)? "ANYCRLF" :
2428 (rc == -1)? "ANY" : "???");
2429 goto EXIT;
2430 }
2431 printf("Unknown -C option: %s\n", argv[op + 1]);
2432 goto EXIT;
2433 }
2434
2435 printf("PCRE version %s\n", version);
2436 printf("Compiled with\n");
2437
2438 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2439 are set, either both UTFs are supported or both are not supported. */
2440
2441 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2442 printf(" 8-bit and 16-bit support\n");
2443 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2444 if (rc)
2445 printf(" UTF-8 and UTF-16 support\n");
2446 else
2447 printf(" No UTF-8 or UTF-16 support\n");
2448 #elif defined SUPPORT_PCRE8
2449 printf(" 8-bit support only\n");
2450 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2451 printf(" %sUTF-8 support\n", rc? "" : "No ");
2452 #else
2453 printf(" 16-bit support only\n");
2454 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2455 printf(" %sUTF-16 support\n", rc? "" : "No ");
2456 #endif
2457
2458 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2459 printf(" %sUnicode properties support\n", rc? "" : "No ");
2460 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2461 if (rc)
2462 {
2463 const char *arch;
2464 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2465 printf(" Just-in-time compiler support: %s\n", arch);
2466 }
2467 else
2468 printf(" No just-in-time compiler support\n");
2469 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2470 /* Note that these values are always the ASCII values, even
2471 in EBCDIC environments. CR is 13 and NL is 10. */
2472 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2473 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2474 (rc == -2)? "ANYCRLF" :
2475 (rc == -1)? "ANY" : "???");
2476 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2477 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2478 "all Unicode newlines");
2479 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2480 printf(" Internal link size = %d\n", rc);
2481 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2482 printf(" POSIX malloc threshold = %d\n", rc);
2483 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2484 printf(" Default match limit = %ld\n", lrc);
2485 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2486 printf(" Default recursion depth limit = %ld\n", lrc);
2487 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2488 printf(" Match recursion uses %s", rc? "stack" : "heap");
2489 if (showstore)
2490 {
2491 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2492 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2493 }
2494 printf("\n");
2495 goto EXIT;
2496 }
2497 else if (strcmp(arg, "-help") == 0 ||
2498 strcmp(arg, "--help") == 0)
2499 {
2500 usage();
2501 goto EXIT;
2502 }
2503 else
2504 {
2505 BAD_ARG:
2506 printf("** Unknown or malformed option %s\n", arg);
2507 usage();
2508 yield = 1;
2509 goto EXIT;
2510 }
2511 op++;
2512 argc--;
2513 }
2514
2515 /* Get the store for the offsets vector, and remember what it was */
2516
2517 size_offsets_max = size_offsets;
2518 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2519 if (offsets == NULL)
2520 {
2521 printf("** Failed to get %d bytes of memory for offsets vector\n",
2522 (int)(size_offsets_max * sizeof(int)));
2523 yield = 1;
2524 goto EXIT;
2525 }
2526
2527 /* Sort out the input and output files */
2528
2529 if (argc > 1)
2530 {
2531 infile = fopen(argv[op], INPUT_MODE);
2532 if (infile == NULL)
2533 {
2534 printf("** Failed to open %s\n", argv[op]);
2535 yield = 1;
2536 goto EXIT;
2537 }
2538 }
2539
2540 if (argc > 2)
2541 {
2542 outfile = fopen(argv[op+1], OUTPUT_MODE);
2543 if (outfile == NULL)
2544 {
2545 printf("** Failed to open %s\n", argv[op+1]);
2546 yield = 1;
2547 goto EXIT;
2548 }
2549 }
2550
2551 /* Set alternative malloc function */
2552
2553 #ifdef SUPPORT_PCRE8
2554 pcre_malloc = new_malloc;
2555 pcre_free = new_free;
2556 pcre_stack_malloc = stack_malloc;
2557 pcre_stack_free = stack_free;
2558 #endif
2559
2560 #ifdef SUPPORT_PCRE16
2561 pcre16_malloc = new_malloc;
2562 pcre16_free = new_free;
2563 pcre16_stack_malloc = stack_malloc;
2564 pcre16_stack_free = stack_free;
2565 #endif
2566
2567 /* Heading line unless quiet, then prompt for first regex if stdin */
2568
2569 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2570
2571 /* Main loop */
2572
2573 while (!done)
2574 {
2575 pcre *re = NULL;
2576 pcre_extra *extra = NULL;
2577
2578 #if !defined NOPOSIX /* There are still compilers that require no indent */
2579 regex_t preg;
2580 int do_posix = 0;
2581 #endif
2582
2583 const char *error;
2584 pcre_uint8 *markptr;
2585 pcre_uint8 *p, *pp, *ppp;
2586 pcre_uint8 *to_file = NULL;
2587 const pcre_uint8 *tables = NULL;
2588 unsigned long int get_options;
2589 unsigned long int true_size, true_study_size = 0;
2590 size_t size, regex_gotten_store;
2591 int do_allcaps = 0;
2592 int do_mark = 0;
2593 int do_study = 0;
2594 int no_force_study = 0;
2595 int do_debug = debug;
2596 int do_G = 0;
2597 int do_g = 0;
2598 int do_showinfo = showinfo;
2599 int do_showrest = 0;
2600 int do_showcaprest = 0;
2601 int do_flip = 0;
2602 int erroroffset, len, delimiter, poffset;
2603
2604 #if !defined NODFA
2605 int dfa_matched = 0;
2606 #endif
2607
2608 use_utf = 0;
2609 debug_lengths = 1;
2610
2611 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2612 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2613 fflush(outfile);
2614
2615 p = buffer;
2616 while (isspace(*p)) p++;
2617 if (*p == 0) continue;
2618
2619 /* See if the pattern is to be loaded pre-compiled from a file. */
2620
2621 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2622 {
2623 pcre_uint32 magic;
2624 pcre_uint8 sbuf[8];
2625 FILE *f;
2626
2627 p++;
2628 if (*p == '!')
2629 {
2630 do_debug = TRUE;
2631 do_showinfo = TRUE;
2632 p++;
2633 }
2634
2635 pp = p + (int)strlen((char *)p);
2636 while (isspace(pp[-1])) pp--;
2637 *pp = 0;
2638
2639 f = fopen((char *)p, "rb");
2640 if (f == NULL)
2641 {
2642 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2643 continue;
2644 }
2645
2646 first_gotten_store = 0;
2647 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2648
2649 true_size =
2650 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2651 true_study_size =
2652 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2653
2654 re = (pcre *)new_malloc(true_size);
2655 regex_gotten_store = first_gotten_store;
2656
2657 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2658
2659 magic = ((REAL_PCRE *)re)->magic_number;
2660 if (magic != MAGIC_NUMBER)
2661 {
2662 if (swap_uint32(magic) == MAGIC_NUMBER)
2663 {
2664 do_flip = 1;
2665 }
2666 else
2667 {
2668 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2669 fclose(f);
2670 continue;
2671 }
2672 }
2673
2674 /* We hide the byte-invert info for little and big endian tests. */
2675 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2676 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2677
2678 /* Now see if there is any following study data. */
2679
2680 if (true_study_size != 0)
2681 {
2682 pcre_study_data *psd;
2683
2684 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2685 extra->flags = PCRE_EXTRA_STUDY_DATA;
2686
2687 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2688 extra->study_data = psd;
2689
2690 if (fread(psd, 1, true_study_size, f) != true_study_size)
2691 {
2692 FAIL_READ:
2693 fprintf(outfile, "Failed to read data from %s\n", p);
2694 if (extra != NULL)
2695 {
2696 PCRE_FREE_STUDY(extra);
2697 }
2698 if (re != NULL) new_free(re);
2699 fclose(f);
2700 continue;
2701 }
2702 fprintf(outfile, "Study data loaded from %s\n", p);
2703 do_study = 1; /* To get the data output if requested */
2704 }
2705 else fprintf(outfile, "No study data\n");
2706
2707 /* Flip the necessary bytes. */
2708 if (do_flip)
2709 {
2710 int rc;
2711 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2712 if (rc == PCRE_ERROR_BADMODE)
2713 {
2714 /* Simulate the result of the function call below. */
2715 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2716 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2717 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2718 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2719 continue;
2720 }
2721 }
2722
2723 /* Need to know if UTF-8 for printing data strings. */
2724
2725 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2726 use_utf = (get_options & PCRE_UTF8) != 0;
2727
2728 fclose(f);
2729 goto SHOW_INFO;
2730 }
2731
2732 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2733 the pattern; if it isn't complete, read more. */
2734
2735 delimiter = *p++;
2736
2737 if (isalnum(delimiter) || delimiter == '\\')
2738 {
2739 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2740 goto SKIP_DATA;
2741 }
2742
2743 pp = p;
2744 poffset = (int)(p - buffer);
2745
2746 for(;;)
2747 {
2748 while (*pp != 0)
2749 {
2750 if (*pp == '\\' && pp[1] != 0) pp++;
2751 else if (*pp == delimiter) break;
2752 pp++;
2753 }
2754 if (*pp != 0) break;
2755 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2756 {
2757 fprintf(outfile, "** Unexpected EOF\n");
2758 done = 1;
2759 goto CONTINUE;
2760 }
2761 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2762 }
2763
2764 /* The buffer may have moved while being extended; reset the start of data
2765 pointer to the correct relative point in the buffer. */
2766
2767 p = buffer + poffset;
2768
2769 /* If the first character after the delimiter is backslash, make
2770 the pattern end with backslash. This is purely to provide a way
2771 of testing for the error message when a pattern ends with backslash. */
2772
2773 if (pp[1] == '\\') *pp++ = '\\';
2774
2775 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2776 for callouts. */
2777
2778 *pp++ = 0;
2779 strcpy((char *)pbuffer, (char *)p);
2780
2781 /* Look for options after final delimiter */
2782
2783 options = 0;
2784 study_options = 0;
2785 log_store = showstore; /* default from command line */
2786
2787 while (*pp != 0)
2788 {
2789 switch (*pp++)
2790 {
2791 case 'f': options |= PCRE_FIRSTLINE; break;
2792 case 'g': do_g = 1; break;
2793 case 'i': options |= PCRE_CASELESS; break;
2794 case 'm': options |= PCRE_MULTILINE; break;
2795 case 's': options |= PCRE_DOTALL; break;
2796 case 'x': options |= PCRE_EXTENDED; break;
2797
2798 case '+':
2799 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2800 break;
2801
2802 case '=': do_allcaps = 1; break;
2803 case 'A': options |= PCRE_ANCHORED; break;
2804 case 'B': do_debug = 1; break;
2805 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2806 case 'D': do_debug = do_showinfo = 1; break;
2807 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2808 case 'F': do_flip = 1; break;
2809 case 'G': do_G = 1; break;
2810 case 'I': do_showinfo = 1; break;
2811 case 'J': options |= PCRE_DUPNAMES; break;
2812 case 'K': do_mark = 1; break;
2813 case 'M': log_store = 1; break;
2814 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2815
2816 #if !defined NOPOSIX
2817 case 'P': do_posix = 1; break;
2818 #endif
2819
2820 case 'S':
2821 if (do_study == 0)
2822 {
2823 do_study = 1;
2824 if (*pp == '+')
2825 {
2826 if (*(++pp) == '+')
2827 {
2828 verify_jit = TRUE;
2829 pp++;
2830 }
2831 if (*pp >= '1' && *pp <= '7')
2832 study_options |= jit_study_bits[*pp++ - '1'];
2833 else
2834 study_options |= jit_study_bits[6];
2835 }
2836 }
2837 else
2838 {
2839 do_study = 0;
2840 no_force_study = 1;
2841 }
2842 break;
2843
2844 case 'U': options |= PCRE_UNGREEDY; break;
2845 case 'W': options |= PCRE_UCP; break;
2846 case 'X': options |= PCRE_EXTRA; break;
2847 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2848 case 'Z': debug_lengths = 0; break;
2849 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2850 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2851
2852 case 'T':
2853 switch (*pp++)
2854 {
2855 case '0': tables = tables0; break;
2856 case '1': tables = tables1; break;
2857
2858 case '\r':
2859 case '\n':
2860 case ' ':
2861 case 0:
2862 fprintf(outfile, "** Missing table number after /T\n");
2863 goto SKIP_DATA;
2864
2865 default:
2866 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2867 goto SKIP_DATA;
2868 }
2869 break;
2870
2871 case 'L':
2872 ppp = pp;
2873 /* The '\r' test here is so that it works on Windows. */
2874 /* The '0' test is just in case this is an unterminated line. */
2875 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2876 *ppp = 0;
2877 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2878 {
2879 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2880 goto SKIP_DATA;
2881 }
2882 locale_set = 1;
2883 tables = PCRE_MAKETABLES;
2884 pp = ppp;
2885 break;
2886
2887 case '>':
2888 to_file = pp;
2889 while (*pp != 0) pp++;
2890 while (isspace(pp[-1])) pp--;
2891 *pp = 0;
2892 break;
2893
2894 case '<':
2895 {
2896 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2897 {
2898 options |= PCRE_JAVASCRIPT_COMPAT;
2899 pp += 3;
2900 }
2901 else
2902 {
2903 int x = check_newline(pp, outfile);
2904 if (x == 0) goto SKIP_DATA;
2905 options |= x;
2906 while (*pp++ != '>');
2907 }
2908 }
2909 break;
2910
2911 case '\r': /* So that it works in Windows */
2912 case '\n':
2913 case ' ':
2914 break;
2915
2916 default:
2917 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2918 goto SKIP_DATA;
2919 }
2920 }
2921
2922 /* Handle compiling via the POSIX interface, which doesn't support the
2923 timing, showing, or debugging options, nor the ability to pass over
2924 local character tables. Neither does it have 16-bit support. */
2925
2926 #if !defined NOPOSIX
2927 if (posix || do_posix)
2928 {
2929 int rc;
2930 int cflags = 0;
2931
2932 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2933 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2934 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2935 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2936 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2937 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2938 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2939
2940 first_gotten_store = 0;
2941 rc = regcomp(&preg, (char *)p, cflags);
2942
2943 /* Compilation failed; go back for another re, skipping to blank line
2944 if non-interactive. */
2945
2946 if (rc != 0)
2947 {
2948 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2949 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2950 goto SKIP_DATA;
2951 }
2952 }
2953
2954 /* Handle compiling via the native interface */
2955
2956 else
2957 #endif /* !defined NOPOSIX */
2958
2959 {
2960 /* In 16-bit mode, convert the input. */
2961
2962 #ifdef SUPPORT_PCRE16
2963 if (use_pcre16)
2964 {
2965 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2966 {
2967 case -1:
2968 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2969 "converted to UTF-16\n");
2970 goto SKIP_DATA;
2971
2972 case -2:
2973 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2974 "cannot be converted to UTF-16\n");
2975 goto SKIP_DATA;
2976
2977 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2978 fprintf(outfile, "**Failed: character value greater than 0xffff "
2979 "cannot be converted to 16-bit in non-UTF mode\n");
2980 goto SKIP_DATA;
2981
2982 default:
2983 break;
2984 }
2985 p = (pcre_uint8 *)buffer16;
2986 }
2987 #endif
2988
2989 /* Compile many times when timing */
2990
2991 if (timeit > 0)
2992 {
2993 register int i;
2994 clock_t time_taken;
2995 clock_t start_time = clock();
2996 for (i = 0; i < timeit; i++)
2997 {
2998 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2999 if (re != NULL) free(re);
3000 }
3001 time_taken = clock() - start_time;
3002 fprintf(outfile, "Compile time %.4f milliseconds\n",
3003 (((double)time_taken * 1000.0) / (double)timeit) /
3004 (double)CLOCKS_PER_SEC);
3005 }
3006
3007 first_gotten_store = 0;
3008 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3009
3010 /* Compilation failed; go back for another re, skipping to blank line
3011 if non-interactive. */
3012
3013 if (re == NULL)
3014 {
3015 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3016 SKIP_DATA:
3017 if (infile != stdin)
3018 {
3019 for (;;)
3020 {
3021 if (extend_inputline(infile, buffer, NULL) == NULL)
3022 {
3023 done = 1;
3024 goto CONTINUE;
3025 }
3026 len = (int)strlen((char *)buffer);
3027 while (len > 0 && isspace(buffer[len-1])) len--;
3028 if (len == 0) break;
3029 }
3030 fprintf(outfile, "\n");
3031 }
3032 goto CONTINUE;
3033 }
3034
3035 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3036 within the regex; check for this so that we know how to process the data
3037 lines. */
3038
3039 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3040 goto SKIP_DATA;
3041 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3042
3043 /* Extract the size for possible writing before possibly flipping it,
3044 and remember the store that was got. */
3045
3046 true_size = ((REAL_PCRE *)re)->size;
3047 regex_gotten_store = first_gotten_store;
3048
3049 /* Output code size information if requested */
3050
3051 if (log_store)
3052 fprintf(outfile, "Memory allocation (code space): %d\n",
3053 (int)(first_gotten_store -
3054 sizeof(REAL_PCRE) -
3055 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3056
3057 /* If -s or /S was present, study the regex to generate additional info to
3058 help with the matching, unless the pattern has the SS option, which
3059 suppresses the effect of /S (used for a few test patterns where studying is
3060 never sensible). */
3061
3062 if (do_study || (force_study >= 0 && !no_force_study))
3063 {
3064 if (timeit > 0)
3065 {
3066 register int i;
3067 clock_t time_taken;
3068 clock_t start_time = clock();
3069 for (i = 0; i < timeit; i++)
3070 {
3071 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3072 }
3073 time_taken = clock() - start_time;
3074 if (extra != NULL)
3075 {
3076 PCRE_FREE_STUDY(extra);
3077 }
3078 fprintf(outfile, " Study time %.4f milliseconds\n",
3079 (((double)time_taken * 1000.0) / (double)timeit) /
3080 (double)CLOCKS_PER_SEC);
3081 }
3082 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3083 if (error != NULL)
3084 fprintf(outfile, "Failed to study: %s\n", error);
3085 else if (extra != NULL)
3086 {
3087 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3088 if (log_store)
3089 {
3090 size_t jitsize;
3091 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3092 jitsize != 0)
3093 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3094 }
3095 }
3096 }
3097
3098 /* If /K was present, we set up for handling MARK data. */
3099
3100 if (do_mark)
3101 {
3102 if (extra == NULL)
3103 {
3104 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3105 extra->flags = 0;
3106 }
3107 extra->mark = &markptr;
3108 extra->flags |= PCRE_EXTRA_MARK;
3109 }
3110
3111 /* Extract and display information from the compiled data if required. */
3112
3113 SHOW_INFO:
3114
3115 if (do_debug)
3116 {
3117 fprintf(outfile, "------------------------------------------------------------------\n");
3118 PCRE_PRINTINT(re, outfile, debug_lengths);
3119 }
3120
3121 /* We already have the options in get_options (see above) */
3122
3123 if (do_showinfo)
3124 {
3125 unsigned long int all_options;
3126 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3127 hascrorlf, maxlookbehind;
3128 int nameentrysize, namecount;
3129 const pcre_uint8 *nametable;
3130
3131 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3132 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3133 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3134 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3135 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3136 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3137 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3138 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3139 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3140 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3141 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3142 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3143 != 0)
3144 goto SKIP_DATA;
3145
3146 if (size != regex_gotten_store) fprintf(outfile,
3147 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3148 (int)size, (int)regex_gotten_store);
3149
3150 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3151 if (backrefmax > 0)
3152 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3153
3154 if (namecount > 0)
3155 {
3156 fprintf(outfile, "Named capturing subpatterns:\n");
3157 while (namecount-- > 0)
3158 {
3159 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3160 int imm2_size = use_pcre16 ? 1 : 2;
3161 #else
3162 int imm2_size = IMM2_SIZE;
3163 #endif
3164 int length = (int)STRLEN(nametable + imm2_size);
3165 fprintf(outfile, " ");
3166 PCHARSV(nametable, imm2_size, length, outfile);
3167 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3168 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3169 fprintf(outfile, "%3d\n", use_pcre16?
3170 (int)(((PCRE_SPTR16)nametable)[0])
3171 :((int)nametable[0] << 8) | (int)nametable[1]);
3172 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3173 #else
3174 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3175 #ifdef SUPPORT_PCRE8
3176 nametable += nameentrysize;
3177 #else
3178 nametable += nameentrysize * 2;
3179 #endif
3180 #endif
3181 }
3182 }
3183
3184 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3185 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3186
3187 all_options = ((REAL_PCRE *)re)->options;
3188 if (do_flip) all_options = swap_uint32(all_options);
3189
3190 if (get_options == 0) fprintf(outfile, "No options\n");
3191 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3192 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3193 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3194 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3195 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3196 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3197 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3198 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3199 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3200 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3201 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3202 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3203 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3204 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3205 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3206 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3207 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3208 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3209
3210 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3211
3212 switch (get_options & PCRE_NEWLINE_BITS)
3213 {
3214 case PCRE_NEWLINE_CR:
3215 fprintf(outfile, "Forced newline sequence: CR\n");
3216 break;
3217
3218 case PCRE_NEWLINE_LF:
3219 fprintf(outfile, "Forced newline sequence: LF\n");
3220 break;
3221
3222 case PCRE_NEWLINE_CRLF:
3223 fprintf(outfile, "Forced newline sequence: CRLF\n");
3224 break;
3225
3226 case PCRE_NEWLINE_ANYCRLF:
3227 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3228 break;
3229
3230 case PCRE_NEWLINE_ANY:
3231 fprintf(outfile, "Forced newline sequence: ANY\n");
3232 break;
3233
3234 default:
3235 break;
3236 }
3237
3238 if (first_char == -1)
3239 {
3240 fprintf(outfile, "First char at start or follows newline\n");
3241 }
3242 else if (first_char < 0)
3243 {
3244 fprintf(outfile, "No first char\n");
3245 }
3246 else
3247 {
3248 const char *caseless =
3249 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3250 "" : " (caseless)";
3251
3252 if (PRINTOK(first_char))
3253 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3254 else
3255 {
3256 fprintf(outfile, "First char = ");
3257 pchar(first_char, outfile);
3258 fprintf(outfile, "%s\n", caseless);
3259 }
3260 }
3261
3262 if (need_char < 0)
3263 {
3264 fprintf(outfile, "No need char\n");
3265 }
3266 else
3267 {
3268 const char *caseless =
3269 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3270 "" : " (caseless)";
3271
3272 if (PRINTOK(need_char))
3273 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3274 else
3275 {
3276 fprintf(outfile, "Need char = ");
3277 pchar(need_char, outfile);
3278 fprintf(outfile, "%s\n", caseless);
3279 }
3280 }
3281
3282 if (maxlookbehind > 0)
3283 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3284
3285 /* Don't output study size; at present it is in any case a fixed
3286 value, but it varies, depending on the computer architecture, and
3287 so messes up the test suite. (And with the /F option, it might be
3288 flipped.) If study was forced by an external -s, don't show this
3289 information unless -i or -d was also present. This means that, except
3290 when auto-callouts are involved, the output from runs with and without
3291 -s should be identical. */
3292
3293 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3294 {
3295 if (extra == NULL)
3296 fprintf(outfile, "Study returned NULL\n");
3297 else
3298 {
3299 pcre_uint8 *start_bits = NULL;
3300 int minlength;
3301
3302 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3303 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3304
3305 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3306 {
3307 if (start_bits == NULL)
3308 fprintf(outfile, "No set of starting bytes\n");
3309 else
3310 {
3311 int i;
3312 int c = 24;
3313 fprintf(outfile, "Starting byte set: ");
3314 for (i = 0; i < 256; i++)
3315 {
3316 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3317 {
3318 if (c > 75)
3319 {
3320 fprintf(outfile, "\n ");
3321 c = 2;
3322 }
3323 if (PRINTOK(i) && i != ' ')
3324 {
3325 fprintf(outfile, "%c ", i);
3326 c += 2;
3327 }
3328 else
3329 {
3330 fprintf(outfile, "\\x%02x ", i);
3331 c += 5;
3332 }
3333 }
3334 }
3335 fprintf(outfile, "\n");
3336 }
3337 }
3338 }
3339
3340 /* Show this only if the JIT was set by /S, not by -s. */
3341
3342 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3343 {
3344 int jit;
3345 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3346 {
3347 if (jit)
3348 fprintf(outfile, "JIT study was successful\n");
3349 else
3350 #ifdef SUPPORT_JIT
3351 fprintf(outfile, "JIT study was not successful\n");
3352 #else
3353 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3354 #endif
3355 }
3356 }
3357 }
3358 }
3359
3360 /* If the '>' option was present, we write out the regex to a file, and
3361 that is all. The first 8 bytes of the file are the regex length and then
3362 the study length, in big-endian order. */
3363
3364 if (to_file != NULL)
3365 {
3366 FILE *f = fopen((char *)to_file, "wb");
3367 if (f == NULL)
3368 {
3369 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3370 }
3371 else
3372 {
3373 pcre_uint8 sbuf[8];
3374
3375 if (do_flip) regexflip(re, extra);
3376 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3377 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3378 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3379 sbuf[3] = (pcre_uint8)((true_size) & 255);
3380 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3381 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3382 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3383 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3384
3385 if (fwrite(sbuf, 1, 8, f) < 8 ||
3386 fwrite(re, 1, true_size, f) < true_size)
3387 {
3388 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3389 }
3390 else
3391 {
3392 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3393
3394 /* If there is study data, write it. */
3395
3396 if (extra != NULL)
3397 {
3398 if (fwrite(extra->study_data, 1, true_study_size, f) <
3399 true_study_size)
3400 {
3401 fprintf(outfile, "Write error on %s: %s\n", to_file,
3402 strerror(errno));
3403 }
3404 else fprintf(outfile, "Study data written to %s\n", to_file);
3405 }
3406 }
3407 fclose(f);
3408 }
3409
3410 new_free(re);
3411 if (extra != NULL)
3412 {
3413 PCRE_FREE_STUDY(extra);
3414 }
3415 if (locale_set)
3416 {
3417 new_free((void *)tables);
3418 setlocale(LC_CTYPE, "C");
3419 locale_set = 0;
3420 }
3421 continue; /* With next regex */
3422 }
3423 } /* End of non-POSIX compile */
3424
3425 /* Read data lines and test them */
3426
3427 for (;;)
3428 {
3429 pcre_uint8 *q;
3430 pcre_uint8 *bptr;
3431 int *use_offsets = offsets;
3432 int use_size_offsets = size_offsets;
3433 int callout_data = 0;
3434 int callout_data_set = 0;
3435 int count, c;
3436 int copystrings = 0;
3437 int find_match_limit = default_find_match_limit;
3438 int getstrings = 0;
3439 int getlist = 0;
3440 int gmatched = 0;
3441 int start_offset = 0;
3442 int start_offset_sign = 1;
3443 int g_notempty = 0;
3444 int use_dfa = 0;
3445
3446 *copynames = 0;
3447 *getnames = 0;
3448
3449 #ifdef SUPPORT_PCRE16
3450 cn16ptr = copynames;
3451 gn16ptr = getnames;
3452 #endif
3453 #ifdef SUPPORT_PCRE8
3454 cn8ptr = copynames8;
3455 gn8ptr = getnames8;
3456 #endif
3457
3458 SET_PCRE_CALLOUT(callout);
3459 first_callout = 1;
3460 last_callout_mark = NULL;
3461 callout_extra = 0;
3462 callout_count = 0;
3463 callout_fail_count = 999999;
3464 callout_fail_id = -1;
3465 show_malloc = 0;
3466 options = 0;
3467
3468 if (extra != NULL) extra->flags &=
3469 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3470
3471 len = 0;
3472 for (;;)
3473 {
3474 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3475 {
3476 if (len > 0) /* Reached EOF without hitting a newline */
3477 {
3478 fprintf(outfile, "\n");
3479 break;
3480 }
3481 done = 1;
3482 goto CONTINUE;
3483 }
3484 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3485 len = (int)strlen((char *)buffer);
3486 if (buffer[len-1] == '\n') break;
3487 }
3488
3489 while (len > 0 && isspace(buffer[len-1])) len--;
3490 buffer[len] = 0;
3491 if (len == 0) break;
3492
3493 p = buffer;
3494 while (isspace(*p)) p++;
3495
3496 bptr = q = dbuffer;
3497 while ((c = *p++) != 0)
3498 {
3499 int i = 0;
3500 int n = 0;
3501
3502 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3503 In non-UTF mode, allow the value of the byte to fall through to later,
3504 where values greater than 127 are turned into UTF-8 when running in
3505 16-bit mode. */
3506
3507 if (c != '\\')
3508 {
3509 if (use_utf)
3510 {
3511 *q++ = c;
3512 continue;
3513 }
3514 }
3515
3516 /* Handle backslash escapes */
3517
3518 else switch ((c = *p++))
3519 {
3520 case 'a': c = 7; break;
3521 case 'b': c = '\b'; break;
3522 case 'e': c = 27; break;
3523 case 'f': c = '\f'; break;
3524 case 'n': c = '\n'; break;
3525 case 'r': c = '\r'; break;
3526 case 't': c = '\t'; break;
3527 case 'v': c = '\v'; break;
3528
3529 case '0': case '1': case '2': case '3':
3530 case '4': case '5': case '6': case '7':
3531 c -= '0';
3532 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3533 c = c * 8 + *p++ - '0';
3534 break;
3535
3536 case 'x':
3537 if (*p == '{')
3538 {
3539 pcre_uint8 *pt = p;
3540 c = 0;
3541
3542 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3543 when isxdigit() is a macro that refers to its argument more than
3544 once. This is banned by the C Standard, but apparently happens in at
3545 least one MacOS environment. */
3546
3547 for (pt++; isxdigit(*pt); pt++)
3548 {
3549 if (++i == 9)
3550 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3551 "using only the first eight.\n");
3552 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3553 }
3554 if (*pt == '}')
3555 {
3556 p = pt + 1;
3557 break;
3558 }
3559 /* Not correct form for \x{...}; fall through */
3560 }
3561
3562 /* \x without {} always defines just one byte in 8-bit mode. This
3563 allows UTF-8 characters to be constructed byte by byte, and also allows
3564 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3565 Otherwise, pass it down to later code so that it can be turned into
3566 UTF-8 when running in 16-bit mode. */
3567
3568 c = 0;
3569 while (i++ < 2 && isxdigit(*p))
3570 {
3571 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3572 p++;
3573 }
3574 if (use_utf)
3575 {
3576 *q++ = c;
3577 continue;
3578 }
3579 break;
3580
3581 case 0: /* \ followed by EOF allows for an empty line */
3582 p--;
3583 continue;
3584
3585 case '>':
3586 if (*p == '-')
3587 {
3588 start_offset_sign = -1;
3589 p++;
3590 }
3591 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3592 start_offset *= start_offset_sign;
3593 continue;
3594
3595 case 'A': /* Option setting */
3596 options |= PCRE_ANCHORED;
3597 continue;
3598
3599 case 'B':
3600 options |= PCRE_NOTBOL;
3601 continue;
3602
3603 case 'C':
3604 if (isdigit(*p)) /* Set copy string */
3605 {
3606 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3607 copystrings |= 1 << n;
3608 }
3609 else if (isalnum(*p))
3610 {
3611 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3612 }
3613 else if (*p == '+')
3614 {
3615 callout_extra = 1;
3616 p++;
3617 }
3618 else if (*p == '-')
3619 {
3620 SET_PCRE_CALLOUT(NULL);
3621 p++;
3622 }
3623 else if (*p == '!')
3624 {
3625 callout_fail_id = 0;
3626 p++;
3627 while(isdigit(*p))
3628 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3629 callout_fail_count = 0;
3630 if (*p == '!')
3631 {
3632 p++;
3633 while(isdigit(*p))
3634 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3635 }
3636 }
3637 else if (*p == '*')
3638 {
3639 int sign = 1;
3640 callout_data = 0;
3641 if (*(++p) == '-') { sign = -1; p++; }
3642 while(isdigit(*p))
3643 callout_data = callout_data * 10 + *p++ - '0';
3644 callout_data *= sign;
3645 callout_data_set = 1;
3646 }
3647 continue;
3648
3649 #if !defined NODFA
3650 case 'D':
3651 #if !defined NOPOSIX
3652 if (posix || do_posix)
3653 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3654 else
3655 #endif
3656 use_dfa = 1;
3657 continue;
3658 #endif
3659
3660 #if !defined NODFA
3661 case 'F':
3662 options |= PCRE_DFA_SHORTEST;
3663 continue;
3664 #endif
3665
3666 case 'G':
3667 if (isdigit(*p))
3668 {
3669 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3670 getstrings |= 1 << n;
3671 }
3672 else if (isalnum(*p))
3673 {
3674 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3675 }
3676 continue;
3677
3678 case 'J':
3679 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3680 if (extra != NULL
3681 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3682 && extra->executable_jit != NULL)
3683 {
3684 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3685 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3686 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3687 }
3688 continue;
3689
3690 case 'L':
3691 getlist = 1;
3692 continue;
3693
3694 case 'M':
3695 find_match_limit = 1;
3696 continue;
3697
3698 case 'N':
3699 if ((options & PCRE_NOTEMPTY) != 0)
3700 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3701 else
3702 options |= PCRE_NOTEMPTY;
3703 continue;
3704
3705 case 'O':
3706 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3707 if (n > size_offsets_max)
3708 {
3709 size_offsets_max = n;
3710 free(offsets);
3711 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3712 if (offsets == NULL)
3713 {
3714 printf("** Failed to get %d bytes of memory for offsets vector\n",
3715 (int)(size_offsets_max * sizeof(int)));
3716 yield = 1;
3717 goto EXIT;
3718 }
3719 }
3720 use_size_offsets = n;
3721 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3722 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
3723 continue;
3724
3725 case 'P':
3726 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3727 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3728 continue;
3729
3730 case 'Q':
3731 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3732 if (extra == NULL)
3733 {
3734 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3735 extra->flags = 0;
3736 }
3737 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3738 extra->match_limit_recursion = n;
3739 continue;
3740
3741 case 'q':
3742 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3743 if (extra == NULL)
3744 {
3745 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3746 extra->flags = 0;
3747 }
3748 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3749 extra->match_limit = n;
3750 continue;
3751
3752 #if !defined NODFA
3753 case 'R':
3754 options |= PCRE_DFA_RESTART;
3755 continue;
3756 #endif
3757
3758 case 'S':
3759 show_malloc = 1;
3760 continue;
3761
3762 case 'Y':
3763 options |= PCRE_NO_START_OPTIMIZE;
3764 continue;
3765
3766 case 'Z':
3767 options |= PCRE_NOTEOL;
3768 continue;
3769
3770 case '?':
3771 options |= PCRE_NO_UTF8_CHECK;
3772 continue;
3773
3774 case '<':
3775 {
3776 int x = check_newline(p, outfile);
3777 if (x == 0) goto NEXT_DATA;
3778 options |= x;
3779 while (*p++ != '>');
3780 }
3781 continue;
3782 }
3783
3784 /* We now have a character value in c that may be greater than 255. In
3785 16-bit mode, we always convert characters to UTF-8 so that values greater
3786 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3787 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3788 mode must have come from \x{...} or octal constructs because values from
3789 \x.. get this far only in non-UTF mode. */
3790
3791 #if !defined NOUTF || defined SUPPORT_PCRE16
3792 if (use_pcre16 || use_utf)
3793 {
3794 pcre_uint8 buff8[8];
3795 int ii, utn;
3796 utn = ord2utf8(c, buff8);
3797 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3798 }
3799 else
3800 #endif
3801 {
3802 if (c > 255)
3803 {
3804 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3805 "and UTF-8 mode is not enabled.\n", c);
3806 fprintf(outfile, "** Truncation will probably give the wrong "
3807 "result.\n");
3808 }
3809 *q++ = c;
3810 }
3811 }
3812
3813 /* Reached end of subject string */
3814
3815 *q = 0;
3816 len = (int)(q - dbuffer);
3817
3818 /* Move the data to the end of the buffer so that a read over the end of
3819 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3820 we are using the POSIX interface, we must include the terminating zero. */
3821
3822 #if !defined NOPOSIX
3823 if (posix || do_posix)
3824 {
3825 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3826 bptr += buffer_size - len - 1;
3827 }
3828 else
3829 #endif
3830 {
3831 memmove(bptr + buffer_size - len, bptr, len);
3832 bptr += buffer_size - len;
3833 }
3834
3835 if ((all_use_dfa || use_dfa) && find_match_limit)
3836 {
3837 printf("**Match limit not relevant for DFA matching: ignored\n");
3838 find_match_limit = 0;
3839 }
3840
3841 /* Handle matching via the POSIX interface, which does not
3842 support timing or playing with the match limit or callout data. */
3843
3844 #if !defined NOPOSIX
3845 if (posix || do_posix)
3846 {
3847 int rc;
3848 int eflags = 0;
3849 regmatch_t *pmatch = NULL;
3850 if (use_size_offsets > 0)
3851 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3852 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3853 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3854 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3855
3856 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3857
3858 if (rc != 0)
3859 {
3860 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3861 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3862 }
3863 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3864 != 0)
3865 {
3866 fprintf(outfile, "Matched with REG_NOSUB\n");
3867 }
3868 else
3869 {
3870 size_t i;
3871 for (i = 0; i < (size_t)use_size_offsets; i++)
3872 {
3873 if (pmatch[i].rm_so >= 0)
3874 {
3875 fprintf(outfile, "%2d: ", (int)i);
3876 PCHARSV(dbuffer, pmatch[i].rm_so,
3877 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3878 fprintf(outfile, "\n");
3879 if (do_showcaprest || (i == 0 && do_showrest))
3880 {
3881 fprintf(outfile, "%2d+ ", (int)i);
3882 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3883 outfile);
3884 fprintf(outfile, "\n");
3885 }
3886 }
3887 }
3888 }
3889 free(pmatch);
3890 goto NEXT_DATA;
3891 }
3892
3893 #endif /* !defined NOPOSIX */
3894
3895 /* Handle matching via the native interface - repeats for /g and /G */
3896
3897 #ifdef SUPPORT_PCRE16
3898 if (use_pcre16)
3899 {
3900 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3901 switch(len)
3902 {
3903 case -1:
3904 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3905 "converted to UTF-16\n");
3906 goto NEXT_DATA;
3907
3908 case -2:
3909 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3910 "cannot be converted to UTF-16\n");
3911 goto NEXT_DATA;
3912
3913 case -3:
3914 fprintf(outfile, "**Failed: character value greater than 0xffff "
3915 "cannot be converted to 16-bit in non-UTF mode\n");
3916 goto NEXT_DATA;
3917
3918 default:
3919 break;
3920 }
3921 bptr = (pcre_uint8 *)buffer16;
3922 }
3923 #endif
3924
3925 /* Ensure that there is a JIT callback if we want to verify that JIT was
3926 actually used. If jit_stack == NULL, no stack has yet been assigned. */
3927
3928 if (verify_jit && jit_stack == NULL && extra != NULL)
3929 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3930
3931 for (;; gmatched++) /* Loop for /g or /G */
3932 {
3933 markptr = NULL;
3934 jit_was_used = FALSE;
3935
3936 if (timeitm > 0)
3937 {
3938 register int i;
3939 clock_t time_taken;
3940 clock_t start_time = clock();
3941
3942 #if !defined NODFA
3943 if (all_use_dfa || use_dfa)
3944 {
3945 if ((options & PCRE_DFA_RESTART) != 0)
3946 {
3947 fprintf(outfile, "Timing DFA restarts is not supported\n");
3948 break;
3949 }
3950 if (dfa_workspace == NULL)
3951 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3952 for (i = 0; i < timeitm; i++)
3953 {
3954 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3955 (options | g_notempty), use_offsets, use_size_offsets,
3956 dfa_workspace, DFA_WS_DIMENSION);
3957 }
3958 }
3959 else
3960 #endif
3961
3962 for (i = 0; i < timeitm; i++)
3963 {
3964 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3965 (options | g_notempty), use_offsets, use_size_offsets);
3966 }
3967 time_taken = clock() - start_time;
3968 fprintf(outfile, "Execute time %.4f milliseconds\n",
3969 (((double)time_taken * 1000.0) / (double)timeitm) /
3970 (double)CLOCKS_PER_SEC);
3971 }
3972
3973 /* If find_match_limit is set, we want to do repeated matches with
3974 varying limits in order to find the minimum value for the match limit and
3975 for the recursion limit. The match limits are relevant only to the normal
3976 running of pcre_exec(), so disable the JIT optimization. This makes it
3977 possible to run the same set of tests with and without JIT externally
3978 requested. */
3979
3980 if (find_match_limit)
3981 {
3982 if (extra == NULL)
3983 {
3984 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3985 extra->flags = 0;
3986 }
3987 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3988
3989 (void)check_match_limit(re, extra, bptr, len, start_offset,
3990 options|g_notempty, use_offsets, use_size_offsets,
3991 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3992 PCRE_ERROR_MATCHLIMIT, "match()");
3993
3994 count = check_match_limit(re, extra, bptr, len, start_offset,
3995 options|g_notempty, use_offsets, use_size_offsets,
3996 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3997 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3998 }
3999
4000 /* If callout_data is set, use the interface with additional data */
4001
4002 else if (callout_data_set)
4003 {
4004 if (extra == NULL)
4005 {
4006 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4007 extra->flags = 0;
4008 }
4009 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4010 extra->callout_data = &callout_data;
4011 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4012 options | g_notempty, use_offsets, use_size_offsets);
4013 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4014 }
4015
4016 /* The normal case is just to do the match once, with the default
4017 value of match_limit. */
4018
4019 #if !defined NODFA
4020 else if (all_use_dfa || use_dfa)
4021 {
4022 if (dfa_workspace == NULL)
4023 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4024 if (dfa_matched++ == 0)
4025 dfa_workspace[0] = -1; /* To catch bad restart */
4026 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4027 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4028 DFA_WS_DIMENSION);
4029 if (count == 0)
4030 {
4031 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4032 count = use_size_offsets/2;
4033 }
4034 }
4035 #endif
4036
4037 else
4038 {
4039 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4040 options | g_notempty, use_offsets, use_size_offsets);
4041 if (count == 0)
4042 {
4043 fprintf(outfile, "Matched, but too many substrings\n");
4044 count = use_size_offsets/3;
4045 }
4046 }
4047
4048 /* Matched */
4049
4050 if (count >= 0)
4051 {
4052 int i, maxcount;
4053 void *cnptr, *gnptr;
4054
4055 #if !defined NODFA
4056 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4057 #endif
4058 maxcount = use_size_offsets/3;
4059
4060 /* This is a check against a lunatic return value. */
4061
4062 if (count > maxcount)
4063 {
4064 fprintf(outfile,
4065 "** PCRE error: returned count %d is too big for offset size %d\n",
4066 count, use_size_offsets);
4067 count = use_size_offsets/3;
4068 if (do_g || do_G)
4069 {
4070 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4071 do_g = do_G = FALSE; /* Break g/G loop */
4072 }
4073 }
4074
4075 /* do_allcaps requests showing of all captures in the pattern, to check
4076 unset ones at the end. */
4077
4078 if (do_allcaps)
4079 {
4080 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4081 goto SKIP_DATA;
4082 count++; /* Allow for full match */
4083 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4084 }
4085
4086 /* Output the captured substrings */
4087
4088 for (i = 0; i < count * 2; i += 2)
4089 {
4090 if (use_offsets[i] < 0)
4091 {
4092 if (use_offsets[i] != -1)
4093 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4094 use_offsets[i], i);
4095 if (use_offsets[i+1] != -1)
4096 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4097 use_offsets[i+1], i+1);
4098 fprintf(outfile, "%2d: <unset>\n", i/2);
4099 }
4100 else
4101 {
4102 fprintf(outfile, "%2d: ", i/2);
4103 PCHARSV(bptr, use_offsets[i],
4104 use_offsets[i+1] - use_offsets[i], outfile);
4105 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4106 fprintf(outfile, "\n");
4107 if (do_showcaprest || (i == 0 && do_showrest))
4108 {
4109 fprintf(outfile, "%2d+ ", i/2);
4110 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4111 outfile);
4112 fprintf(outfile, "\n");
4113 }
4114 }
4115 }
4116
4117 if (markptr != NULL)
4118 {
4119 fprintf(outfile, "MK: ");
4120 PCHARSV(markptr, 0, -1, outfile);
4121 fprintf(outfile, "\n");
4122 }
4123
4124 for (i = 0; i < 32; i++)
4125 {
4126 if ((copystrings & (1 << i)) != 0)
4127 {
4128 int rc;
4129 char copybuffer[256];
4130 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4131 copybuffer, sizeof(copybuffer));
4132 if (rc < 0)
4133 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4134 else
4135 {
4136 fprintf(outfile, "%2dC ", i);
4137 PCHARSV(copybuffer, 0, rc, outfile);
4138 fprintf(outfile, " (%d)\n", rc);
4139 }
4140 }
4141 }
4142
4143 cnptr = copynames;
4144 for (;;)
4145 {
4146 int rc;
4147 char copybuffer[256];
4148
4149 if (use_pcre16)
4150 {
4151 if (*(pcre_uint16 *)cnptr == 0) break;
4152 }
4153 else
4154 {
4155 if (*(pcre_uint8 *)cnptr == 0) break;
4156 }
4157
4158 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4159 cnptr, copybuffer, sizeof(copybuffer));
4160
4161 if (rc < 0)
4162 {
4163 fprintf(outfile, "copy substring ");
4164 PCHARSV(cnptr, 0, -1, outfile);
4165 fprintf(outfile, " failed %d\n", rc);
4166 }
4167 else
4168 {
4169 fprintf(outfile, " C ");
4170 PCHARSV(copybuffer, 0, rc, outfile);
4171 fprintf(outfile, " (%d) ", rc);
4172 PCHARSV(cnptr, 0, -1, outfile);
4173 putc('\n', outfile);
4174 }
4175
4176 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4177 }
4178
4179 for (i = 0; i < 32; i++)
4180 {
4181 if ((getstrings & (1 << i)) != 0)
4182 {
4183 int rc;
4184 const char *substring;
4185 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4186 if (rc < 0)
4187 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4188 else
4189 {
4190 fprintf(outfile, "%2dG ", i);
4191 PCHARSV(substring, 0, rc, outfile);
4192 fprintf(outfile, " (%d)\n", rc);
4193 PCRE_FREE_SUBSTRING(substring);
4194 }
4195 }
4196 }
4197
4198 gnptr = getnames;
4199 for (;;)
4200 {
4201 int rc;
4202 const char *substring;
4203
4204 if (use_pcre16)
4205 {
4206 if (*(pcre_uint16 *)gnptr == 0) break;
4207 }
4208 else
4209 {
4210 if (*(pcre_uint8 *)gnptr == 0) break;
4211 }
4212
4213 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4214 gnptr, &substring);
4215 if (rc < 0)
4216 {
4217 fprintf(outfile, "get substring ");
4218 PCHARSV(gnptr, 0, -1, outfile);
4219 fprintf(outfile, " failed %d\n", rc);
4220 }
4221 else
4222 {
4223 fprintf(outfile, " G ");
4224 PCHARSV(substring, 0, rc, outfile);
4225 fprintf(outfile, " (%d) ", rc);
4226 PCHARSV(gnptr, 0, -1, outfile);
4227 PCRE_FREE_SUBSTRING(substring);
4228 putc('\n', outfile);
4229 }
4230
4231 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4232 }
4233
4234 if (getlist)
4235 {
4236 int rc;
4237 const char **stringlist;
4238 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4239 if (rc < 0)
4240 fprintf(outfile, "get substring list failed %d\n", rc);
4241 else
4242 {
4243 for (i = 0; i < count; i++)
4244 {
4245 fprintf(outfile, "%2dL ", i);
4246 PCHARSV(stringlist[i], 0, -1, outfile);
4247 putc('\n', outfile);
4248 }
4249 if (stringlist[i] != NULL)
4250 fprintf(outfile, "string list not terminated by NULL\n");
4251 PCRE_FREE_SUBSTRING_LIST(stringlist);
4252 }
4253 }
4254 }
4255
4256 /* There was a partial match */
4257
4258 else if (count == PCRE_ERROR_PARTIAL)
4259 {
4260 if (markptr == NULL) fprintf(outfile, "Partial match");
4261 else
4262 {
4263 fprintf(outfile, "Partial match, mark=");
4264 PCHARSV(markptr, 0, -1, outfile);
4265 }
4266 if (use_size_offsets > 1)
4267 {
4268 fprintf(outfile, ": ");
4269 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4270 outfile);
4271 }
4272 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4273 fprintf(outfile, "\n");
4274 break; /* Out of the /g loop */
4275 }
4276
4277 /* Failed to match. If this is a /g or /G loop and we previously set
4278 g_notempty after a null match, this is not necessarily the end. We want
4279 to advance the start offset, and continue. We won't be at the end of the
4280 string - that was checked before setting g_notempty.
4281
4282 Complication arises in the case when the newline convention is "any",
4283 "crlf", or "anycrlf". If the previous match was at the end of a line
4284 terminated by CRLF, an advance of one character just passes the \r,
4285 whereas we should prefer the longer newline sequence, as does the code in
4286 pcre_exec(). Fudge the offset value to achieve this. We check for a
4287 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4288 find the default.
4289
4290 Otherwise, in the case of UTF-8 matching, the advance must be one
4291 character, not one byte. */
4292
4293 else
4294 {
4295 if (g_notempty != 0)
4296 {
4297 int onechar = 1;
4298 unsigned int obits = ((REAL_PCRE *)re)->options;
4299 use_offsets[0] = start_offset;
4300 if ((obits & PCRE_NEWLINE_BITS) == 0)
4301 {
4302 int d;
4303 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4304 /* Note that these values are always the ASCII ones, even in
4305 EBCDIC environments. CR = 13, NL = 10. */
4306 obits = (d == 13)? PCRE_NEWLINE_CR :
4307 (d == 10)? PCRE_NEWLINE_LF :
4308 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4309 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4310 (d == -1)? PCRE_NEWLINE_ANY : 0;
4311 }
4312 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4313 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4314 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4315 &&
4316 start_offset < len - 1 &&
4317 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4318 (use_pcre16?
4319 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4320 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4321 :
4322 bptr[start_offset] == '\r'
4323 && bptr[start_offset + 1] == '\n')
4324 #elif defined SUPPORT_PCRE16
4325 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4326 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4327 #else
4328 bptr[start_offset] == '\r'
4329 && bptr[start_offset + 1] == '\n'
4330 #endif
4331 )
4332 onechar++;
4333 else if (use_utf)
4334 {
4335 while (start_offset + onechar < len)
4336 {
4337 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4338 onechar++;
4339 }
4340 }
4341 use_offsets[1] = start_offset + onechar;
4342 }
4343 else
4344 {
4345 switch(count)
4346 {
4347 case PCRE_ERROR_NOMATCH:
4348 if (gmatched == 0)
4349 {
4350 if (markptr == NULL)
4351 {
4352 fprintf(outfile, "No match");
4353 }
4354 else
4355 {
4356 fprintf(outfile, "No match, mark = ");
4357 PCHARSV(markptr, 0, -1, outfile);
4358 }
4359 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4360 putc('\n', outfile);
4361 }
4362 break;
4363
4364 case PCRE_ERROR_BADUTF8:
4365 case PCRE_ERROR_SHORTUTF8:
4366 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4367 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4368 use_pcre16? "16" : "8");
4369 if (use_size_offsets >= 2)
4370 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4371 use_offsets[1]);
4372 fprintf(outfile, "\n");
4373 break;
4374
4375 case PCRE_ERROR_BADUTF8_OFFSET:
4376 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4377 use_pcre16? "16" : "8");
4378 break;
4379
4380 default:
4381 if (count < 0 &&
4382 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4383 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4384 else
4385 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4386 break;
4387 }
4388
4389 break; /* Out of the /g loop */
4390 }
4391 }
4392
4393 /* If not /g or /G we are done */
4394
4395 if (!do_g && !do_G) break;
4396
4397 /* If we have matched an empty string, first check to see if we are at
4398 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4399 Perl's /g options does. This turns out to be rather cunning. First we set
4400 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4401 same point. If this fails (picked up above) we advance to the next
4402 character. */
4403
4404 g_notempty = 0;
4405
4406 if (use_offsets[0] == use_offsets[1])
4407 {
4408 if (use_offsets[0] == len) break;
4409 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4410 }
4411
4412 /* For /g, update the start offset, leaving the rest alone */
4413
4414 if (do_g) start_offset = use_offsets[1];
4415
4416 /* For /G, update the pointer and length */
4417
4418 else
4419 {
4420 bptr += use_offsets[1] * CHAR_SIZE;
4421 len -= use_offsets[1];
4422 }
4423 } /* End of loop for /g and /G */
4424
4425 NEXT_DATA: continue;
4426 } /* End of loop for data lines */
4427
4428 CONTINUE:
4429
4430 #if !defined NOPOSIX
4431 if (posix || do_posix) regfree(&preg);
4432 #endif
4433
4434 if (re != NULL) new_free(re);
4435 if (extra != NULL)
4436 {
4437 PCRE_FREE_STUDY(extra);
4438 }
4439 if (locale_set)
4440 {
4441 new_free((void *)tables);
4442 setlocale(LC_CTYPE, "C");
4443 locale_set = 0;
4444 }
4445 if (jit_stack != NULL)
4446 {
4447 PCRE_JIT_STACK_FREE(jit_stack);
4448 jit_stack = NULL;
4449 }
4450 }
4451
4452 if (infile == stdin) fprintf(outfile, "\n");
4453
4454 EXIT:
4455
4456 if (infile != NULL && infile != stdin) fclose(infile);
4457 if (outfile != NULL && outfile != stdout) fclose(outfile);
4458
4459 free(buffer);
4460 free(dbuffer);
4461 free(pbuffer);
4462 free(offsets);
4463
4464 #ifdef SUPPORT_PCRE16
4465 if (buffer16 != NULL) free(buffer16);
4466 #endif
4467
4468 return yield;
4469 }
4470
4471 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5