/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1022 - (show annotations)
Tue Aug 28 12:28:15 2012 UTC (7 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 134466 byte(s)
Add support for PCRE_STUDY_EXTRA_NEEDED.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #define INPUT_MODE "rb"
116 #define OUTPUT_MODE "wb"
117 #endif
118
119 #define PRIV(name) name
120
121 /* We have to include pcre_internal.h because we need the internal info for
122 displaying the results of pcre_study() and we also need to know about the
123 internal macros, structures, and other internal data values; pcretest has
124 "inside information" compared to a program that strictly follows the PCRE API.
125
126 Although pcre_internal.h does itself include pcre.h, we explicitly include it
127 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
128 appropriately for an application, not for building PCRE. */
129
130 #include "pcre.h"
131
132 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133 /* Configure internal macros to 16 bit mode. */
134 #define COMPILE_PCRE16
135 #endif
136
137 #include "pcre_internal.h"
138
139 /* The pcre_printint() function, which prints the internal form of a compiled
140 regex, is held in a separate file so that (a) it can be compiled in either
141 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142 when that is compiled in debug mode. */
143
144 #ifdef SUPPORT_PCRE8
145 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146 #endif
147 #ifdef SUPPORT_PCRE16
148 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149 #endif
150
151 /* We need access to some of the data tables that PCRE uses. So as not to have
152 to keep two copies, we include the source file here, changing the names of the
153 external symbols to prevent clashes. */
154
155 #define PCRE_INCLUDED
156
157 #include "pcre_tables.c"
158
159 /* The definition of the macro PRINTABLE, which determines whether to print an
160 output character as-is or as a hex value when showing compiled patterns, is
161 the same as in the printint.src file. We uses it here in cases when the locale
162 has not been explicitly changed, so as to get consistent output from systems
163 that differ in their output from isprint() even in the "C" locale. */
164
165 #ifdef EBCDIC
166 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167 #else
168 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169 #endif
170
171 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172
173 /* Posix support is disabled in 16 bit only mode. */
174 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175 #define NOPOSIX
176 #endif
177
178 /* It is possible to compile this test program without including support for
179 testing the POSIX interface, though this is not available via the standard
180 Makefile. */
181
182 #if !defined NOPOSIX
183 #include "pcreposix.h"
184 #endif
185
186 /* It is also possible, originally for the benefit of a version that was
187 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189 automatically cut out the UTF support if PCRE is built without it. */
190
191 #ifndef SUPPORT_UTF
192 #ifndef NOUTF
193 #define NOUTF
194 #endif
195 #endif
196
197 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199 only from one place and is handled differently). I couldn't dream up any way of
200 using a single macro to do this in a generic way, because of the many different
201 argument requirements. We know that at least one of SUPPORT_PCRE8 and
202 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203 use these in the definitions of generic macros.
204
205 **** Special note about the PCHARSxxx macros: the address of the string to be
206 printed is always given as two arguments: a base address followed by an offset.
207 The base address is cast to the correct data size for 8 or 16 bit data; the
208 offset is in units of this size. If the string were given as base+offset in one
209 argument, the casting might be incorrectly applied. */
210
211 #ifdef SUPPORT_PCRE8
212
213 #define PCHARS8(lv, p, offset, len, f) \
214 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215
216 #define PCHARSV8(p, offset, len, f) \
217 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218
219 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220 p = read_capture_name8(p, cn8, re)
221
222 #define STRLEN8(p) ((int)strlen((char *)p))
223
224 #define SET_PCRE_CALLOUT8(callout) \
225 pcre_callout = callout
226
227 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228 pcre_assign_jit_stack(extra, callback, userdata)
229
230 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231 re = pcre_compile((char *)pat, options, error, erroffset, tables)
232
233 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234 namesptr, cbuffer, size) \
235 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236 (char *)namesptr, cbuffer, size)
237
238 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240
241 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242 offsets, size_offsets, workspace, size_workspace) \
243 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244 offsets, size_offsets, workspace, size_workspace)
245
246 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247 offsets, size_offsets) \
248 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249 offsets, size_offsets)
250
251 #define PCRE_FREE_STUDY8(extra) \
252 pcre_free_study(extra)
253
254 #define PCRE_FREE_SUBSTRING8(substring) \
255 pcre_free_substring(substring)
256
257 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258 pcre_free_substring_list(listptr)
259
260 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261 getnamesptr, subsptr) \
262 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263 (char *)getnamesptr, subsptr)
264
265 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266 n = pcre_get_stringnumber(re, (char *)ptr)
267
268 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270
271 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273
274 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276
277 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278 pcre_printint(re, outfile, debug_lengths)
279
280 #define PCRE_STUDY8(extra, re, options, error) \
281 extra = pcre_study(re, options, error)
282
283 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284 pcre_jit_stack_alloc(startsize, maxsize)
285
286 #define PCRE_JIT_STACK_FREE8(stack) \
287 pcre_jit_stack_free(stack)
288
289 #endif /* SUPPORT_PCRE8 */
290
291 /* -----------------------------------------------------------*/
292
293 #ifdef SUPPORT_PCRE16
294
295 #define PCHARS16(lv, p, offset, len, f) \
296 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297
298 #define PCHARSV16(p, offset, len, f) \
299 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300
301 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302 p = read_capture_name16(p, cn16, re)
303
304 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305
306 #define SET_PCRE_CALLOUT16(callout) \
307 pcre16_callout = (int (*)(pcre16_callout_block *))callout
308
309 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310 pcre16_assign_jit_stack((pcre16_extra *)extra, \
311 (pcre16_jit_callback)callback, userdata)
312
313 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315 tables)
316
317 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318 namesptr, cbuffer, size) \
319 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321
322 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324 (PCRE_UCHAR16 *)cbuffer, size/2)
325
326 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327 offsets, size_offsets, workspace, size_workspace) \
328 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330 workspace, size_workspace)
331
332 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333 offsets, size_offsets) \
334 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335 len, start_offset, options, offsets, size_offsets)
336
337 #define PCRE_FREE_STUDY16(extra) \
338 pcre16_free_study((pcre16_extra *)extra)
339
340 #define PCRE_FREE_SUBSTRING16(substring) \
341 pcre16_free_substring((PCRE_SPTR16)substring)
342
343 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345
346 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347 getnamesptr, subsptr) \
348 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350
351 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353
354 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356 (PCRE_SPTR16 *)(void*)subsptr)
357
358 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360 (PCRE_SPTR16 **)(void*)listptr)
361
362 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364 tables)
365
366 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367 pcre16_printint(re, outfile, debug_lengths)
368
369 #define PCRE_STUDY16(extra, re, options, error) \
370 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371
372 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374
375 #define PCRE_JIT_STACK_FREE16(stack) \
376 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377
378 #endif /* SUPPORT_PCRE16 */
379
380
381 /* ----- Both modes are supported; a runtime test is needed, except for
382 pcre_config(), and the JIT stack functions, when it doesn't matter which
383 version is called. ----- */
384
385 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386
387 #define CHAR_SIZE (use_pcre16? 2:1)
388
389 #define PCHARS(lv, p, offset, len, f) \
390 if (use_pcre16) \
391 PCHARS16(lv, p, offset, len, f); \
392 else \
393 PCHARS8(lv, p, offset, len, f)
394
395 #define PCHARSV(p, offset, len, f) \
396 if (use_pcre16) \
397 PCHARSV16(p, offset, len, f); \
398 else \
399 PCHARSV8(p, offset, len, f)
400
401 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402 if (use_pcre16) \
403 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404 else \
405 READ_CAPTURE_NAME8(p, cn8, cn16, re)
406
407 #define SET_PCRE_CALLOUT(callout) \
408 if (use_pcre16) \
409 SET_PCRE_CALLOUT16(callout); \
410 else \
411 SET_PCRE_CALLOUT8(callout)
412
413 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414
415 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416 if (use_pcre16) \
417 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418 else \
419 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420
421 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422 if (use_pcre16) \
423 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424 else \
425 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426
427 #define PCRE_CONFIG pcre_config
428
429 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430 namesptr, cbuffer, size) \
431 if (use_pcre16) \
432 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433 namesptr, cbuffer, size); \
434 else \
435 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436 namesptr, cbuffer, size)
437
438 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439 if (use_pcre16) \
440 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441 else \
442 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443
444 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445 offsets, size_offsets, workspace, size_workspace) \
446 if (use_pcre16) \
447 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448 offsets, size_offsets, workspace, size_workspace); \
449 else \
450 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451 offsets, size_offsets, workspace, size_workspace)
452
453 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454 offsets, size_offsets) \
455 if (use_pcre16) \
456 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457 offsets, size_offsets); \
458 else \
459 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460 offsets, size_offsets)
461
462 #define PCRE_FREE_STUDY(extra) \
463 if (use_pcre16) \
464 PCRE_FREE_STUDY16(extra); \
465 else \
466 PCRE_FREE_STUDY8(extra)
467
468 #define PCRE_FREE_SUBSTRING(substring) \
469 if (use_pcre16) \
470 PCRE_FREE_SUBSTRING16(substring); \
471 else \
472 PCRE_FREE_SUBSTRING8(substring)
473
474 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475 if (use_pcre16) \
476 PCRE_FREE_SUBSTRING_LIST16(listptr); \
477 else \
478 PCRE_FREE_SUBSTRING_LIST8(listptr)
479
480 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481 getnamesptr, subsptr) \
482 if (use_pcre16) \
483 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484 getnamesptr, subsptr); \
485 else \
486 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487 getnamesptr, subsptr)
488
489 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490 if (use_pcre16) \
491 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492 else \
493 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494
495 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496 if (use_pcre16) \
497 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498 else \
499 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500
501 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502 if (use_pcre16) \
503 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504 else \
505 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506
507 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508 (use_pcre16 ? \
509 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511
512 #define PCRE_JIT_STACK_FREE(stack) \
513 if (use_pcre16) \
514 PCRE_JIT_STACK_FREE16(stack); \
515 else \
516 PCRE_JIT_STACK_FREE8(stack)
517
518 #define PCRE_MAKETABLES \
519 (use_pcre16? pcre16_maketables() : pcre_maketables())
520
521 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522 if (use_pcre16) \
523 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524 else \
525 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526
527 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528 if (use_pcre16) \
529 PCRE_PRINTINT16(re, outfile, debug_lengths); \
530 else \
531 PCRE_PRINTINT8(re, outfile, debug_lengths)
532
533 #define PCRE_STUDY(extra, re, options, error) \
534 if (use_pcre16) \
535 PCRE_STUDY16(extra, re, options, error); \
536 else \
537 PCRE_STUDY8(extra, re, options, error)
538
539 /* ----- Only 8-bit mode is supported ----- */
540
541 #elif defined SUPPORT_PCRE8
542 #define CHAR_SIZE 1
543 #define PCHARS PCHARS8
544 #define PCHARSV PCHARSV8
545 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
546 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
547 #define STRLEN STRLEN8
548 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
549 #define PCRE_COMPILE PCRE_COMPILE8
550 #define PCRE_CONFIG pcre_config
551 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
553 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
554 #define PCRE_EXEC PCRE_EXEC8
555 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
556 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
557 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
558 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
559 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
560 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
561 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
562 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
563 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
564 #define PCRE_MAKETABLES pcre_maketables()
565 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566 #define PCRE_PRINTINT PCRE_PRINTINT8
567 #define PCRE_STUDY PCRE_STUDY8
568
569 /* ----- Only 16-bit mode is supported ----- */
570
571 #else
572 #define CHAR_SIZE 2
573 #define PCHARS PCHARS16
574 #define PCHARSV PCHARSV16
575 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
576 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
577 #define STRLEN STRLEN16
578 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
579 #define PCRE_COMPILE PCRE_COMPILE16
580 #define PCRE_CONFIG pcre16_config
581 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
583 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
584 #define PCRE_EXEC PCRE_EXEC16
585 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
586 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
587 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
588 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
589 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
590 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
591 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
592 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
593 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
594 #define PCRE_MAKETABLES pcre16_maketables()
595 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596 #define PCRE_PRINTINT PCRE_PRINTINT16
597 #define PCRE_STUDY PCRE_STUDY16
598 #endif
599
600 /* ----- End of mode-specific function call macros ----- */
601
602
603 /* Other parameters */
604
605 #ifndef CLOCKS_PER_SEC
606 #ifdef CLK_TCK
607 #define CLOCKS_PER_SEC CLK_TCK
608 #else
609 #define CLOCKS_PER_SEC 100
610 #endif
611 #endif
612
613 #if !defined NODFA
614 #define DFA_WS_DIMENSION 1000
615 #endif
616
617 /* This is the default loop count for timing. */
618
619 #define LOOPREPEAT 500000
620
621 /* Static variables */
622
623 static FILE *outfile;
624 static int log_store = 0;
625 static int callout_count;
626 static int callout_extra;
627 static int callout_fail_count;
628 static int callout_fail_id;
629 static int debug_lengths;
630 static int first_callout;
631 static int jit_was_used;
632 static int locale_set = 0;
633 static int show_malloc;
634 static int use_utf;
635 static size_t gotten_store;
636 static size_t first_gotten_store = 0;
637 static const unsigned char *last_callout_mark = NULL;
638
639 /* The buffers grow automatically if very long input lines are encountered. */
640
641 static int buffer_size = 50000;
642 static pcre_uint8 *buffer = NULL;
643 static pcre_uint8 *dbuffer = NULL;
644 static pcre_uint8 *pbuffer = NULL;
645
646 /* Another buffer is needed translation to 16-bit character strings. It will
647 obtained and extended as required. */
648
649 #ifdef SUPPORT_PCRE16
650 static int buffer16_size = 0;
651 static pcre_uint16 *buffer16 = NULL;
652
653 #ifdef SUPPORT_PCRE8
654
655 /* We need the table of operator lengths that is used for 16-bit compiling, in
656 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658 appropriately for the 16-bit world. Just as a safety check, make sure that
659 COMPILE_PCRE16 is *not* set. */
660
661 #ifdef COMPILE_PCRE16
662 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663 #endif
664
665 #if LINK_SIZE == 2
666 #undef LINK_SIZE
667 #define LINK_SIZE 1
668 #elif LINK_SIZE == 3 || LINK_SIZE == 4
669 #undef LINK_SIZE
670 #define LINK_SIZE 2
671 #else
672 #error LINK_SIZE must be either 2, 3, or 4
673 #endif
674
675 #undef IMM2_SIZE
676 #define IMM2_SIZE 1
677
678 #endif /* SUPPORT_PCRE8 */
679
680 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681 #endif /* SUPPORT_PCRE16 */
682
683 /* If we have 8-bit support, default use_pcre16 to false; if there is also
684 16-bit support, it can be changed by an option. If there is no 8-bit support,
685 there must be 16-bit support, so default it to 1. */
686
687 #ifdef SUPPORT_PCRE8
688 static int use_pcre16 = 0;
689 #else
690 static int use_pcre16 = 1;
691 #endif
692
693 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694
695 static int jit_study_bits[] =
696 {
697 PCRE_STUDY_JIT_COMPILE,
698 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705 };
706
707 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
708 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
709
710 /* Textual explanations for runtime error codes */
711
712 static const char *errtexts[] = {
713 NULL, /* 0 is no error */
714 NULL, /* NOMATCH is handled specially */
715 "NULL argument passed",
716 "bad option value",
717 "magic number missing",
718 "unknown opcode - pattern overwritten?",
719 "no more memory",
720 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
721 "match limit exceeded",
722 "callout error code",
723 NULL, /* BADUTF8/16 is handled specially */
724 NULL, /* BADUTF8/16 offset is handled specially */
725 NULL, /* PARTIAL is handled specially */
726 "not used - internal error",
727 "internal error - pattern overwritten?",
728 "bad count value",
729 "item unsupported for DFA matching",
730 "backreference condition or recursion test not supported for DFA matching",
731 "match limit not supported for DFA matching",
732 "workspace size exceeded in DFA matching",
733 "too much recursion for DFA matching",
734 "recursion limit exceeded",
735 "not used - internal error",
736 "invalid combination of newline options",
737 "bad offset value",
738 NULL, /* SHORTUTF8/16 is handled specially */
739 "nested recursion at the same subject position",
740 "JIT stack limit reached",
741 "pattern compiled in wrong mode: 8-bit/16-bit error",
742 "pattern compiled with other endianness",
743 "invalid data in workspace for DFA restart"
744 };
745
746
747 /*************************************************
748 * Alternate character tables *
749 *************************************************/
750
751 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
752 using the default tables of the library. However, the T option can be used to
753 select alternate sets of tables, for different kinds of testing. Note also that
754 the L (locale) option also adjusts the tables. */
755
756 /* This is the set of tables distributed as default with PCRE. It recognizes
757 only ASCII characters. */
758
759 static const pcre_uint8 tables0[] = {
760
761 /* This table is a lower casing table. */
762
763 0, 1, 2, 3, 4, 5, 6, 7,
764 8, 9, 10, 11, 12, 13, 14, 15,
765 16, 17, 18, 19, 20, 21, 22, 23,
766 24, 25, 26, 27, 28, 29, 30, 31,
767 32, 33, 34, 35, 36, 37, 38, 39,
768 40, 41, 42, 43, 44, 45, 46, 47,
769 48, 49, 50, 51, 52, 53, 54, 55,
770 56, 57, 58, 59, 60, 61, 62, 63,
771 64, 97, 98, 99,100,101,102,103,
772 104,105,106,107,108,109,110,111,
773 112,113,114,115,116,117,118,119,
774 120,121,122, 91, 92, 93, 94, 95,
775 96, 97, 98, 99,100,101,102,103,
776 104,105,106,107,108,109,110,111,
777 112,113,114,115,116,117,118,119,
778 120,121,122,123,124,125,126,127,
779 128,129,130,131,132,133,134,135,
780 136,137,138,139,140,141,142,143,
781 144,145,146,147,148,149,150,151,
782 152,153,154,155,156,157,158,159,
783 160,161,162,163,164,165,166,167,
784 168,169,170,171,172,173,174,175,
785 176,177,178,179,180,181,182,183,
786 184,185,186,187,188,189,190,191,
787 192,193,194,195,196,197,198,199,
788 200,201,202,203,204,205,206,207,
789 208,209,210,211,212,213,214,215,
790 216,217,218,219,220,221,222,223,
791 224,225,226,227,228,229,230,231,
792 232,233,234,235,236,237,238,239,
793 240,241,242,243,244,245,246,247,
794 248,249,250,251,252,253,254,255,
795
796 /* This table is a case flipping table. */
797
798 0, 1, 2, 3, 4, 5, 6, 7,
799 8, 9, 10, 11, 12, 13, 14, 15,
800 16, 17, 18, 19, 20, 21, 22, 23,
801 24, 25, 26, 27, 28, 29, 30, 31,
802 32, 33, 34, 35, 36, 37, 38, 39,
803 40, 41, 42, 43, 44, 45, 46, 47,
804 48, 49, 50, 51, 52, 53, 54, 55,
805 56, 57, 58, 59, 60, 61, 62, 63,
806 64, 97, 98, 99,100,101,102,103,
807 104,105,106,107,108,109,110,111,
808 112,113,114,115,116,117,118,119,
809 120,121,122, 91, 92, 93, 94, 95,
810 96, 65, 66, 67, 68, 69, 70, 71,
811 72, 73, 74, 75, 76, 77, 78, 79,
812 80, 81, 82, 83, 84, 85, 86, 87,
813 88, 89, 90,123,124,125,126,127,
814 128,129,130,131,132,133,134,135,
815 136,137,138,139,140,141,142,143,
816 144,145,146,147,148,149,150,151,
817 152,153,154,155,156,157,158,159,
818 160,161,162,163,164,165,166,167,
819 168,169,170,171,172,173,174,175,
820 176,177,178,179,180,181,182,183,
821 184,185,186,187,188,189,190,191,
822 192,193,194,195,196,197,198,199,
823 200,201,202,203,204,205,206,207,
824 208,209,210,211,212,213,214,215,
825 216,217,218,219,220,221,222,223,
826 224,225,226,227,228,229,230,231,
827 232,233,234,235,236,237,238,239,
828 240,241,242,243,244,245,246,247,
829 248,249,250,251,252,253,254,255,
830
831 /* This table contains bit maps for various character classes. Each map is 32
832 bytes long and the bits run from the least significant end of each byte. The
833 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
834 graph, print, punct, and cntrl. Other classes are built from combinations. */
835
836 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
837 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840
841 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
842 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845
846 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
858 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860
861 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
862 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865
866 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
867 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
868 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870
871 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
872 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875
876 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
877 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880
881 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885
886 /* This table identifies various classes of character by individual bits:
887 0x01 white space character
888 0x02 letter
889 0x04 decimal digit
890 0x08 hexadecimal digit
891 0x10 alphanumeric or '_'
892 0x80 regular expression metacharacter or binary zero
893 */
894
895 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
896 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
897 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
898 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
899 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
900 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
901 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
902 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
903 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
904 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
905 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
906 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
907 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
908 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
909 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
910 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
911 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
912 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
913 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
914 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
915 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
924 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
925 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
926 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
927
928 /* This is a set of tables that came orginally from a Windows user. It seems to
929 be at least an approximation of ISO 8859. In particular, there are characters
930 greater than 128 that are marked as spaces, letters, etc. */
931
932 static const pcre_uint8 tables1[] = {
933 0,1,2,3,4,5,6,7,
934 8,9,10,11,12,13,14,15,
935 16,17,18,19,20,21,22,23,
936 24,25,26,27,28,29,30,31,
937 32,33,34,35,36,37,38,39,
938 40,41,42,43,44,45,46,47,
939 48,49,50,51,52,53,54,55,
940 56,57,58,59,60,61,62,63,
941 64,97,98,99,100,101,102,103,
942 104,105,106,107,108,109,110,111,
943 112,113,114,115,116,117,118,119,
944 120,121,122,91,92,93,94,95,
945 96,97,98,99,100,101,102,103,
946 104,105,106,107,108,109,110,111,
947 112,113,114,115,116,117,118,119,
948 120,121,122,123,124,125,126,127,
949 128,129,130,131,132,133,134,135,
950 136,137,138,139,140,141,142,143,
951 144,145,146,147,148,149,150,151,
952 152,153,154,155,156,157,158,159,
953 160,161,162,163,164,165,166,167,
954 168,169,170,171,172,173,174,175,
955 176,177,178,179,180,181,182,183,
956 184,185,186,187,188,189,190,191,
957 224,225,226,227,228,229,230,231,
958 232,233,234,235,236,237,238,239,
959 240,241,242,243,244,245,246,215,
960 248,249,250,251,252,253,254,223,
961 224,225,226,227,228,229,230,231,
962 232,233,234,235,236,237,238,239,
963 240,241,242,243,244,245,246,247,
964 248,249,250,251,252,253,254,255,
965 0,1,2,3,4,5,6,7,
966 8,9,10,11,12,13,14,15,
967 16,17,18,19,20,21,22,23,
968 24,25,26,27,28,29,30,31,
969 32,33,34,35,36,37,38,39,
970 40,41,42,43,44,45,46,47,
971 48,49,50,51,52,53,54,55,
972 56,57,58,59,60,61,62,63,
973 64,97,98,99,100,101,102,103,
974 104,105,106,107,108,109,110,111,
975 112,113,114,115,116,117,118,119,
976 120,121,122,91,92,93,94,95,
977 96,65,66,67,68,69,70,71,
978 72,73,74,75,76,77,78,79,
979 80,81,82,83,84,85,86,87,
980 88,89,90,123,124,125,126,127,
981 128,129,130,131,132,133,134,135,
982 136,137,138,139,140,141,142,143,
983 144,145,146,147,148,149,150,151,
984 152,153,154,155,156,157,158,159,
985 160,161,162,163,164,165,166,167,
986 168,169,170,171,172,173,174,175,
987 176,177,178,179,180,181,182,183,
988 184,185,186,187,188,189,190,191,
989 224,225,226,227,228,229,230,231,
990 232,233,234,235,236,237,238,239,
991 240,241,242,243,244,245,246,215,
992 248,249,250,251,252,253,254,223,
993 192,193,194,195,196,197,198,199,
994 200,201,202,203,204,205,206,207,
995 208,209,210,211,212,213,214,247,
996 216,217,218,219,220,221,222,255,
997 0,62,0,0,1,0,0,0,
998 0,0,0,0,0,0,0,0,
999 32,0,0,0,1,0,0,0,
1000 0,0,0,0,0,0,0,0,
1001 0,0,0,0,0,0,255,3,
1002 126,0,0,0,126,0,0,0,
1003 0,0,0,0,0,0,0,0,
1004 0,0,0,0,0,0,0,0,
1005 0,0,0,0,0,0,255,3,
1006 0,0,0,0,0,0,0,0,
1007 0,0,0,0,0,0,12,2,
1008 0,0,0,0,0,0,0,0,
1009 0,0,0,0,0,0,0,0,
1010 254,255,255,7,0,0,0,0,
1011 0,0,0,0,0,0,0,0,
1012 255,255,127,127,0,0,0,0,
1013 0,0,0,0,0,0,0,0,
1014 0,0,0,0,254,255,255,7,
1015 0,0,0,0,0,4,32,4,
1016 0,0,0,128,255,255,127,255,
1017 0,0,0,0,0,0,255,3,
1018 254,255,255,135,254,255,255,7,
1019 0,0,0,0,0,4,44,6,
1020 255,255,127,255,255,255,127,255,
1021 0,0,0,0,254,255,255,255,
1022 255,255,255,255,255,255,255,127,
1023 0,0,0,0,254,255,255,255,
1024 255,255,255,255,255,255,255,255,
1025 0,2,0,0,255,255,255,255,
1026 255,255,255,255,255,255,255,127,
1027 0,0,0,0,255,255,255,255,
1028 255,255,255,255,255,255,255,255,
1029 0,0,0,0,254,255,0,252,
1030 1,0,0,248,1,0,0,120,
1031 0,0,0,0,254,255,255,255,
1032 0,0,128,0,0,0,128,0,
1033 255,255,255,255,0,0,0,0,
1034 0,0,0,0,0,0,0,128,
1035 255,255,255,255,0,0,0,0,
1036 0,0,0,0,0,0,0,0,
1037 128,0,0,0,0,0,0,0,
1038 0,1,1,0,1,1,0,0,
1039 0,0,0,0,0,0,0,0,
1040 0,0,0,0,0,0,0,0,
1041 1,0,0,0,128,0,0,0,
1042 128,128,128,128,0,0,128,0,
1043 28,28,28,28,28,28,28,28,
1044 28,28,0,0,0,0,0,128,
1045 0,26,26,26,26,26,26,18,
1046 18,18,18,18,18,18,18,18,
1047 18,18,18,18,18,18,18,18,
1048 18,18,18,128,128,0,128,16,
1049 0,26,26,26,26,26,26,18,
1050 18,18,18,18,18,18,18,18,
1051 18,18,18,18,18,18,18,18,
1052 18,18,18,128,128,0,0,0,
1053 0,0,0,0,0,1,0,0,
1054 0,0,0,0,0,0,0,0,
1055 0,0,0,0,0,0,0,0,
1056 0,0,0,0,0,0,0,0,
1057 1,0,0,0,0,0,0,0,
1058 0,0,18,0,0,0,0,0,
1059 0,0,20,20,0,18,0,0,
1060 0,20,18,0,0,0,0,0,
1061 18,18,18,18,18,18,18,18,
1062 18,18,18,18,18,18,18,18,
1063 18,18,18,18,18,18,18,0,
1064 18,18,18,18,18,18,18,18,
1065 18,18,18,18,18,18,18,18,
1066 18,18,18,18,18,18,18,18,
1067 18,18,18,18,18,18,18,0,
1068 18,18,18,18,18,18,18,18
1069 };
1070
1071
1072
1073
1074 #ifndef HAVE_STRERROR
1075 /*************************************************
1076 * Provide strerror() for non-ANSI libraries *
1077 *************************************************/
1078
1079 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1080 in their libraries, but can provide the same facility by this simple
1081 alternative function. */
1082
1083 extern int sys_nerr;
1084 extern char *sys_errlist[];
1085
1086 char *
1087 strerror(int n)
1088 {
1089 if (n < 0 || n >= sys_nerr) return "unknown error number";
1090 return sys_errlist[n];
1091 }
1092 #endif /* HAVE_STRERROR */
1093
1094
1095 /*************************************************
1096 * JIT memory callback *
1097 *************************************************/
1098
1099 static pcre_jit_stack* jit_callback(void *arg)
1100 {
1101 jit_was_used = TRUE;
1102 return (pcre_jit_stack *)arg;
1103 }
1104
1105
1106 #if !defined NOUTF || defined SUPPORT_PCRE16
1107 /*************************************************
1108 * Convert UTF-8 string to value *
1109 *************************************************/
1110
1111 /* This function takes one or more bytes that represents a UTF-8 character,
1112 and returns the value of the character.
1113
1114 Argument:
1115 utf8bytes a pointer to the byte vector
1116 vptr a pointer to an int to receive the value
1117
1118 Returns: > 0 => the number of bytes consumed
1119 -6 to 0 => malformed UTF-8 character at offset = (-return)
1120 */
1121
1122 static int
1123 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1124 {
1125 int c = *utf8bytes++;
1126 int d = c;
1127 int i, j, s;
1128
1129 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1130 {
1131 if ((d & 0x80) == 0) break;
1132 d <<= 1;
1133 }
1134
1135 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1136 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1137
1138 /* i now has a value in the range 1-5 */
1139
1140 s = 6*i;
1141 d = (c & utf8_table3[i]) << s;
1142
1143 for (j = 0; j < i; j++)
1144 {
1145 c = *utf8bytes++;
1146 if ((c & 0xc0) != 0x80) return -(j+1);
1147 s -= 6;
1148 d |= (c & 0x3f) << s;
1149 }
1150
1151 /* Check that encoding was the correct unique one */
1152
1153 for (j = 0; j < utf8_table1_size; j++)
1154 if (d <= utf8_table1[j]) break;
1155 if (j != i) return -(i+1);
1156
1157 /* Valid value */
1158
1159 *vptr = d;
1160 return i+1;
1161 }
1162 #endif /* NOUTF || SUPPORT_PCRE16 */
1163
1164
1165
1166 #if !defined NOUTF || defined SUPPORT_PCRE16
1167 /*************************************************
1168 * Convert character value to UTF-8 *
1169 *************************************************/
1170
1171 /* This function takes an integer value in the range 0 - 0x7fffffff
1172 and encodes it as a UTF-8 character in 0 to 6 bytes.
1173
1174 Arguments:
1175 cvalue the character value
1176 utf8bytes pointer to buffer for result - at least 6 bytes long
1177
1178 Returns: number of characters placed in the buffer
1179 */
1180
1181 static int
1182 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1183 {
1184 register int i, j;
1185 for (i = 0; i < utf8_table1_size; i++)
1186 if (cvalue <= utf8_table1[i]) break;
1187 utf8bytes += i;
1188 for (j = i; j > 0; j--)
1189 {
1190 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1191 cvalue >>= 6;
1192 }
1193 *utf8bytes = utf8_table2[i] | cvalue;
1194 return i + 1;
1195 }
1196 #endif
1197
1198
1199 #ifdef SUPPORT_PCRE16
1200 /*************************************************
1201 * Convert a string to 16-bit *
1202 *************************************************/
1203
1204 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1205 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1206 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1207 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1208 result is always left in buffer16.
1209
1210 Note that this function does not object to surrogate values. This is
1211 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1212 for the purpose of testing that they are correctly faulted.
1213
1214 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1215 in UTF-8 so that values greater than 255 can be handled.
1216
1217 Arguments:
1218 data TRUE if converting a data line; FALSE for a regex
1219 p points to a byte string
1220 utf true if UTF-8 (to be converted to UTF-16)
1221 len number of bytes in the string (excluding trailing zero)
1222
1223 Returns: number of 16-bit data items used (excluding trailing zero)
1224 OR -1 if a UTF-8 string is malformed
1225 OR -2 if a value > 0x10ffff is encountered
1226 OR -3 if a value > 0xffff is encountered when not in UTF mode
1227 */
1228
1229 static int
1230 to16(int data, pcre_uint8 *p, int utf, int len)
1231 {
1232 pcre_uint16 *pp;
1233
1234 if (buffer16_size < 2*len + 2)
1235 {
1236 if (buffer16 != NULL) free(buffer16);
1237 buffer16_size = 2*len + 2;
1238 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1239 if (buffer16 == NULL)
1240 {
1241 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1242 exit(1);
1243 }
1244 }
1245
1246 pp = buffer16;
1247
1248 if (!utf && !data)
1249 {
1250 while (len-- > 0) *pp++ = *p++;
1251 }
1252
1253 else
1254 {
1255 int c = 0;
1256 while (len > 0)
1257 {
1258 int chlen = utf82ord(p, &c);
1259 if (chlen <= 0) return -1;
1260 if (c > 0x10ffff) return -2;
1261 p += chlen;
1262 len -= chlen;
1263 if (c < 0x10000) *pp++ = c; else
1264 {
1265 if (!utf) return -3;
1266 c -= 0x10000;
1267 *pp++ = 0xD800 | (c >> 10);
1268 *pp++ = 0xDC00 | (c & 0x3ff);
1269 }
1270 }
1271 }
1272
1273 *pp = 0;
1274 return pp - buffer16;
1275 }
1276 #endif
1277
1278
1279 /*************************************************
1280 * Read or extend an input line *
1281 *************************************************/
1282
1283 /* Input lines are read into buffer, but both patterns and data lines can be
1284 continued over multiple input lines. In addition, if the buffer fills up, we
1285 want to automatically expand it so as to be able to handle extremely large
1286 lines that are needed for certain stress tests. When the input buffer is
1287 expanded, the other two buffers must also be expanded likewise, and the
1288 contents of pbuffer, which are a copy of the input for callouts, must be
1289 preserved (for when expansion happens for a data line). This is not the most
1290 optimal way of handling this, but hey, this is just a test program!
1291
1292 Arguments:
1293 f the file to read
1294 start where in buffer to start (this *must* be within buffer)
1295 prompt for stdin or readline()
1296
1297 Returns: pointer to the start of new data
1298 could be a copy of start, or could be moved
1299 NULL if no data read and EOF reached
1300 */
1301
1302 static pcre_uint8 *
1303 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1304 {
1305 pcre_uint8 *here = start;
1306
1307 for (;;)
1308 {
1309 size_t rlen = (size_t)(buffer_size - (here - buffer));
1310
1311 if (rlen > 1000)
1312 {
1313 int dlen;
1314
1315 /* If libreadline or libedit support is required, use readline() to read a
1316 line if the input is a terminal. Note that readline() removes the trailing
1317 newline, so we must put it back again, to be compatible with fgets(). */
1318
1319 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1320 if (isatty(fileno(f)))
1321 {
1322 size_t len;
1323 char *s = readline(prompt);
1324 if (s == NULL) return (here == start)? NULL : start;
1325 len = strlen(s);
1326 if (len > 0) add_history(s);
1327 if (len > rlen - 1) len = rlen - 1;
1328 memcpy(here, s, len);
1329 here[len] = '\n';
1330 here[len+1] = 0;
1331 free(s);
1332 }
1333 else
1334 #endif
1335
1336 /* Read the next line by normal means, prompting if the file is stdin. */
1337
1338 {
1339 if (f == stdin) printf("%s", prompt);
1340 if (fgets((char *)here, rlen, f) == NULL)
1341 return (here == start)? NULL : start;
1342 }
1343
1344 dlen = (int)strlen((char *)here);
1345 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1346 here += dlen;
1347 }
1348
1349 else
1350 {
1351 int new_buffer_size = 2*buffer_size;
1352 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1353 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1354 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1355
1356 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1357 {
1358 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1359 exit(1);
1360 }
1361
1362 memcpy(new_buffer, buffer, buffer_size);
1363 memcpy(new_pbuffer, pbuffer, buffer_size);
1364
1365 buffer_size = new_buffer_size;
1366
1367 start = new_buffer + (start - buffer);
1368 here = new_buffer + (here - buffer);
1369
1370 free(buffer);
1371 free(dbuffer);
1372 free(pbuffer);
1373
1374 buffer = new_buffer;
1375 dbuffer = new_dbuffer;
1376 pbuffer = new_pbuffer;
1377 }
1378 }
1379
1380 return NULL; /* Control never gets here */
1381 }
1382
1383
1384
1385 /*************************************************
1386 * Read number from string *
1387 *************************************************/
1388
1389 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1390 around with conditional compilation, just do the job by hand. It is only used
1391 for unpicking arguments, so just keep it simple.
1392
1393 Arguments:
1394 str string to be converted
1395 endptr where to put the end pointer
1396
1397 Returns: the unsigned long
1398 */
1399
1400 static int
1401 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1402 {
1403 int result = 0;
1404 while(*str != 0 && isspace(*str)) str++;
1405 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1406 *endptr = str;
1407 return(result);
1408 }
1409
1410
1411
1412 /*************************************************
1413 * Print one character *
1414 *************************************************/
1415
1416 /* Print a single character either literally, or as a hex escape. */
1417
1418 static int pchar(int c, FILE *f)
1419 {
1420 if (PRINTOK(c))
1421 {
1422 if (f != NULL) fprintf(f, "%c", c);
1423 return 1;
1424 }
1425
1426 if (c < 0x100)
1427 {
1428 if (use_utf)
1429 {
1430 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1431 return 6;
1432 }
1433 else
1434 {
1435 if (f != NULL) fprintf(f, "\\x%02x", c);
1436 return 4;
1437 }
1438 }
1439
1440 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1441 return (c <= 0x000000ff)? 6 :
1442 (c <= 0x00000fff)? 7 :
1443 (c <= 0x0000ffff)? 8 :
1444 (c <= 0x000fffff)? 9 : 10;
1445 }
1446
1447
1448
1449 #ifdef SUPPORT_PCRE8
1450 /*************************************************
1451 * Print 8-bit character string *
1452 *************************************************/
1453
1454 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1455 If handed a NULL file, just counts chars without printing. */
1456
1457 static int pchars(pcre_uint8 *p, int length, FILE *f)
1458 {
1459 int c = 0;
1460 int yield = 0;
1461
1462 if (length < 0)
1463 length = strlen((char *)p);
1464
1465 while (length-- > 0)
1466 {
1467 #if !defined NOUTF
1468 if (use_utf)
1469 {
1470 int rc = utf82ord(p, &c);
1471 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1472 {
1473 length -= rc - 1;
1474 p += rc;
1475 yield += pchar(c, f);
1476 continue;
1477 }
1478 }
1479 #endif
1480 c = *p++;
1481 yield += pchar(c, f);
1482 }
1483
1484 return yield;
1485 }
1486 #endif
1487
1488
1489
1490 #ifdef SUPPORT_PCRE16
1491 /*************************************************
1492 * Find length of 0-terminated 16-bit string *
1493 *************************************************/
1494
1495 static int strlen16(PCRE_SPTR16 p)
1496 {
1497 int len = 0;
1498 while (*p++ != 0) len++;
1499 return len;
1500 }
1501 #endif /* SUPPORT_PCRE16 */
1502
1503
1504 #ifdef SUPPORT_PCRE16
1505 /*************************************************
1506 * Print 16-bit character string *
1507 *************************************************/
1508
1509 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1510 If handed a NULL file, just counts chars without printing. */
1511
1512 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1513 {
1514 int yield = 0;
1515
1516 if (length < 0)
1517 length = strlen16(p);
1518
1519 while (length-- > 0)
1520 {
1521 int c = *p++ & 0xffff;
1522 #if !defined NOUTF
1523 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1524 {
1525 int d = *p & 0xffff;
1526 if (d >= 0xDC00 && d < 0xDFFF)
1527 {
1528 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1529 length--;
1530 p++;
1531 }
1532 }
1533 #endif
1534 yield += pchar(c, f);
1535 }
1536
1537 return yield;
1538 }
1539 #endif /* SUPPORT_PCRE16 */
1540
1541
1542
1543 #ifdef SUPPORT_PCRE8
1544 /*************************************************
1545 * Read a capture name (8-bit) and check it *
1546 *************************************************/
1547
1548 static pcre_uint8 *
1549 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1550 {
1551 pcre_uint8 *npp = *pp;
1552 while (isalnum(*p)) *npp++ = *p++;
1553 *npp++ = 0;
1554 *npp = 0;
1555 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1556 {
1557 fprintf(outfile, "no parentheses with name \"");
1558 PCHARSV(*pp, 0, -1, outfile);
1559 fprintf(outfile, "\"\n");
1560 }
1561
1562 *pp = npp;
1563 return p;
1564 }
1565 #endif /* SUPPORT_PCRE8 */
1566
1567
1568
1569 #ifdef SUPPORT_PCRE16
1570 /*************************************************
1571 * Read a capture name (16-bit) and check it *
1572 *************************************************/
1573
1574 /* Note that the text being read is 8-bit. */
1575
1576 static pcre_uint8 *
1577 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1578 {
1579 pcre_uint16 *npp = *pp;
1580 while (isalnum(*p)) *npp++ = *p++;
1581 *npp++ = 0;
1582 *npp = 0;
1583 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1584 {
1585 fprintf(outfile, "no parentheses with name \"");
1586 PCHARSV(*pp, 0, -1, outfile);
1587 fprintf(outfile, "\"\n");
1588 }
1589 *pp = npp;
1590 return p;
1591 }
1592 #endif /* SUPPORT_PCRE16 */
1593
1594
1595
1596 /*************************************************
1597 * Callout function *
1598 *************************************************/
1599
1600 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1601 the match. Yield zero unless more callouts than the fail count, or the callout
1602 data is not zero. */
1603
1604 static int callout(pcre_callout_block *cb)
1605 {
1606 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1607 int i, pre_start, post_start, subject_length;
1608
1609 if (callout_extra)
1610 {
1611 fprintf(f, "Callout %d: last capture = %d\n",
1612 cb->callout_number, cb->capture_last);
1613
1614 for (i = 0; i < cb->capture_top * 2; i += 2)
1615 {
1616 if (cb->offset_vector[i] < 0)
1617 fprintf(f, "%2d: <unset>\n", i/2);
1618 else
1619 {
1620 fprintf(f, "%2d: ", i/2);
1621 PCHARSV(cb->subject, cb->offset_vector[i],
1622 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1623 fprintf(f, "\n");
1624 }
1625 }
1626 }
1627
1628 /* Re-print the subject in canonical form, the first time or if giving full
1629 datails. On subsequent calls in the same match, we use pchars just to find the
1630 printed lengths of the substrings. */
1631
1632 if (f != NULL) fprintf(f, "--->");
1633
1634 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1635 PCHARS(post_start, cb->subject, cb->start_match,
1636 cb->current_position - cb->start_match, f);
1637
1638 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1639
1640 PCHARSV(cb->subject, cb->current_position,
1641 cb->subject_length - cb->current_position, f);
1642
1643 if (f != NULL) fprintf(f, "\n");
1644
1645 /* Always print appropriate indicators, with callout number if not already
1646 shown. For automatic callouts, show the pattern offset. */
1647
1648 if (cb->callout_number == 255)
1649 {
1650 fprintf(outfile, "%+3d ", cb->pattern_position);
1651 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1652 }
1653 else
1654 {
1655 if (callout_extra) fprintf(outfile, " ");
1656 else fprintf(outfile, "%3d ", cb->callout_number);
1657 }
1658
1659 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1660 fprintf(outfile, "^");
1661
1662 if (post_start > 0)
1663 {
1664 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1665 fprintf(outfile, "^");
1666 }
1667
1668 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1669 fprintf(outfile, " ");
1670
1671 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1672 pbuffer + cb->pattern_position);
1673
1674 fprintf(outfile, "\n");
1675 first_callout = 0;
1676
1677 if (cb->mark != last_callout_mark)
1678 {
1679 if (cb->mark == NULL)
1680 fprintf(outfile, "Latest Mark: <unset>\n");
1681 else
1682 {
1683 fprintf(outfile, "Latest Mark: ");
1684 PCHARSV(cb->mark, 0, -1, outfile);
1685 putc('\n', outfile);
1686 }
1687 last_callout_mark = cb->mark;
1688 }
1689
1690 if (cb->callout_data != NULL)
1691 {
1692 int callout_data = *((int *)(cb->callout_data));
1693 if (callout_data != 0)
1694 {
1695 fprintf(outfile, "Callout data = %d\n", callout_data);
1696 return callout_data;
1697 }
1698 }
1699
1700 return (cb->callout_number != callout_fail_id)? 0 :
1701 (++callout_count >= callout_fail_count)? 1 : 0;
1702 }
1703
1704
1705 /*************************************************
1706 * Local malloc functions *
1707 *************************************************/
1708
1709 /* Alternative malloc function, to test functionality and save the size of a
1710 compiled re, which is the first store request that pcre_compile() makes. The
1711 show_malloc variable is set only during matching. */
1712
1713 static void *new_malloc(size_t size)
1714 {
1715 void *block = malloc(size);
1716 gotten_store = size;
1717 if (first_gotten_store == 0) first_gotten_store = size;
1718 if (show_malloc)
1719 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1720 return block;
1721 }
1722
1723 static void new_free(void *block)
1724 {
1725 if (show_malloc)
1726 fprintf(outfile, "free %p\n", block);
1727 free(block);
1728 }
1729
1730 /* For recursion malloc/free, to test stacking calls */
1731
1732 static void *stack_malloc(size_t size)
1733 {
1734 void *block = malloc(size);
1735 if (show_malloc)
1736 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1737 return block;
1738 }
1739
1740 static void stack_free(void *block)
1741 {
1742 if (show_malloc)
1743 fprintf(outfile, "stack_free %p\n", block);
1744 free(block);
1745 }
1746
1747
1748 /*************************************************
1749 * Call pcre_fullinfo() *
1750 *************************************************/
1751
1752 /* Get one piece of information from the pcre_fullinfo() function. When only
1753 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1754 value, but the code is defensive.
1755
1756 Arguments:
1757 re compiled regex
1758 study study data
1759 option PCRE_INFO_xxx option
1760 ptr where to put the data
1761
1762 Returns: 0 when OK, < 0 on error
1763 */
1764
1765 static int
1766 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1767 {
1768 int rc;
1769
1770 if (use_pcre16)
1771 #ifdef SUPPORT_PCRE16
1772 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1773 #else
1774 rc = PCRE_ERROR_BADMODE;
1775 #endif
1776 else
1777 #ifdef SUPPORT_PCRE8
1778 rc = pcre_fullinfo(re, study, option, ptr);
1779 #else
1780 rc = PCRE_ERROR_BADMODE;
1781 #endif
1782
1783 if (rc < 0)
1784 {
1785 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1786 use_pcre16? "16" : "", option);
1787 if (rc == PCRE_ERROR_BADMODE)
1788 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1789 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1790 }
1791
1792 return rc;
1793 }
1794
1795
1796
1797 /*************************************************
1798 * Swap byte functions *
1799 *************************************************/
1800
1801 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1802 value, respectively.
1803
1804 Arguments:
1805 value any number
1806
1807 Returns: the byte swapped value
1808 */
1809
1810 static pcre_uint32
1811 swap_uint32(pcre_uint32 value)
1812 {
1813 return ((value & 0x000000ff) << 24) |
1814 ((value & 0x0000ff00) << 8) |
1815 ((value & 0x00ff0000) >> 8) |
1816 (value >> 24);
1817 }
1818
1819 static pcre_uint16
1820 swap_uint16(pcre_uint16 value)
1821 {
1822 return (value >> 8) | (value << 8);
1823 }
1824
1825
1826
1827 /*************************************************
1828 * Flip bytes in a compiled pattern *
1829 *************************************************/
1830
1831 /* This function is called if the 'F' option was present on a pattern that is
1832 to be written to a file. We flip the bytes of all the integer fields in the
1833 regex data block and the study block. In 16-bit mode this also flips relevant
1834 bytes in the pattern itself. This is to make it possible to test PCRE's
1835 ability to reload byte-flipped patterns, e.g. those compiled on a different
1836 architecture. */
1837
1838 static void
1839 regexflip(pcre *ere, pcre_extra *extra)
1840 {
1841 REAL_PCRE *re = (REAL_PCRE *)ere;
1842 #ifdef SUPPORT_PCRE16
1843 int op;
1844 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1845 int length = re->name_count * re->name_entry_size;
1846 #ifdef SUPPORT_UTF
1847 BOOL utf = (re->options & PCRE_UTF16) != 0;
1848 BOOL utf16_char = FALSE;
1849 #endif /* SUPPORT_UTF */
1850 #endif /* SUPPORT_PCRE16 */
1851
1852 /* Always flip the bytes in the main data block and study blocks. */
1853
1854 re->magic_number = REVERSED_MAGIC_NUMBER;
1855 re->size = swap_uint32(re->size);
1856 re->options = swap_uint32(re->options);
1857 re->flags = swap_uint16(re->flags);
1858 re->top_bracket = swap_uint16(re->top_bracket);
1859 re->top_backref = swap_uint16(re->top_backref);
1860 re->first_char = swap_uint16(re->first_char);
1861 re->req_char = swap_uint16(re->req_char);
1862 re->name_table_offset = swap_uint16(re->name_table_offset);
1863 re->name_entry_size = swap_uint16(re->name_entry_size);
1864 re->name_count = swap_uint16(re->name_count);
1865
1866 if (extra != NULL)
1867 {
1868 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1869 rsd->size = swap_uint32(rsd->size);
1870 rsd->flags = swap_uint32(rsd->flags);
1871 rsd->minlength = swap_uint32(rsd->minlength);
1872 }
1873
1874 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1875 in the name table, if present, and then in the pattern itself. */
1876
1877 #ifdef SUPPORT_PCRE16
1878 if (!use_pcre16) return;
1879
1880 while(TRUE)
1881 {
1882 /* Swap previous characters. */
1883 while (length-- > 0)
1884 {
1885 *ptr = swap_uint16(*ptr);
1886 ptr++;
1887 }
1888 #ifdef SUPPORT_UTF
1889 if (utf16_char)
1890 {
1891 if ((ptr[-1] & 0xfc00) == 0xd800)
1892 {
1893 /* We know that there is only one extra character in UTF-16. */
1894 *ptr = swap_uint16(*ptr);
1895 ptr++;
1896 }
1897 }
1898 utf16_char = FALSE;
1899 #endif /* SUPPORT_UTF */
1900
1901 /* Get next opcode. */
1902
1903 length = 0;
1904 op = *ptr;
1905 *ptr++ = swap_uint16(op);
1906
1907 switch (op)
1908 {
1909 case OP_END:
1910 return;
1911
1912 #ifdef SUPPORT_UTF
1913 case OP_CHAR:
1914 case OP_CHARI:
1915 case OP_NOT:
1916 case OP_NOTI:
1917 case OP_STAR:
1918 case OP_MINSTAR:
1919 case OP_PLUS:
1920 case OP_MINPLUS:
1921 case OP_QUERY:
1922 case OP_MINQUERY:
1923 case OP_UPTO:
1924 case OP_MINUPTO:
1925 case OP_EXACT:
1926 case OP_POSSTAR:
1927 case OP_POSPLUS:
1928 case OP_POSQUERY:
1929 case OP_POSUPTO:
1930 case OP_STARI:
1931 case OP_MINSTARI:
1932 case OP_PLUSI:
1933 case OP_MINPLUSI:
1934 case OP_QUERYI:
1935 case OP_MINQUERYI:
1936 case OP_UPTOI:
1937 case OP_MINUPTOI:
1938 case OP_EXACTI:
1939 case OP_POSSTARI:
1940 case OP_POSPLUSI:
1941 case OP_POSQUERYI:
1942 case OP_POSUPTOI:
1943 case OP_NOTSTAR:
1944 case OP_NOTMINSTAR:
1945 case OP_NOTPLUS:
1946 case OP_NOTMINPLUS:
1947 case OP_NOTQUERY:
1948 case OP_NOTMINQUERY:
1949 case OP_NOTUPTO:
1950 case OP_NOTMINUPTO:
1951 case OP_NOTEXACT:
1952 case OP_NOTPOSSTAR:
1953 case OP_NOTPOSPLUS:
1954 case OP_NOTPOSQUERY:
1955 case OP_NOTPOSUPTO:
1956 case OP_NOTSTARI:
1957 case OP_NOTMINSTARI:
1958 case OP_NOTPLUSI:
1959 case OP_NOTMINPLUSI:
1960 case OP_NOTQUERYI:
1961 case OP_NOTMINQUERYI:
1962 case OP_NOTUPTOI:
1963 case OP_NOTMINUPTOI:
1964 case OP_NOTEXACTI:
1965 case OP_NOTPOSSTARI:
1966 case OP_NOTPOSPLUSI:
1967 case OP_NOTPOSQUERYI:
1968 case OP_NOTPOSUPTOI:
1969 if (utf) utf16_char = TRUE;
1970 #endif
1971 /* Fall through. */
1972
1973 default:
1974 length = OP_lengths16[op] - 1;
1975 break;
1976
1977 case OP_CLASS:
1978 case OP_NCLASS:
1979 /* Skip the character bit map. */
1980 ptr += 32/sizeof(pcre_uint16);
1981 length = 0;
1982 break;
1983
1984 case OP_XCLASS:
1985 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1986 if (LINK_SIZE > 1)
1987 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1988 - (1 + LINK_SIZE + 1));
1989 else
1990 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1991
1992 /* Reverse the size of the XCLASS instance. */
1993 *ptr = swap_uint16(*ptr);
1994 ptr++;
1995 if (LINK_SIZE > 1)
1996 {
1997 *ptr = swap_uint16(*ptr);
1998 ptr++;
1999 }
2000
2001 op = *ptr;
2002 *ptr = swap_uint16(op);
2003 ptr++;
2004 if ((op & XCL_MAP) != 0)
2005 {
2006 /* Skip the character bit map. */
2007 ptr += 32/sizeof(pcre_uint16);
2008 length -= 32/sizeof(pcre_uint16);
2009 }
2010 break;
2011 }
2012 }
2013 /* Control should never reach here in 16 bit mode. */
2014 #endif /* SUPPORT_PCRE16 */
2015 }
2016
2017
2018
2019 /*************************************************
2020 * Check match or recursion limit *
2021 *************************************************/
2022
2023 static int
2024 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2025 int start_offset, int options, int *use_offsets, int use_size_offsets,
2026 int flag, unsigned long int *limit, int errnumber, const char *msg)
2027 {
2028 int count;
2029 int min = 0;
2030 int mid = 64;
2031 int max = -1;
2032
2033 extra->flags |= flag;
2034
2035 for (;;)
2036 {
2037 *limit = mid;
2038
2039 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2040 use_offsets, use_size_offsets);
2041
2042 if (count == errnumber)
2043 {
2044 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2045 min = mid;
2046 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2047 }
2048
2049 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2050 count == PCRE_ERROR_PARTIAL)
2051 {
2052 if (mid == min + 1)
2053 {
2054 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2055 break;
2056 }
2057 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2058 max = mid;
2059 mid = (min + mid)/2;
2060 }
2061 else break; /* Some other error */
2062 }
2063
2064 extra->flags &= ~flag;
2065 return count;
2066 }
2067
2068
2069
2070 /*************************************************
2071 * Case-independent strncmp() function *
2072 *************************************************/
2073
2074 /*
2075 Arguments:
2076 s first string
2077 t second string
2078 n number of characters to compare
2079
2080 Returns: < 0, = 0, or > 0, according to the comparison
2081 */
2082
2083 static int
2084 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2085 {
2086 while (n--)
2087 {
2088 int c = tolower(*s++) - tolower(*t++);
2089 if (c) return c;
2090 }
2091 return 0;
2092 }
2093
2094
2095
2096 /*************************************************
2097 * Check newline indicator *
2098 *************************************************/
2099
2100 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2101 a message and return 0 if there is no match.
2102
2103 Arguments:
2104 p points after the leading '<'
2105 f file for error message
2106
2107 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2108 */
2109
2110 static int
2111 check_newline(pcre_uint8 *p, FILE *f)
2112 {
2113 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2114 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2115 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2116 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2117 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2118 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2119 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2120 fprintf(f, "Unknown newline type at: <%s\n", p);
2121 return 0;
2122 }
2123
2124
2125
2126 /*************************************************
2127 * Usage function *
2128 *************************************************/
2129
2130 static void
2131 usage(void)
2132 {
2133 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2134 printf("Input and output default to stdin and stdout.\n");
2135 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2136 printf("If input is a terminal, readline() is used to read from it.\n");
2137 #else
2138 printf("This version of pcretest is not linked with readline().\n");
2139 #endif
2140 printf("\nOptions:\n");
2141 #ifdef SUPPORT_PCRE16
2142 printf(" -16 use the 16-bit library\n");
2143 #endif
2144 printf(" -b show compiled code\n");
2145 printf(" -C show PCRE compile-time options and exit\n");
2146 printf(" -C arg show a specific compile-time option\n");
2147 printf(" and exit with its value. The arg can be:\n");
2148 printf(" linksize internal link size [2, 3, 4]\n");
2149 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2150 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2151 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2152 printf(" ucp Unicode Properties supported [0, 1]\n");
2153 printf(" jit Just-in-time compiler supported [0, 1]\n");
2154 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2155 printf(" -d debug: show compiled code and information (-b and -i)\n");
2156 #if !defined NODFA
2157 printf(" -dfa force DFA matching for all subjects\n");
2158 #endif
2159 printf(" -help show usage information\n");
2160 printf(" -i show information about compiled patterns\n"
2161 " -M find MATCH_LIMIT minimum for each subject\n"
2162 " -m output memory used information\n"
2163 " -o <n> set size of offsets vector to <n>\n");
2164 #if !defined NOPOSIX
2165 printf(" -p use POSIX interface\n");
2166 #endif
2167 printf(" -q quiet: do not output PCRE version number at start\n");
2168 printf(" -S <n> set stack size to <n> megabytes\n");
2169 printf(" -s force each pattern to be studied at basic level\n"
2170 " -s+ force each pattern to be studied, using JIT if available\n"
2171 " -s++ ditto, verifying when JIT was actually used\n"
2172 " -s+n force each pattern to be studied, using JIT if available,\n"
2173 " where 1 <= n <= 7 selects JIT options\n"
2174 " -s++n ditto, verifying when JIT was actually used\n"
2175 " -t time compilation and execution\n");
2176 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2177 printf(" -tm time execution (matching) only\n");
2178 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2179 }
2180
2181
2182
2183 /*************************************************
2184 * Main Program *
2185 *************************************************/
2186
2187 /* Read lines from named file or stdin and write to named file or stdout; lines
2188 consist of a regular expression, in delimiters and optionally followed by
2189 options, followed by a set of test data, terminated by an empty line. */
2190
2191 int main(int argc, char **argv)
2192 {
2193 FILE *infile = stdin;
2194 const char *version;
2195 int options = 0;
2196 int study_options = 0;
2197 int default_find_match_limit = FALSE;
2198 int op = 1;
2199 int timeit = 0;
2200 int timeitm = 0;
2201 int showinfo = 0;
2202 int showstore = 0;
2203 int force_study = -1;
2204 int force_study_options = 0;
2205 int quiet = 0;
2206 int size_offsets = 45;
2207 int size_offsets_max;
2208 int *offsets = NULL;
2209 int debug = 0;
2210 int done = 0;
2211 int all_use_dfa = 0;
2212 int verify_jit = 0;
2213 int yield = 0;
2214 int stack_size;
2215
2216 #if !defined NOPOSIX
2217 int posix = 0;
2218 #endif
2219 #if !defined NODFA
2220 int *dfa_workspace = NULL;
2221 #endif
2222
2223 pcre_jit_stack *jit_stack = NULL;
2224
2225 /* These vectors store, end-to-end, a list of zero-terminated captured
2226 substring names, each list itself being terminated by an empty name. Assume
2227 that 1024 is plenty long enough for the few names we'll be testing. It is
2228 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2229 for the actual memory, to ensure alignment. */
2230
2231 pcre_uint16 copynames[1024];
2232 pcre_uint16 getnames[1024];
2233
2234 #ifdef SUPPORT_PCRE16
2235 pcre_uint16 *cn16ptr;
2236 pcre_uint16 *gn16ptr;
2237 #endif
2238
2239 #ifdef SUPPORT_PCRE8
2240 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2241 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2242 pcre_uint8 *cn8ptr;
2243 pcre_uint8 *gn8ptr;
2244 #endif
2245
2246 /* Get buffers from malloc() so that valgrind will check their misuse when
2247 debugging. They grow automatically when very long lines are read. The 16-bit
2248 buffer (buffer16) is obtained only if needed. */
2249
2250 buffer = (pcre_uint8 *)malloc(buffer_size);
2251 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2252 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2253
2254 /* The outfile variable is static so that new_malloc can use it. */
2255
2256 outfile = stdout;
2257
2258 /* The following _setmode() stuff is some Windows magic that tells its runtime
2259 library to translate CRLF into a single LF character. At least, that's what
2260 I've been told: never having used Windows I take this all on trust. Originally
2261 it set 0x8000, but then I was advised that _O_BINARY was better. */
2262
2263 #if defined(_WIN32) || defined(WIN32)
2264 _setmode( _fileno( stdout ), _O_BINARY );
2265 #endif
2266
2267 /* Get the version number: both pcre_version() and pcre16_version() give the
2268 same answer. We just need to ensure that we call one that is available. */
2269
2270 #ifdef SUPPORT_PCRE8
2271 version = pcre_version();
2272 #else
2273 version = pcre16_version();
2274 #endif
2275
2276 /* Scan options */
2277
2278 while (argc > 1 && argv[op][0] == '-')
2279 {
2280 pcre_uint8 *endptr;
2281 char *arg = argv[op];
2282
2283 if (strcmp(arg, "-m") == 0) showstore = 1;
2284 else if (strcmp(arg, "-s") == 0) force_study = 0;
2285
2286 else if (strncmp(arg, "-s+", 3) == 0)
2287 {
2288 arg += 3;
2289 if (*arg == '+') { arg++; verify_jit = TRUE; }
2290 force_study = 1;
2291 if (*arg == 0)
2292 force_study_options = jit_study_bits[6];
2293 else if (*arg >= '1' && *arg <= '7')
2294 force_study_options = jit_study_bits[*arg - '1'];
2295 else goto BAD_ARG;
2296 }
2297 else if (strcmp(arg, "-16") == 0)
2298 {
2299 #ifdef SUPPORT_PCRE16
2300 use_pcre16 = 1;
2301 #else
2302 printf("** This version of PCRE was built without 16-bit support\n");
2303 exit(1);
2304 #endif
2305 }
2306 else if (strcmp(arg, "-q") == 0) quiet = 1;
2307 else if (strcmp(arg, "-b") == 0) debug = 1;
2308 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2309 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2310 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2311 #if !defined NODFA
2312 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2313 #endif
2314 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2315 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2316 *endptr == 0))
2317 {
2318 op++;
2319 argc--;
2320 }
2321 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2322 {
2323 int both = arg[2] == 0;
2324 int temp;
2325 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2326 *endptr == 0))
2327 {
2328 timeitm = temp;
2329 op++;
2330 argc--;
2331 }
2332 else timeitm = LOOPREPEAT;
2333 if (both) timeit = timeitm;
2334 }
2335 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2336 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2337 *endptr == 0))
2338 {
2339 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2340 printf("PCRE: -S not supported on this OS\n");
2341 exit(1);
2342 #else
2343 int rc;
2344 struct rlimit rlim;
2345 getrlimit(RLIMIT_STACK, &rlim);
2346 rlim.rlim_cur = stack_size * 1024 * 1024;
2347 rc = setrlimit(RLIMIT_STACK, &rlim);
2348 if (rc != 0)
2349 {
2350 printf("PCRE: setrlimit() failed with error %d\n", rc);
2351 exit(1);
2352 }
2353 op++;
2354 argc--;
2355 #endif
2356 }
2357 #if !defined NOPOSIX
2358 else if (strcmp(arg, "-p") == 0) posix = 1;
2359 #endif
2360 else if (strcmp(arg, "-C") == 0)
2361 {
2362 int rc;
2363 unsigned long int lrc;
2364
2365 if (argc > 2)
2366 {
2367 if (strcmp(argv[op + 1], "linksize") == 0)
2368 {
2369 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2370 printf("%d\n", rc);
2371 yield = rc;
2372 goto EXIT;
2373 }
2374 if (strcmp(argv[op + 1], "pcre8") == 0)
2375 {
2376 #ifdef SUPPORT_PCRE8
2377 printf("1\n");
2378 yield = 1;
2379 #else
2380 printf("0\n");
2381 yield = 0;
2382 #endif
2383 goto EXIT;
2384 }
2385 if (strcmp(argv[op + 1], "pcre16") == 0)
2386 {
2387 #ifdef SUPPORT_PCRE16
2388 printf("1\n");
2389 yield = 1;
2390 #else
2391 printf("0\n");
2392 yield = 0;
2393 #endif
2394 goto EXIT;
2395 }
2396 if (strcmp(argv[op + 1], "utf") == 0)
2397 {
2398 #ifdef SUPPORT_PCRE8
2399 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2400 printf("%d\n", rc);
2401 yield = rc;
2402 #else
2403 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2404 printf("%d\n", rc);
2405 yield = rc;
2406 #endif
2407 goto EXIT;
2408 }
2409 if (strcmp(argv[op + 1], "ucp") == 0)
2410 {
2411 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2412 printf("%d\n", rc);
2413 yield = rc;
2414 goto EXIT;
2415 }
2416 if (strcmp(argv[op + 1], "jit") == 0)
2417 {
2418 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2419 printf("%d\n", rc);
2420 yield = rc;
2421 goto EXIT;
2422 }
2423 if (strcmp(argv[op + 1], "newline") == 0)
2424 {
2425 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2426 /* Note that these values are always the ASCII values, even
2427 in EBCDIC environments. CR is 13 and NL is 10. */
2428 printf("%s\n", (rc == 13)? "CR" :
2429 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2430 (rc == -2)? "ANYCRLF" :
2431 (rc == -1)? "ANY" : "???");
2432 goto EXIT;
2433 }
2434 printf("Unknown -C option: %s\n", argv[op + 1]);
2435 goto EXIT;
2436 }
2437
2438 printf("PCRE version %s\n", version);
2439 printf("Compiled with\n");
2440
2441 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2442 are set, either both UTFs are supported or both are not supported. */
2443
2444 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2445 printf(" 8-bit and 16-bit support\n");
2446 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2447 if (rc)
2448 printf(" UTF-8 and UTF-16 support\n");
2449 else
2450 printf(" No UTF-8 or UTF-16 support\n");
2451 #elif defined SUPPORT_PCRE8
2452 printf(" 8-bit support only\n");
2453 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2454 printf(" %sUTF-8 support\n", rc? "" : "No ");
2455 #else
2456 printf(" 16-bit support only\n");
2457 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2458 printf(" %sUTF-16 support\n", rc? "" : "No ");
2459 #endif
2460
2461 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2462 printf(" %sUnicode properties support\n", rc? "" : "No ");
2463 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2464 if (rc)
2465 {
2466 const char *arch;
2467 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2468 printf(" Just-in-time compiler support: %s\n", arch);
2469 }
2470 else
2471 printf(" No just-in-time compiler support\n");
2472 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2473 /* Note that these values are always the ASCII values, even
2474 in EBCDIC environments. CR is 13 and NL is 10. */
2475 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2476 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2477 (rc == -2)? "ANYCRLF" :
2478 (rc == -1)? "ANY" : "???");
2479 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2480 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2481 "all Unicode newlines");
2482 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2483 printf(" Internal link size = %d\n", rc);
2484 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2485 printf(" POSIX malloc threshold = %d\n", rc);
2486 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2487 printf(" Default match limit = %ld\n", lrc);
2488 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2489 printf(" Default recursion depth limit = %ld\n", lrc);
2490 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2491 printf(" Match recursion uses %s", rc? "stack" : "heap");
2492 if (showstore)
2493 {
2494 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2495 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2496 }
2497 printf("\n");
2498 goto EXIT;
2499 }
2500 else if (strcmp(arg, "-help") == 0 ||
2501 strcmp(arg, "--help") == 0)
2502 {
2503 usage();
2504 goto EXIT;
2505 }
2506 else
2507 {
2508 BAD_ARG:
2509 printf("** Unknown or malformed option %s\n", arg);
2510 usage();
2511 yield = 1;
2512 goto EXIT;
2513 }
2514 op++;
2515 argc--;
2516 }
2517
2518 /* Get the store for the offsets vector, and remember what it was */
2519
2520 size_offsets_max = size_offsets;
2521 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2522 if (offsets == NULL)
2523 {
2524 printf("** Failed to get %d bytes of memory for offsets vector\n",
2525 (int)(size_offsets_max * sizeof(int)));
2526 yield = 1;
2527 goto EXIT;
2528 }
2529
2530 /* Sort out the input and output files */
2531
2532 if (argc > 1)
2533 {
2534 infile = fopen(argv[op], INPUT_MODE);
2535 if (infile == NULL)
2536 {
2537 printf("** Failed to open %s\n", argv[op]);
2538 yield = 1;
2539 goto EXIT;
2540 }
2541 }
2542
2543 if (argc > 2)
2544 {
2545 outfile = fopen(argv[op+1], OUTPUT_MODE);
2546 if (outfile == NULL)
2547 {
2548 printf("** Failed to open %s\n", argv[op+1]);
2549 yield = 1;
2550 goto EXIT;
2551 }
2552 }
2553
2554 /* Set alternative malloc function */
2555
2556 #ifdef SUPPORT_PCRE8
2557 pcre_malloc = new_malloc;
2558 pcre_free = new_free;
2559 pcre_stack_malloc = stack_malloc;
2560 pcre_stack_free = stack_free;
2561 #endif
2562
2563 #ifdef SUPPORT_PCRE16
2564 pcre16_malloc = new_malloc;
2565 pcre16_free = new_free;
2566 pcre16_stack_malloc = stack_malloc;
2567 pcre16_stack_free = stack_free;
2568 #endif
2569
2570 /* Heading line unless quiet, then prompt for first regex if stdin */
2571
2572 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2573
2574 /* Main loop */
2575
2576 while (!done)
2577 {
2578 pcre *re = NULL;
2579 pcre_extra *extra = NULL;
2580
2581 #if !defined NOPOSIX /* There are still compilers that require no indent */
2582 regex_t preg;
2583 int do_posix = 0;
2584 #endif
2585
2586 const char *error;
2587 pcre_uint8 *markptr;
2588 pcre_uint8 *p, *pp, *ppp;
2589 pcre_uint8 *to_file = NULL;
2590 const pcre_uint8 *tables = NULL;
2591 unsigned long int get_options;
2592 unsigned long int true_size, true_study_size = 0;
2593 size_t size, regex_gotten_store;
2594 int do_allcaps = 0;
2595 int do_mark = 0;
2596 int do_study = 0;
2597 int no_force_study = 0;
2598 int do_debug = debug;
2599 int do_G = 0;
2600 int do_g = 0;
2601 int do_showinfo = showinfo;
2602 int do_showrest = 0;
2603 int do_showcaprest = 0;
2604 int do_flip = 0;
2605 int erroroffset, len, delimiter, poffset;
2606
2607 #if !defined NODFA
2608 int dfa_matched = 0;
2609 #endif
2610
2611 use_utf = 0;
2612 debug_lengths = 1;
2613
2614 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2615 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2616 fflush(outfile);
2617
2618 p = buffer;
2619 while (isspace(*p)) p++;
2620 if (*p == 0) continue;
2621
2622 /* See if the pattern is to be loaded pre-compiled from a file. */
2623
2624 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2625 {
2626 pcre_uint32 magic;
2627 pcre_uint8 sbuf[8];
2628 FILE *f;
2629
2630 p++;
2631 if (*p == '!')
2632 {
2633 do_debug = TRUE;
2634 do_showinfo = TRUE;
2635 p++;
2636 }
2637
2638 pp = p + (int)strlen((char *)p);
2639 while (isspace(pp[-1])) pp--;
2640 *pp = 0;
2641
2642 f = fopen((char *)p, "rb");
2643 if (f == NULL)
2644 {
2645 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2646 continue;
2647 }
2648
2649 first_gotten_store = 0;
2650 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2651
2652 true_size =
2653 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2654 true_study_size =
2655 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2656
2657 re = (pcre *)new_malloc(true_size);
2658 if (re == NULL)
2659 {
2660 printf("** Failed to get %d bytes of memory for pcre object\n",
2661 (int)true_size);
2662 yield = 1;
2663 goto EXIT;
2664 }
2665 regex_gotten_store = first_gotten_store;
2666
2667 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2668
2669 magic = ((REAL_PCRE *)re)->magic_number;
2670 if (magic != MAGIC_NUMBER)
2671 {
2672 if (swap_uint32(magic) == MAGIC_NUMBER)
2673 {
2674 do_flip = 1;
2675 }
2676 else
2677 {
2678 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2679 new_free(re);
2680 fclose(f);
2681 continue;
2682 }
2683 }
2684
2685 /* We hide the byte-invert info for little and big endian tests. */
2686 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2687 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2688
2689 /* Now see if there is any following study data. */
2690
2691 if (true_study_size != 0)
2692 {
2693 pcre_study_data *psd;
2694
2695 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2696 extra->flags = PCRE_EXTRA_STUDY_DATA;
2697
2698 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2699 extra->study_data = psd;
2700
2701 if (fread(psd, 1, true_study_size, f) != true_study_size)
2702 {
2703 FAIL_READ:
2704 fprintf(outfile, "Failed to read data from %s\n", p);
2705 if (extra != NULL)
2706 {
2707 PCRE_FREE_STUDY(extra);
2708 }
2709 new_free(re);
2710 fclose(f);
2711 continue;
2712 }
2713 fprintf(outfile, "Study data loaded from %s\n", p);
2714 do_study = 1; /* To get the data output if requested */
2715 }
2716 else fprintf(outfile, "No study data\n");
2717
2718 /* Flip the necessary bytes. */
2719 if (do_flip)
2720 {
2721 int rc;
2722 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2723 if (rc == PCRE_ERROR_BADMODE)
2724 {
2725 /* Simulate the result of the function call below. */
2726 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2727 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2728 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2729 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2730 new_free(re);
2731 fclose(f);
2732 continue;
2733 }
2734 }
2735
2736 /* Need to know if UTF-8 for printing data strings. */
2737
2738 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2739 {
2740 new_free(re);
2741 fclose(f);
2742 continue;
2743 }
2744 use_utf = (get_options & PCRE_UTF8) != 0;
2745
2746 fclose(f);
2747 goto SHOW_INFO;
2748 }
2749
2750 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2751 the pattern; if it isn't complete, read more. */
2752
2753 delimiter = *p++;
2754
2755 if (isalnum(delimiter) || delimiter == '\\')
2756 {
2757 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2758 goto SKIP_DATA;
2759 }
2760
2761 pp = p;
2762 poffset = (int)(p - buffer);
2763
2764 for(;;)
2765 {
2766 while (*pp != 0)
2767 {
2768 if (*pp == '\\' && pp[1] != 0) pp++;
2769 else if (*pp == delimiter) break;
2770 pp++;
2771 }
2772 if (*pp != 0) break;
2773 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2774 {
2775 fprintf(outfile, "** Unexpected EOF\n");
2776 done = 1;
2777 goto CONTINUE;
2778 }
2779 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2780 }
2781
2782 /* The buffer may have moved while being extended; reset the start of data
2783 pointer to the correct relative point in the buffer. */
2784
2785 p = buffer + poffset;
2786
2787 /* If the first character after the delimiter is backslash, make
2788 the pattern end with backslash. This is purely to provide a way
2789 of testing for the error message when a pattern ends with backslash. */
2790
2791 if (pp[1] == '\\') *pp++ = '\\';
2792
2793 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2794 for callouts. */
2795
2796 *pp++ = 0;
2797 strcpy((char *)pbuffer, (char *)p);
2798
2799 /* Look for options after final delimiter */
2800
2801 options = 0;
2802 study_options = force_study_options;
2803 log_store = showstore; /* default from command line */
2804
2805 while (*pp != 0)
2806 {
2807 switch (*pp++)
2808 {
2809 case 'f': options |= PCRE_FIRSTLINE; break;
2810 case 'g': do_g = 1; break;
2811 case 'i': options |= PCRE_CASELESS; break;
2812 case 'm': options |= PCRE_MULTILINE; break;
2813 case 's': options |= PCRE_DOTALL; break;
2814 case 'x': options |= PCRE_EXTENDED; break;
2815
2816 case '+':
2817 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2818 break;
2819
2820 case '=': do_allcaps = 1; break;
2821 case 'A': options |= PCRE_ANCHORED; break;
2822 case 'B': do_debug = 1; break;
2823 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2824 case 'D': do_debug = do_showinfo = 1; break;
2825 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2826 case 'F': do_flip = 1; break;
2827 case 'G': do_G = 1; break;
2828 case 'I': do_showinfo = 1; break;
2829 case 'J': options |= PCRE_DUPNAMES; break;
2830 case 'K': do_mark = 1; break;
2831 case 'M': log_store = 1; break;
2832 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2833
2834 #if !defined NOPOSIX
2835 case 'P': do_posix = 1; break;
2836 #endif
2837
2838 case 'S':
2839 do_study = 1;
2840 for (;;)
2841 {
2842 switch (*pp++)
2843 {
2844 case 'S':
2845 do_study = 0;
2846 no_force_study = 1;
2847 break;
2848
2849 case '!':
2850 study_options |= PCRE_STUDY_EXTRA_NEEDED;
2851 break;
2852
2853 case '+':
2854 if (*pp == '+')
2855 {
2856 verify_jit = TRUE;
2857 pp++;
2858 }
2859 if (*pp >= '1' && *pp <= '7')
2860 study_options |= jit_study_bits[*pp++ - '1'];
2861 else
2862 study_options |= jit_study_bits[6];
2863 break;
2864
2865 case '-':
2866 study_options &= ~PCRE_STUDY_ALLJIT;
2867 break;
2868
2869 default:
2870 pp--;
2871 goto ENDLOOP;
2872 }
2873 }
2874 ENDLOOP:
2875 break;
2876
2877 case 'U': options |= PCRE_UNGREEDY; break;
2878 case 'W': options |= PCRE_UCP; break;
2879 case 'X': options |= PCRE_EXTRA; break;
2880 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2881 case 'Z': debug_lengths = 0; break;
2882 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2883 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2884
2885 case 'T':
2886 switch (*pp++)
2887 {
2888 case '0': tables = tables0; break;
2889 case '1': tables = tables1; break;
2890
2891 case '\r':
2892 case '\n':
2893 case ' ':
2894 case 0:
2895 fprintf(outfile, "** Missing table number after /T\n");
2896 goto SKIP_DATA;
2897
2898 default:
2899 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2900 goto SKIP_DATA;
2901 }
2902 break;
2903
2904 case 'L':
2905 ppp = pp;
2906 /* The '\r' test here is so that it works on Windows. */
2907 /* The '0' test is just in case this is an unterminated line. */
2908 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2909 *ppp = 0;
2910 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2911 {
2912 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2913 goto SKIP_DATA;
2914 }
2915 locale_set = 1;
2916 tables = PCRE_MAKETABLES;
2917 pp = ppp;
2918 break;
2919
2920 case '>':
2921 to_file = pp;
2922 while (*pp != 0) pp++;
2923 while (isspace(pp[-1])) pp--;
2924 *pp = 0;
2925 break;
2926
2927 case '<':
2928 {
2929 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2930 {
2931 options |= PCRE_JAVASCRIPT_COMPAT;
2932 pp += 3;
2933 }
2934 else
2935 {
2936 int x = check_newline(pp, outfile);
2937 if (x == 0) goto SKIP_DATA;
2938 options |= x;
2939 while (*pp++ != '>');
2940 }
2941 }
2942 break;
2943
2944 case '\r': /* So that it works in Windows */
2945 case '\n':
2946 case ' ':
2947 break;
2948
2949 default:
2950 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2951 goto SKIP_DATA;
2952 }
2953 }
2954
2955 /* Handle compiling via the POSIX interface, which doesn't support the
2956 timing, showing, or debugging options, nor the ability to pass over
2957 local character tables. Neither does it have 16-bit support. */
2958
2959 #if !defined NOPOSIX
2960 if (posix || do_posix)
2961 {
2962 int rc;
2963 int cflags = 0;
2964
2965 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2966 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2967 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2968 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2969 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2970 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2971 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2972
2973 first_gotten_store = 0;
2974 rc = regcomp(&preg, (char *)p, cflags);
2975
2976 /* Compilation failed; go back for another re, skipping to blank line
2977 if non-interactive. */
2978
2979 if (rc != 0)
2980 {
2981 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2982 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2983 goto SKIP_DATA;
2984 }
2985 }
2986
2987 /* Handle compiling via the native interface */
2988
2989 else
2990 #endif /* !defined NOPOSIX */
2991
2992 {
2993 /* In 16-bit mode, convert the input. */
2994
2995 #ifdef SUPPORT_PCRE16
2996 if (use_pcre16)
2997 {
2998 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2999 {
3000 case -1:
3001 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3002 "converted to UTF-16\n");
3003 goto SKIP_DATA;
3004
3005 case -2:
3006 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3007 "cannot be converted to UTF-16\n");
3008 goto SKIP_DATA;
3009
3010 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3011 fprintf(outfile, "**Failed: character value greater than 0xffff "
3012 "cannot be converted to 16-bit in non-UTF mode\n");
3013 goto SKIP_DATA;
3014
3015 default:
3016 break;
3017 }
3018 p = (pcre_uint8 *)buffer16;
3019 }
3020 #endif
3021
3022 /* Compile many times when timing */
3023
3024 if (timeit > 0)
3025 {
3026 register int i;
3027 clock_t time_taken;
3028 clock_t start_time = clock();
3029 for (i = 0; i < timeit; i++)
3030 {
3031 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3032 if (re != NULL) free(re);
3033 }
3034 time_taken = clock() - start_time;
3035 fprintf(outfile, "Compile time %.4f milliseconds\n",
3036 (((double)time_taken * 1000.0) / (double)timeit) /
3037 (double)CLOCKS_PER_SEC);
3038 }
3039
3040 first_gotten_store = 0;
3041 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3042
3043 /* Compilation failed; go back for another re, skipping to blank line
3044 if non-interactive. */
3045
3046 if (re == NULL)
3047 {
3048 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3049 SKIP_DATA:
3050 if (infile != stdin)
3051 {
3052 for (;;)
3053 {
3054 if (extend_inputline(infile, buffer, NULL) == NULL)
3055 {
3056 done = 1;
3057 goto CONTINUE;
3058 }
3059 len = (int)strlen((char *)buffer);
3060 while (len > 0 && isspace(buffer[len-1])) len--;
3061 if (len == 0) break;
3062 }
3063 fprintf(outfile, "\n");
3064 }
3065 goto CONTINUE;
3066 }
3067
3068 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3069 within the regex; check for this so that we know how to process the data
3070 lines. */
3071
3072 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3073 goto SKIP_DATA;
3074 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3075
3076 /* Extract the size for possible writing before possibly flipping it,
3077 and remember the store that was got. */
3078
3079 true_size = ((REAL_PCRE *)re)->size;
3080 regex_gotten_store = first_gotten_store;
3081
3082 /* Output code size information if requested */
3083
3084 if (log_store)
3085 fprintf(outfile, "Memory allocation (code space): %d\n",
3086 (int)(first_gotten_store -
3087 sizeof(REAL_PCRE) -
3088 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3089
3090 /* If -s or /S was present, study the regex to generate additional info to
3091 help with the matching, unless the pattern has the SS option, which
3092 suppresses the effect of /S (used for a few test patterns where studying is
3093 never sensible). */
3094
3095 if (do_study || (force_study >= 0 && !no_force_study))
3096 {
3097 if (timeit > 0)
3098 {
3099 register int i;
3100 clock_t time_taken;
3101 clock_t start_time = clock();
3102 for (i = 0; i < timeit; i++)
3103 {
3104 PCRE_STUDY(extra, re, study_options, &error);
3105 }
3106 time_taken = clock() - start_time;
3107 if (extra != NULL)
3108 {
3109 PCRE_FREE_STUDY(extra);
3110 }
3111 fprintf(outfile, " Study time %.4f milliseconds\n",
3112 (((double)time_taken * 1000.0) / (double)timeit) /
3113 (double)CLOCKS_PER_SEC);
3114 }
3115 PCRE_STUDY(extra, re, study_options, &error);
3116 if (error != NULL)
3117 fprintf(outfile, "Failed to study: %s\n", error);
3118 else if (extra != NULL)
3119 {
3120 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3121 if (log_store)
3122 {
3123 size_t jitsize;
3124 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3125 jitsize != 0)
3126 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3127 }
3128 }
3129 }
3130
3131 /* If /K was present, we set up for handling MARK data. */
3132
3133 if (do_mark)
3134 {
3135 if (extra == NULL)
3136 {
3137 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3138 extra->flags = 0;
3139 }
3140 extra->mark = &markptr;
3141 extra->flags |= PCRE_EXTRA_MARK;
3142 }
3143
3144 /* Extract and display information from the compiled data if required. */
3145
3146 SHOW_INFO:
3147
3148 if (do_debug)
3149 {
3150 fprintf(outfile, "------------------------------------------------------------------\n");
3151 PCRE_PRINTINT(re, outfile, debug_lengths);
3152 }
3153
3154 /* We already have the options in get_options (see above) */
3155
3156 if (do_showinfo)
3157 {
3158 unsigned long int all_options;
3159 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3160 hascrorlf, maxlookbehind;
3161 int nameentrysize, namecount;
3162 const pcre_uint8 *nametable;
3163
3164 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3165 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3166 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3167 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3168 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3169 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3170 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3171 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3173 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3174 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3175 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3176 != 0)
3177 goto SKIP_DATA;
3178
3179 if (size != regex_gotten_store) fprintf(outfile,
3180 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3181 (int)size, (int)regex_gotten_store);
3182
3183 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3184 if (backrefmax > 0)
3185 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3186
3187 if (namecount > 0)
3188 {
3189 fprintf(outfile, "Named capturing subpatterns:\n");
3190 while (namecount-- > 0)
3191 {
3192 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3193 int imm2_size = use_pcre16 ? 1 : 2;
3194 #else
3195 int imm2_size = IMM2_SIZE;
3196 #endif
3197 int length = (int)STRLEN(nametable + imm2_size);
3198 fprintf(outfile, " ");
3199 PCHARSV(nametable, imm2_size, length, outfile);
3200 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3201 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3202 fprintf(outfile, "%3d\n", use_pcre16?
3203 (int)(((PCRE_SPTR16)nametable)[0])
3204 :((int)nametable[0] << 8) | (int)nametable[1]);
3205 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3206 #else
3207 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3208 #ifdef SUPPORT_PCRE8
3209 nametable += nameentrysize;
3210 #else
3211 nametable += nameentrysize * 2;
3212 #endif
3213 #endif
3214 }
3215 }
3216
3217 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3218 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3219
3220 all_options = ((REAL_PCRE *)re)->options;
3221 if (do_flip) all_options = swap_uint32(all_options);
3222
3223 if (get_options == 0) fprintf(outfile, "No options\n");
3224 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3225 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3226 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3227 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3228 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3229 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3230 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3232 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3233 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3234 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3235 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3236 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3237 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3238 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3239 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3240 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3241 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3242
3243 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3244
3245 switch (get_options & PCRE_NEWLINE_BITS)
3246 {
3247 case PCRE_NEWLINE_CR:
3248 fprintf(outfile, "Forced newline sequence: CR\n");
3249 break;
3250
3251 case PCRE_NEWLINE_LF:
3252 fprintf(outfile, "Forced newline sequence: LF\n");
3253 break;
3254
3255 case PCRE_NEWLINE_CRLF:
3256 fprintf(outfile, "Forced newline sequence: CRLF\n");
3257 break;
3258
3259 case PCRE_NEWLINE_ANYCRLF:
3260 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3261 break;
3262
3263 case PCRE_NEWLINE_ANY:
3264 fprintf(outfile, "Forced newline sequence: ANY\n");
3265 break;
3266
3267 default:
3268 break;
3269 }
3270
3271 if (first_char == -1)
3272 {
3273 fprintf(outfile, "First char at start or follows newline\n");
3274 }
3275 else if (first_char < 0)
3276 {
3277 fprintf(outfile, "No first char\n");
3278 }
3279 else
3280 {
3281 const char *caseless =
3282 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3283 "" : " (caseless)";
3284
3285 if (PRINTOK(first_char))
3286 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3287 else
3288 {
3289 fprintf(outfile, "First char = ");
3290 pchar(first_char, outfile);
3291 fprintf(outfile, "%s\n", caseless);
3292 }
3293 }
3294
3295 if (need_char < 0)
3296 {
3297 fprintf(outfile, "No need char\n");
3298 }
3299 else
3300 {
3301 const char *caseless =
3302 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3303 "" : " (caseless)";
3304
3305 if (PRINTOK(need_char))
3306 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3307 else
3308 {
3309 fprintf(outfile, "Need char = ");
3310 pchar(need_char, outfile);
3311 fprintf(outfile, "%s\n", caseless);
3312 }
3313 }
3314
3315 if (maxlookbehind > 0)
3316 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3317
3318 /* Don't output study size; at present it is in any case a fixed
3319 value, but it varies, depending on the computer architecture, and
3320 so messes up the test suite. (And with the /F option, it might be
3321 flipped.) If study was forced by an external -s, don't show this
3322 information unless -i or -d was also present. This means that, except
3323 when auto-callouts are involved, the output from runs with and without
3324 -s should be identical. */
3325
3326 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3327 {
3328 if (extra == NULL)
3329 fprintf(outfile, "Study returned NULL\n");
3330 else
3331 {
3332 pcre_uint8 *start_bits = NULL;
3333 int minlength;
3334
3335 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3336 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3337
3338 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3339 {
3340 if (start_bits == NULL)
3341 fprintf(outfile, "No set of starting bytes\n");
3342 else
3343 {
3344 int i;
3345 int c = 24;
3346 fprintf(outfile, "Starting byte set: ");
3347 for (i = 0; i < 256; i++)
3348 {
3349 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3350 {
3351 if (c > 75)
3352 {
3353 fprintf(outfile, "\n ");
3354 c = 2;
3355 }
3356 if (PRINTOK(i) && i != ' ')
3357 {
3358 fprintf(outfile, "%c ", i);
3359 c += 2;
3360 }
3361 else
3362 {
3363 fprintf(outfile, "\\x%02x ", i);
3364 c += 5;
3365 }
3366 }
3367 }
3368 fprintf(outfile, "\n");
3369 }
3370 }
3371 }
3372
3373 /* Show this only if the JIT was set by /S, not by -s. */
3374
3375 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3376 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3377 {
3378 int jit;
3379 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3380 {
3381 if (jit)
3382 fprintf(outfile, "JIT study was successful\n");
3383 else
3384 #ifdef SUPPORT_JIT
3385 fprintf(outfile, "JIT study was not successful\n");
3386 #else
3387 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3388 #endif
3389 }
3390 }
3391 }
3392 }
3393
3394 /* If the '>' option was present, we write out the regex to a file, and
3395 that is all. The first 8 bytes of the file are the regex length and then
3396 the study length, in big-endian order. */
3397
3398 if (to_file != NULL)
3399 {
3400 FILE *f = fopen((char *)to_file, "wb");
3401 if (f == NULL)
3402 {
3403 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3404 }
3405 else
3406 {
3407 pcre_uint8 sbuf[8];
3408
3409 if (do_flip) regexflip(re, extra);
3410 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3411 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3412 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3413 sbuf[3] = (pcre_uint8)((true_size) & 255);
3414 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3415 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3416 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3417 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3418
3419 if (fwrite(sbuf, 1, 8, f) < 8 ||
3420 fwrite(re, 1, true_size, f) < true_size)
3421 {
3422 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3423 }
3424 else
3425 {
3426 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3427
3428 /* If there is study data, write it. */
3429
3430 if (extra != NULL)
3431 {
3432 if (fwrite(extra->study_data, 1, true_study_size, f) <
3433 true_study_size)
3434 {
3435 fprintf(outfile, "Write error on %s: %s\n", to_file,
3436 strerror(errno));
3437 }
3438 else fprintf(outfile, "Study data written to %s\n", to_file);
3439 }
3440 }
3441 fclose(f);
3442 }
3443
3444 new_free(re);
3445 if (extra != NULL)
3446 {
3447 PCRE_FREE_STUDY(extra);
3448 }
3449 if (locale_set)
3450 {
3451 new_free((void *)tables);
3452 setlocale(LC_CTYPE, "C");
3453 locale_set = 0;
3454 }
3455 continue; /* With next regex */
3456 }
3457 } /* End of non-POSIX compile */
3458
3459 /* Read data lines and test them */
3460
3461 for (;;)
3462 {
3463 pcre_uint8 *q;
3464 pcre_uint8 *bptr;
3465 int *use_offsets = offsets;
3466 int use_size_offsets = size_offsets;
3467 int callout_data = 0;
3468 int callout_data_set = 0;
3469 int count, c;
3470 int copystrings = 0;
3471 int find_match_limit = default_find_match_limit;
3472 int getstrings = 0;
3473 int getlist = 0;
3474 int gmatched = 0;
3475 int start_offset = 0;
3476 int start_offset_sign = 1;
3477 int g_notempty = 0;
3478 int use_dfa = 0;
3479
3480 *copynames = 0;
3481 *getnames = 0;
3482
3483 #ifdef SUPPORT_PCRE16
3484 cn16ptr = copynames;
3485 gn16ptr = getnames;
3486 #endif
3487 #ifdef SUPPORT_PCRE8
3488 cn8ptr = copynames8;
3489 gn8ptr = getnames8;
3490 #endif
3491
3492 SET_PCRE_CALLOUT(callout);
3493 first_callout = 1;
3494 last_callout_mark = NULL;
3495 callout_extra = 0;
3496 callout_count = 0;
3497 callout_fail_count = 999999;
3498 callout_fail_id = -1;
3499 show_malloc = 0;
3500 options = 0;
3501
3502 if (extra != NULL) extra->flags &=
3503 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3504
3505 len = 0;
3506 for (;;)
3507 {
3508 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3509 {
3510 if (len > 0) /* Reached EOF without hitting a newline */
3511 {
3512 fprintf(outfile, "\n");
3513 break;
3514 }
3515 done = 1;
3516 goto CONTINUE;
3517 }
3518 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3519 len = (int)strlen((char *)buffer);
3520 if (buffer[len-1] == '\n') break;
3521 }
3522
3523 while (len > 0 && isspace(buffer[len-1])) len--;
3524 buffer[len] = 0;
3525 if (len == 0) break;
3526
3527 p = buffer;
3528 while (isspace(*p)) p++;
3529
3530 bptr = q = dbuffer;
3531 while ((c = *p++) != 0)
3532 {
3533 int i = 0;
3534 int n = 0;
3535
3536 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3537 In non-UTF mode, allow the value of the byte to fall through to later,
3538 where values greater than 127 are turned into UTF-8 when running in
3539 16-bit mode. */
3540
3541 if (c != '\\')
3542 {
3543 if (use_utf)
3544 {
3545 *q++ = c;
3546 continue;
3547 }
3548 }
3549
3550 /* Handle backslash escapes */
3551
3552 else switch ((c = *p++))
3553 {
3554 case 'a': c = 7; break;
3555 case 'b': c = '\b'; break;
3556 case 'e': c = 27; break;
3557 case 'f': c = '\f'; break;
3558 case 'n': c = '\n'; break;
3559 case 'r': c = '\r'; break;
3560 case 't': c = '\t'; break;
3561 case 'v': c = '\v'; break;
3562
3563 case '0': case '1': case '2': case '3':
3564 case '4': case '5': case '6': case '7':
3565 c -= '0';
3566 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3567 c = c * 8 + *p++ - '0';
3568 break;
3569
3570 case 'x':
3571 if (*p == '{')
3572 {
3573 pcre_uint8 *pt = p;
3574 c = 0;
3575
3576 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3577 when isxdigit() is a macro that refers to its argument more than
3578 once. This is banned by the C Standard, but apparently happens in at
3579 least one MacOS environment. */
3580
3581 for (pt++; isxdigit(*pt); pt++)
3582 {
3583 if (++i == 9)
3584 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3585 "using only the first eight.\n");
3586 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3587 }
3588 if (*pt == '}')
3589 {
3590 p = pt + 1;
3591 break;
3592 }
3593 /* Not correct form for \x{...}; fall through */
3594 }
3595
3596 /* \x without {} always defines just one byte in 8-bit mode. This
3597 allows UTF-8 characters to be constructed byte by byte, and also allows
3598 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3599 Otherwise, pass it down to later code so that it can be turned into
3600 UTF-8 when running in 16-bit mode. */
3601
3602 c = 0;
3603 while (i++ < 2 && isxdigit(*p))
3604 {
3605 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3606 p++;
3607 }
3608 if (use_utf)
3609 {
3610 *q++ = c;
3611 continue;
3612 }
3613 break;
3614
3615 case 0: /* \ followed by EOF allows for an empty line */
3616 p--;
3617 continue;
3618
3619 case '>':
3620 if (*p == '-')
3621 {
3622 start_offset_sign = -1;
3623 p++;
3624 }
3625 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3626 start_offset *= start_offset_sign;
3627 continue;
3628
3629 case 'A': /* Option setting */
3630 options |= PCRE_ANCHORED;
3631 continue;
3632
3633 case 'B':
3634 options |= PCRE_NOTBOL;
3635 continue;
3636
3637 case 'C':
3638 if (isdigit(*p)) /* Set copy string */
3639 {
3640 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3641 copystrings |= 1 << n;
3642 }
3643 else if (isalnum(*p))
3644 {
3645 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3646 }
3647 else if (*p == '+')
3648 {
3649 callout_extra = 1;
3650 p++;
3651 }
3652 else if (*p == '-')
3653 {
3654 SET_PCRE_CALLOUT(NULL);
3655 p++;
3656 }
3657 else if (*p == '!')
3658 {
3659 callout_fail_id = 0;
3660 p++;
3661 while(isdigit(*p))
3662 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3663 callout_fail_count = 0;
3664 if (*p == '!')
3665 {
3666 p++;
3667 while(isdigit(*p))
3668 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3669 }
3670 }
3671 else if (*p == '*')
3672 {
3673 int sign = 1;
3674 callout_data = 0;
3675 if (*(++p) == '-') { sign = -1; p++; }
3676 while(isdigit(*p))
3677 callout_data = callout_data * 10 + *p++ - '0';
3678 callout_data *= sign;
3679 callout_data_set = 1;
3680 }
3681 continue;
3682
3683 #if !defined NODFA
3684 case 'D':
3685 #if !defined NOPOSIX
3686 if (posix || do_posix)
3687 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3688 else
3689 #endif
3690 use_dfa = 1;
3691 continue;
3692 #endif
3693
3694 #if !defined NODFA
3695 case 'F':
3696 options |= PCRE_DFA_SHORTEST;
3697 continue;
3698 #endif
3699
3700 case 'G':
3701 if (isdigit(*p))
3702 {
3703 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3704 getstrings |= 1 << n;
3705 }
3706 else if (isalnum(*p))
3707 {
3708 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3709 }
3710 continue;
3711
3712 case 'J':
3713 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3714 if (extra != NULL
3715 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3716 && extra->executable_jit != NULL)
3717 {
3718 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3719 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3720 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3721 }
3722 continue;
3723
3724 case 'L':
3725 getlist = 1;
3726 continue;
3727
3728 case 'M':
3729 find_match_limit = 1;
3730 continue;
3731
3732 case 'N':
3733 if ((options & PCRE_NOTEMPTY) != 0)
3734 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3735 else
3736 options |= PCRE_NOTEMPTY;
3737 continue;
3738
3739 case 'O':
3740 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3741 if (n > size_offsets_max)
3742 {
3743 size_offsets_max = n;
3744 free(offsets);
3745 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3746 if (offsets == NULL)
3747 {
3748 printf("** Failed to get %d bytes of memory for offsets vector\n",
3749 (int)(size_offsets_max * sizeof(int)));
3750 yield = 1;
3751 goto EXIT;
3752 }
3753 }
3754 use_size_offsets = n;
3755 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3756 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
3757 continue;
3758
3759 case 'P':
3760 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3761 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3762 continue;
3763
3764 case 'Q':
3765 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3766 if (extra == NULL)
3767 {
3768 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3769 extra->flags = 0;
3770 }
3771 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3772 extra->match_limit_recursion = n;
3773 continue;
3774
3775 case 'q':
3776 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3777 if (extra == NULL)
3778 {
3779 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3780 extra->flags = 0;
3781 }
3782 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3783 extra->match_limit = n;
3784 continue;
3785
3786 #if !defined NODFA
3787 case 'R':
3788 options |= PCRE_DFA_RESTART;
3789 continue;
3790 #endif
3791
3792 case 'S':
3793 show_malloc = 1;
3794 continue;
3795
3796 case 'Y':
3797 options |= PCRE_NO_START_OPTIMIZE;
3798 continue;
3799
3800 case 'Z':
3801 options |= PCRE_NOTEOL;
3802 continue;
3803
3804 case '?':
3805 options |= PCRE_NO_UTF8_CHECK;
3806 continue;
3807
3808 case '<':
3809 {
3810 int x = check_newline(p, outfile);
3811 if (x == 0) goto NEXT_DATA;
3812 options |= x;
3813 while (*p++ != '>');
3814 }
3815 continue;
3816 }
3817
3818 /* We now have a character value in c that may be greater than 255. In
3819 16-bit mode, we always convert characters to UTF-8 so that values greater
3820 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3821 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3822 mode must have come from \x{...} or octal constructs because values from
3823 \x.. get this far only in non-UTF mode. */
3824
3825 #if !defined NOUTF || defined SUPPORT_PCRE16
3826 if (use_pcre16 || use_utf)
3827 {
3828 pcre_uint8 buff8[8];
3829 int ii, utn;
3830 utn = ord2utf8(c, buff8);
3831 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3832 }
3833 else
3834 #endif
3835 {
3836 if (c > 255)
3837 {
3838 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3839 "and UTF-8 mode is not enabled.\n", c);
3840 fprintf(outfile, "** Truncation will probably give the wrong "
3841 "result.\n");
3842 }
3843 *q++ = c;
3844 }
3845 }
3846
3847 /* Reached end of subject string */
3848
3849 *q = 0;
3850 len = (int)(q - dbuffer);
3851
3852 /* Move the data to the end of the buffer so that a read over the end of
3853 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3854 we are using the POSIX interface, we must include the terminating zero. */
3855
3856 #if !defined NOPOSIX
3857 if (posix || do_posix)
3858 {
3859 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3860 bptr += buffer_size - len - 1;
3861 }
3862 else
3863 #endif
3864 {
3865 memmove(bptr + buffer_size - len, bptr, len);
3866 bptr += buffer_size - len;
3867 }
3868
3869 if ((all_use_dfa || use_dfa) && find_match_limit)
3870 {
3871 printf("**Match limit not relevant for DFA matching: ignored\n");
3872 find_match_limit = 0;
3873 }
3874
3875 /* Handle matching via the POSIX interface, which does not
3876 support timing or playing with the match limit or callout data. */
3877
3878 #if !defined NOPOSIX
3879 if (posix || do_posix)
3880 {
3881 int rc;
3882 int eflags = 0;
3883 regmatch_t *pmatch = NULL;
3884 if (use_size_offsets > 0)
3885 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3886 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3887 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3888 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3889
3890 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3891
3892 if (rc != 0)
3893 {
3894 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3895 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3896 }
3897 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3898 != 0)
3899 {
3900 fprintf(outfile, "Matched with REG_NOSUB\n");
3901 }
3902 else
3903 {
3904 size_t i;
3905 for (i = 0; i < (size_t)use_size_offsets; i++)
3906 {
3907 if (pmatch[i].rm_so >= 0)
3908 {
3909 fprintf(outfile, "%2d: ", (int)i);
3910 PCHARSV(dbuffer, pmatch[i].rm_so,
3911 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3912 fprintf(outfile, "\n");
3913 if (do_showcaprest || (i == 0 && do_showrest))
3914 {
3915 fprintf(outfile, "%2d+ ", (int)i);
3916 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3917 outfile);
3918 fprintf(outfile, "\n");
3919 }
3920 }
3921 }
3922 }
3923 free(pmatch);
3924 goto NEXT_DATA;
3925 }
3926
3927 #endif /* !defined NOPOSIX */
3928
3929 /* Handle matching via the native interface - repeats for /g and /G */
3930
3931 #ifdef SUPPORT_PCRE16
3932 if (use_pcre16)
3933 {
3934 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3935 switch(len)
3936 {
3937 case -1:
3938 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3939 "converted to UTF-16\n");
3940 goto NEXT_DATA;
3941
3942 case -2:
3943 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3944 "cannot be converted to UTF-16\n");
3945 goto NEXT_DATA;
3946
3947 case -3:
3948 fprintf(outfile, "**Failed: character value greater than 0xffff "
3949 "cannot be converted to 16-bit in non-UTF mode\n");
3950 goto NEXT_DATA;
3951
3952 default:
3953 break;
3954 }
3955 bptr = (pcre_uint8 *)buffer16;
3956 }
3957 #endif
3958
3959 /* Ensure that there is a JIT callback if we want to verify that JIT was
3960 actually used. If jit_stack == NULL, no stack has yet been assigned. */
3961
3962 if (verify_jit && jit_stack == NULL && extra != NULL)
3963 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3964
3965 for (;; gmatched++) /* Loop for /g or /G */
3966 {
3967 markptr = NULL;
3968 jit_was_used = FALSE;
3969
3970 if (timeitm > 0)
3971 {
3972 register int i;
3973 clock_t time_taken;
3974 clock_t start_time = clock();
3975
3976 #if !defined NODFA
3977 if (all_use_dfa || use_dfa)
3978 {
3979 if ((options & PCRE_DFA_RESTART) != 0)
3980 {
3981 fprintf(outfile, "Timing DFA restarts is not supported\n");
3982 break;
3983 }
3984 if (dfa_workspace == NULL)
3985 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3986 for (i = 0; i < timeitm; i++)
3987 {
3988 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3989 (options | g_notempty), use_offsets, use_size_offsets,
3990 dfa_workspace, DFA_WS_DIMENSION);
3991 }
3992 }
3993 else
3994 #endif
3995
3996 for (i = 0; i < timeitm; i++)
3997 {
3998 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3999 (options | g_notempty), use_offsets, use_size_offsets);
4000 }
4001 time_taken = clock() - start_time;
4002 fprintf(outfile, "Execute time %.4f milliseconds\n",
4003 (((double)time_taken * 1000.0) / (double)timeitm) /
4004 (double)CLOCKS_PER_SEC);
4005 }
4006
4007 /* If find_match_limit is set, we want to do repeated matches with
4008 varying limits in order to find the minimum value for the match limit and
4009 for the recursion limit. The match limits are relevant only to the normal
4010 running of pcre_exec(), so disable the JIT optimization. This makes it
4011 possible to run the same set of tests with and without JIT externally
4012 requested. */
4013
4014 if (find_match_limit)
4015 {
4016 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4017 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4018 extra->flags = 0;
4019
4020 (void)check_match_limit(re, extra, bptr, len, start_offset,
4021 options|g_notempty, use_offsets, use_size_offsets,
4022 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4023 PCRE_ERROR_MATCHLIMIT, "match()");
4024
4025 count = check_match_limit(re, extra, bptr, len, start_offset,
4026 options|g_notempty, use_offsets, use_size_offsets,
4027 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4028 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4029 }
4030
4031 /* If callout_data is set, use the interface with additional data */
4032
4033 else if (callout_data_set)
4034 {
4035 if (extra == NULL)
4036 {
4037 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4038 extra->flags = 0;
4039 }
4040 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4041 extra->callout_data = &callout_data;
4042 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4043 options | g_notempty, use_offsets, use_size_offsets);
4044 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4045 }
4046
4047 /* The normal case is just to do the match once, with the default
4048 value of match_limit. */
4049
4050 #if !defined NODFA
4051 else if (all_use_dfa || use_dfa)
4052 {
4053 if (dfa_workspace == NULL)
4054 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4055 if (dfa_matched++ == 0)
4056 dfa_workspace[0] = -1; /* To catch bad restart */
4057 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4058 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4059 DFA_WS_DIMENSION);
4060 if (count == 0)
4061 {
4062 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4063 count = use_size_offsets/2;
4064 }
4065 }
4066 #endif
4067
4068 else
4069 {
4070 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4071 options | g_notempty, use_offsets, use_size_offsets);
4072 if (count == 0)
4073 {
4074 fprintf(outfile, "Matched, but too many substrings\n");
4075 count = use_size_offsets/3;
4076 }
4077 }
4078
4079 /* Matched */
4080
4081 if (count >= 0)
4082 {
4083 int i, maxcount;
4084 void *cnptr, *gnptr;
4085
4086 #if !defined NODFA
4087 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4088 #endif
4089 maxcount = use_size_offsets/3;
4090
4091 /* This is a check against a lunatic return value. */
4092
4093 if (count > maxcount)
4094 {
4095 fprintf(outfile,
4096 "** PCRE error: returned count %d is too big for offset size %d\n",
4097 count, use_size_offsets);
4098 count = use_size_offsets/3;
4099 if (do_g || do_G)
4100 {
4101 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4102 do_g = do_G = FALSE; /* Break g/G loop */
4103 }
4104 }
4105
4106 /* do_allcaps requests showing of all captures in the pattern, to check
4107 unset ones at the end. */
4108
4109 if (do_allcaps)
4110 {
4111 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4112 goto SKIP_DATA;
4113 count++; /* Allow for full match */
4114 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4115 }
4116
4117 /* Output the captured substrings */
4118
4119 for (i = 0; i < count * 2; i += 2)
4120 {
4121 if (use_offsets[i] < 0)
4122 {
4123 if (use_offsets[i] != -1)
4124 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4125 use_offsets[i], i);
4126 if (use_offsets[i+1] != -1)
4127 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4128 use_offsets[i+1], i+1);
4129 fprintf(outfile, "%2d: <unset>\n", i/2);
4130 }
4131 else
4132 {
4133 fprintf(outfile, "%2d: ", i/2);
4134 PCHARSV(bptr, use_offsets[i],
4135 use_offsets[i+1] - use_offsets[i], outfile);
4136 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4137 fprintf(outfile, "\n");
4138 if (do_showcaprest || (i == 0 && do_showrest))
4139 {
4140 fprintf(outfile, "%2d+ ", i/2);
4141 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4142 outfile);
4143 fprintf(outfile, "\n");
4144 }
4145 }
4146 }
4147
4148 if (markptr != NULL)
4149 {
4150 fprintf(outfile, "MK: ");
4151 PCHARSV(markptr, 0, -1, outfile);
4152 fprintf(outfile, "\n");
4153 }
4154
4155 for (i = 0; i < 32; i++)
4156 {
4157 if ((copystrings & (1 << i)) != 0)
4158 {
4159 int rc;
4160 char copybuffer[256];
4161 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4162 copybuffer, sizeof(copybuffer));
4163 if (rc < 0)
4164 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4165 else
4166 {
4167 fprintf(outfile, "%2dC ", i);
4168 PCHARSV(copybuffer, 0, rc, outfile);
4169 fprintf(outfile, " (%d)\n", rc);
4170 }
4171 }
4172 }
4173
4174 cnptr = copynames;
4175 for (;;)
4176 {
4177 int rc;
4178 char copybuffer[256];
4179
4180 if (use_pcre16)
4181 {
4182 if (*(pcre_uint16 *)cnptr == 0) break;
4183 }
4184 else
4185 {
4186 if (*(pcre_uint8 *)cnptr == 0) break;
4187 }
4188
4189 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4190 cnptr, copybuffer, sizeof(copybuffer));
4191
4192 if (rc < 0)
4193 {
4194 fprintf(outfile, "copy substring ");
4195 PCHARSV(cnptr, 0, -1, outfile);
4196 fprintf(outfile, " failed %d\n", rc);
4197 }
4198 else
4199 {
4200 fprintf(outfile, " C ");
4201 PCHARSV(copybuffer, 0, rc, outfile);
4202 fprintf(outfile, " (%d) ", rc);
4203 PCHARSV(cnptr, 0, -1, outfile);
4204 putc('\n', outfile);
4205 }
4206
4207 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4208 }
4209
4210 for (i = 0; i < 32; i++)
4211 {
4212 if ((getstrings & (1 << i)) != 0)
4213 {
4214 int rc;
4215 const char *substring;
4216 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4217 if (rc < 0)
4218 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4219 else
4220 {
4221 fprintf(outfile, "%2dG ", i);
4222 PCHARSV(substring, 0, rc, outfile);
4223 fprintf(outfile, " (%d)\n", rc);
4224 PCRE_FREE_SUBSTRING(substring);
4225 }
4226 }
4227 }
4228
4229 gnptr = getnames;
4230 for (;;)
4231 {
4232 int rc;
4233 const char *substring;
4234
4235 if (use_pcre16)
4236 {
4237 if (*(pcre_uint16 *)gnptr == 0) break;
4238 }
4239 else
4240 {
4241 if (*(pcre_uint8 *)gnptr == 0) break;
4242 }
4243
4244 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4245 gnptr, &substring);
4246 if (rc < 0)
4247 {
4248 fprintf(outfile, "get substring ");
4249 PCHARSV(gnptr, 0, -1, outfile);
4250 fprintf(outfile, " failed %d\n", rc);
4251 }
4252 else
4253 {
4254 fprintf(outfile, " G ");
4255 PCHARSV(substring, 0, rc, outfile);
4256 fprintf(outfile, " (%d) ", rc);
4257 PCHARSV(gnptr, 0, -1, outfile);
4258 PCRE_FREE_SUBSTRING(substring);
4259 putc('\n', outfile);
4260 }
4261
4262 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4263 }
4264
4265 if (getlist)
4266 {
4267 int rc;
4268 const char **stringlist;
4269 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4270 if (rc < 0)
4271 fprintf(outfile, "get substring list failed %d\n", rc);
4272 else
4273 {
4274 for (i = 0; i < count; i++)
4275 {
4276 fprintf(outfile, "%2dL ", i);
4277 PCHARSV(stringlist[i], 0, -1, outfile);
4278 putc('\n', outfile);
4279 }
4280 if (stringlist[i] != NULL)
4281 fprintf(outfile, "string list not terminated by NULL\n");
4282 PCRE_FREE_SUBSTRING_LIST(stringlist);
4283 }
4284 }
4285 }
4286
4287 /* There was a partial match */
4288
4289 else if (count == PCRE_ERROR_PARTIAL)
4290 {
4291 if (markptr == NULL) fprintf(outfile, "Partial match");
4292 else
4293 {
4294 fprintf(outfile, "Partial match, mark=");
4295 PCHARSV(markptr, 0, -1, outfile);
4296 }
4297 if (use_size_offsets > 1)
4298 {
4299 fprintf(outfile, ": ");
4300 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4301 outfile);
4302 }
4303 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4304 fprintf(outfile, "\n");
4305 break; /* Out of the /g loop */
4306 }
4307
4308 /* Failed to match. If this is a /g or /G loop and we previously set
4309 g_notempty after a null match, this is not necessarily the end. We want
4310 to advance the start offset, and continue. We won't be at the end of the
4311 string - that was checked before setting g_notempty.
4312
4313 Complication arises in the case when the newline convention is "any",
4314 "crlf", or "anycrlf". If the previous match was at the end of a line
4315 terminated by CRLF, an advance of one character just passes the \r,
4316 whereas we should prefer the longer newline sequence, as does the code in
4317 pcre_exec(). Fudge the offset value to achieve this. We check for a
4318 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4319 find the default.
4320
4321 Otherwise, in the case of UTF-8 matching, the advance must be one
4322 character, not one byte. */
4323
4324 else
4325 {
4326 if (g_notempty != 0)
4327 {
4328 int onechar = 1;
4329 unsigned int obits = ((REAL_PCRE *)re)->options;
4330 use_offsets[0] = start_offset;
4331 if ((obits & PCRE_NEWLINE_BITS) == 0)
4332 {
4333 int d;
4334 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4335 /* Note that these values are always the ASCII ones, even in
4336 EBCDIC environments. CR = 13, NL = 10. */
4337 obits = (d == 13)? PCRE_NEWLINE_CR :
4338 (d == 10)? PCRE_NEWLINE_LF :
4339 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4340 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4341 (d == -1)? PCRE_NEWLINE_ANY : 0;
4342 }
4343 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4344 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4345 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4346 &&
4347 start_offset < len - 1 &&
4348 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4349 (use_pcre16?
4350 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4351 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4352 :
4353 bptr[start_offset] == '\r'
4354 && bptr[start_offset + 1] == '\n')
4355 #elif defined SUPPORT_PCRE16
4356 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4357 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4358 #else
4359 bptr[start_offset] == '\r'
4360 && bptr[start_offset + 1] == '\n'
4361 #endif
4362 )
4363 onechar++;
4364 else if (use_utf)
4365 {
4366 while (start_offset + onechar < len)
4367 {
4368 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4369 onechar++;
4370 }
4371 }
4372 use_offsets[1] = start_offset + onechar;
4373 }
4374 else
4375 {
4376 switch(count)
4377 {
4378 case PCRE_ERROR_NOMATCH:
4379 if (gmatched == 0)
4380 {
4381 if (markptr == NULL)
4382 {
4383 fprintf(outfile, "No match");
4384 }
4385 else
4386 {
4387 fprintf(outfile, "No match, mark = ");
4388 PCHARSV(markptr, 0, -1, outfile);
4389 }
4390 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4391 putc('\n', outfile);
4392 }
4393 break;
4394
4395 case PCRE_ERROR_BADUTF8:
4396 case PCRE_ERROR_SHORTUTF8:
4397 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4398 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4399 use_pcre16? "16" : "8");
4400 if (use_size_offsets >= 2)
4401 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4402 use_offsets[1]);
4403 fprintf(outfile, "\n");
4404 break;
4405
4406 case PCRE_ERROR_BADUTF8_OFFSET:
4407 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4408 use_pcre16? "16" : "8");
4409 break;
4410
4411 default:
4412 if (count < 0 &&
4413 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4414 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4415 else
4416 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4417 break;
4418 }
4419
4420 break; /* Out of the /g loop */
4421 }
4422 }
4423
4424 /* If not /g or /G we are done */
4425
4426 if (!do_g && !do_G) break;
4427
4428 /* If we have matched an empty string, first check to see if we are at
4429 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4430 Perl's /g options does. This turns out to be rather cunning. First we set
4431 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4432 same point. If this fails (picked up above) we advance to the next
4433 character. */
4434
4435 g_notempty = 0;
4436
4437 if (use_offsets[0] == use_offsets[1])
4438 {
4439 if (use_offsets[0] == len) break;
4440 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4441 }
4442
4443 /* For /g, update the start offset, leaving the rest alone */
4444
4445 if (do_g) start_offset = use_offsets[1];
4446
4447 /* For /G, update the pointer and length */
4448
4449 else
4450 {
4451 bptr += use_offsets[1] * CHAR_SIZE;
4452 len -= use_offsets[1];
4453 }
4454 } /* End of loop for /g and /G */
4455
4456 NEXT_DATA: continue;
4457 } /* End of loop for data lines */
4458
4459 CONTINUE:
4460
4461 #if !defined NOPOSIX
4462 if (posix || do_posix) regfree(&preg);
4463 #endif
4464
4465 if (re != NULL) new_free(re);
4466 if (extra != NULL)
4467 {
4468 PCRE_FREE_STUDY(extra);
4469 }
4470 if (locale_set)
4471 {
4472 new_free((void *)tables);
4473 setlocale(LC_CTYPE, "C");
4474 locale_set = 0;
4475 }
4476 if (jit_stack != NULL)
4477 {
4478 PCRE_JIT_STACK_FREE(jit_stack);
4479 jit_stack = NULL;
4480 }
4481 }
4482
4483 if (infile == stdin) fprintf(outfile, "\n");
4484
4485 EXIT:
4486
4487 if (infile != NULL && infile != stdin) fclose(infile);
4488 if (outfile != NULL && outfile != stdout) fclose(outfile);
4489
4490 free(buffer);
4491 free(dbuffer);
4492 free(pbuffer);
4493 free(offsets);
4494
4495 #ifdef SUPPORT_PCRE16
4496 if (buffer16 != NULL) free(buffer16);
4497 #endif
4498
4499 #if !defined NODFA
4500 if (dfa_workspace != NULL)
4501 free(dfa_workspace);
4502 #endif
4503
4504 return yield;
4505 }
4506
4507 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5