/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1017 - (show annotations)
Sun Aug 26 16:30:50 2012 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 134022 byte(s)
Error occurred while calculating annotation data.
Tidies to pcretest to ensure freeing memory and closing files.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #define INPUT_MODE "rb"
116 #define OUTPUT_MODE "wb"
117 #endif
118
119 #define PRIV(name) name
120
121 /* We have to include pcre_internal.h because we need the internal info for
122 displaying the results of pcre_study() and we also need to know about the
123 internal macros, structures, and other internal data values; pcretest has
124 "inside information" compared to a program that strictly follows the PCRE API.
125
126 Although pcre_internal.h does itself include pcre.h, we explicitly include it
127 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
128 appropriately for an application, not for building PCRE. */
129
130 #include "pcre.h"
131
132 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133 /* Configure internal macros to 16 bit mode. */
134 #define COMPILE_PCRE16
135 #endif
136
137 #include "pcre_internal.h"
138
139 /* The pcre_printint() function, which prints the internal form of a compiled
140 regex, is held in a separate file so that (a) it can be compiled in either
141 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142 when that is compiled in debug mode. */
143
144 #ifdef SUPPORT_PCRE8
145 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146 #endif
147 #ifdef SUPPORT_PCRE16
148 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149 #endif
150
151 /* We need access to some of the data tables that PCRE uses. So as not to have
152 to keep two copies, we include the source file here, changing the names of the
153 external symbols to prevent clashes. */
154
155 #define PCRE_INCLUDED
156
157 #include "pcre_tables.c"
158
159 /* The definition of the macro PRINTABLE, which determines whether to print an
160 output character as-is or as a hex value when showing compiled patterns, is
161 the same as in the printint.src file. We uses it here in cases when the locale
162 has not been explicitly changed, so as to get consistent output from systems
163 that differ in their output from isprint() even in the "C" locale. */
164
165 #ifdef EBCDIC
166 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167 #else
168 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169 #endif
170
171 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172
173 /* Posix support is disabled in 16 bit only mode. */
174 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175 #define NOPOSIX
176 #endif
177
178 /* It is possible to compile this test program without including support for
179 testing the POSIX interface, though this is not available via the standard
180 Makefile. */
181
182 #if !defined NOPOSIX
183 #include "pcreposix.h"
184 #endif
185
186 /* It is also possible, originally for the benefit of a version that was
187 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189 automatically cut out the UTF support if PCRE is built without it. */
190
191 #ifndef SUPPORT_UTF
192 #ifndef NOUTF
193 #define NOUTF
194 #endif
195 #endif
196
197 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199 only from one place and is handled differently). I couldn't dream up any way of
200 using a single macro to do this in a generic way, because of the many different
201 argument requirements. We know that at least one of SUPPORT_PCRE8 and
202 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203 use these in the definitions of generic macros.
204
205 **** Special note about the PCHARSxxx macros: the address of the string to be
206 printed is always given as two arguments: a base address followed by an offset.
207 The base address is cast to the correct data size for 8 or 16 bit data; the
208 offset is in units of this size. If the string were given as base+offset in one
209 argument, the casting might be incorrectly applied. */
210
211 #ifdef SUPPORT_PCRE8
212
213 #define PCHARS8(lv, p, offset, len, f) \
214 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215
216 #define PCHARSV8(p, offset, len, f) \
217 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218
219 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220 p = read_capture_name8(p, cn8, re)
221
222 #define STRLEN8(p) ((int)strlen((char *)p))
223
224 #define SET_PCRE_CALLOUT8(callout) \
225 pcre_callout = callout
226
227 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228 pcre_assign_jit_stack(extra, callback, userdata)
229
230 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231 re = pcre_compile((char *)pat, options, error, erroffset, tables)
232
233 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234 namesptr, cbuffer, size) \
235 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236 (char *)namesptr, cbuffer, size)
237
238 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240
241 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242 offsets, size_offsets, workspace, size_workspace) \
243 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244 offsets, size_offsets, workspace, size_workspace)
245
246 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247 offsets, size_offsets) \
248 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249 offsets, size_offsets)
250
251 #define PCRE_FREE_STUDY8(extra) \
252 pcre_free_study(extra)
253
254 #define PCRE_FREE_SUBSTRING8(substring) \
255 pcre_free_substring(substring)
256
257 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258 pcre_free_substring_list(listptr)
259
260 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261 getnamesptr, subsptr) \
262 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263 (char *)getnamesptr, subsptr)
264
265 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266 n = pcre_get_stringnumber(re, (char *)ptr)
267
268 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270
271 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273
274 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276
277 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278 pcre_printint(re, outfile, debug_lengths)
279
280 #define PCRE_STUDY8(extra, re, options, error) \
281 extra = pcre_study(re, options, error)
282
283 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284 pcre_jit_stack_alloc(startsize, maxsize)
285
286 #define PCRE_JIT_STACK_FREE8(stack) \
287 pcre_jit_stack_free(stack)
288
289 #endif /* SUPPORT_PCRE8 */
290
291 /* -----------------------------------------------------------*/
292
293 #ifdef SUPPORT_PCRE16
294
295 #define PCHARS16(lv, p, offset, len, f) \
296 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297
298 #define PCHARSV16(p, offset, len, f) \
299 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300
301 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302 p = read_capture_name16(p, cn16, re)
303
304 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305
306 #define SET_PCRE_CALLOUT16(callout) \
307 pcre16_callout = (int (*)(pcre16_callout_block *))callout
308
309 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310 pcre16_assign_jit_stack((pcre16_extra *)extra, \
311 (pcre16_jit_callback)callback, userdata)
312
313 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315 tables)
316
317 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318 namesptr, cbuffer, size) \
319 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321
322 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324 (PCRE_UCHAR16 *)cbuffer, size/2)
325
326 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327 offsets, size_offsets, workspace, size_workspace) \
328 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330 workspace, size_workspace)
331
332 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333 offsets, size_offsets) \
334 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335 len, start_offset, options, offsets, size_offsets)
336
337 #define PCRE_FREE_STUDY16(extra) \
338 pcre16_free_study((pcre16_extra *)extra)
339
340 #define PCRE_FREE_SUBSTRING16(substring) \
341 pcre16_free_substring((PCRE_SPTR16)substring)
342
343 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345
346 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347 getnamesptr, subsptr) \
348 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350
351 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353
354 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356 (PCRE_SPTR16 *)(void*)subsptr)
357
358 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360 (PCRE_SPTR16 **)(void*)listptr)
361
362 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364 tables)
365
366 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367 pcre16_printint(re, outfile, debug_lengths)
368
369 #define PCRE_STUDY16(extra, re, options, error) \
370 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371
372 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374
375 #define PCRE_JIT_STACK_FREE16(stack) \
376 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377
378 #endif /* SUPPORT_PCRE16 */
379
380
381 /* ----- Both modes are supported; a runtime test is needed, except for
382 pcre_config(), and the JIT stack functions, when it doesn't matter which
383 version is called. ----- */
384
385 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386
387 #define CHAR_SIZE (use_pcre16? 2:1)
388
389 #define PCHARS(lv, p, offset, len, f) \
390 if (use_pcre16) \
391 PCHARS16(lv, p, offset, len, f); \
392 else \
393 PCHARS8(lv, p, offset, len, f)
394
395 #define PCHARSV(p, offset, len, f) \
396 if (use_pcre16) \
397 PCHARSV16(p, offset, len, f); \
398 else \
399 PCHARSV8(p, offset, len, f)
400
401 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402 if (use_pcre16) \
403 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404 else \
405 READ_CAPTURE_NAME8(p, cn8, cn16, re)
406
407 #define SET_PCRE_CALLOUT(callout) \
408 if (use_pcre16) \
409 SET_PCRE_CALLOUT16(callout); \
410 else \
411 SET_PCRE_CALLOUT8(callout)
412
413 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414
415 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416 if (use_pcre16) \
417 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418 else \
419 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420
421 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422 if (use_pcre16) \
423 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424 else \
425 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426
427 #define PCRE_CONFIG pcre_config
428
429 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430 namesptr, cbuffer, size) \
431 if (use_pcre16) \
432 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433 namesptr, cbuffer, size); \
434 else \
435 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436 namesptr, cbuffer, size)
437
438 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439 if (use_pcre16) \
440 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441 else \
442 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443
444 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445 offsets, size_offsets, workspace, size_workspace) \
446 if (use_pcre16) \
447 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448 offsets, size_offsets, workspace, size_workspace); \
449 else \
450 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451 offsets, size_offsets, workspace, size_workspace)
452
453 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454 offsets, size_offsets) \
455 if (use_pcre16) \
456 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457 offsets, size_offsets); \
458 else \
459 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460 offsets, size_offsets)
461
462 #define PCRE_FREE_STUDY(extra) \
463 if (use_pcre16) \
464 PCRE_FREE_STUDY16(extra); \
465 else \
466 PCRE_FREE_STUDY8(extra)
467
468 #define PCRE_FREE_SUBSTRING(substring) \
469 if (use_pcre16) \
470 PCRE_FREE_SUBSTRING16(substring); \
471 else \
472 PCRE_FREE_SUBSTRING8(substring)
473
474 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475 if (use_pcre16) \
476 PCRE_FREE_SUBSTRING_LIST16(listptr); \
477 else \
478 PCRE_FREE_SUBSTRING_LIST8(listptr)
479
480 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481 getnamesptr, subsptr) \
482 if (use_pcre16) \
483 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484 getnamesptr, subsptr); \
485 else \
486 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487 getnamesptr, subsptr)
488
489 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490 if (use_pcre16) \
491 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492 else \
493 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494
495 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496 if (use_pcre16) \
497 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498 else \
499 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500
501 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502 if (use_pcre16) \
503 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504 else \
505 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506
507 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508 (use_pcre16 ? \
509 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511
512 #define PCRE_JIT_STACK_FREE(stack) \
513 if (use_pcre16) \
514 PCRE_JIT_STACK_FREE16(stack); \
515 else \
516 PCRE_JIT_STACK_FREE8(stack)
517
518 #define PCRE_MAKETABLES \
519 (use_pcre16? pcre16_maketables() : pcre_maketables())
520
521 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522 if (use_pcre16) \
523 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524 else \
525 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526
527 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528 if (use_pcre16) \
529 PCRE_PRINTINT16(re, outfile, debug_lengths); \
530 else \
531 PCRE_PRINTINT8(re, outfile, debug_lengths)
532
533 #define PCRE_STUDY(extra, re, options, error) \
534 if (use_pcre16) \
535 PCRE_STUDY16(extra, re, options, error); \
536 else \
537 PCRE_STUDY8(extra, re, options, error)
538
539 /* ----- Only 8-bit mode is supported ----- */
540
541 #elif defined SUPPORT_PCRE8
542 #define CHAR_SIZE 1
543 #define PCHARS PCHARS8
544 #define PCHARSV PCHARSV8
545 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
546 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
547 #define STRLEN STRLEN8
548 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
549 #define PCRE_COMPILE PCRE_COMPILE8
550 #define PCRE_CONFIG pcre_config
551 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
553 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
554 #define PCRE_EXEC PCRE_EXEC8
555 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
556 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
557 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
558 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
559 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
560 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
561 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
562 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
563 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
564 #define PCRE_MAKETABLES pcre_maketables()
565 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566 #define PCRE_PRINTINT PCRE_PRINTINT8
567 #define PCRE_STUDY PCRE_STUDY8
568
569 /* ----- Only 16-bit mode is supported ----- */
570
571 #else
572 #define CHAR_SIZE 2
573 #define PCHARS PCHARS16
574 #define PCHARSV PCHARSV16
575 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
576 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
577 #define STRLEN STRLEN16
578 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
579 #define PCRE_COMPILE PCRE_COMPILE16
580 #define PCRE_CONFIG pcre16_config
581 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
583 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
584 #define PCRE_EXEC PCRE_EXEC16
585 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
586 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
587 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
588 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
589 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
590 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
591 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
592 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
593 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
594 #define PCRE_MAKETABLES pcre16_maketables()
595 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596 #define PCRE_PRINTINT PCRE_PRINTINT16
597 #define PCRE_STUDY PCRE_STUDY16
598 #endif
599
600 /* ----- End of mode-specific function call macros ----- */
601
602
603 /* Other parameters */
604
605 #ifndef CLOCKS_PER_SEC
606 #ifdef CLK_TCK
607 #define CLOCKS_PER_SEC CLK_TCK
608 #else
609 #define CLOCKS_PER_SEC 100
610 #endif
611 #endif
612
613 #if !defined NODFA
614 #define DFA_WS_DIMENSION 1000
615 #endif
616
617 /* This is the default loop count for timing. */
618
619 #define LOOPREPEAT 500000
620
621 /* Static variables */
622
623 static FILE *outfile;
624 static int log_store = 0;
625 static int callout_count;
626 static int callout_extra;
627 static int callout_fail_count;
628 static int callout_fail_id;
629 static int debug_lengths;
630 static int first_callout;
631 static int jit_was_used;
632 static int locale_set = 0;
633 static int show_malloc;
634 static int use_utf;
635 static size_t gotten_store;
636 static size_t first_gotten_store = 0;
637 static const unsigned char *last_callout_mark = NULL;
638
639 /* The buffers grow automatically if very long input lines are encountered. */
640
641 static int buffer_size = 50000;
642 static pcre_uint8 *buffer = NULL;
643 static pcre_uint8 *dbuffer = NULL;
644 static pcre_uint8 *pbuffer = NULL;
645
646 /* Another buffer is needed translation to 16-bit character strings. It will
647 obtained and extended as required. */
648
649 #ifdef SUPPORT_PCRE16
650 static int buffer16_size = 0;
651 static pcre_uint16 *buffer16 = NULL;
652
653 #ifdef SUPPORT_PCRE8
654
655 /* We need the table of operator lengths that is used for 16-bit compiling, in
656 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658 appropriately for the 16-bit world. Just as a safety check, make sure that
659 COMPILE_PCRE16 is *not* set. */
660
661 #ifdef COMPILE_PCRE16
662 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663 #endif
664
665 #if LINK_SIZE == 2
666 #undef LINK_SIZE
667 #define LINK_SIZE 1
668 #elif LINK_SIZE == 3 || LINK_SIZE == 4
669 #undef LINK_SIZE
670 #define LINK_SIZE 2
671 #else
672 #error LINK_SIZE must be either 2, 3, or 4
673 #endif
674
675 #undef IMM2_SIZE
676 #define IMM2_SIZE 1
677
678 #endif /* SUPPORT_PCRE8 */
679
680 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681 #endif /* SUPPORT_PCRE16 */
682
683 /* If we have 8-bit support, default use_pcre16 to false; if there is also
684 16-bit support, it can be changed by an option. If there is no 8-bit support,
685 there must be 16-bit support, so default it to 1. */
686
687 #ifdef SUPPORT_PCRE8
688 static int use_pcre16 = 0;
689 #else
690 static int use_pcre16 = 1;
691 #endif
692
693 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694
695 static int jit_study_bits[] =
696 {
697 PCRE_STUDY_JIT_COMPILE,
698 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705 };
706
707 /* Textual explanations for runtime error codes */
708
709 static const char *errtexts[] = {
710 NULL, /* 0 is no error */
711 NULL, /* NOMATCH is handled specially */
712 "NULL argument passed",
713 "bad option value",
714 "magic number missing",
715 "unknown opcode - pattern overwritten?",
716 "no more memory",
717 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
718 "match limit exceeded",
719 "callout error code",
720 NULL, /* BADUTF8/16 is handled specially */
721 NULL, /* BADUTF8/16 offset is handled specially */
722 NULL, /* PARTIAL is handled specially */
723 "not used - internal error",
724 "internal error - pattern overwritten?",
725 "bad count value",
726 "item unsupported for DFA matching",
727 "backreference condition or recursion test not supported for DFA matching",
728 "match limit not supported for DFA matching",
729 "workspace size exceeded in DFA matching",
730 "too much recursion for DFA matching",
731 "recursion limit exceeded",
732 "not used - internal error",
733 "invalid combination of newline options",
734 "bad offset value",
735 NULL, /* SHORTUTF8/16 is handled specially */
736 "nested recursion at the same subject position",
737 "JIT stack limit reached",
738 "pattern compiled in wrong mode: 8-bit/16-bit error",
739 "pattern compiled with other endianness",
740 "invalid data in workspace for DFA restart"
741 };
742
743
744 /*************************************************
745 * Alternate character tables *
746 *************************************************/
747
748 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
749 using the default tables of the library. However, the T option can be used to
750 select alternate sets of tables, for different kinds of testing. Note also that
751 the L (locale) option also adjusts the tables. */
752
753 /* This is the set of tables distributed as default with PCRE. It recognizes
754 only ASCII characters. */
755
756 static const pcre_uint8 tables0[] = {
757
758 /* This table is a lower casing table. */
759
760 0, 1, 2, 3, 4, 5, 6, 7,
761 8, 9, 10, 11, 12, 13, 14, 15,
762 16, 17, 18, 19, 20, 21, 22, 23,
763 24, 25, 26, 27, 28, 29, 30, 31,
764 32, 33, 34, 35, 36, 37, 38, 39,
765 40, 41, 42, 43, 44, 45, 46, 47,
766 48, 49, 50, 51, 52, 53, 54, 55,
767 56, 57, 58, 59, 60, 61, 62, 63,
768 64, 97, 98, 99,100,101,102,103,
769 104,105,106,107,108,109,110,111,
770 112,113,114,115,116,117,118,119,
771 120,121,122, 91, 92, 93, 94, 95,
772 96, 97, 98, 99,100,101,102,103,
773 104,105,106,107,108,109,110,111,
774 112,113,114,115,116,117,118,119,
775 120,121,122,123,124,125,126,127,
776 128,129,130,131,132,133,134,135,
777 136,137,138,139,140,141,142,143,
778 144,145,146,147,148,149,150,151,
779 152,153,154,155,156,157,158,159,
780 160,161,162,163,164,165,166,167,
781 168,169,170,171,172,173,174,175,
782 176,177,178,179,180,181,182,183,
783 184,185,186,187,188,189,190,191,
784 192,193,194,195,196,197,198,199,
785 200,201,202,203,204,205,206,207,
786 208,209,210,211,212,213,214,215,
787 216,217,218,219,220,221,222,223,
788 224,225,226,227,228,229,230,231,
789 232,233,234,235,236,237,238,239,
790 240,241,242,243,244,245,246,247,
791 248,249,250,251,252,253,254,255,
792
793 /* This table is a case flipping table. */
794
795 0, 1, 2, 3, 4, 5, 6, 7,
796 8, 9, 10, 11, 12, 13, 14, 15,
797 16, 17, 18, 19, 20, 21, 22, 23,
798 24, 25, 26, 27, 28, 29, 30, 31,
799 32, 33, 34, 35, 36, 37, 38, 39,
800 40, 41, 42, 43, 44, 45, 46, 47,
801 48, 49, 50, 51, 52, 53, 54, 55,
802 56, 57, 58, 59, 60, 61, 62, 63,
803 64, 97, 98, 99,100,101,102,103,
804 104,105,106,107,108,109,110,111,
805 112,113,114,115,116,117,118,119,
806 120,121,122, 91, 92, 93, 94, 95,
807 96, 65, 66, 67, 68, 69, 70, 71,
808 72, 73, 74, 75, 76, 77, 78, 79,
809 80, 81, 82, 83, 84, 85, 86, 87,
810 88, 89, 90,123,124,125,126,127,
811 128,129,130,131,132,133,134,135,
812 136,137,138,139,140,141,142,143,
813 144,145,146,147,148,149,150,151,
814 152,153,154,155,156,157,158,159,
815 160,161,162,163,164,165,166,167,
816 168,169,170,171,172,173,174,175,
817 176,177,178,179,180,181,182,183,
818 184,185,186,187,188,189,190,191,
819 192,193,194,195,196,197,198,199,
820 200,201,202,203,204,205,206,207,
821 208,209,210,211,212,213,214,215,
822 216,217,218,219,220,221,222,223,
823 224,225,226,227,228,229,230,231,
824 232,233,234,235,236,237,238,239,
825 240,241,242,243,244,245,246,247,
826 248,249,250,251,252,253,254,255,
827
828 /* This table contains bit maps for various character classes. Each map is 32
829 bytes long and the bits run from the least significant end of each byte. The
830 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
831 graph, print, punct, and cntrl. Other classes are built from combinations. */
832
833 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837
838 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842
843 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857
858 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
859 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862
863 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
864 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867
868 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
869 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
870 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872
873 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
874 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
875 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877
878 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
882
883 /* This table identifies various classes of character by individual bits:
884 0x01 white space character
885 0x02 letter
886 0x04 decimal digit
887 0x08 hexadecimal digit
888 0x10 alphanumeric or '_'
889 0x80 regular expression metacharacter or binary zero
890 */
891
892 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
893 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
894 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
895 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
896 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
897 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
898 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
899 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
900 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
901 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
902 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
903 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
904 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
905 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
906 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
907 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
908 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
909 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
910 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
911 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
912 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
913 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
914 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
915 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
924
925 /* This is a set of tables that came orginally from a Windows user. It seems to
926 be at least an approximation of ISO 8859. In particular, there are characters
927 greater than 128 that are marked as spaces, letters, etc. */
928
929 static const pcre_uint8 tables1[] = {
930 0,1,2,3,4,5,6,7,
931 8,9,10,11,12,13,14,15,
932 16,17,18,19,20,21,22,23,
933 24,25,26,27,28,29,30,31,
934 32,33,34,35,36,37,38,39,
935 40,41,42,43,44,45,46,47,
936 48,49,50,51,52,53,54,55,
937 56,57,58,59,60,61,62,63,
938 64,97,98,99,100,101,102,103,
939 104,105,106,107,108,109,110,111,
940 112,113,114,115,116,117,118,119,
941 120,121,122,91,92,93,94,95,
942 96,97,98,99,100,101,102,103,
943 104,105,106,107,108,109,110,111,
944 112,113,114,115,116,117,118,119,
945 120,121,122,123,124,125,126,127,
946 128,129,130,131,132,133,134,135,
947 136,137,138,139,140,141,142,143,
948 144,145,146,147,148,149,150,151,
949 152,153,154,155,156,157,158,159,
950 160,161,162,163,164,165,166,167,
951 168,169,170,171,172,173,174,175,
952 176,177,178,179,180,181,182,183,
953 184,185,186,187,188,189,190,191,
954 224,225,226,227,228,229,230,231,
955 232,233,234,235,236,237,238,239,
956 240,241,242,243,244,245,246,215,
957 248,249,250,251,252,253,254,223,
958 224,225,226,227,228,229,230,231,
959 232,233,234,235,236,237,238,239,
960 240,241,242,243,244,245,246,247,
961 248,249,250,251,252,253,254,255,
962 0,1,2,3,4,5,6,7,
963 8,9,10,11,12,13,14,15,
964 16,17,18,19,20,21,22,23,
965 24,25,26,27,28,29,30,31,
966 32,33,34,35,36,37,38,39,
967 40,41,42,43,44,45,46,47,
968 48,49,50,51,52,53,54,55,
969 56,57,58,59,60,61,62,63,
970 64,97,98,99,100,101,102,103,
971 104,105,106,107,108,109,110,111,
972 112,113,114,115,116,117,118,119,
973 120,121,122,91,92,93,94,95,
974 96,65,66,67,68,69,70,71,
975 72,73,74,75,76,77,78,79,
976 80,81,82,83,84,85,86,87,
977 88,89,90,123,124,125,126,127,
978 128,129,130,131,132,133,134,135,
979 136,137,138,139,140,141,142,143,
980 144,145,146,147,148,149,150,151,
981 152,153,154,155,156,157,158,159,
982 160,161,162,163,164,165,166,167,
983 168,169,170,171,172,173,174,175,
984 176,177,178,179,180,181,182,183,
985 184,185,186,187,188,189,190,191,
986 224,225,226,227,228,229,230,231,
987 232,233,234,235,236,237,238,239,
988 240,241,242,243,244,245,246,215,
989 248,249,250,251,252,253,254,223,
990 192,193,194,195,196,197,198,199,
991 200,201,202,203,204,205,206,207,
992 208,209,210,211,212,213,214,247,
993 216,217,218,219,220,221,222,255,
994 0,62,0,0,1,0,0,0,
995 0,0,0,0,0,0,0,0,
996 32,0,0,0,1,0,0,0,
997 0,0,0,0,0,0,0,0,
998 0,0,0,0,0,0,255,3,
999 126,0,0,0,126,0,0,0,
1000 0,0,0,0,0,0,0,0,
1001 0,0,0,0,0,0,0,0,
1002 0,0,0,0,0,0,255,3,
1003 0,0,0,0,0,0,0,0,
1004 0,0,0,0,0,0,12,2,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 254,255,255,7,0,0,0,0,
1008 0,0,0,0,0,0,0,0,
1009 255,255,127,127,0,0,0,0,
1010 0,0,0,0,0,0,0,0,
1011 0,0,0,0,254,255,255,7,
1012 0,0,0,0,0,4,32,4,
1013 0,0,0,128,255,255,127,255,
1014 0,0,0,0,0,0,255,3,
1015 254,255,255,135,254,255,255,7,
1016 0,0,0,0,0,4,44,6,
1017 255,255,127,255,255,255,127,255,
1018 0,0,0,0,254,255,255,255,
1019 255,255,255,255,255,255,255,127,
1020 0,0,0,0,254,255,255,255,
1021 255,255,255,255,255,255,255,255,
1022 0,2,0,0,255,255,255,255,
1023 255,255,255,255,255,255,255,127,
1024 0,0,0,0,255,255,255,255,
1025 255,255,255,255,255,255,255,255,
1026 0,0,0,0,254,255,0,252,
1027 1,0,0,248,1,0,0,120,
1028 0,0,0,0,254,255,255,255,
1029 0,0,128,0,0,0,128,0,
1030 255,255,255,255,0,0,0,0,
1031 0,0,0,0,0,0,0,128,
1032 255,255,255,255,0,0,0,0,
1033 0,0,0,0,0,0,0,0,
1034 128,0,0,0,0,0,0,0,
1035 0,1,1,0,1,1,0,0,
1036 0,0,0,0,0,0,0,0,
1037 0,0,0,0,0,0,0,0,
1038 1,0,0,0,128,0,0,0,
1039 128,128,128,128,0,0,128,0,
1040 28,28,28,28,28,28,28,28,
1041 28,28,0,0,0,0,0,128,
1042 0,26,26,26,26,26,26,18,
1043 18,18,18,18,18,18,18,18,
1044 18,18,18,18,18,18,18,18,
1045 18,18,18,128,128,0,128,16,
1046 0,26,26,26,26,26,26,18,
1047 18,18,18,18,18,18,18,18,
1048 18,18,18,18,18,18,18,18,
1049 18,18,18,128,128,0,0,0,
1050 0,0,0,0,0,1,0,0,
1051 0,0,0,0,0,0,0,0,
1052 0,0,0,0,0,0,0,0,
1053 0,0,0,0,0,0,0,0,
1054 1,0,0,0,0,0,0,0,
1055 0,0,18,0,0,0,0,0,
1056 0,0,20,20,0,18,0,0,
1057 0,20,18,0,0,0,0,0,
1058 18,18,18,18,18,18,18,18,
1059 18,18,18,18,18,18,18,18,
1060 18,18,18,18,18,18,18,0,
1061 18,18,18,18,18,18,18,18,
1062 18,18,18,18,18,18,18,18,
1063 18,18,18,18,18,18,18,18,
1064 18,18,18,18,18,18,18,0,
1065 18,18,18,18,18,18,18,18
1066 };
1067
1068
1069
1070
1071 #ifndef HAVE_STRERROR
1072 /*************************************************
1073 * Provide strerror() for non-ANSI libraries *
1074 *************************************************/
1075
1076 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1077 in their libraries, but can provide the same facility by this simple
1078 alternative function. */
1079
1080 extern int sys_nerr;
1081 extern char *sys_errlist[];
1082
1083 char *
1084 strerror(int n)
1085 {
1086 if (n < 0 || n >= sys_nerr) return "unknown error number";
1087 return sys_errlist[n];
1088 }
1089 #endif /* HAVE_STRERROR */
1090
1091
1092 /*************************************************
1093 * JIT memory callback *
1094 *************************************************/
1095
1096 static pcre_jit_stack* jit_callback(void *arg)
1097 {
1098 jit_was_used = TRUE;
1099 return (pcre_jit_stack *)arg;
1100 }
1101
1102
1103 #if !defined NOUTF || defined SUPPORT_PCRE16
1104 /*************************************************
1105 * Convert UTF-8 string to value *
1106 *************************************************/
1107
1108 /* This function takes one or more bytes that represents a UTF-8 character,
1109 and returns the value of the character.
1110
1111 Argument:
1112 utf8bytes a pointer to the byte vector
1113 vptr a pointer to an int to receive the value
1114
1115 Returns: > 0 => the number of bytes consumed
1116 -6 to 0 => malformed UTF-8 character at offset = (-return)
1117 */
1118
1119 static int
1120 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1121 {
1122 int c = *utf8bytes++;
1123 int d = c;
1124 int i, j, s;
1125
1126 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1127 {
1128 if ((d & 0x80) == 0) break;
1129 d <<= 1;
1130 }
1131
1132 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1133 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1134
1135 /* i now has a value in the range 1-5 */
1136
1137 s = 6*i;
1138 d = (c & utf8_table3[i]) << s;
1139
1140 for (j = 0; j < i; j++)
1141 {
1142 c = *utf8bytes++;
1143 if ((c & 0xc0) != 0x80) return -(j+1);
1144 s -= 6;
1145 d |= (c & 0x3f) << s;
1146 }
1147
1148 /* Check that encoding was the correct unique one */
1149
1150 for (j = 0; j < utf8_table1_size; j++)
1151 if (d <= utf8_table1[j]) break;
1152 if (j != i) return -(i+1);
1153
1154 /* Valid value */
1155
1156 *vptr = d;
1157 return i+1;
1158 }
1159 #endif /* NOUTF || SUPPORT_PCRE16 */
1160
1161
1162
1163 #if !defined NOUTF || defined SUPPORT_PCRE16
1164 /*************************************************
1165 * Convert character value to UTF-8 *
1166 *************************************************/
1167
1168 /* This function takes an integer value in the range 0 - 0x7fffffff
1169 and encodes it as a UTF-8 character in 0 to 6 bytes.
1170
1171 Arguments:
1172 cvalue the character value
1173 utf8bytes pointer to buffer for result - at least 6 bytes long
1174
1175 Returns: number of characters placed in the buffer
1176 */
1177
1178 static int
1179 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1180 {
1181 register int i, j;
1182 for (i = 0; i < utf8_table1_size; i++)
1183 if (cvalue <= utf8_table1[i]) break;
1184 utf8bytes += i;
1185 for (j = i; j > 0; j--)
1186 {
1187 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1188 cvalue >>= 6;
1189 }
1190 *utf8bytes = utf8_table2[i] | cvalue;
1191 return i + 1;
1192 }
1193 #endif
1194
1195
1196 #ifdef SUPPORT_PCRE16
1197 /*************************************************
1198 * Convert a string to 16-bit *
1199 *************************************************/
1200
1201 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1202 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1203 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1204 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1205 result is always left in buffer16.
1206
1207 Note that this function does not object to surrogate values. This is
1208 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1209 for the purpose of testing that they are correctly faulted.
1210
1211 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1212 in UTF-8 so that values greater than 255 can be handled.
1213
1214 Arguments:
1215 data TRUE if converting a data line; FALSE for a regex
1216 p points to a byte string
1217 utf true if UTF-8 (to be converted to UTF-16)
1218 len number of bytes in the string (excluding trailing zero)
1219
1220 Returns: number of 16-bit data items used (excluding trailing zero)
1221 OR -1 if a UTF-8 string is malformed
1222 OR -2 if a value > 0x10ffff is encountered
1223 OR -3 if a value > 0xffff is encountered when not in UTF mode
1224 */
1225
1226 static int
1227 to16(int data, pcre_uint8 *p, int utf, int len)
1228 {
1229 pcre_uint16 *pp;
1230
1231 if (buffer16_size < 2*len + 2)
1232 {
1233 if (buffer16 != NULL) free(buffer16);
1234 buffer16_size = 2*len + 2;
1235 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1236 if (buffer16 == NULL)
1237 {
1238 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1239 exit(1);
1240 }
1241 }
1242
1243 pp = buffer16;
1244
1245 if (!utf && !data)
1246 {
1247 while (len-- > 0) *pp++ = *p++;
1248 }
1249
1250 else
1251 {
1252 int c = 0;
1253 while (len > 0)
1254 {
1255 int chlen = utf82ord(p, &c);
1256 if (chlen <= 0) return -1;
1257 if (c > 0x10ffff) return -2;
1258 p += chlen;
1259 len -= chlen;
1260 if (c < 0x10000) *pp++ = c; else
1261 {
1262 if (!utf) return -3;
1263 c -= 0x10000;
1264 *pp++ = 0xD800 | (c >> 10);
1265 *pp++ = 0xDC00 | (c & 0x3ff);
1266 }
1267 }
1268 }
1269
1270 *pp = 0;
1271 return pp - buffer16;
1272 }
1273 #endif
1274
1275
1276 /*************************************************
1277 * Read or extend an input line *
1278 *************************************************/
1279
1280 /* Input lines are read into buffer, but both patterns and data lines can be
1281 continued over multiple input lines. In addition, if the buffer fills up, we
1282 want to automatically expand it so as to be able to handle extremely large
1283 lines that are needed for certain stress tests. When the input buffer is
1284 expanded, the other two buffers must also be expanded likewise, and the
1285 contents of pbuffer, which are a copy of the input for callouts, must be
1286 preserved (for when expansion happens for a data line). This is not the most
1287 optimal way of handling this, but hey, this is just a test program!
1288
1289 Arguments:
1290 f the file to read
1291 start where in buffer to start (this *must* be within buffer)
1292 prompt for stdin or readline()
1293
1294 Returns: pointer to the start of new data
1295 could be a copy of start, or could be moved
1296 NULL if no data read and EOF reached
1297 */
1298
1299 static pcre_uint8 *
1300 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1301 {
1302 pcre_uint8 *here = start;
1303
1304 for (;;)
1305 {
1306 size_t rlen = (size_t)(buffer_size - (here - buffer));
1307
1308 if (rlen > 1000)
1309 {
1310 int dlen;
1311
1312 /* If libreadline or libedit support is required, use readline() to read a
1313 line if the input is a terminal. Note that readline() removes the trailing
1314 newline, so we must put it back again, to be compatible with fgets(). */
1315
1316 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1317 if (isatty(fileno(f)))
1318 {
1319 size_t len;
1320 char *s = readline(prompt);
1321 if (s == NULL) return (here == start)? NULL : start;
1322 len = strlen(s);
1323 if (len > 0) add_history(s);
1324 if (len > rlen - 1) len = rlen - 1;
1325 memcpy(here, s, len);
1326 here[len] = '\n';
1327 here[len+1] = 0;
1328 free(s);
1329 }
1330 else
1331 #endif
1332
1333 /* Read the next line by normal means, prompting if the file is stdin. */
1334
1335 {
1336 if (f == stdin) printf("%s", prompt);
1337 if (fgets((char *)here, rlen, f) == NULL)
1338 return (here == start)? NULL : start;
1339 }
1340
1341 dlen = (int)strlen((char *)here);
1342 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1343 here += dlen;
1344 }
1345
1346 else
1347 {
1348 int new_buffer_size = 2*buffer_size;
1349 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1350 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1351 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1352
1353 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1354 {
1355 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1356 exit(1);
1357 }
1358
1359 memcpy(new_buffer, buffer, buffer_size);
1360 memcpy(new_pbuffer, pbuffer, buffer_size);
1361
1362 buffer_size = new_buffer_size;
1363
1364 start = new_buffer + (start - buffer);
1365 here = new_buffer + (here - buffer);
1366
1367 free(buffer);
1368 free(dbuffer);
1369 free(pbuffer);
1370
1371 buffer = new_buffer;
1372 dbuffer = new_dbuffer;
1373 pbuffer = new_pbuffer;
1374 }
1375 }
1376
1377 return NULL; /* Control never gets here */
1378 }
1379
1380
1381
1382 /*************************************************
1383 * Read number from string *
1384 *************************************************/
1385
1386 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1387 around with conditional compilation, just do the job by hand. It is only used
1388 for unpicking arguments, so just keep it simple.
1389
1390 Arguments:
1391 str string to be converted
1392 endptr where to put the end pointer
1393
1394 Returns: the unsigned long
1395 */
1396
1397 static int
1398 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1399 {
1400 int result = 0;
1401 while(*str != 0 && isspace(*str)) str++;
1402 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1403 *endptr = str;
1404 return(result);
1405 }
1406
1407
1408
1409 /*************************************************
1410 * Print one character *
1411 *************************************************/
1412
1413 /* Print a single character either literally, or as a hex escape. */
1414
1415 static int pchar(int c, FILE *f)
1416 {
1417 if (PRINTOK(c))
1418 {
1419 if (f != NULL) fprintf(f, "%c", c);
1420 return 1;
1421 }
1422
1423 if (c < 0x100)
1424 {
1425 if (use_utf)
1426 {
1427 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1428 return 6;
1429 }
1430 else
1431 {
1432 if (f != NULL) fprintf(f, "\\x%02x", c);
1433 return 4;
1434 }
1435 }
1436
1437 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1438 return (c <= 0x000000ff)? 6 :
1439 (c <= 0x00000fff)? 7 :
1440 (c <= 0x0000ffff)? 8 :
1441 (c <= 0x000fffff)? 9 : 10;
1442 }
1443
1444
1445
1446 #ifdef SUPPORT_PCRE8
1447 /*************************************************
1448 * Print 8-bit character string *
1449 *************************************************/
1450
1451 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1452 If handed a NULL file, just counts chars without printing. */
1453
1454 static int pchars(pcre_uint8 *p, int length, FILE *f)
1455 {
1456 int c = 0;
1457 int yield = 0;
1458
1459 if (length < 0)
1460 length = strlen((char *)p);
1461
1462 while (length-- > 0)
1463 {
1464 #if !defined NOUTF
1465 if (use_utf)
1466 {
1467 int rc = utf82ord(p, &c);
1468 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1469 {
1470 length -= rc - 1;
1471 p += rc;
1472 yield += pchar(c, f);
1473 continue;
1474 }
1475 }
1476 #endif
1477 c = *p++;
1478 yield += pchar(c, f);
1479 }
1480
1481 return yield;
1482 }
1483 #endif
1484
1485
1486
1487 #ifdef SUPPORT_PCRE16
1488 /*************************************************
1489 * Find length of 0-terminated 16-bit string *
1490 *************************************************/
1491
1492 static int strlen16(PCRE_SPTR16 p)
1493 {
1494 int len = 0;
1495 while (*p++ != 0) len++;
1496 return len;
1497 }
1498 #endif /* SUPPORT_PCRE16 */
1499
1500
1501 #ifdef SUPPORT_PCRE16
1502 /*************************************************
1503 * Print 16-bit character string *
1504 *************************************************/
1505
1506 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1507 If handed a NULL file, just counts chars without printing. */
1508
1509 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1510 {
1511 int yield = 0;
1512
1513 if (length < 0)
1514 length = strlen16(p);
1515
1516 while (length-- > 0)
1517 {
1518 int c = *p++ & 0xffff;
1519 #if !defined NOUTF
1520 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1521 {
1522 int d = *p & 0xffff;
1523 if (d >= 0xDC00 && d < 0xDFFF)
1524 {
1525 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1526 length--;
1527 p++;
1528 }
1529 }
1530 #endif
1531 yield += pchar(c, f);
1532 }
1533
1534 return yield;
1535 }
1536 #endif /* SUPPORT_PCRE16 */
1537
1538
1539
1540 #ifdef SUPPORT_PCRE8
1541 /*************************************************
1542 * Read a capture name (8-bit) and check it *
1543 *************************************************/
1544
1545 static pcre_uint8 *
1546 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1547 {
1548 pcre_uint8 *npp = *pp;
1549 while (isalnum(*p)) *npp++ = *p++;
1550 *npp++ = 0;
1551 *npp = 0;
1552 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1553 {
1554 fprintf(outfile, "no parentheses with name \"");
1555 PCHARSV(*pp, 0, -1, outfile);
1556 fprintf(outfile, "\"\n");
1557 }
1558
1559 *pp = npp;
1560 return p;
1561 }
1562 #endif /* SUPPORT_PCRE8 */
1563
1564
1565
1566 #ifdef SUPPORT_PCRE16
1567 /*************************************************
1568 * Read a capture name (16-bit) and check it *
1569 *************************************************/
1570
1571 /* Note that the text being read is 8-bit. */
1572
1573 static pcre_uint8 *
1574 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1575 {
1576 pcre_uint16 *npp = *pp;
1577 while (isalnum(*p)) *npp++ = *p++;
1578 *npp++ = 0;
1579 *npp = 0;
1580 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1581 {
1582 fprintf(outfile, "no parentheses with name \"");
1583 PCHARSV(*pp, 0, -1, outfile);
1584 fprintf(outfile, "\"\n");
1585 }
1586 *pp = npp;
1587 return p;
1588 }
1589 #endif /* SUPPORT_PCRE16 */
1590
1591
1592
1593 /*************************************************
1594 * Callout function *
1595 *************************************************/
1596
1597 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1598 the match. Yield zero unless more callouts than the fail count, or the callout
1599 data is not zero. */
1600
1601 static int callout(pcre_callout_block *cb)
1602 {
1603 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1604 int i, pre_start, post_start, subject_length;
1605
1606 if (callout_extra)
1607 {
1608 fprintf(f, "Callout %d: last capture = %d\n",
1609 cb->callout_number, cb->capture_last);
1610
1611 for (i = 0; i < cb->capture_top * 2; i += 2)
1612 {
1613 if (cb->offset_vector[i] < 0)
1614 fprintf(f, "%2d: <unset>\n", i/2);
1615 else
1616 {
1617 fprintf(f, "%2d: ", i/2);
1618 PCHARSV(cb->subject, cb->offset_vector[i],
1619 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1620 fprintf(f, "\n");
1621 }
1622 }
1623 }
1624
1625 /* Re-print the subject in canonical form, the first time or if giving full
1626 datails. On subsequent calls in the same match, we use pchars just to find the
1627 printed lengths of the substrings. */
1628
1629 if (f != NULL) fprintf(f, "--->");
1630
1631 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1632 PCHARS(post_start, cb->subject, cb->start_match,
1633 cb->current_position - cb->start_match, f);
1634
1635 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1636
1637 PCHARSV(cb->subject, cb->current_position,
1638 cb->subject_length - cb->current_position, f);
1639
1640 if (f != NULL) fprintf(f, "\n");
1641
1642 /* Always print appropriate indicators, with callout number if not already
1643 shown. For automatic callouts, show the pattern offset. */
1644
1645 if (cb->callout_number == 255)
1646 {
1647 fprintf(outfile, "%+3d ", cb->pattern_position);
1648 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1649 }
1650 else
1651 {
1652 if (callout_extra) fprintf(outfile, " ");
1653 else fprintf(outfile, "%3d ", cb->callout_number);
1654 }
1655
1656 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1657 fprintf(outfile, "^");
1658
1659 if (post_start > 0)
1660 {
1661 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1662 fprintf(outfile, "^");
1663 }
1664
1665 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1666 fprintf(outfile, " ");
1667
1668 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1669 pbuffer + cb->pattern_position);
1670
1671 fprintf(outfile, "\n");
1672 first_callout = 0;
1673
1674 if (cb->mark != last_callout_mark)
1675 {
1676 if (cb->mark == NULL)
1677 fprintf(outfile, "Latest Mark: <unset>\n");
1678 else
1679 {
1680 fprintf(outfile, "Latest Mark: ");
1681 PCHARSV(cb->mark, 0, -1, outfile);
1682 putc('\n', outfile);
1683 }
1684 last_callout_mark = cb->mark;
1685 }
1686
1687 if (cb->callout_data != NULL)
1688 {
1689 int callout_data = *((int *)(cb->callout_data));
1690 if (callout_data != 0)
1691 {
1692 fprintf(outfile, "Callout data = %d\n", callout_data);
1693 return callout_data;
1694 }
1695 }
1696
1697 return (cb->callout_number != callout_fail_id)? 0 :
1698 (++callout_count >= callout_fail_count)? 1 : 0;
1699 }
1700
1701
1702 /*************************************************
1703 * Local malloc functions *
1704 *************************************************/
1705
1706 /* Alternative malloc function, to test functionality and save the size of a
1707 compiled re, which is the first store request that pcre_compile() makes. The
1708 show_malloc variable is set only during matching. */
1709
1710 static void *new_malloc(size_t size)
1711 {
1712 void *block = malloc(size);
1713 gotten_store = size;
1714 if (first_gotten_store == 0) first_gotten_store = size;
1715 if (show_malloc)
1716 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1717 return block;
1718 }
1719
1720 static void new_free(void *block)
1721 {
1722 if (show_malloc)
1723 fprintf(outfile, "free %p\n", block);
1724 free(block);
1725 }
1726
1727 /* For recursion malloc/free, to test stacking calls */
1728
1729 static void *stack_malloc(size_t size)
1730 {
1731 void *block = malloc(size);
1732 if (show_malloc)
1733 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1734 return block;
1735 }
1736
1737 static void stack_free(void *block)
1738 {
1739 if (show_malloc)
1740 fprintf(outfile, "stack_free %p\n", block);
1741 free(block);
1742 }
1743
1744
1745 /*************************************************
1746 * Call pcre_fullinfo() *
1747 *************************************************/
1748
1749 /* Get one piece of information from the pcre_fullinfo() function. When only
1750 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1751 value, but the code is defensive.
1752
1753 Arguments:
1754 re compiled regex
1755 study study data
1756 option PCRE_INFO_xxx option
1757 ptr where to put the data
1758
1759 Returns: 0 when OK, < 0 on error
1760 */
1761
1762 static int
1763 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1764 {
1765 int rc;
1766
1767 if (use_pcre16)
1768 #ifdef SUPPORT_PCRE16
1769 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1770 #else
1771 rc = PCRE_ERROR_BADMODE;
1772 #endif
1773 else
1774 #ifdef SUPPORT_PCRE8
1775 rc = pcre_fullinfo(re, study, option, ptr);
1776 #else
1777 rc = PCRE_ERROR_BADMODE;
1778 #endif
1779
1780 if (rc < 0)
1781 {
1782 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1783 use_pcre16? "16" : "", option);
1784 if (rc == PCRE_ERROR_BADMODE)
1785 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1786 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1787 }
1788
1789 return rc;
1790 }
1791
1792
1793
1794 /*************************************************
1795 * Swap byte functions *
1796 *************************************************/
1797
1798 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1799 value, respectively.
1800
1801 Arguments:
1802 value any number
1803
1804 Returns: the byte swapped value
1805 */
1806
1807 static pcre_uint32
1808 swap_uint32(pcre_uint32 value)
1809 {
1810 return ((value & 0x000000ff) << 24) |
1811 ((value & 0x0000ff00) << 8) |
1812 ((value & 0x00ff0000) >> 8) |
1813 (value >> 24);
1814 }
1815
1816 static pcre_uint16
1817 swap_uint16(pcre_uint16 value)
1818 {
1819 return (value >> 8) | (value << 8);
1820 }
1821
1822
1823
1824 /*************************************************
1825 * Flip bytes in a compiled pattern *
1826 *************************************************/
1827
1828 /* This function is called if the 'F' option was present on a pattern that is
1829 to be written to a file. We flip the bytes of all the integer fields in the
1830 regex data block and the study block. In 16-bit mode this also flips relevant
1831 bytes in the pattern itself. This is to make it possible to test PCRE's
1832 ability to reload byte-flipped patterns, e.g. those compiled on a different
1833 architecture. */
1834
1835 static void
1836 regexflip(pcre *ere, pcre_extra *extra)
1837 {
1838 REAL_PCRE *re = (REAL_PCRE *)ere;
1839 #ifdef SUPPORT_PCRE16
1840 int op;
1841 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1842 int length = re->name_count * re->name_entry_size;
1843 #ifdef SUPPORT_UTF
1844 BOOL utf = (re->options & PCRE_UTF16) != 0;
1845 BOOL utf16_char = FALSE;
1846 #endif /* SUPPORT_UTF */
1847 #endif /* SUPPORT_PCRE16 */
1848
1849 /* Always flip the bytes in the main data block and study blocks. */
1850
1851 re->magic_number = REVERSED_MAGIC_NUMBER;
1852 re->size = swap_uint32(re->size);
1853 re->options = swap_uint32(re->options);
1854 re->flags = swap_uint16(re->flags);
1855 re->top_bracket = swap_uint16(re->top_bracket);
1856 re->top_backref = swap_uint16(re->top_backref);
1857 re->first_char = swap_uint16(re->first_char);
1858 re->req_char = swap_uint16(re->req_char);
1859 re->name_table_offset = swap_uint16(re->name_table_offset);
1860 re->name_entry_size = swap_uint16(re->name_entry_size);
1861 re->name_count = swap_uint16(re->name_count);
1862
1863 if (extra != NULL)
1864 {
1865 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1866 rsd->size = swap_uint32(rsd->size);
1867 rsd->flags = swap_uint32(rsd->flags);
1868 rsd->minlength = swap_uint32(rsd->minlength);
1869 }
1870
1871 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1872 in the name table, if present, and then in the pattern itself. */
1873
1874 #ifdef SUPPORT_PCRE16
1875 if (!use_pcre16) return;
1876
1877 while(TRUE)
1878 {
1879 /* Swap previous characters. */
1880 while (length-- > 0)
1881 {
1882 *ptr = swap_uint16(*ptr);
1883 ptr++;
1884 }
1885 #ifdef SUPPORT_UTF
1886 if (utf16_char)
1887 {
1888 if ((ptr[-1] & 0xfc00) == 0xd800)
1889 {
1890 /* We know that there is only one extra character in UTF-16. */
1891 *ptr = swap_uint16(*ptr);
1892 ptr++;
1893 }
1894 }
1895 utf16_char = FALSE;
1896 #endif /* SUPPORT_UTF */
1897
1898 /* Get next opcode. */
1899
1900 length = 0;
1901 op = *ptr;
1902 *ptr++ = swap_uint16(op);
1903
1904 switch (op)
1905 {
1906 case OP_END:
1907 return;
1908
1909 #ifdef SUPPORT_UTF
1910 case OP_CHAR:
1911 case OP_CHARI:
1912 case OP_NOT:
1913 case OP_NOTI:
1914 case OP_STAR:
1915 case OP_MINSTAR:
1916 case OP_PLUS:
1917 case OP_MINPLUS:
1918 case OP_QUERY:
1919 case OP_MINQUERY:
1920 case OP_UPTO:
1921 case OP_MINUPTO:
1922 case OP_EXACT:
1923 case OP_POSSTAR:
1924 case OP_POSPLUS:
1925 case OP_POSQUERY:
1926 case OP_POSUPTO:
1927 case OP_STARI:
1928 case OP_MINSTARI:
1929 case OP_PLUSI:
1930 case OP_MINPLUSI:
1931 case OP_QUERYI:
1932 case OP_MINQUERYI:
1933 case OP_UPTOI:
1934 case OP_MINUPTOI:
1935 case OP_EXACTI:
1936 case OP_POSSTARI:
1937 case OP_POSPLUSI:
1938 case OP_POSQUERYI:
1939 case OP_POSUPTOI:
1940 case OP_NOTSTAR:
1941 case OP_NOTMINSTAR:
1942 case OP_NOTPLUS:
1943 case OP_NOTMINPLUS:
1944 case OP_NOTQUERY:
1945 case OP_NOTMINQUERY:
1946 case OP_NOTUPTO:
1947 case OP_NOTMINUPTO:
1948 case OP_NOTEXACT:
1949 case OP_NOTPOSSTAR:
1950 case OP_NOTPOSPLUS:
1951 case OP_NOTPOSQUERY:
1952 case OP_NOTPOSUPTO:
1953 case OP_NOTSTARI:
1954 case OP_NOTMINSTARI:
1955 case OP_NOTPLUSI:
1956 case OP_NOTMINPLUSI:
1957 case OP_NOTQUERYI:
1958 case OP_NOTMINQUERYI:
1959 case OP_NOTUPTOI:
1960 case OP_NOTMINUPTOI:
1961 case OP_NOTEXACTI:
1962 case OP_NOTPOSSTARI:
1963 case OP_NOTPOSPLUSI:
1964 case OP_NOTPOSQUERYI:
1965 case OP_NOTPOSUPTOI:
1966 if (utf) utf16_char = TRUE;
1967 #endif
1968 /* Fall through. */
1969
1970 default:
1971 length = OP_lengths16[op] - 1;
1972 break;
1973
1974 case OP_CLASS:
1975 case OP_NCLASS:
1976 /* Skip the character bit map. */
1977 ptr += 32/sizeof(pcre_uint16);
1978 length = 0;
1979 break;
1980
1981 case OP_XCLASS:
1982 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1983 if (LINK_SIZE > 1)
1984 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1985 - (1 + LINK_SIZE + 1));
1986 else
1987 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1988
1989 /* Reverse the size of the XCLASS instance. */
1990 *ptr = swap_uint16(*ptr);
1991 ptr++;
1992 if (LINK_SIZE > 1)
1993 {
1994 *ptr = swap_uint16(*ptr);
1995 ptr++;
1996 }
1997
1998 op = *ptr;
1999 *ptr = swap_uint16(op);
2000 ptr++;
2001 if ((op & XCL_MAP) != 0)
2002 {
2003 /* Skip the character bit map. */
2004 ptr += 32/sizeof(pcre_uint16);
2005 length -= 32/sizeof(pcre_uint16);
2006 }
2007 break;
2008 }
2009 }
2010 /* Control should never reach here in 16 bit mode. */
2011 #endif /* SUPPORT_PCRE16 */
2012 }
2013
2014
2015
2016 /*************************************************
2017 * Check match or recursion limit *
2018 *************************************************/
2019
2020 static int
2021 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2022 int start_offset, int options, int *use_offsets, int use_size_offsets,
2023 int flag, unsigned long int *limit, int errnumber, const char *msg)
2024 {
2025 int count;
2026 int min = 0;
2027 int mid = 64;
2028 int max = -1;
2029
2030 extra->flags |= flag;
2031
2032 for (;;)
2033 {
2034 *limit = mid;
2035
2036 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2037 use_offsets, use_size_offsets);
2038
2039 if (count == errnumber)
2040 {
2041 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2042 min = mid;
2043 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2044 }
2045
2046 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2047 count == PCRE_ERROR_PARTIAL)
2048 {
2049 if (mid == min + 1)
2050 {
2051 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2052 break;
2053 }
2054 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2055 max = mid;
2056 mid = (min + mid)/2;
2057 }
2058 else break; /* Some other error */
2059 }
2060
2061 extra->flags &= ~flag;
2062 return count;
2063 }
2064
2065
2066
2067 /*************************************************
2068 * Case-independent strncmp() function *
2069 *************************************************/
2070
2071 /*
2072 Arguments:
2073 s first string
2074 t second string
2075 n number of characters to compare
2076
2077 Returns: < 0, = 0, or > 0, according to the comparison
2078 */
2079
2080 static int
2081 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2082 {
2083 while (n--)
2084 {
2085 int c = tolower(*s++) - tolower(*t++);
2086 if (c) return c;
2087 }
2088 return 0;
2089 }
2090
2091
2092
2093 /*************************************************
2094 * Check newline indicator *
2095 *************************************************/
2096
2097 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2098 a message and return 0 if there is no match.
2099
2100 Arguments:
2101 p points after the leading '<'
2102 f file for error message
2103
2104 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2105 */
2106
2107 static int
2108 check_newline(pcre_uint8 *p, FILE *f)
2109 {
2110 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2111 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2112 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2113 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2114 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2115 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2116 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2117 fprintf(f, "Unknown newline type at: <%s\n", p);
2118 return 0;
2119 }
2120
2121
2122
2123 /*************************************************
2124 * Usage function *
2125 *************************************************/
2126
2127 static void
2128 usage(void)
2129 {
2130 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2131 printf("Input and output default to stdin and stdout.\n");
2132 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2133 printf("If input is a terminal, readline() is used to read from it.\n");
2134 #else
2135 printf("This version of pcretest is not linked with readline().\n");
2136 #endif
2137 printf("\nOptions:\n");
2138 #ifdef SUPPORT_PCRE16
2139 printf(" -16 use the 16-bit library\n");
2140 #endif
2141 printf(" -b show compiled code\n");
2142 printf(" -C show PCRE compile-time options and exit\n");
2143 printf(" -C arg show a specific compile-time option\n");
2144 printf(" and exit with its value. The arg can be:\n");
2145 printf(" linksize internal link size [2, 3, 4]\n");
2146 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2147 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2148 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2149 printf(" ucp Unicode Properties supported [0, 1]\n");
2150 printf(" jit Just-in-time compiler supported [0, 1]\n");
2151 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2152 printf(" -d debug: show compiled code and information (-b and -i)\n");
2153 #if !defined NODFA
2154 printf(" -dfa force DFA matching for all subjects\n");
2155 #endif
2156 printf(" -help show usage information\n");
2157 printf(" -i show information about compiled patterns\n"
2158 " -M find MATCH_LIMIT minimum for each subject\n"
2159 " -m output memory used information\n"
2160 " -o <n> set size of offsets vector to <n>\n");
2161 #if !defined NOPOSIX
2162 printf(" -p use POSIX interface\n");
2163 #endif
2164 printf(" -q quiet: do not output PCRE version number at start\n");
2165 printf(" -S <n> set stack size to <n> megabytes\n");
2166 printf(" -s force each pattern to be studied at basic level\n"
2167 " -s+ force each pattern to be studied, using JIT if available\n"
2168 " -s++ ditto, verifying when JIT was actually used\n"
2169 " -s+n force each pattern to be studied, using JIT if available,\n"
2170 " where 1 <= n <= 7 selects JIT options\n"
2171 " -s++n ditto, verifying when JIT was actually used\n"
2172 " -t time compilation and execution\n");
2173 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2174 printf(" -tm time execution (matching) only\n");
2175 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2176 }
2177
2178
2179
2180 /*************************************************
2181 * Main Program *
2182 *************************************************/
2183
2184 /* Read lines from named file or stdin and write to named file or stdout; lines
2185 consist of a regular expression, in delimiters and optionally followed by
2186 options, followed by a set of test data, terminated by an empty line. */
2187
2188 int main(int argc, char **argv)
2189 {
2190 FILE *infile = stdin;
2191 const char *version;
2192 int options = 0;
2193 int study_options = 0;
2194 int default_find_match_limit = FALSE;
2195 int op = 1;
2196 int timeit = 0;
2197 int timeitm = 0;
2198 int showinfo = 0;
2199 int showstore = 0;
2200 int force_study = -1;
2201 int force_study_options = 0;
2202 int quiet = 0;
2203 int size_offsets = 45;
2204 int size_offsets_max;
2205 int *offsets = NULL;
2206 int debug = 0;
2207 int done = 0;
2208 int all_use_dfa = 0;
2209 int verify_jit = 0;
2210 int yield = 0;
2211 int stack_size;
2212
2213 #if !defined NOPOSIX
2214 int posix = 0;
2215 #endif
2216 #if !defined NODFA
2217 int *dfa_workspace = NULL;
2218 #endif
2219
2220 pcre_jit_stack *jit_stack = NULL;
2221
2222 /* These vectors store, end-to-end, a list of zero-terminated captured
2223 substring names, each list itself being terminated by an empty name. Assume
2224 that 1024 is plenty long enough for the few names we'll be testing. It is
2225 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2226 for the actual memory, to ensure alignment. */
2227
2228 pcre_uint16 copynames[1024];
2229 pcre_uint16 getnames[1024];
2230
2231 #ifdef SUPPORT_PCRE16
2232 pcre_uint16 *cn16ptr;
2233 pcre_uint16 *gn16ptr;
2234 #endif
2235
2236 #ifdef SUPPORT_PCRE8
2237 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2238 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2239 pcre_uint8 *cn8ptr;
2240 pcre_uint8 *gn8ptr;
2241 #endif
2242
2243 /* Get buffers from malloc() so that valgrind will check their misuse when
2244 debugging. They grow automatically when very long lines are read. The 16-bit
2245 buffer (buffer16) is obtained only if needed. */
2246
2247 buffer = (pcre_uint8 *)malloc(buffer_size);
2248 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2249 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2250
2251 /* The outfile variable is static so that new_malloc can use it. */
2252
2253 outfile = stdout;
2254
2255 /* The following _setmode() stuff is some Windows magic that tells its runtime
2256 library to translate CRLF into a single LF character. At least, that's what
2257 I've been told: never having used Windows I take this all on trust. Originally
2258 it set 0x8000, but then I was advised that _O_BINARY was better. */
2259
2260 #if defined(_WIN32) || defined(WIN32)
2261 _setmode( _fileno( stdout ), _O_BINARY );
2262 #endif
2263
2264 /* Get the version number: both pcre_version() and pcre16_version() give the
2265 same answer. We just need to ensure that we call one that is available. */
2266
2267 #ifdef SUPPORT_PCRE8
2268 version = pcre_version();
2269 #else
2270 version = pcre16_version();
2271 #endif
2272
2273 /* Scan options */
2274
2275 while (argc > 1 && argv[op][0] == '-')
2276 {
2277 pcre_uint8 *endptr;
2278 char *arg = argv[op];
2279
2280 if (strcmp(arg, "-m") == 0) showstore = 1;
2281 else if (strcmp(arg, "-s") == 0) force_study = 0;
2282
2283 else if (strncmp(arg, "-s+", 3) == 0)
2284 {
2285 arg += 3;
2286 if (*arg == '+') { arg++; verify_jit = TRUE; }
2287 force_study = 1;
2288 if (*arg == 0)
2289 force_study_options = jit_study_bits[6];
2290 else if (*arg >= '1' && *arg <= '7')
2291 force_study_options = jit_study_bits[*arg - '1'];
2292 else goto BAD_ARG;
2293 }
2294 else if (strcmp(arg, "-16") == 0)
2295 {
2296 #ifdef SUPPORT_PCRE16
2297 use_pcre16 = 1;
2298 #else
2299 printf("** This version of PCRE was built without 16-bit support\n");
2300 exit(1);
2301 #endif
2302 }
2303 else if (strcmp(arg, "-q") == 0) quiet = 1;
2304 else if (strcmp(arg, "-b") == 0) debug = 1;
2305 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2306 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2307 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2308 #if !defined NODFA
2309 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2310 #endif
2311 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2312 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2313 *endptr == 0))
2314 {
2315 op++;
2316 argc--;
2317 }
2318 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2319 {
2320 int both = arg[2] == 0;
2321 int temp;
2322 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2323 *endptr == 0))
2324 {
2325 timeitm = temp;
2326 op++;
2327 argc--;
2328 }
2329 else timeitm = LOOPREPEAT;
2330 if (both) timeit = timeitm;
2331 }
2332 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2333 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2334 *endptr == 0))
2335 {
2336 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2337 printf("PCRE: -S not supported on this OS\n");
2338 exit(1);
2339 #else
2340 int rc;
2341 struct rlimit rlim;
2342 getrlimit(RLIMIT_STACK, &rlim);
2343 rlim.rlim_cur = stack_size * 1024 * 1024;
2344 rc = setrlimit(RLIMIT_STACK, &rlim);
2345 if (rc != 0)
2346 {
2347 printf("PCRE: setrlimit() failed with error %d\n", rc);
2348 exit(1);
2349 }
2350 op++;
2351 argc--;
2352 #endif
2353 }
2354 #if !defined NOPOSIX
2355 else if (strcmp(arg, "-p") == 0) posix = 1;
2356 #endif
2357 else if (strcmp(arg, "-C") == 0)
2358 {
2359 int rc;
2360 unsigned long int lrc;
2361
2362 if (argc > 2)
2363 {
2364 if (strcmp(argv[op + 1], "linksize") == 0)
2365 {
2366 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2367 printf("%d\n", rc);
2368 yield = rc;
2369 goto EXIT;
2370 }
2371 if (strcmp(argv[op + 1], "pcre8") == 0)
2372 {
2373 #ifdef SUPPORT_PCRE8
2374 printf("1\n");
2375 yield = 1;
2376 #else
2377 printf("0\n");
2378 yield = 0;
2379 #endif
2380 goto EXIT;
2381 }
2382 if (strcmp(argv[op + 1], "pcre16") == 0)
2383 {
2384 #ifdef SUPPORT_PCRE16
2385 printf("1\n");
2386 yield = 1;
2387 #else
2388 printf("0\n");
2389 yield = 0;
2390 #endif
2391 goto EXIT;
2392 }
2393 if (strcmp(argv[op + 1], "utf") == 0)
2394 {
2395 #ifdef SUPPORT_PCRE8
2396 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2397 printf("%d\n", rc);
2398 yield = rc;
2399 #else
2400 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2401 printf("%d\n", rc);
2402 yield = rc;
2403 #endif
2404 goto EXIT;
2405 }
2406 if (strcmp(argv[op + 1], "ucp") == 0)
2407 {
2408 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409 printf("%d\n", rc);
2410 yield = rc;
2411 goto EXIT;
2412 }
2413 if (strcmp(argv[op + 1], "jit") == 0)
2414 {
2415 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2416 printf("%d\n", rc);
2417 yield = rc;
2418 goto EXIT;
2419 }
2420 if (strcmp(argv[op + 1], "newline") == 0)
2421 {
2422 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2423 /* Note that these values are always the ASCII values, even
2424 in EBCDIC environments. CR is 13 and NL is 10. */
2425 printf("%s\n", (rc == 13)? "CR" :
2426 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2427 (rc == -2)? "ANYCRLF" :
2428 (rc == -1)? "ANY" : "???");
2429 goto EXIT;
2430 }
2431 printf("Unknown -C option: %s\n", argv[op + 1]);
2432 goto EXIT;
2433 }
2434
2435 printf("PCRE version %s\n", version);
2436 printf("Compiled with\n");
2437
2438 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2439 are set, either both UTFs are supported or both are not supported. */
2440
2441 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2442 printf(" 8-bit and 16-bit support\n");
2443 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2444 if (rc)
2445 printf(" UTF-8 and UTF-16 support\n");
2446 else
2447 printf(" No UTF-8 or UTF-16 support\n");
2448 #elif defined SUPPORT_PCRE8
2449 printf(" 8-bit support only\n");
2450 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2451 printf(" %sUTF-8 support\n", rc? "" : "No ");
2452 #else
2453 printf(" 16-bit support only\n");
2454 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2455 printf(" %sUTF-16 support\n", rc? "" : "No ");
2456 #endif
2457
2458 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2459 printf(" %sUnicode properties support\n", rc? "" : "No ");
2460 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2461 if (rc)
2462 {
2463 const char *arch;
2464 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2465 printf(" Just-in-time compiler support: %s\n", arch);
2466 }
2467 else
2468 printf(" No just-in-time compiler support\n");
2469 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2470 /* Note that these values are always the ASCII values, even
2471 in EBCDIC environments. CR is 13 and NL is 10. */
2472 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2473 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2474 (rc == -2)? "ANYCRLF" :
2475 (rc == -1)? "ANY" : "???");
2476 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2477 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2478 "all Unicode newlines");
2479 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2480 printf(" Internal link size = %d\n", rc);
2481 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2482 printf(" POSIX malloc threshold = %d\n", rc);
2483 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2484 printf(" Default match limit = %ld\n", lrc);
2485 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2486 printf(" Default recursion depth limit = %ld\n", lrc);
2487 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2488 printf(" Match recursion uses %s", rc? "stack" : "heap");
2489 if (showstore)
2490 {
2491 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2492 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2493 }
2494 printf("\n");
2495 goto EXIT;
2496 }
2497 else if (strcmp(arg, "-help") == 0 ||
2498 strcmp(arg, "--help") == 0)
2499 {
2500 usage();
2501 goto EXIT;
2502 }
2503 else
2504 {
2505 BAD_ARG:
2506 printf("** Unknown or malformed option %s\n", arg);
2507 usage();
2508 yield = 1;
2509 goto EXIT;
2510 }
2511 op++;
2512 argc--;
2513 }
2514
2515 /* Get the store for the offsets vector, and remember what it was */
2516
2517 size_offsets_max = size_offsets;
2518 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2519 if (offsets == NULL)
2520 {
2521 printf("** Failed to get %d bytes of memory for offsets vector\n",
2522 (int)(size_offsets_max * sizeof(int)));
2523 yield = 1;
2524 goto EXIT;
2525 }
2526
2527 /* Sort out the input and output files */
2528
2529 if (argc > 1)
2530 {
2531 infile = fopen(argv[op], INPUT_MODE);
2532 if (infile == NULL)
2533 {
2534 printf("** Failed to open %s\n", argv[op]);
2535 yield = 1;
2536 goto EXIT;
2537 }
2538 }
2539
2540 if (argc > 2)
2541 {
2542 outfile = fopen(argv[op+1], OUTPUT_MODE);
2543 if (outfile == NULL)
2544 {
2545 printf("** Failed to open %s\n", argv[op+1]);
2546 yield = 1;
2547 goto EXIT;
2548 }
2549 }
2550
2551 /* Set alternative malloc function */
2552
2553 #ifdef SUPPORT_PCRE8
2554 pcre_malloc = new_malloc;
2555 pcre_free = new_free;
2556 pcre_stack_malloc = stack_malloc;
2557 pcre_stack_free = stack_free;
2558 #endif
2559
2560 #ifdef SUPPORT_PCRE16
2561 pcre16_malloc = new_malloc;
2562 pcre16_free = new_free;
2563 pcre16_stack_malloc = stack_malloc;
2564 pcre16_stack_free = stack_free;
2565 #endif
2566
2567 /* Heading line unless quiet, then prompt for first regex if stdin */
2568
2569 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2570
2571 /* Main loop */
2572
2573 while (!done)
2574 {
2575 pcre *re = NULL;
2576 pcre_extra *extra = NULL;
2577
2578 #if !defined NOPOSIX /* There are still compilers that require no indent */
2579 regex_t preg;
2580 int do_posix = 0;
2581 #endif
2582
2583 const char *error;
2584 pcre_uint8 *markptr;
2585 pcre_uint8 *p, *pp, *ppp;
2586 pcre_uint8 *to_file = NULL;
2587 const pcre_uint8 *tables = NULL;
2588 unsigned long int get_options;
2589 unsigned long int true_size, true_study_size = 0;
2590 size_t size, regex_gotten_store;
2591 int do_allcaps = 0;
2592 int do_mark = 0;
2593 int do_study = 0;
2594 int no_force_study = 0;
2595 int do_debug = debug;
2596 int do_G = 0;
2597 int do_g = 0;
2598 int do_showinfo = showinfo;
2599 int do_showrest = 0;
2600 int do_showcaprest = 0;
2601 int do_flip = 0;
2602 int erroroffset, len, delimiter, poffset;
2603
2604 #if !defined NODFA
2605 int dfa_matched = 0;
2606 #endif
2607
2608 use_utf = 0;
2609 debug_lengths = 1;
2610
2611 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2612 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2613 fflush(outfile);
2614
2615 p = buffer;
2616 while (isspace(*p)) p++;
2617 if (*p == 0) continue;
2618
2619 /* See if the pattern is to be loaded pre-compiled from a file. */
2620
2621 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2622 {
2623 pcre_uint32 magic;
2624 pcre_uint8 sbuf[8];
2625 FILE *f;
2626
2627 p++;
2628 if (*p == '!')
2629 {
2630 do_debug = TRUE;
2631 do_showinfo = TRUE;
2632 p++;
2633 }
2634
2635 pp = p + (int)strlen((char *)p);
2636 while (isspace(pp[-1])) pp--;
2637 *pp = 0;
2638
2639 f = fopen((char *)p, "rb");
2640 if (f == NULL)
2641 {
2642 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2643 continue;
2644 }
2645
2646 first_gotten_store = 0;
2647 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2648
2649 true_size =
2650 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2651 true_study_size =
2652 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2653
2654 re = (pcre *)new_malloc(true_size);
2655 if (re == NULL)
2656 {
2657 printf("** Failed to get %d bytes of memory for pcre object\n",
2658 (int)true_size);
2659 yield = 1;
2660 goto EXIT;
2661 }
2662 regex_gotten_store = first_gotten_store;
2663
2664 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2665
2666 magic = ((REAL_PCRE *)re)->magic_number;
2667 if (magic != MAGIC_NUMBER)
2668 {
2669 if (swap_uint32(magic) == MAGIC_NUMBER)
2670 {
2671 do_flip = 1;
2672 }
2673 else
2674 {
2675 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2676 new_free(re);
2677 fclose(f);
2678 continue;
2679 }
2680 }
2681
2682 /* We hide the byte-invert info for little and big endian tests. */
2683 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2684 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2685
2686 /* Now see if there is any following study data. */
2687
2688 if (true_study_size != 0)
2689 {
2690 pcre_study_data *psd;
2691
2692 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2693 extra->flags = PCRE_EXTRA_STUDY_DATA;
2694
2695 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2696 extra->study_data = psd;
2697
2698 if (fread(psd, 1, true_study_size, f) != true_study_size)
2699 {
2700 FAIL_READ:
2701 fprintf(outfile, "Failed to read data from %s\n", p);
2702 if (extra != NULL)
2703 {
2704 PCRE_FREE_STUDY(extra);
2705 }
2706 new_free(re);
2707 fclose(f);
2708 continue;
2709 }
2710 fprintf(outfile, "Study data loaded from %s\n", p);
2711 do_study = 1; /* To get the data output if requested */
2712 }
2713 else fprintf(outfile, "No study data\n");
2714
2715 /* Flip the necessary bytes. */
2716 if (do_flip)
2717 {
2718 int rc;
2719 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2720 if (rc == PCRE_ERROR_BADMODE)
2721 {
2722 /* Simulate the result of the function call below. */
2723 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2724 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2725 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2726 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2727 new_free(re);
2728 fclose(f);
2729 continue;
2730 }
2731 }
2732
2733 /* Need to know if UTF-8 for printing data strings. */
2734
2735 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2736 {
2737 new_free(re);
2738 fclose(f);
2739 continue;
2740 }
2741 use_utf = (get_options & PCRE_UTF8) != 0;
2742
2743 fclose(f);
2744 goto SHOW_INFO;
2745 }
2746
2747 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2748 the pattern; if it isn't complete, read more. */
2749
2750 delimiter = *p++;
2751
2752 if (isalnum(delimiter) || delimiter == '\\')
2753 {
2754 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2755 goto SKIP_DATA;
2756 }
2757
2758 pp = p;
2759 poffset = (int)(p - buffer);
2760
2761 for(;;)
2762 {
2763 while (*pp != 0)
2764 {
2765 if (*pp == '\\' && pp[1] != 0) pp++;
2766 else if (*pp == delimiter) break;
2767 pp++;
2768 }
2769 if (*pp != 0) break;
2770 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2771 {
2772 fprintf(outfile, "** Unexpected EOF\n");
2773 done = 1;
2774 goto CONTINUE;
2775 }
2776 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2777 }
2778
2779 /* The buffer may have moved while being extended; reset the start of data
2780 pointer to the correct relative point in the buffer. */
2781
2782 p = buffer + poffset;
2783
2784 /* If the first character after the delimiter is backslash, make
2785 the pattern end with backslash. This is purely to provide a way
2786 of testing for the error message when a pattern ends with backslash. */
2787
2788 if (pp[1] == '\\') *pp++ = '\\';
2789
2790 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2791 for callouts. */
2792
2793 *pp++ = 0;
2794 strcpy((char *)pbuffer, (char *)p);
2795
2796 /* Look for options after final delimiter */
2797
2798 options = 0;
2799 study_options = 0;
2800 log_store = showstore; /* default from command line */
2801
2802 while (*pp != 0)
2803 {
2804 switch (*pp++)
2805 {
2806 case 'f': options |= PCRE_FIRSTLINE; break;
2807 case 'g': do_g = 1; break;
2808 case 'i': options |= PCRE_CASELESS; break;
2809 case 'm': options |= PCRE_MULTILINE; break;
2810 case 's': options |= PCRE_DOTALL; break;
2811 case 'x': options |= PCRE_EXTENDED; break;
2812
2813 case '+':
2814 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2815 break;
2816
2817 case '=': do_allcaps = 1; break;
2818 case 'A': options |= PCRE_ANCHORED; break;
2819 case 'B': do_debug = 1; break;
2820 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2821 case 'D': do_debug = do_showinfo = 1; break;
2822 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2823 case 'F': do_flip = 1; break;
2824 case 'G': do_G = 1; break;
2825 case 'I': do_showinfo = 1; break;
2826 case 'J': options |= PCRE_DUPNAMES; break;
2827 case 'K': do_mark = 1; break;
2828 case 'M': log_store = 1; break;
2829 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2830
2831 #if !defined NOPOSIX
2832 case 'P': do_posix = 1; break;
2833 #endif
2834
2835 case 'S':
2836 if (do_study == 0)
2837 {
2838 do_study = 1;
2839 if (*pp == '+')
2840 {
2841 if (*(++pp) == '+')
2842 {
2843 verify_jit = TRUE;
2844 pp++;
2845 }
2846 if (*pp >= '1' && *pp <= '7')
2847 study_options |= jit_study_bits[*pp++ - '1'];
2848 else
2849 study_options |= jit_study_bits[6];
2850 }
2851 }
2852 else
2853 {
2854 do_study = 0;
2855 no_force_study = 1;
2856 }
2857 break;
2858
2859 case 'U': options |= PCRE_UNGREEDY; break;
2860 case 'W': options |= PCRE_UCP; break;
2861 case 'X': options |= PCRE_EXTRA; break;
2862 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2863 case 'Z': debug_lengths = 0; break;
2864 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2865 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2866
2867 case 'T':
2868 switch (*pp++)
2869 {
2870 case '0': tables = tables0; break;
2871 case '1': tables = tables1; break;
2872
2873 case '\r':
2874 case '\n':
2875 case ' ':
2876 case 0:
2877 fprintf(outfile, "** Missing table number after /T\n");
2878 goto SKIP_DATA;
2879
2880 default:
2881 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2882 goto SKIP_DATA;
2883 }
2884 break;
2885
2886 case 'L':
2887 ppp = pp;
2888 /* The '\r' test here is so that it works on Windows. */
2889 /* The '0' test is just in case this is an unterminated line. */
2890 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2891 *ppp = 0;
2892 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2893 {
2894 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2895 goto SKIP_DATA;
2896 }
2897 locale_set = 1;
2898 tables = PCRE_MAKETABLES;
2899 pp = ppp;
2900 break;
2901
2902 case '>':
2903 to_file = pp;
2904 while (*pp != 0) pp++;
2905 while (isspace(pp[-1])) pp--;
2906 *pp = 0;
2907 break;
2908
2909 case '<':
2910 {
2911 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2912 {
2913 options |= PCRE_JAVASCRIPT_COMPAT;
2914 pp += 3;
2915 }
2916 else
2917 {
2918 int x = check_newline(pp, outfile);
2919 if (x == 0) goto SKIP_DATA;
2920 options |= x;
2921 while (*pp++ != '>');
2922 }
2923 }
2924 break;
2925
2926 case '\r': /* So that it works in Windows */
2927 case '\n':
2928 case ' ':
2929 break;
2930
2931 default:
2932 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2933 goto SKIP_DATA;
2934 }
2935 }
2936
2937 /* Handle compiling via the POSIX interface, which doesn't support the
2938 timing, showing, or debugging options, nor the ability to pass over
2939 local character tables. Neither does it have 16-bit support. */
2940
2941 #if !defined NOPOSIX
2942 if (posix || do_posix)
2943 {
2944 int rc;
2945 int cflags = 0;
2946
2947 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2948 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2949 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2950 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2951 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2952 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2953 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2954
2955 first_gotten_store = 0;
2956 rc = regcomp(&preg, (char *)p, cflags);
2957
2958 /* Compilation failed; go back for another re, skipping to blank line
2959 if non-interactive. */
2960
2961 if (rc != 0)
2962 {
2963 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2964 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2965 goto SKIP_DATA;
2966 }
2967 }
2968
2969 /* Handle compiling via the native interface */
2970
2971 else
2972 #endif /* !defined NOPOSIX */
2973
2974 {
2975 /* In 16-bit mode, convert the input. */
2976
2977 #ifdef SUPPORT_PCRE16
2978 if (use_pcre16)
2979 {
2980 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2981 {
2982 case -1:
2983 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2984 "converted to UTF-16\n");
2985 goto SKIP_DATA;
2986
2987 case -2:
2988 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2989 "cannot be converted to UTF-16\n");
2990 goto SKIP_DATA;
2991
2992 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2993 fprintf(outfile, "**Failed: character value greater than 0xffff "
2994 "cannot be converted to 16-bit in non-UTF mode\n");
2995 goto SKIP_DATA;
2996
2997 default:
2998 break;
2999 }
3000 p = (pcre_uint8 *)buffer16;
3001 }
3002 #endif
3003
3004 /* Compile many times when timing */
3005
3006 if (timeit > 0)
3007 {
3008 register int i;
3009 clock_t time_taken;
3010 clock_t start_time = clock();
3011 for (i = 0; i < timeit; i++)
3012 {
3013 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3014 if (re != NULL) free(re);
3015 }
3016 time_taken = clock() - start_time;
3017 fprintf(outfile, "Compile time %.4f milliseconds\n",
3018 (((double)time_taken * 1000.0) / (double)timeit) /
3019 (double)CLOCKS_PER_SEC);
3020 }
3021
3022 first_gotten_store = 0;
3023 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3024
3025 /* Compilation failed; go back for another re, skipping to blank line
3026 if non-interactive. */
3027
3028 if (re == NULL)
3029 {
3030 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3031 SKIP_DATA:
3032 if (infile != stdin)
3033 {
3034 for (;;)
3035 {
3036 if (extend_inputline(infile, buffer, NULL) == NULL)
3037 {
3038 done = 1;
3039 goto CONTINUE;
3040 }
3041 len = (int)strlen((char *)buffer);
3042 while (len > 0 && isspace(buffer[len-1])) len--;
3043 if (len == 0) break;
3044 }
3045 fprintf(outfile, "\n");
3046 }
3047 goto CONTINUE;
3048 }
3049
3050 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3051 within the regex; check for this so that we know how to process the data
3052 lines. */
3053
3054 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3055 goto SKIP_DATA;
3056 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3057
3058 /* Extract the size for possible writing before possibly flipping it,
3059 and remember the store that was got. */
3060
3061 true_size = ((REAL_PCRE *)re)->size;
3062 regex_gotten_store = first_gotten_store;
3063
3064 /* Output code size information if requested */
3065
3066 if (log_store)
3067 fprintf(outfile, "Memory allocation (code space): %d\n",
3068 (int)(first_gotten_store -
3069 sizeof(REAL_PCRE) -
3070 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3071
3072 /* If -s or /S was present, study the regex to generate additional info to
3073 help with the matching, unless the pattern has the SS option, which
3074 suppresses the effect of /S (used for a few test patterns where studying is
3075 never sensible). */
3076
3077 if (do_study || (force_study >= 0 && !no_force_study))
3078 {
3079 if (timeit > 0)
3080 {
3081 register int i;
3082 clock_t time_taken;
3083 clock_t start_time = clock();
3084 for (i = 0; i < timeit; i++)
3085 {
3086 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3087 }
3088 time_taken = clock() - start_time;
3089 if (extra != NULL)
3090 {
3091 PCRE_FREE_STUDY(extra);
3092 }
3093 fprintf(outfile, " Study time %.4f milliseconds\n",
3094 (((double)time_taken * 1000.0) / (double)timeit) /
3095 (double)CLOCKS_PER_SEC);
3096 }
3097 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3098 if (error != NULL)
3099 fprintf(outfile, "Failed to study: %s\n", error);
3100 else if (extra != NULL)
3101 {
3102 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3103 if (log_store)
3104 {
3105 size_t jitsize;
3106 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3107 jitsize != 0)
3108 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3109 }
3110 }
3111 }
3112
3113 /* If /K was present, we set up for handling MARK data. */
3114
3115 if (do_mark)
3116 {
3117 if (extra == NULL)
3118 {
3119 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3120 extra->flags = 0;
3121 }
3122 extra->mark = &markptr;
3123 extra->flags |= PCRE_EXTRA_MARK;
3124 }
3125
3126 /* Extract and display information from the compiled data if required. */
3127
3128 SHOW_INFO:
3129
3130 if (do_debug)
3131 {
3132 fprintf(outfile, "------------------------------------------------------------------\n");
3133 PCRE_PRINTINT(re, outfile, debug_lengths);
3134 }
3135
3136 /* We already have the options in get_options (see above) */
3137
3138 if (do_showinfo)
3139 {
3140 unsigned long int all_options;
3141 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3142 hascrorlf, maxlookbehind;
3143 int nameentrysize, namecount;
3144 const pcre_uint8 *nametable;
3145
3146 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3147 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3148 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3149 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3150 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3151 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3152 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3153 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3154 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3155 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3156 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3157 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3158 != 0)
3159 goto SKIP_DATA;
3160
3161 if (size != regex_gotten_store) fprintf(outfile,
3162 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3163 (int)size, (int)regex_gotten_store);
3164
3165 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3166 if (backrefmax > 0)
3167 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3168
3169 if (namecount > 0)
3170 {
3171 fprintf(outfile, "Named capturing subpatterns:\n");
3172 while (namecount-- > 0)
3173 {
3174 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3175 int imm2_size = use_pcre16 ? 1 : 2;
3176 #else
3177 int imm2_size = IMM2_SIZE;
3178 #endif
3179 int length = (int)STRLEN(nametable + imm2_size);
3180 fprintf(outfile, " ");
3181 PCHARSV(nametable, imm2_size, length, outfile);
3182 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3183 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3184 fprintf(outfile, "%3d\n", use_pcre16?
3185 (int)(((PCRE_SPTR16)nametable)[0])
3186 :((int)nametable[0] << 8) | (int)nametable[1]);
3187 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3188 #else
3189 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3190 #ifdef SUPPORT_PCRE8
3191 nametable += nameentrysize;
3192 #else
3193 nametable += nameentrysize * 2;
3194 #endif
3195 #endif
3196 }
3197 }
3198
3199 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3200 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3201
3202 all_options = ((REAL_PCRE *)re)->options;
3203 if (do_flip) all_options = swap_uint32(all_options);
3204
3205 if (get_options == 0) fprintf(outfile, "No options\n");
3206 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3207 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3208 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3209 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3210 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3211 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3212 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3213 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3214 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3215 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3216 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3217 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3218 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3219 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3220 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3221 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3222 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3223 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3224
3225 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3226
3227 switch (get_options & PCRE_NEWLINE_BITS)
3228 {
3229 case PCRE_NEWLINE_CR:
3230 fprintf(outfile, "Forced newline sequence: CR\n");
3231 break;
3232
3233 case PCRE_NEWLINE_LF:
3234 fprintf(outfile, "Forced newline sequence: LF\n");
3235 break;
3236
3237 case PCRE_NEWLINE_CRLF:
3238 fprintf(outfile, "Forced newline sequence: CRLF\n");
3239 break;
3240
3241 case PCRE_NEWLINE_ANYCRLF:
3242 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3243 break;
3244
3245 case PCRE_NEWLINE_ANY:
3246 fprintf(outfile, "Forced newline sequence: ANY\n");
3247 break;
3248
3249 default:
3250 break;
3251 }
3252
3253 if (first_char == -1)
3254 {
3255 fprintf(outfile, "First char at start or follows newline\n");
3256 }
3257 else if (first_char < 0)
3258 {
3259 fprintf(outfile, "No first char\n");
3260 }
3261 else
3262 {
3263 const char *caseless =
3264 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3265 "" : " (caseless)";
3266
3267 if (PRINTOK(first_char))
3268 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3269 else
3270 {
3271 fprintf(outfile, "First char = ");
3272 pchar(first_char, outfile);
3273 fprintf(outfile, "%s\n", caseless);
3274 }
3275 }
3276
3277 if (need_char < 0)
3278 {
3279 fprintf(outfile, "No need char\n");
3280 }
3281 else
3282 {
3283 const char *caseless =
3284 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3285 "" : " (caseless)";
3286
3287 if (PRINTOK(need_char))
3288 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3289 else
3290 {
3291 fprintf(outfile, "Need char = ");
3292 pchar(need_char, outfile);
3293 fprintf(outfile, "%s\n", caseless);
3294 }
3295 }
3296
3297 if (maxlookbehind > 0)
3298 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3299
3300 /* Don't output study size; at present it is in any case a fixed
3301 value, but it varies, depending on the computer architecture, and
3302 so messes up the test suite. (And with the /F option, it might be
3303 flipped.) If study was forced by an external -s, don't show this
3304 information unless -i or -d was also present. This means that, except
3305 when auto-callouts are involved, the output from runs with and without
3306 -s should be identical. */
3307
3308 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3309 {
3310 if (extra == NULL)
3311 fprintf(outfile, "Study returned NULL\n");
3312 else
3313 {
3314 pcre_uint8 *start_bits = NULL;
3315 int minlength;
3316
3317 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3318 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3319
3320 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3321 {
3322 if (start_bits == NULL)
3323 fprintf(outfile, "No set of starting bytes\n");
3324 else
3325 {
3326 int i;
3327 int c = 24;
3328 fprintf(outfile, "Starting byte set: ");
3329 for (i = 0; i < 256; i++)
3330 {
3331 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3332 {
3333 if (c > 75)
3334 {
3335 fprintf(outfile, "\n ");
3336 c = 2;
3337 }
3338 if (PRINTOK(i) && i != ' ')
3339 {
3340 fprintf(outfile, "%c ", i);
3341 c += 2;
3342 }
3343 else
3344 {
3345 fprintf(outfile, "\\x%02x ", i);
3346 c += 5;
3347 }
3348 }
3349 }
3350 fprintf(outfile, "\n");
3351 }
3352 }
3353 }
3354
3355 /* Show this only if the JIT was set by /S, not by -s. */
3356
3357 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3358 {
3359 int jit;
3360 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3361 {
3362 if (jit)
3363 fprintf(outfile, "JIT study was successful\n");
3364 else
3365 #ifdef SUPPORT_JIT
3366 fprintf(outfile, "JIT study was not successful\n");
3367 #else
3368 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3369 #endif
3370 }
3371 }
3372 }
3373 }
3374
3375 /* If the '>' option was present, we write out the regex to a file, and
3376 that is all. The first 8 bytes of the file are the regex length and then
3377 the study length, in big-endian order. */
3378
3379 if (to_file != NULL)
3380 {
3381 FILE *f = fopen((char *)to_file, "wb");
3382 if (f == NULL)
3383 {
3384 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3385 }
3386 else
3387 {
3388 pcre_uint8 sbuf[8];
3389
3390 if (do_flip) regexflip(re, extra);
3391 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3392 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3393 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3394 sbuf[3] = (pcre_uint8)((true_size) & 255);
3395 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3396 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3397 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3398 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3399
3400 if (fwrite(sbuf, 1, 8, f) < 8 ||
3401 fwrite(re, 1, true_size, f) < true_size)
3402 {
3403 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3404 }
3405 else
3406 {
3407 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3408
3409 /* If there is study data, write it. */
3410
3411 if (extra != NULL)
3412 {
3413 if (fwrite(extra->study_data, 1, true_study_size, f) <
3414 true_study_size)
3415 {
3416 fprintf(outfile, "Write error on %s: %s\n", to_file,
3417 strerror(errno));
3418 }
3419 else fprintf(outfile, "Study data written to %s\n", to_file);
3420 }
3421 }
3422 fclose(f);
3423 }
3424
3425 new_free(re);
3426 if (extra != NULL)
3427 {
3428 PCRE_FREE_STUDY(extra);
3429 }
3430 if (locale_set)
3431 {
3432 new_free((void *)tables);
3433 setlocale(LC_CTYPE, "C");
3434 locale_set = 0;
3435 }
3436 continue; /* With next regex */
3437 }
3438 } /* End of non-POSIX compile */
3439
3440 /* Read data lines and test them */
3441
3442 for (;;)
3443 {
3444 pcre_uint8 *q;
3445 pcre_uint8 *bptr;
3446 int *use_offsets = offsets;
3447 int use_size_offsets = size_offsets;
3448 int callout_data = 0;
3449 int callout_data_set = 0;
3450 int count, c;
3451 int copystrings = 0;
3452 int find_match_limit = default_find_match_limit;
3453 int getstrings = 0;
3454 int getlist = 0;
3455 int gmatched = 0;
3456 int start_offset = 0;
3457 int start_offset_sign = 1;
3458 int g_notempty = 0;
3459 int use_dfa = 0;
3460
3461 *copynames = 0;
3462 *getnames = 0;
3463
3464 #ifdef SUPPORT_PCRE16
3465 cn16ptr = copynames;
3466 gn16ptr = getnames;
3467 #endif
3468 #ifdef SUPPORT_PCRE8
3469 cn8ptr = copynames8;
3470 gn8ptr = getnames8;
3471 #endif
3472
3473 SET_PCRE_CALLOUT(callout);
3474 first_callout = 1;
3475 last_callout_mark = NULL;
3476 callout_extra = 0;
3477 callout_count = 0;
3478 callout_fail_count = 999999;
3479 callout_fail_id = -1;
3480 show_malloc = 0;
3481 options = 0;
3482
3483 if (extra != NULL) extra->flags &=
3484 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3485
3486 len = 0;
3487 for (;;)
3488 {
3489 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3490 {
3491 if (len > 0) /* Reached EOF without hitting a newline */
3492 {
3493 fprintf(outfile, "\n");
3494 break;
3495 }
3496 done = 1;
3497 goto CONTINUE;
3498 }
3499 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3500 len = (int)strlen((char *)buffer);
3501 if (buffer[len-1] == '\n') break;
3502 }
3503
3504 while (len > 0 && isspace(buffer[len-1])) len--;
3505 buffer[len] = 0;
3506 if (len == 0) break;
3507
3508 p = buffer;
3509 while (isspace(*p)) p++;
3510
3511 bptr = q = dbuffer;
3512 while ((c = *p++) != 0)
3513 {
3514 int i = 0;
3515 int n = 0;
3516
3517 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3518 In non-UTF mode, allow the value of the byte to fall through to later,
3519 where values greater than 127 are turned into UTF-8 when running in
3520 16-bit mode. */
3521
3522 if (c != '\\')
3523 {
3524 if (use_utf)
3525 {
3526 *q++ = c;
3527 continue;
3528 }
3529 }
3530
3531 /* Handle backslash escapes */
3532
3533 else switch ((c = *p++))
3534 {
3535 case 'a': c = 7; break;
3536 case 'b': c = '\b'; break;
3537 case 'e': c = 27; break;
3538 case 'f': c = '\f'; break;
3539 case 'n': c = '\n'; break;
3540 case 'r': c = '\r'; break;
3541 case 't': c = '\t'; break;
3542 case 'v': c = '\v'; break;
3543
3544 case '0': case '1': case '2': case '3':
3545 case '4': case '5': case '6': case '7':
3546 c -= '0';
3547 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3548 c = c * 8 + *p++ - '0';
3549 break;
3550
3551 case 'x':
3552 if (*p == '{')
3553 {
3554 pcre_uint8 *pt = p;
3555 c = 0;
3556
3557 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3558 when isxdigit() is a macro that refers to its argument more than
3559 once. This is banned by the C Standard, but apparently happens in at
3560 least one MacOS environment. */
3561
3562 for (pt++; isxdigit(*pt); pt++)
3563 {
3564 if (++i == 9)
3565 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3566 "using only the first eight.\n");
3567 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3568 }
3569 if (*pt == '}')
3570 {
3571 p = pt + 1;
3572 break;
3573 }
3574 /* Not correct form for \x{...}; fall through */
3575 }
3576
3577 /* \x without {} always defines just one byte in 8-bit mode. This
3578 allows UTF-8 characters to be constructed byte by byte, and also allows
3579 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3580 Otherwise, pass it down to later code so that it can be turned into
3581 UTF-8 when running in 16-bit mode. */
3582
3583 c = 0;
3584 while (i++ < 2 && isxdigit(*p))
3585 {
3586 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3587 p++;
3588 }
3589 if (use_utf)
3590 {
3591 *q++ = c;
3592 continue;
3593 }
3594 break;
3595
3596 case 0: /* \ followed by EOF allows for an empty line */
3597 p--;
3598 continue;
3599
3600 case '>':
3601 if (*p == '-')
3602 {
3603 start_offset_sign = -1;
3604 p++;
3605 }
3606 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3607 start_offset *= start_offset_sign;
3608 continue;
3609
3610 case 'A': /* Option setting */
3611 options |= PCRE_ANCHORED;
3612 continue;
3613
3614 case 'B':
3615 options |= PCRE_NOTBOL;
3616 continue;
3617
3618 case 'C':
3619 if (isdigit(*p)) /* Set copy string */
3620 {
3621 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3622 copystrings |= 1 << n;
3623 }
3624 else if (isalnum(*p))
3625 {
3626 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3627 }
3628 else if (*p == '+')
3629 {
3630 callout_extra = 1;
3631 p++;
3632 }
3633 else if (*p == '-')
3634 {
3635 SET_PCRE_CALLOUT(NULL);
3636 p++;
3637 }
3638 else if (*p == '!')
3639 {
3640 callout_fail_id = 0;
3641 p++;
3642 while(isdigit(*p))
3643 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3644 callout_fail_count = 0;
3645 if (*p == '!')
3646 {
3647 p++;
3648 while(isdigit(*p))
3649 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3650 }
3651 }
3652 else if (*p == '*')
3653 {
3654 int sign = 1;
3655 callout_data = 0;
3656 if (*(++p) == '-') { sign = -1; p++; }
3657 while(isdigit(*p))
3658 callout_data = callout_data * 10 + *p++ - '0';
3659 callout_data *= sign;
3660 callout_data_set = 1;
3661 }
3662 continue;
3663
3664 #if !defined NODFA
3665 case 'D':
3666 #if !defined NOPOSIX
3667 if (posix || do_posix)
3668 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3669 else
3670 #endif
3671 use_dfa = 1;
3672 continue;
3673 #endif
3674
3675 #if !defined NODFA
3676 case 'F':
3677 options |= PCRE_DFA_SHORTEST;
3678 continue;
3679 #endif
3680
3681 case 'G':
3682 if (isdigit(*p))
3683 {
3684 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3685 getstrings |= 1 << n;
3686 }
3687 else if (isalnum(*p))
3688 {
3689 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3690 }
3691 continue;
3692
3693 case 'J':
3694 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3695 if (extra != NULL
3696 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3697 && extra->executable_jit != NULL)
3698 {
3699 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3700 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3701 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3702 }
3703 continue;
3704
3705 case 'L':
3706 getlist = 1;
3707 continue;
3708
3709 case 'M':
3710 find_match_limit = 1;
3711 continue;
3712
3713 case 'N':
3714 if ((options & PCRE_NOTEMPTY) != 0)
3715 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3716 else
3717 options |= PCRE_NOTEMPTY;
3718 continue;
3719
3720 case 'O':
3721 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3722 if (n > size_offsets_max)
3723 {
3724 size_offsets_max = n;
3725 free(offsets);
3726 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3727 if (offsets == NULL)
3728 {
3729 printf("** Failed to get %d bytes of memory for offsets vector\n",
3730 (int)(size_offsets_max * sizeof(int)));
3731 yield = 1;
3732 goto EXIT;
3733 }
3734 }
3735 use_size_offsets = n;
3736 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3737 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
3738 continue;
3739
3740 case 'P':
3741 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3742 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3743 continue;
3744
3745 case 'Q':
3746 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3747 if (extra == NULL)
3748 {
3749 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3750 extra->flags = 0;
3751 }
3752 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3753 extra->match_limit_recursion = n;
3754 continue;
3755
3756 case 'q':
3757 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3758 if (extra == NULL)
3759 {
3760 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3761 extra->flags = 0;
3762 }
3763 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3764 extra->match_limit = n;
3765 continue;
3766
3767 #if !defined NODFA
3768 case 'R':
3769 options |= PCRE_DFA_RESTART;
3770 continue;
3771 #endif
3772
3773 case 'S':
3774 show_malloc = 1;
3775 continue;
3776
3777 case 'Y':
3778 options |= PCRE_NO_START_OPTIMIZE;
3779 continue;
3780
3781 case 'Z':
3782 options |= PCRE_NOTEOL;
3783 continue;
3784
3785 case '?':
3786 options |= PCRE_NO_UTF8_CHECK;
3787 continue;
3788
3789 case '<':
3790 {
3791 int x = check_newline(p, outfile);
3792 if (x == 0) goto NEXT_DATA;
3793 options |= x;
3794 while (*p++ != '>');
3795 }
3796 continue;
3797 }
3798
3799 /* We now have a character value in c that may be greater than 255. In
3800 16-bit mode, we always convert characters to UTF-8 so that values greater
3801 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3802 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3803 mode must have come from \x{...} or octal constructs because values from
3804 \x.. get this far only in non-UTF mode. */
3805
3806 #if !defined NOUTF || defined SUPPORT_PCRE16
3807 if (use_pcre16 || use_utf)
3808 {
3809 pcre_uint8 buff8[8];
3810 int ii, utn;
3811 utn = ord2utf8(c, buff8);
3812 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3813 }
3814 else
3815 #endif
3816 {
3817 if (c > 255)
3818 {
3819 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3820 "and UTF-8 mode is not enabled.\n", c);
3821 fprintf(outfile, "** Truncation will probably give the wrong "
3822 "result.\n");
3823 }
3824 *q++ = c;
3825 }
3826 }
3827
3828 /* Reached end of subject string */
3829
3830 *q = 0;
3831 len = (int)(q - dbuffer);
3832
3833 /* Move the data to the end of the buffer so that a read over the end of
3834 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3835 we are using the POSIX interface, we must include the terminating zero. */
3836
3837 #if !defined NOPOSIX
3838 if (posix || do_posix)
3839 {
3840 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3841 bptr += buffer_size - len - 1;
3842 }
3843 else
3844 #endif
3845 {
3846 memmove(bptr + buffer_size - len, bptr, len);
3847 bptr += buffer_size - len;
3848 }
3849
3850 if ((all_use_dfa || use_dfa) && find_match_limit)
3851 {
3852 printf("**Match limit not relevant for DFA matching: ignored\n");
3853 find_match_limit = 0;
3854 }
3855
3856 /* Handle matching via the POSIX interface, which does not
3857 support timing or playing with the match limit or callout data. */
3858
3859 #if !defined NOPOSIX
3860 if (posix || do_posix)
3861 {
3862 int rc;
3863 int eflags = 0;
3864 regmatch_t *pmatch = NULL;
3865 if (use_size_offsets > 0)
3866 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3867 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3868 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3869 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3870
3871 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3872
3873 if (rc != 0)
3874 {
3875 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3876 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3877 }
3878 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3879 != 0)
3880 {
3881 fprintf(outfile, "Matched with REG_NOSUB\n");
3882 }
3883 else
3884 {
3885 size_t i;
3886 for (i = 0; i < (size_t)use_size_offsets; i++)
3887 {
3888 if (pmatch[i].rm_so >= 0)
3889 {
3890 fprintf(outfile, "%2d: ", (int)i);
3891 PCHARSV(dbuffer, pmatch[i].rm_so,
3892 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3893 fprintf(outfile, "\n");
3894 if (do_showcaprest || (i == 0 && do_showrest))
3895 {
3896 fprintf(outfile, "%2d+ ", (int)i);
3897 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3898 outfile);
3899 fprintf(outfile, "\n");
3900 }
3901 }
3902 }
3903 }
3904 free(pmatch);
3905 goto NEXT_DATA;
3906 }
3907
3908 #endif /* !defined NOPOSIX */
3909
3910 /* Handle matching via the native interface - repeats for /g and /G */
3911
3912 #ifdef SUPPORT_PCRE16
3913 if (use_pcre16)
3914 {
3915 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3916 switch(len)
3917 {
3918 case -1:
3919 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3920 "converted to UTF-16\n");
3921 goto NEXT_DATA;
3922
3923 case -2:
3924 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3925 "cannot be converted to UTF-16\n");
3926 goto NEXT_DATA;
3927
3928 case -3:
3929 fprintf(outfile, "**Failed: character value greater than 0xffff "
3930 "cannot be converted to 16-bit in non-UTF mode\n");
3931 goto NEXT_DATA;
3932
3933 default:
3934 break;
3935 }
3936 bptr = (pcre_uint8 *)buffer16;
3937 }
3938 #endif
3939
3940 /* Ensure that there is a JIT callback if we want to verify that JIT was
3941 actually used. If jit_stack == NULL, no stack has yet been assigned. */
3942
3943 if (verify_jit && jit_stack == NULL && extra != NULL)
3944 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3945
3946 for (;; gmatched++) /* Loop for /g or /G */
3947 {
3948 markptr = NULL;
3949 jit_was_used = FALSE;
3950
3951 if (timeitm > 0)
3952 {
3953 register int i;
3954 clock_t time_taken;
3955 clock_t start_time = clock();
3956
3957 #if !defined NODFA
3958 if (all_use_dfa || use_dfa)
3959 {
3960 if ((options & PCRE_DFA_RESTART) != 0)
3961 {
3962 fprintf(outfile, "Timing DFA restarts is not supported\n");
3963 break;
3964 }
3965 if (dfa_workspace == NULL)
3966 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3967 for (i = 0; i < timeitm; i++)
3968 {
3969 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3970 (options | g_notempty), use_offsets, use_size_offsets,
3971 dfa_workspace, DFA_WS_DIMENSION);
3972 }
3973 }
3974 else
3975 #endif
3976
3977 for (i = 0; i < timeitm; i++)
3978 {
3979 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3980 (options | g_notempty), use_offsets, use_size_offsets);
3981 }
3982 time_taken = clock() - start_time;
3983 fprintf(outfile, "Execute time %.4f milliseconds\n",
3984 (((double)time_taken * 1000.0) / (double)timeitm) /
3985 (double)CLOCKS_PER_SEC);
3986 }
3987
3988 /* If find_match_limit is set, we want to do repeated matches with
3989 varying limits in order to find the minimum value for the match limit and
3990 for the recursion limit. The match limits are relevant only to the normal
3991 running of pcre_exec(), so disable the JIT optimization. This makes it
3992 possible to run the same set of tests with and without JIT externally
3993 requested. */
3994
3995 if (find_match_limit)
3996 {
3997 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
3998 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3999 extra->flags = 0;
4000
4001 (void)check_match_limit(re, extra, bptr, len, start_offset,
4002 options|g_notempty, use_offsets, use_size_offsets,
4003 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4004 PCRE_ERROR_MATCHLIMIT, "match()");
4005
4006 count = check_match_limit(re, extra, bptr, len, start_offset,
4007 options|g_notempty, use_offsets, use_size_offsets,
4008 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4009 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4010 }
4011
4012 /* If callout_data is set, use the interface with additional data */
4013
4014 else if (callout_data_set)
4015 {
4016 if (extra == NULL)
4017 {
4018 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4019 extra->flags = 0;
4020 }
4021 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4022 extra->callout_data = &callout_data;
4023 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4024 options | g_notempty, use_offsets, use_size_offsets);
4025 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4026 }
4027
4028 /* The normal case is just to do the match once, with the default
4029 value of match_limit. */
4030
4031 #if !defined NODFA
4032 else if (all_use_dfa || use_dfa)
4033 {
4034 if (dfa_workspace == NULL)
4035 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4036 if (dfa_matched++ == 0)
4037 dfa_workspace[0] = -1; /* To catch bad restart */
4038 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4039 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4040 DFA_WS_DIMENSION);
4041 if (count == 0)
4042 {
4043 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4044 count = use_size_offsets/2;
4045 }
4046 }
4047 #endif
4048
4049 else
4050 {
4051 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4052 options | g_notempty, use_offsets, use_size_offsets);
4053 if (count == 0)
4054 {
4055 fprintf(outfile, "Matched, but too many substrings\n");
4056 count = use_size_offsets/3;
4057 }
4058 }
4059
4060 /* Matched */
4061
4062 if (count >= 0)
4063 {
4064 int i, maxcount;
4065 void *cnptr, *gnptr;
4066
4067 #if !defined NODFA
4068 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4069 #endif
4070 maxcount = use_size_offsets/3;
4071
4072 /* This is a check against a lunatic return value. */
4073
4074 if (count > maxcount)
4075 {
4076 fprintf(outfile,
4077 "** PCRE error: returned count %d is too big for offset size %d\n",
4078 count, use_size_offsets);
4079 count = use_size_offsets/3;
4080 if (do_g || do_G)
4081 {
4082 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4083 do_g = do_G = FALSE; /* Break g/G loop */
4084 }
4085 }
4086
4087 /* do_allcaps requests showing of all captures in the pattern, to check
4088 unset ones at the end. */
4089
4090 if (do_allcaps)
4091 {
4092 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4093 goto SKIP_DATA;
4094 count++; /* Allow for full match */
4095 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4096 }
4097
4098 /* Output the captured substrings */
4099
4100 for (i = 0; i < count * 2; i += 2)
4101 {
4102 if (use_offsets[i] < 0)
4103 {
4104 if (use_offsets[i] != -1)
4105 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4106 use_offsets[i], i);
4107 if (use_offsets[i+1] != -1)
4108 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4109 use_offsets[i+1], i+1);
4110 fprintf(outfile, "%2d: <unset>\n", i/2);
4111 }
4112 else
4113 {
4114 fprintf(outfile, "%2d: ", i/2);
4115 PCHARSV(bptr, use_offsets[i],
4116 use_offsets[i+1] - use_offsets[i], outfile);
4117 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4118 fprintf(outfile, "\n");
4119 if (do_showcaprest || (i == 0 && do_showrest))
4120 {
4121 fprintf(outfile, "%2d+ ", i/2);
4122 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4123 outfile);
4124 fprintf(outfile, "\n");
4125 }
4126 }
4127 }
4128
4129 if (markptr != NULL)
4130 {
4131 fprintf(outfile, "MK: ");
4132 PCHARSV(markptr, 0, -1, outfile);
4133 fprintf(outfile, "\n");
4134 }
4135
4136 for (i = 0; i < 32; i++)
4137 {
4138 if ((copystrings & (1 << i)) != 0)
4139 {
4140 int rc;
4141 char copybuffer[256];
4142 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4143 copybuffer, sizeof(copybuffer));
4144 if (rc < 0)
4145 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4146 else
4147 {
4148 fprintf(outfile, "%2dC ", i);
4149 PCHARSV(copybuffer, 0, rc, outfile);
4150 fprintf(outfile, " (%d)\n", rc);
4151 }
4152 }
4153 }
4154
4155 cnptr = copynames;
4156 for (;;)
4157 {
4158 int rc;
4159 char copybuffer[256];
4160
4161 if (use_pcre16)
4162 {
4163 if (*(pcre_uint16 *)cnptr == 0) break;
4164 }
4165 else
4166 {
4167 if (*(pcre_uint8 *)cnptr == 0) break;
4168 }
4169
4170 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4171 cnptr, copybuffer, sizeof(copybuffer));
4172
4173 if (rc < 0)
4174 {
4175 fprintf(outfile, "copy substring ");
4176 PCHARSV(cnptr, 0, -1, outfile);
4177 fprintf(outfile, " failed %d\n", rc);
4178 }
4179 else
4180 {
4181 fprintf(outfile, " C ");
4182 PCHARSV(copybuffer, 0, rc, outfile);
4183 fprintf(outfile, " (%d) ", rc);
4184 PCHARSV(cnptr, 0, -1, outfile);
4185 putc('\n', outfile);
4186 }
4187
4188 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4189 }
4190
4191 for (i = 0; i < 32; i++)
4192 {
4193 if ((getstrings & (1 << i)) != 0)
4194 {
4195 int rc;
4196 const char *substring;
4197 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4198 if (rc < 0)
4199 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4200 else
4201 {
4202 fprintf(outfile, "%2dG ", i);
4203 PCHARSV(substring, 0, rc, outfile);
4204 fprintf(outfile, " (%d)\n", rc);
4205 PCRE_FREE_SUBSTRING(substring);
4206 }
4207 }
4208 }
4209
4210 gnptr = getnames;
4211 for (;;)
4212 {
4213 int rc;
4214 const char *substring;
4215
4216 if (use_pcre16)
4217 {
4218 if (*(pcre_uint16 *)gnptr == 0) break;
4219 }
4220 else
4221 {
4222 if (*(pcre_uint8 *)gnptr == 0) break;
4223 }
4224
4225 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4226 gnptr, &substring);
4227 if (rc < 0)
4228 {
4229 fprintf(outfile, "get substring ");
4230 PCHARSV(gnptr, 0, -1, outfile);
4231 fprintf(outfile, " failed %d\n", rc);
4232 }
4233 else
4234 {
4235 fprintf(outfile, " G ");
4236 PCHARSV(substring, 0, rc, outfile);
4237 fprintf(outfile, " (%d) ", rc);
4238 PCHARSV(gnptr, 0, -1, outfile);
4239 PCRE_FREE_SUBSTRING(substring);
4240 putc('\n', outfile);
4241 }
4242
4243 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4244 }
4245
4246 if (getlist)
4247 {
4248 int rc;
4249 const char **stringlist;
4250 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4251 if (rc < 0)
4252 fprintf(outfile, "get substring list failed %d\n", rc);
4253 else
4254 {
4255 for (i = 0; i < count; i++)
4256 {
4257 fprintf(outfile, "%2dL ", i);
4258 PCHARSV(stringlist[i], 0, -1, outfile);
4259 putc('\n', outfile);
4260 }
4261 if (stringlist[i] != NULL)
4262 fprintf(outfile, "string list not terminated by NULL\n");
4263 PCRE_FREE_SUBSTRING_LIST(stringlist);
4264 }
4265 }
4266 }
4267
4268 /* There was a partial match */
4269
4270 else if (count == PCRE_ERROR_PARTIAL)
4271 {
4272 if (markptr == NULL) fprintf(outfile, "Partial match");
4273 else
4274 {
4275 fprintf(outfile, "Partial match, mark=");
4276 PCHARSV(markptr, 0, -1, outfile);
4277 }
4278 if (use_size_offsets > 1)
4279 {
4280 fprintf(outfile, ": ");
4281 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4282 outfile);
4283 }
4284 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4285 fprintf(outfile, "\n");
4286 break; /* Out of the /g loop */
4287 }
4288
4289 /* Failed to match. If this is a /g or /G loop and we previously set
4290 g_notempty after a null match, this is not necessarily the end. We want
4291 to advance the start offset, and continue. We won't be at the end of the
4292 string - that was checked before setting g_notempty.
4293
4294 Complication arises in the case when the newline convention is "any",
4295 "crlf", or "anycrlf". If the previous match was at the end of a line
4296 terminated by CRLF, an advance of one character just passes the \r,
4297 whereas we should prefer the longer newline sequence, as does the code in
4298 pcre_exec(). Fudge the offset value to achieve this. We check for a
4299 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4300 find the default.
4301
4302 Otherwise, in the case of UTF-8 matching, the advance must be one
4303 character, not one byte. */
4304
4305 else
4306 {
4307 if (g_notempty != 0)
4308 {
4309 int onechar = 1;
4310 unsigned int obits = ((REAL_PCRE *)re)->options;
4311 use_offsets[0] = start_offset;
4312 if ((obits & PCRE_NEWLINE_BITS) == 0)
4313 {
4314 int d;
4315 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4316 /* Note that these values are always the ASCII ones, even in
4317 EBCDIC environments. CR = 13, NL = 10. */
4318 obits = (d == 13)? PCRE_NEWLINE_CR :
4319 (d == 10)? PCRE_NEWLINE_LF :
4320 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4321 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4322 (d == -1)? PCRE_NEWLINE_ANY : 0;
4323 }
4324 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4325 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4326 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4327 &&
4328 start_offset < len - 1 &&
4329 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4330 (use_pcre16?
4331 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4332 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4333 :
4334 bptr[start_offset] == '\r'
4335 && bptr[start_offset + 1] == '\n')
4336 #elif defined SUPPORT_PCRE16
4337 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4338 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4339 #else
4340 bptr[start_offset] == '\r'
4341 && bptr[start_offset + 1] == '\n'
4342 #endif
4343 )
4344 onechar++;
4345 else if (use_utf)
4346 {
4347 while (start_offset + onechar < len)
4348 {
4349 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4350 onechar++;
4351 }
4352 }
4353 use_offsets[1] = start_offset + onechar;
4354 }
4355 else
4356 {
4357 switch(count)
4358 {
4359 case PCRE_ERROR_NOMATCH:
4360 if (gmatched == 0)
4361 {
4362 if (markptr == NULL)
4363 {
4364 fprintf(outfile, "No match");
4365 }
4366 else
4367 {
4368 fprintf(outfile, "No match, mark = ");
4369 PCHARSV(markptr, 0, -1, outfile);
4370 }
4371 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4372 putc('\n', outfile);
4373 }
4374 break;
4375
4376 case PCRE_ERROR_BADUTF8:
4377 case PCRE_ERROR_SHORTUTF8:
4378 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4379 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4380 use_pcre16? "16" : "8");
4381 if (use_size_offsets >= 2)
4382 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4383 use_offsets[1]);
4384 fprintf(outfile, "\n");
4385 break;
4386
4387 case PCRE_ERROR_BADUTF8_OFFSET:
4388 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4389 use_pcre16? "16" : "8");
4390 break;
4391
4392 default:
4393 if (count < 0 &&
4394 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4395 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4396 else
4397 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4398 break;
4399 }
4400
4401 break; /* Out of the /g loop */
4402 }
4403 }
4404
4405 /* If not /g or /G we are done */
4406
4407 if (!do_g && !do_G) break;
4408
4409 /* If we have matched an empty string, first check to see if we are at
4410 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4411 Perl's /g options does. This turns out to be rather cunning. First we set
4412 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4413 same point. If this fails (picked up above) we advance to the next
4414 character. */
4415
4416 g_notempty = 0;
4417
4418 if (use_offsets[0] == use_offsets[1])
4419 {
4420 if (use_offsets[0] == len) break;
4421 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4422 }
4423
4424 /* For /g, update the start offset, leaving the rest alone */
4425
4426 if (do_g) start_offset = use_offsets[1];
4427
4428 /* For /G, update the pointer and length */
4429
4430 else
4431 {
4432 bptr += use_offsets[1] * CHAR_SIZE;
4433 len -= use_offsets[1];
4434 }
4435 } /* End of loop for /g and /G */
4436
4437 NEXT_DATA: continue;
4438 } /* End of loop for data lines */
4439
4440 CONTINUE:
4441
4442 #if !defined NOPOSIX
4443 if (posix || do_posix) regfree(&preg);
4444 #endif
4445
4446 if (re != NULL) new_free(re);
4447 if (extra != NULL)
4448 {
4449 PCRE_FREE_STUDY(extra);
4450 }
4451 if (locale_set)
4452 {
4453 new_free((void *)tables);
4454 setlocale(LC_CTYPE, "C");
4455 locale_set = 0;
4456 }
4457 if (jit_stack != NULL)
4458 {
4459 PCRE_JIT_STACK_FREE(jit_stack);
4460 jit_stack = NULL;
4461 }
4462 }
4463
4464 if (infile == stdin) fprintf(outfile, "\n");
4465
4466 EXIT:
4467
4468 if (infile != NULL && infile != stdin) fclose(infile);
4469 if (outfile != NULL && outfile != stdout) fclose(outfile);
4470
4471 free(buffer);
4472 free(dbuffer);
4473 free(pbuffer);
4474 free(offsets);
4475
4476 #ifdef SUPPORT_PCRE16
4477 if (buffer16 != NULL) free(buffer16);
4478 #endif
4479
4480 #if !defined NODFA
4481 if (dfa_workspace != NULL)
4482 free(dfa_workspace);
4483 #endif
4484
4485 return yield;
4486 }
4487
4488 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5