/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 839 - (show annotations)
Fri Dec 30 13:22:28 2011 UTC (7 years, 10 months ago) by zherczeg
File MIME type: text/plain
File size: 128874 byte(s)
endianness fixes and JIT compiler update
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define SET_PCRE_CALLOUT8(callout) \
213 pcre_callout = callout
214
215 #define STRLEN8(p) ((int)strlen((char *)p))
216
217
218 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219 re = pcre_compile((char *)pat, options, error, erroffset, tables)
220
221 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222 namesptr, cbuffer, size) \
223 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224 (char *)namesptr, cbuffer, size)
225
226 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228
229 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230 offsets, size_offsets, workspace, size_workspace) \
231 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace)
233
234 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235 offsets, size_offsets) \
236 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237 offsets, size_offsets)
238
239 #define PCRE_FREE_STUDY8(extra) \
240 pcre_free_study(extra)
241
242 #define PCRE_FREE_SUBSTRING8(substring) \
243 pcre_free_substring(substring)
244
245 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246 pcre_free_substring_list(listptr)
247
248 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249 getnamesptr, subsptr) \
250 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251 (char *)getnamesptr, subsptr)
252
253 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254 n = pcre_get_stringnumber(re, (char *)ptr)
255
256 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258
259 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261
262 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
263 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
264
265 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266 pcre_printint(re, outfile, debug_lengths)
267
268 #define PCRE_STUDY8(extra, re, options, error) \
269 extra = pcre_study(re, options, error)
270
271 #endif /* SUPPORT_PCRE8 */
272
273 /* -----------------------------------------------------------*/
274
275 #ifdef SUPPORT_PCRE16
276
277 #define PCHARS16(lv, p, offset, len, f) \
278 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279
280 #define PCHARSV16(p, offset, len, f) \
281 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282
283 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284 p = read_capture_name16(p, cn16, re)
285
286 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287
288 #define SET_PCRE_CALLOUT16(callout) \
289 pcre16_callout = callout
290
291
292 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294
295 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296 namesptr, cbuffer, size) \
297 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299
300 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302 (PCRE_SCHAR16 *)cbuffer, size/2)
303
304 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305 offsets, size_offsets, workspace, size_workspace) \
306 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307 options, offsets, size_offsets, workspace, size_workspace)
308
309 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310 offsets, size_offsets) \
311 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312 options, offsets, size_offsets)
313
314 #define PCRE_FREE_STUDY16(extra) \
315 pcre16_free_study(extra)
316
317 #define PCRE_FREE_SUBSTRING16(substring) \
318 pcre16_free_substring((PCRE_SPTR16)substring)
319
320 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322
323 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324 getnamesptr, subsptr) \
325 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327
328 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330
331 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333 (PCRE_SPTR16 *)(void*)subsptr)
334
335 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337 (PCRE_SPTR16 **)(void*)listptr)
338
339 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
340 rc = pcre16_pattern_to_host_byte_order(re, extra, tables)
341
342 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343 pcre16_printint(re, outfile, debug_lengths)
344
345 #define PCRE_STUDY16(extra, re, options, error) \
346 extra = pcre16_study(re, options, error)
347
348 #endif /* SUPPORT_PCRE16 */
349
350
351 /* ----- Both modes are supported; a runtime test is needed, except for
352 pcre_config(), and the JIT stack functions, when it doesn't matter which
353 version is called. ----- */
354
355 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356
357 #define CHAR_SIZE (use_pcre16? 2:1)
358
359 #define PCHARS(lv, p, offset, len, f) \
360 if (use_pcre16) \
361 PCHARS16(lv, p, offset, len, f); \
362 else \
363 PCHARS8(lv, p, offset, len, f)
364
365 #define PCHARSV(p, offset, len, f) \
366 if (use_pcre16) \
367 PCHARSV16(p, offset, len, f); \
368 else \
369 PCHARSV8(p, offset, len, f)
370
371 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372 if (use_pcre16) \
373 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374 else \
375 READ_CAPTURE_NAME8(p, cn8, cn16, re)
376
377 #define SET_PCRE_CALLOUT(callout) \
378 if (use_pcre16) \
379 SET_PCRE_CALLOUT16(callout); \
380 else \
381 SET_PCRE_CALLOUT8(callout)
382
383 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384
385 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386
387 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388 if (use_pcre16) \
389 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390 else \
391 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392
393 #define PCRE_CONFIG pcre_config
394
395 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396 namesptr, cbuffer, size) \
397 if (use_pcre16) \
398 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399 namesptr, cbuffer, size); \
400 else \
401 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402 namesptr, cbuffer, size)
403
404 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405 if (use_pcre16) \
406 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407 else \
408 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409
410 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411 offsets, size_offsets, workspace, size_workspace) \
412 if (use_pcre16) \
413 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414 offsets, size_offsets, workspace, size_workspace); \
415 else \
416 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417 offsets, size_offsets, workspace, size_workspace)
418
419 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420 offsets, size_offsets) \
421 if (use_pcre16) \
422 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423 offsets, size_offsets); \
424 else \
425 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426 offsets, size_offsets)
427
428 #define PCRE_FREE_STUDY(extra) \
429 if (use_pcre16) \
430 PCRE_FREE_STUDY16(extra); \
431 else \
432 PCRE_FREE_STUDY8(extra)
433
434 #define PCRE_FREE_SUBSTRING(substring) \
435 if (use_pcre16) \
436 PCRE_FREE_SUBSTRING16(substring); \
437 else \
438 PCRE_FREE_SUBSTRING8(substring)
439
440 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441 if (use_pcre16) \
442 PCRE_FREE_SUBSTRING_LIST16(listptr); \
443 else \
444 PCRE_FREE_SUBSTRING_LIST8(listptr)
445
446 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr) \
448 if (use_pcre16) \
449 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450 getnamesptr, subsptr); \
451 else \
452 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr)
454
455 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456 if (use_pcre16) \
457 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458 else \
459 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460
461 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462 if (use_pcre16) \
463 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464 else \
465 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466
467 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468 if (use_pcre16) \
469 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470 else \
471 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472
473 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475
476 #define PCRE_MAKETABLES \
477 (use_pcre16? pcre16_maketables() : pcre_maketables())
478
479 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
480 if (use_pcre16) \
481 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
482 else \
483 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
484
485 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486 if (use_pcre16) \
487 PCRE_PRINTINT16(re, outfile, debug_lengths); \
488 else \
489 PCRE_PRINTINT8(re, outfile, debug_lengths)
490
491 #define PCRE_STUDY(extra, re, options, error) \
492 if (use_pcre16) \
493 PCRE_STUDY16(extra, re, options, error); \
494 else \
495 PCRE_STUDY8(extra, re, options, error)
496
497 /* ----- Only 8-bit mode is supported ----- */
498
499 #elif defined SUPPORT_PCRE8
500 #define CHAR_SIZE 1
501 #define PCHARS PCHARS8
502 #define PCHARSV PCHARSV8
503 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
504 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
505 #define STRLEN STRLEN8
506 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
507 #define PCRE_COMPILE PCRE_COMPILE8
508 #define PCRE_CONFIG pcre_config
509 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
511 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
512 #define PCRE_EXEC PCRE_EXEC8
513 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
514 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
515 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
516 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
517 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
518 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
519 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
520 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
521 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
522 #define PCRE_MAKETABLES pcre_maketables()
523 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524 #define PCRE_PRINTINT PCRE_PRINTINT8
525 #define PCRE_STUDY PCRE_STUDY8
526
527 /* ----- Only 16-bit mode is supported ----- */
528
529 #else
530 #define CHAR_SIZE 2
531 #define PCHARS PCHARS16
532 #define PCHARSV PCHARSV16
533 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
534 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
535 #define STRLEN STRLEN16
536 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
537 #define PCRE_COMPILE PCRE_COMPILE16
538 #define PCRE_CONFIG pcre16_config
539 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
541 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
542 #define PCRE_EXEC PCRE_EXEC16
543 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
544 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
545 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
546 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
547 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
548 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
549 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
550 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
551 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
552 #define PCRE_MAKETABLES pcre16_maketables()
553 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554 #define PCRE_PRINTINT PCRE_PRINTINT16
555 #define PCRE_STUDY PCRE_STUDY16
556 #endif
557
558 /* ----- End of mode-specific function call macros ----- */
559
560
561 /* Other parameters */
562
563 #ifndef CLOCKS_PER_SEC
564 #ifdef CLK_TCK
565 #define CLOCKS_PER_SEC CLK_TCK
566 #else
567 #define CLOCKS_PER_SEC 100
568 #endif
569 #endif
570
571 /* This is the default loop count for timing. */
572
573 #define LOOPREPEAT 500000
574
575 /* Static variables */
576
577 static FILE *outfile;
578 static int log_store = 0;
579 static int callout_count;
580 static int callout_extra;
581 static int callout_fail_count;
582 static int callout_fail_id;
583 static int debug_lengths;
584 static int first_callout;
585 static int locale_set = 0;
586 static int show_malloc;
587 static int use_utf;
588 static size_t gotten_store;
589 static size_t first_gotten_store = 0;
590 static const unsigned char *last_callout_mark = NULL;
591
592 /* The buffers grow automatically if very long input lines are encountered. */
593
594 static int buffer_size = 50000;
595 static pcre_uint8 *buffer = NULL;
596 static pcre_uint8 *dbuffer = NULL;
597 static pcre_uint8 *pbuffer = NULL;
598
599 /* Another buffer is needed translation to 16-bit character strings. It will
600 obtained and extended as required. */
601
602 #ifdef SUPPORT_PCRE16
603 static int buffer16_size = 0;
604 static pcre_uint16 *buffer16 = NULL;
605
606 #ifdef SUPPORT_PCRE8
607
608 /* We need the table of operator lengths that is used for 16-bit compiling, in
609 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611 appropriately for the 16-bit world. Just as a safety check, make sure that
612 COMPILE_PCRE16 is *not* set. */
613
614 #ifdef COMPILE_PCRE16
615 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616 #endif
617
618 #if LINK_SIZE == 2
619 #undef LINK_SIZE
620 #define LINK_SIZE 1
621 #elif LINK_SIZE == 3 || LINK_SIZE == 4
622 #undef LINK_SIZE
623 #define LINK_SIZE 2
624 #else
625 #error LINK_SIZE must be either 2, 3, or 4
626 #endif
627
628 #undef IMM2_SIZE
629 #define IMM2_SIZE 1
630
631 #endif /* SUPPORT_PCRE8 */
632
633 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
634 #endif /* SUPPORT_PCRE16 */
635
636 /* If we have 8-bit support, default use_pcre16 to false; if there is also
637 16-bit support, it can be changed by an option. If there is no 8-bit support,
638 there must be 16-bit support, so default it to 1. */
639
640 #ifdef SUPPORT_PCRE8
641 static int use_pcre16 = 0;
642 #else
643 static int use_pcre16 = 1;
644 #endif
645
646 /* Textual explanations for runtime error codes */
647
648 static const char *errtexts[] = {
649 NULL, /* 0 is no error */
650 NULL, /* NOMATCH is handled specially */
651 "NULL argument passed",
652 "bad option value",
653 "magic number missing",
654 "unknown opcode - pattern overwritten?",
655 "no more memory",
656 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
657 "match limit exceeded",
658 "callout error code",
659 NULL, /* BADUTF8/16 is handled specially */
660 NULL, /* BADUTF8/16 offset is handled specially */
661 NULL, /* PARTIAL is handled specially */
662 "not used - internal error",
663 "internal error - pattern overwritten?",
664 "bad count value",
665 "item unsupported for DFA matching",
666 "backreference condition or recursion test not supported for DFA matching",
667 "match limit not supported for DFA matching",
668 "workspace size exceeded in DFA matching",
669 "too much recursion for DFA matching",
670 "recursion limit exceeded",
671 "not used - internal error",
672 "invalid combination of newline options",
673 "bad offset value",
674 NULL, /* SHORTUTF8/16 is handled specially */
675 "nested recursion at the same subject position",
676 "JIT stack limit reached",
677 "pattern compiled in wrong mode: 8-bit/16-bit error"
678 };
679
680
681 /*************************************************
682 * Alternate character tables *
683 *************************************************/
684
685 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
686 using the default tables of the library. However, the T option can be used to
687 select alternate sets of tables, for different kinds of testing. Note also that
688 the L (locale) option also adjusts the tables. */
689
690 /* This is the set of tables distributed as default with PCRE. It recognizes
691 only ASCII characters. */
692
693 static const pcre_uint8 tables0[] = {
694
695 /* This table is a lower casing table. */
696
697 0, 1, 2, 3, 4, 5, 6, 7,
698 8, 9, 10, 11, 12, 13, 14, 15,
699 16, 17, 18, 19, 20, 21, 22, 23,
700 24, 25, 26, 27, 28, 29, 30, 31,
701 32, 33, 34, 35, 36, 37, 38, 39,
702 40, 41, 42, 43, 44, 45, 46, 47,
703 48, 49, 50, 51, 52, 53, 54, 55,
704 56, 57, 58, 59, 60, 61, 62, 63,
705 64, 97, 98, 99,100,101,102,103,
706 104,105,106,107,108,109,110,111,
707 112,113,114,115,116,117,118,119,
708 120,121,122, 91, 92, 93, 94, 95,
709 96, 97, 98, 99,100,101,102,103,
710 104,105,106,107,108,109,110,111,
711 112,113,114,115,116,117,118,119,
712 120,121,122,123,124,125,126,127,
713 128,129,130,131,132,133,134,135,
714 136,137,138,139,140,141,142,143,
715 144,145,146,147,148,149,150,151,
716 152,153,154,155,156,157,158,159,
717 160,161,162,163,164,165,166,167,
718 168,169,170,171,172,173,174,175,
719 176,177,178,179,180,181,182,183,
720 184,185,186,187,188,189,190,191,
721 192,193,194,195,196,197,198,199,
722 200,201,202,203,204,205,206,207,
723 208,209,210,211,212,213,214,215,
724 216,217,218,219,220,221,222,223,
725 224,225,226,227,228,229,230,231,
726 232,233,234,235,236,237,238,239,
727 240,241,242,243,244,245,246,247,
728 248,249,250,251,252,253,254,255,
729
730 /* This table is a case flipping table. */
731
732 0, 1, 2, 3, 4, 5, 6, 7,
733 8, 9, 10, 11, 12, 13, 14, 15,
734 16, 17, 18, 19, 20, 21, 22, 23,
735 24, 25, 26, 27, 28, 29, 30, 31,
736 32, 33, 34, 35, 36, 37, 38, 39,
737 40, 41, 42, 43, 44, 45, 46, 47,
738 48, 49, 50, 51, 52, 53, 54, 55,
739 56, 57, 58, 59, 60, 61, 62, 63,
740 64, 97, 98, 99,100,101,102,103,
741 104,105,106,107,108,109,110,111,
742 112,113,114,115,116,117,118,119,
743 120,121,122, 91, 92, 93, 94, 95,
744 96, 65, 66, 67, 68, 69, 70, 71,
745 72, 73, 74, 75, 76, 77, 78, 79,
746 80, 81, 82, 83, 84, 85, 86, 87,
747 88, 89, 90,123,124,125,126,127,
748 128,129,130,131,132,133,134,135,
749 136,137,138,139,140,141,142,143,
750 144,145,146,147,148,149,150,151,
751 152,153,154,155,156,157,158,159,
752 160,161,162,163,164,165,166,167,
753 168,169,170,171,172,173,174,175,
754 176,177,178,179,180,181,182,183,
755 184,185,186,187,188,189,190,191,
756 192,193,194,195,196,197,198,199,
757 200,201,202,203,204,205,206,207,
758 208,209,210,211,212,213,214,215,
759 216,217,218,219,220,221,222,223,
760 224,225,226,227,228,229,230,231,
761 232,233,234,235,236,237,238,239,
762 240,241,242,243,244,245,246,247,
763 248,249,250,251,252,253,254,255,
764
765 /* This table contains bit maps for various character classes. Each map is 32
766 bytes long and the bits run from the least significant end of each byte. The
767 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
768 graph, print, punct, and cntrl. Other classes are built from combinations. */
769
770 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
771 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
772 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774
775 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
776 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779
780 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
781 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784
785 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
787 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789
790 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
792 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794
795 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
796 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
797 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799
800 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
801 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
802 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804
805 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
806 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
807 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809
810 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
811 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
812 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814
815 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
816 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819
820 /* This table identifies various classes of character by individual bits:
821 0x01 white space character
822 0x02 letter
823 0x04 decimal digit
824 0x08 hexadecimal digit
825 0x10 alphanumeric or '_'
826 0x80 regular expression metacharacter or binary zero
827 */
828
829 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
830 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
831 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
832 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
833 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
834 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
835 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
836 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
837 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
838 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
839 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
840 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
841 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
842 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
843 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
844 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
857 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
858 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
861
862 /* This is a set of tables that came orginally from a Windows user. It seems to
863 be at least an approximation of ISO 8859. In particular, there are characters
864 greater than 128 that are marked as spaces, letters, etc. */
865
866 static const pcre_uint8 tables1[] = {
867 0,1,2,3,4,5,6,7,
868 8,9,10,11,12,13,14,15,
869 16,17,18,19,20,21,22,23,
870 24,25,26,27,28,29,30,31,
871 32,33,34,35,36,37,38,39,
872 40,41,42,43,44,45,46,47,
873 48,49,50,51,52,53,54,55,
874 56,57,58,59,60,61,62,63,
875 64,97,98,99,100,101,102,103,
876 104,105,106,107,108,109,110,111,
877 112,113,114,115,116,117,118,119,
878 120,121,122,91,92,93,94,95,
879 96,97,98,99,100,101,102,103,
880 104,105,106,107,108,109,110,111,
881 112,113,114,115,116,117,118,119,
882 120,121,122,123,124,125,126,127,
883 128,129,130,131,132,133,134,135,
884 136,137,138,139,140,141,142,143,
885 144,145,146,147,148,149,150,151,
886 152,153,154,155,156,157,158,159,
887 160,161,162,163,164,165,166,167,
888 168,169,170,171,172,173,174,175,
889 176,177,178,179,180,181,182,183,
890 184,185,186,187,188,189,190,191,
891 224,225,226,227,228,229,230,231,
892 232,233,234,235,236,237,238,239,
893 240,241,242,243,244,245,246,215,
894 248,249,250,251,252,253,254,223,
895 224,225,226,227,228,229,230,231,
896 232,233,234,235,236,237,238,239,
897 240,241,242,243,244,245,246,247,
898 248,249,250,251,252,253,254,255,
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,65,66,67,68,69,70,71,
912 72,73,74,75,76,77,78,79,
913 80,81,82,83,84,85,86,87,
914 88,89,90,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 192,193,194,195,196,197,198,199,
928 200,201,202,203,204,205,206,207,
929 208,209,210,211,212,213,214,247,
930 216,217,218,219,220,221,222,255,
931 0,62,0,0,1,0,0,0,
932 0,0,0,0,0,0,0,0,
933 32,0,0,0,1,0,0,0,
934 0,0,0,0,0,0,0,0,
935 0,0,0,0,0,0,255,3,
936 126,0,0,0,126,0,0,0,
937 0,0,0,0,0,0,0,0,
938 0,0,0,0,0,0,0,0,
939 0,0,0,0,0,0,255,3,
940 0,0,0,0,0,0,0,0,
941 0,0,0,0,0,0,12,2,
942 0,0,0,0,0,0,0,0,
943 0,0,0,0,0,0,0,0,
944 254,255,255,7,0,0,0,0,
945 0,0,0,0,0,0,0,0,
946 255,255,127,127,0,0,0,0,
947 0,0,0,0,0,0,0,0,
948 0,0,0,0,254,255,255,7,
949 0,0,0,0,0,4,32,4,
950 0,0,0,128,255,255,127,255,
951 0,0,0,0,0,0,255,3,
952 254,255,255,135,254,255,255,7,
953 0,0,0,0,0,4,44,6,
954 255,255,127,255,255,255,127,255,
955 0,0,0,0,254,255,255,255,
956 255,255,255,255,255,255,255,127,
957 0,0,0,0,254,255,255,255,
958 255,255,255,255,255,255,255,255,
959 0,2,0,0,255,255,255,255,
960 255,255,255,255,255,255,255,127,
961 0,0,0,0,255,255,255,255,
962 255,255,255,255,255,255,255,255,
963 0,0,0,0,254,255,0,252,
964 1,0,0,248,1,0,0,120,
965 0,0,0,0,254,255,255,255,
966 0,0,128,0,0,0,128,0,
967 255,255,255,255,0,0,0,0,
968 0,0,0,0,0,0,0,128,
969 255,255,255,255,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 128,0,0,0,0,0,0,0,
972 0,1,1,0,1,1,0,0,
973 0,0,0,0,0,0,0,0,
974 0,0,0,0,0,0,0,0,
975 1,0,0,0,128,0,0,0,
976 128,128,128,128,0,0,128,0,
977 28,28,28,28,28,28,28,28,
978 28,28,0,0,0,0,0,128,
979 0,26,26,26,26,26,26,18,
980 18,18,18,18,18,18,18,18,
981 18,18,18,18,18,18,18,18,
982 18,18,18,128,128,0,128,16,
983 0,26,26,26,26,26,26,18,
984 18,18,18,18,18,18,18,18,
985 18,18,18,18,18,18,18,18,
986 18,18,18,128,128,0,0,0,
987 0,0,0,0,0,1,0,0,
988 0,0,0,0,0,0,0,0,
989 0,0,0,0,0,0,0,0,
990 0,0,0,0,0,0,0,0,
991 1,0,0,0,0,0,0,0,
992 0,0,18,0,0,0,0,0,
993 0,0,20,20,0,18,0,0,
994 0,20,18,0,0,0,0,0,
995 18,18,18,18,18,18,18,18,
996 18,18,18,18,18,18,18,18,
997 18,18,18,18,18,18,18,0,
998 18,18,18,18,18,18,18,18,
999 18,18,18,18,18,18,18,18,
1000 18,18,18,18,18,18,18,18,
1001 18,18,18,18,18,18,18,0,
1002 18,18,18,18,18,18,18,18
1003 };
1004
1005
1006
1007
1008 #ifndef HAVE_STRERROR
1009 /*************************************************
1010 * Provide strerror() for non-ANSI libraries *
1011 *************************************************/
1012
1013 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1014 in their libraries, but can provide the same facility by this simple
1015 alternative function. */
1016
1017 extern int sys_nerr;
1018 extern char *sys_errlist[];
1019
1020 char *
1021 strerror(int n)
1022 {
1023 if (n < 0 || n >= sys_nerr) return "unknown error number";
1024 return sys_errlist[n];
1025 }
1026 #endif /* HAVE_STRERROR */
1027
1028
1029 /*************************************************
1030 * JIT memory callback *
1031 *************************************************/
1032
1033 static pcre_jit_stack* jit_callback(void *arg)
1034 {
1035 return (pcre_jit_stack *)arg;
1036 }
1037
1038
1039 #if !defined NOUTF || defined SUPPORT_PCRE16
1040 /*************************************************
1041 * Convert UTF-8 string to value *
1042 *************************************************/
1043
1044 /* This function takes one or more bytes that represents a UTF-8 character,
1045 and returns the value of the character.
1046
1047 Argument:
1048 utf8bytes a pointer to the byte vector
1049 vptr a pointer to an int to receive the value
1050
1051 Returns: > 0 => the number of bytes consumed
1052 -6 to 0 => malformed UTF-8 character at offset = (-return)
1053 */
1054
1055 static int
1056 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1057 {
1058 int c = *utf8bytes++;
1059 int d = c;
1060 int i, j, s;
1061
1062 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1063 {
1064 if ((d & 0x80) == 0) break;
1065 d <<= 1;
1066 }
1067
1068 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1069 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1070
1071 /* i now has a value in the range 1-5 */
1072
1073 s = 6*i;
1074 d = (c & utf8_table3[i]) << s;
1075
1076 for (j = 0; j < i; j++)
1077 {
1078 c = *utf8bytes++;
1079 if ((c & 0xc0) != 0x80) return -(j+1);
1080 s -= 6;
1081 d |= (c & 0x3f) << s;
1082 }
1083
1084 /* Check that encoding was the correct unique one */
1085
1086 for (j = 0; j < utf8_table1_size; j++)
1087 if (d <= utf8_table1[j]) break;
1088 if (j != i) return -(i+1);
1089
1090 /* Valid value */
1091
1092 *vptr = d;
1093 return i+1;
1094 }
1095 #endif /* NOUTF || SUPPORT_PCRE16 */
1096
1097
1098
1099 #if !defined NOUTF || defined SUPPORT_PCRE16
1100 /*************************************************
1101 * Convert character value to UTF-8 *
1102 *************************************************/
1103
1104 /* This function takes an integer value in the range 0 - 0x7fffffff
1105 and encodes it as a UTF-8 character in 0 to 6 bytes.
1106
1107 Arguments:
1108 cvalue the character value
1109 utf8bytes pointer to buffer for result - at least 6 bytes long
1110
1111 Returns: number of characters placed in the buffer
1112 */
1113
1114 static int
1115 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1116 {
1117 register int i, j;
1118 for (i = 0; i < utf8_table1_size; i++)
1119 if (cvalue <= utf8_table1[i]) break;
1120 utf8bytes += i;
1121 for (j = i; j > 0; j--)
1122 {
1123 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1124 cvalue >>= 6;
1125 }
1126 *utf8bytes = utf8_table2[i] | cvalue;
1127 return i + 1;
1128 }
1129 #endif /* NOUTF || SUPPORT_PCRE16 */
1130
1131
1132
1133 #ifdef SUPPORT_PCRE16
1134 /*************************************************
1135 * Convert a string to 16-bit *
1136 *************************************************/
1137
1138 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1139 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1140 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1141 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1142 result is always left in buffer16.
1143
1144 Note that this function does not object to surrogate values. This is
1145 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1146 for the purpose of testing that they are correctly faulted.
1147
1148 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1149 in UTF-8 so that values greater than 255 can be handled.
1150
1151 Arguments:
1152 data TRUE if converting a data line; FALSE for a regex
1153 p points to a byte string
1154 utf true if UTF-8 (to be converted to UTF-16)
1155 len number of bytes in the string (excluding trailing zero)
1156
1157 Returns: number of 16-bit data items used (excluding trailing zero)
1158 OR -1 if a UTF-8 string is malformed
1159 OR -2 if a value > 0x10ffff is encountered
1160 OR -3 if a value > 0xffff is encountered when not in UTF mode
1161 */
1162
1163 static int
1164 to16(int data, pcre_uint8 *p, int utf, int len)
1165 {
1166 pcre_uint16 *pp;
1167
1168 if (buffer16_size < 2*len + 2)
1169 {
1170 if (buffer16 != NULL) free(buffer16);
1171 buffer16_size = 2*len + 2;
1172 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1173 if (buffer16 == NULL)
1174 {
1175 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1176 exit(1);
1177 }
1178 }
1179
1180 pp = buffer16;
1181
1182 if (!utf && !data)
1183 {
1184 while (len-- > 0) *pp++ = *p++;
1185 }
1186
1187 else
1188 {
1189 int c = 0;
1190 while (len > 0)
1191 {
1192 int chlen = utf82ord(p, &c);
1193 if (chlen <= 0) return -1;
1194 if (c > 0x10ffff) return -2;
1195 p += chlen;
1196 len -= chlen;
1197 if (c < 0x10000) *pp++ = c; else
1198 {
1199 if (!utf) return -3;
1200 c -= 0x10000;
1201 *pp++ = 0xD800 | (c >> 10);
1202 *pp++ = 0xDC00 | (c & 0x3ff);
1203 }
1204 }
1205 }
1206
1207 *pp = 0;
1208 return pp - buffer16;
1209 }
1210 #endif
1211
1212
1213 /*************************************************
1214 * Read or extend an input line *
1215 *************************************************/
1216
1217 /* Input lines are read into buffer, but both patterns and data lines can be
1218 continued over multiple input lines. In addition, if the buffer fills up, we
1219 want to automatically expand it so as to be able to handle extremely large
1220 lines that are needed for certain stress tests. When the input buffer is
1221 expanded, the other two buffers must also be expanded likewise, and the
1222 contents of pbuffer, which are a copy of the input for callouts, must be
1223 preserved (for when expansion happens for a data line). This is not the most
1224 optimal way of handling this, but hey, this is just a test program!
1225
1226 Arguments:
1227 f the file to read
1228 start where in buffer to start (this *must* be within buffer)
1229 prompt for stdin or readline()
1230
1231 Returns: pointer to the start of new data
1232 could be a copy of start, or could be moved
1233 NULL if no data read and EOF reached
1234 */
1235
1236 static pcre_uint8 *
1237 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1238 {
1239 pcre_uint8 *here = start;
1240
1241 for (;;)
1242 {
1243 int rlen = (int)(buffer_size - (here - buffer));
1244
1245 if (rlen > 1000)
1246 {
1247 int dlen;
1248
1249 /* If libreadline support is required, use readline() to read a line if the
1250 input is a terminal. Note that readline() removes the trailing newline, so
1251 we must put it back again, to be compatible with fgets(). */
1252
1253 #ifdef SUPPORT_LIBREADLINE
1254 if (isatty(fileno(f)))
1255 {
1256 size_t len;
1257 char *s = readline(prompt);
1258 if (s == NULL) return (here == start)? NULL : start;
1259 len = strlen(s);
1260 if (len > 0) add_history(s);
1261 if (len > rlen - 1) len = rlen - 1;
1262 memcpy(here, s, len);
1263 here[len] = '\n';
1264 here[len+1] = 0;
1265 free(s);
1266 }
1267 else
1268 #endif
1269
1270 /* Read the next line by normal means, prompting if the file is stdin. */
1271
1272 {
1273 if (f == stdin) printf("%s", prompt);
1274 if (fgets((char *)here, rlen, f) == NULL)
1275 return (here == start)? NULL : start;
1276 }
1277
1278 dlen = (int)strlen((char *)here);
1279 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1280 here += dlen;
1281 }
1282
1283 else
1284 {
1285 int new_buffer_size = 2*buffer_size;
1286 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1287 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1288 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1289
1290 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1291 {
1292 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1293 exit(1);
1294 }
1295
1296 memcpy(new_buffer, buffer, buffer_size);
1297 memcpy(new_pbuffer, pbuffer, buffer_size);
1298
1299 buffer_size = new_buffer_size;
1300
1301 start = new_buffer + (start - buffer);
1302 here = new_buffer + (here - buffer);
1303
1304 free(buffer);
1305 free(dbuffer);
1306 free(pbuffer);
1307
1308 buffer = new_buffer;
1309 dbuffer = new_dbuffer;
1310 pbuffer = new_pbuffer;
1311 }
1312 }
1313
1314 return NULL; /* Control never gets here */
1315 }
1316
1317
1318
1319 /*************************************************
1320 * Read number from string *
1321 *************************************************/
1322
1323 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1324 around with conditional compilation, just do the job by hand. It is only used
1325 for unpicking arguments, so just keep it simple.
1326
1327 Arguments:
1328 str string to be converted
1329 endptr where to put the end pointer
1330
1331 Returns: the unsigned long
1332 */
1333
1334 static int
1335 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1336 {
1337 int result = 0;
1338 while(*str != 0 && isspace(*str)) str++;
1339 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1340 *endptr = str;
1341 return(result);
1342 }
1343
1344
1345
1346 /*************************************************
1347 * Print one character *
1348 *************************************************/
1349
1350 /* Print a single character either literally, or as a hex escape. */
1351
1352 static int pchar(int c, FILE *f)
1353 {
1354 if (PRINTOK(c))
1355 {
1356 if (f != NULL) fprintf(f, "%c", c);
1357 return 1;
1358 }
1359
1360 if (c < 0x100)
1361 {
1362 if (use_utf)
1363 {
1364 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1365 return 6;
1366 }
1367 else
1368 {
1369 if (f != NULL) fprintf(f, "\\x%02x", c);
1370 return 4;
1371 }
1372 }
1373
1374 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1375 return (c <= 0x000000ff)? 6 :
1376 (c <= 0x00000fff)? 7 :
1377 (c <= 0x0000ffff)? 8 :
1378 (c <= 0x000fffff)? 9 : 10;
1379 }
1380
1381
1382
1383 #ifdef SUPPORT_PCRE8
1384 /*************************************************
1385 * Print 8-bit character string *
1386 *************************************************/
1387
1388 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1389 If handed a NULL file, just counts chars without printing. */
1390
1391 static int pchars(pcre_uint8 *p, int length, FILE *f)
1392 {
1393 int c = 0;
1394 int yield = 0;
1395
1396 if (length < 0)
1397 length = strlen((char *)p);
1398
1399 while (length-- > 0)
1400 {
1401 #if !defined NOUTF
1402 if (use_utf)
1403 {
1404 int rc = utf82ord(p, &c);
1405 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1406 {
1407 length -= rc - 1;
1408 p += rc;
1409 yield += pchar(c, f);
1410 continue;
1411 }
1412 }
1413 #endif
1414 c = *p++;
1415 yield += pchar(c, f);
1416 }
1417
1418 return yield;
1419 }
1420 #endif
1421
1422
1423
1424 #ifdef SUPPORT_PCRE16
1425 /*************************************************
1426 * Find length of 0-terminated 16-bit string *
1427 *************************************************/
1428
1429 static int strlen16(PCRE_SPTR16 p)
1430 {
1431 int len = 0;
1432 while (*p++ != 0) len++;
1433 return len;
1434 }
1435 #endif /* SUPPORT_PCRE16 */
1436
1437
1438 #ifdef SUPPORT_PCRE16
1439 /*************************************************
1440 * Print 16-bit character string *
1441 *************************************************/
1442
1443 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1444 If handed a NULL file, just counts chars without printing. */
1445
1446 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1447 {
1448 int yield = 0;
1449
1450 if (length < 0)
1451 length = strlen16(p);
1452
1453 while (length-- > 0)
1454 {
1455 int c = *p++ & 0xffff;
1456 #if !defined NOUTF
1457 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1458 {
1459 int d = *p & 0xffff;
1460 if (d >= 0xDC00 && d < 0xDFFF)
1461 {
1462 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1463 length--;
1464 p++;
1465 }
1466 }
1467 #endif
1468 yield += pchar(c, f);
1469 }
1470
1471 return yield;
1472 }
1473 #endif /* SUPPORT_PCRE16 */
1474
1475
1476
1477 #ifdef SUPPORT_PCRE8
1478 /*************************************************
1479 * Read a capture name (8-bit) and check it *
1480 *************************************************/
1481
1482 static pcre_uint8 *
1483 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1484 {
1485 pcre_uint8 *npp = *pp;
1486 while (isalnum(*p)) *npp++ = *p++;
1487 *npp++ = 0;
1488 *npp = 0;
1489 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1490 {
1491 fprintf(outfile, "no parentheses with name \"");
1492 PCHARSV(*pp, 0, -1, outfile);
1493 fprintf(outfile, "\"\n");
1494 }
1495
1496 *pp = npp;
1497 return p;
1498 }
1499 #endif /* SUPPORT_PCRE8 */
1500
1501
1502
1503 #ifdef SUPPORT_PCRE16
1504 /*************************************************
1505 * Read a capture name (16-bit) and check it *
1506 *************************************************/
1507
1508 /* Note that the text being read is 8-bit. */
1509
1510 static pcre_uint8 *
1511 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1512 {
1513 pcre_uint16 *npp = *pp;
1514 while (isalnum(*p)) *npp++ = *p++;
1515 *npp++ = 0;
1516 *npp = 0;
1517 if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1518 {
1519 fprintf(outfile, "no parentheses with name \"");
1520 PCHARSV(*pp, 0, -1, outfile);
1521 fprintf(outfile, "\"\n");
1522 }
1523 *pp = npp;
1524 return p;
1525 }
1526 #endif /* SUPPORT_PCRE16 */
1527
1528
1529
1530 /*************************************************
1531 * Callout function *
1532 *************************************************/
1533
1534 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1535 the match. Yield zero unless more callouts than the fail count, or the callout
1536 data is not zero. */
1537
1538 static int callout(pcre_callout_block *cb)
1539 {
1540 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1541 int i, pre_start, post_start, subject_length;
1542
1543 if (callout_extra)
1544 {
1545 fprintf(f, "Callout %d: last capture = %d\n",
1546 cb->callout_number, cb->capture_last);
1547
1548 for (i = 0; i < cb->capture_top * 2; i += 2)
1549 {
1550 if (cb->offset_vector[i] < 0)
1551 fprintf(f, "%2d: <unset>\n", i/2);
1552 else
1553 {
1554 fprintf(f, "%2d: ", i/2);
1555 PCHARSV(cb->subject, cb->offset_vector[i],
1556 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1557 fprintf(f, "\n");
1558 }
1559 }
1560 }
1561
1562 /* Re-print the subject in canonical form, the first time or if giving full
1563 datails. On subsequent calls in the same match, we use pchars just to find the
1564 printed lengths of the substrings. */
1565
1566 if (f != NULL) fprintf(f, "--->");
1567
1568 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1569 PCHARS(post_start, cb->subject, cb->start_match,
1570 cb->current_position - cb->start_match, f);
1571
1572 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1573
1574 PCHARSV(cb->subject, cb->current_position,
1575 cb->subject_length - cb->current_position, f);
1576
1577 if (f != NULL) fprintf(f, "\n");
1578
1579 /* Always print appropriate indicators, with callout number if not already
1580 shown. For automatic callouts, show the pattern offset. */
1581
1582 if (cb->callout_number == 255)
1583 {
1584 fprintf(outfile, "%+3d ", cb->pattern_position);
1585 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1586 }
1587 else
1588 {
1589 if (callout_extra) fprintf(outfile, " ");
1590 else fprintf(outfile, "%3d ", cb->callout_number);
1591 }
1592
1593 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1594 fprintf(outfile, "^");
1595
1596 if (post_start > 0)
1597 {
1598 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1599 fprintf(outfile, "^");
1600 }
1601
1602 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1603 fprintf(outfile, " ");
1604
1605 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1606 pbuffer + cb->pattern_position);
1607
1608 fprintf(outfile, "\n");
1609 first_callout = 0;
1610
1611 if (cb->mark != last_callout_mark)
1612 {
1613 if (cb->mark == NULL)
1614 fprintf(outfile, "Latest Mark: <unset>\n");
1615 else
1616 {
1617 fprintf(outfile, "Latest Mark: ");
1618 PCHARSV(cb->mark, 0, -1, outfile);
1619 putc('\n', outfile);
1620 }
1621 last_callout_mark = cb->mark;
1622 }
1623
1624 if (cb->callout_data != NULL)
1625 {
1626 int callout_data = *((int *)(cb->callout_data));
1627 if (callout_data != 0)
1628 {
1629 fprintf(outfile, "Callout data = %d\n", callout_data);
1630 return callout_data;
1631 }
1632 }
1633
1634 return (cb->callout_number != callout_fail_id)? 0 :
1635 (++callout_count >= callout_fail_count)? 1 : 0;
1636 }
1637
1638
1639 /*************************************************
1640 * Local malloc functions *
1641 *************************************************/
1642
1643 /* Alternative malloc function, to test functionality and save the size of a
1644 compiled re, which is the first store request that pcre_compile() makes. The
1645 show_malloc variable is set only during matching. */
1646
1647 static void *new_malloc(size_t size)
1648 {
1649 void *block = malloc(size);
1650 gotten_store = size;
1651 if (first_gotten_store == 0) first_gotten_store = size;
1652 if (show_malloc)
1653 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1654 return block;
1655 }
1656
1657 static void new_free(void *block)
1658 {
1659 if (show_malloc)
1660 fprintf(outfile, "free %p\n", block);
1661 free(block);
1662 }
1663
1664 /* For recursion malloc/free, to test stacking calls */
1665
1666 static void *stack_malloc(size_t size)
1667 {
1668 void *block = malloc(size);
1669 if (show_malloc)
1670 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1671 return block;
1672 }
1673
1674 static void stack_free(void *block)
1675 {
1676 if (show_malloc)
1677 fprintf(outfile, "stack_free %p\n", block);
1678 free(block);
1679 }
1680
1681
1682 /*************************************************
1683 * Call pcre_fullinfo() *
1684 *************************************************/
1685
1686 /* Get one piece of information from the pcre_fullinfo() function. When only
1687 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1688 value, but the code is defensive.
1689
1690 Arguments:
1691 re compiled regex
1692 study study data
1693 option PCRE_INFO_xxx option
1694 ptr where to put the data
1695
1696 Returns: 0 when OK, < 0 on error
1697 */
1698
1699 static int
1700 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1701 {
1702 int rc;
1703
1704 if (use_pcre16)
1705 #ifdef SUPPORT_PCRE16
1706 rc = pcre16_fullinfo(re, study, option, ptr);
1707 #else
1708 rc = PCRE_ERROR_BADMODE;
1709 #endif
1710 else
1711 #ifdef SUPPORT_PCRE8
1712 rc = pcre_fullinfo(re, study, option, ptr);
1713 #else
1714 rc = PCRE_ERROR_BADMODE;
1715 #endif
1716
1717 if (rc < 0)
1718 {
1719 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1720 use_pcre16? "16" : "", option);
1721 if (rc == PCRE_ERROR_BADMODE)
1722 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1723 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1724 }
1725
1726 return rc;
1727 }
1728
1729
1730
1731 /*************************************************
1732 * Swap byte functions *
1733 *************************************************/
1734
1735 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1736 value, respectively.
1737
1738 Arguments:
1739 value any number
1740
1741 Returns: the byte swapped value
1742 */
1743
1744 static pcre_uint32
1745 swap_uint32(pcre_uint32 value)
1746 {
1747 return ((value & 0x000000ff) << 24) |
1748 ((value & 0x0000ff00) << 8) |
1749 ((value & 0x00ff0000) >> 8) |
1750 (value >> 24);
1751 }
1752
1753 static pcre_uint16
1754 swap_uint16(pcre_uint16 value)
1755 {
1756 return (value >> 8) | (value << 8);
1757 }
1758
1759
1760
1761 /*************************************************
1762 * Flip bytes in a compiled pattern *
1763 *************************************************/
1764
1765 /* This function is called if the 'F' option was present on a pattern that is
1766 to be written to a file. We flip the bytes of all the integer fields in the
1767 regex data block and the study block. In 16-bit mode this also flips relevant
1768 bytes in the pattern itself. This is to make it possible to test PCRE's
1769 ability to reload byte-flipped patterns, e.g. those compiled on a different
1770 architecture. */
1771
1772 static void
1773 regexflip(pcre *ere, pcre_extra *extra)
1774 {
1775 real_pcre *re = (real_pcre *)ere;
1776 #ifdef SUPPORT_PCRE16
1777 int op;
1778 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1779 int length = re->name_count * re->name_entry_size;
1780 #ifdef SUPPORT_UTF
1781 BOOL utf = (re->options & PCRE_UTF16) != 0;
1782 BOOL utf16_char = FALSE;
1783 #endif /* SUPPORT_UTF */
1784 #endif /* SUPPORT_PCRE16 */
1785
1786 /* Always flip the bytes in the main data block and study blocks. */
1787
1788 re->magic_number = REVERSED_MAGIC_NUMBER;
1789 re->size = swap_uint32(re->size);
1790 re->options = swap_uint32(re->options);
1791 re->flags = swap_uint16(re->flags);
1792 re->top_bracket = swap_uint16(re->top_bracket);
1793 re->top_backref = swap_uint16(re->top_backref);
1794 re->first_char = swap_uint16(re->first_char);
1795 re->req_char = swap_uint16(re->req_char);
1796 re->name_table_offset = swap_uint16(re->name_table_offset);
1797 re->name_entry_size = swap_uint16(re->name_entry_size);
1798 re->name_count = swap_uint16(re->name_count);
1799
1800 if (extra != NULL)
1801 {
1802 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1803 rsd->size = swap_uint32(rsd->size);
1804 rsd->flags = swap_uint32(rsd->flags);
1805 rsd->minlength = swap_uint32(rsd->minlength);
1806 }
1807
1808 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1809 in the name table, if present, and then in the pattern itself. */
1810
1811 #ifdef SUPPORT_PCRE16
1812 if (!use_pcre16) return;
1813
1814 while(TRUE)
1815 {
1816 /* Swap previous characters. */
1817 while (length-- > 0)
1818 {
1819 *ptr = swap_uint16(*ptr);
1820 ptr++;
1821 }
1822 #ifdef SUPPORT_UTF
1823 if (utf16_char)
1824 {
1825 if ((ptr[-1] & 0xfc00) == 0xd800)
1826 {
1827 /* We know that there is only one extra character in UTF-16. */
1828 *ptr = swap_uint16(*ptr);
1829 ptr++;
1830 }
1831 }
1832 utf16_char = FALSE;
1833 #endif /* SUPPORT_UTF */
1834
1835 /* Get next opcode. */
1836
1837 length = 0;
1838 op = *ptr;
1839 *ptr++ = swap_uint16(op);
1840
1841 switch (op)
1842 {
1843 case OP_END:
1844 return;
1845
1846 #ifdef SUPPORT_UTF
1847 case OP_CHAR:
1848 case OP_CHARI:
1849 case OP_NOT:
1850 case OP_NOTI:
1851 case OP_STAR:
1852 case OP_MINSTAR:
1853 case OP_PLUS:
1854 case OP_MINPLUS:
1855 case OP_QUERY:
1856 case OP_MINQUERY:
1857 case OP_UPTO:
1858 case OP_MINUPTO:
1859 case OP_EXACT:
1860 case OP_POSSTAR:
1861 case OP_POSPLUS:
1862 case OP_POSQUERY:
1863 case OP_POSUPTO:
1864 case OP_STARI:
1865 case OP_MINSTARI:
1866 case OP_PLUSI:
1867 case OP_MINPLUSI:
1868 case OP_QUERYI:
1869 case OP_MINQUERYI:
1870 case OP_UPTOI:
1871 case OP_MINUPTOI:
1872 case OP_EXACTI:
1873 case OP_POSSTARI:
1874 case OP_POSPLUSI:
1875 case OP_POSQUERYI:
1876 case OP_POSUPTOI:
1877 case OP_NOTSTAR:
1878 case OP_NOTMINSTAR:
1879 case OP_NOTPLUS:
1880 case OP_NOTMINPLUS:
1881 case OP_NOTQUERY:
1882 case OP_NOTMINQUERY:
1883 case OP_NOTUPTO:
1884 case OP_NOTMINUPTO:
1885 case OP_NOTEXACT:
1886 case OP_NOTPOSSTAR:
1887 case OP_NOTPOSPLUS:
1888 case OP_NOTPOSQUERY:
1889 case OP_NOTPOSUPTO:
1890 case OP_NOTSTARI:
1891 case OP_NOTMINSTARI:
1892 case OP_NOTPLUSI:
1893 case OP_NOTMINPLUSI:
1894 case OP_NOTQUERYI:
1895 case OP_NOTMINQUERYI:
1896 case OP_NOTUPTOI:
1897 case OP_NOTMINUPTOI:
1898 case OP_NOTEXACTI:
1899 case OP_NOTPOSSTARI:
1900 case OP_NOTPOSPLUSI:
1901 case OP_NOTPOSQUERYI:
1902 case OP_NOTPOSUPTOI:
1903 if (utf) utf16_char = TRUE;
1904 #endif
1905 /* Fall through. */
1906
1907 default:
1908 length = OP_lengths16[op] - 1;
1909 break;
1910
1911 case OP_CLASS:
1912 case OP_NCLASS:
1913 /* Skip the character bit map. */
1914 ptr += 32/sizeof(pcre_uint16);
1915 length = 0;
1916 break;
1917
1918 case OP_XCLASS:
1919 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1920 if (LINK_SIZE > 1)
1921 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1922 - (1 + LINK_SIZE + 1));
1923 else
1924 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1925
1926 /* Reverse the size of the XCLASS instance. */
1927 *ptr = swap_uint16(*ptr);
1928 ptr++;
1929 if (LINK_SIZE > 1)
1930 {
1931 *ptr = swap_uint16(*ptr);
1932 ptr++;
1933 }
1934
1935 op = *ptr;
1936 *ptr = swap_uint16(op);
1937 ptr++;
1938 if ((op & XCL_MAP) != 0)
1939 {
1940 /* Skip the character bit map. */
1941 ptr += 32/sizeof(pcre_uint16);
1942 length -= 32/sizeof(pcre_uint16);
1943 }
1944 break;
1945 }
1946 }
1947 /* Control should never reach here in 16 bit mode. */
1948 #endif /* SUPPORT_PCRE16 */
1949 }
1950
1951
1952
1953 /*************************************************
1954 * Check match or recursion limit *
1955 *************************************************/
1956
1957 static int
1958 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1959 int start_offset, int options, int *use_offsets, int use_size_offsets,
1960 int flag, unsigned long int *limit, int errnumber, const char *msg)
1961 {
1962 int count;
1963 int min = 0;
1964 int mid = 64;
1965 int max = -1;
1966
1967 extra->flags |= flag;
1968
1969 for (;;)
1970 {
1971 *limit = mid;
1972
1973 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1974 use_offsets, use_size_offsets);
1975
1976 if (count == errnumber)
1977 {
1978 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1979 min = mid;
1980 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1981 }
1982
1983 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1984 count == PCRE_ERROR_PARTIAL)
1985 {
1986 if (mid == min + 1)
1987 {
1988 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1989 break;
1990 }
1991 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1992 max = mid;
1993 mid = (min + mid)/2;
1994 }
1995 else break; /* Some other error */
1996 }
1997
1998 extra->flags &= ~flag;
1999 return count;
2000 }
2001
2002
2003
2004 /*************************************************
2005 * Case-independent strncmp() function *
2006 *************************************************/
2007
2008 /*
2009 Arguments:
2010 s first string
2011 t second string
2012 n number of characters to compare
2013
2014 Returns: < 0, = 0, or > 0, according to the comparison
2015 */
2016
2017 static int
2018 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2019 {
2020 while (n--)
2021 {
2022 int c = tolower(*s++) - tolower(*t++);
2023 if (c) return c;
2024 }
2025 return 0;
2026 }
2027
2028
2029
2030 /*************************************************
2031 * Check newline indicator *
2032 *************************************************/
2033
2034 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2035 a message and return 0 if there is no match.
2036
2037 Arguments:
2038 p points after the leading '<'
2039 f file for error message
2040
2041 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2042 */
2043
2044 static int
2045 check_newline(pcre_uint8 *p, FILE *f)
2046 {
2047 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2048 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2049 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2050 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2051 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2052 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2053 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2054 fprintf(f, "Unknown newline type at: <%s\n", p);
2055 return 0;
2056 }
2057
2058
2059
2060 /*************************************************
2061 * Usage function *
2062 *************************************************/
2063
2064 static void
2065 usage(void)
2066 {
2067 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2068 printf("Input and output default to stdin and stdout.\n");
2069 #ifdef SUPPORT_LIBREADLINE
2070 printf("If input is a terminal, readline() is used to read from it.\n");
2071 #else
2072 printf("This version of pcretest is not linked with readline().\n");
2073 #endif
2074 printf("\nOptions:\n");
2075 #ifdef SUPPORT_PCRE16
2076 printf(" -16 use 16-bit interface\n");
2077 #endif
2078 printf(" -b show compiled code (bytecode)\n");
2079 printf(" -C show PCRE compile-time options and exit\n");
2080 printf(" -C arg show a specific compile-time option\n");
2081 printf(" and exit with its value. The arg can be:\n");
2082 printf(" linksize internal link size [2, 3, 4]\n");
2083 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2084 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2085 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2086 printf(" ucp Unicode Properties supported [0, 1]\n");
2087 printf(" jit Just-in-time compiler supported [0, 1]\n");
2088 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2089 printf(" -d debug: show compiled code and information (-b and -i)\n");
2090 #if !defined NODFA
2091 printf(" -dfa force DFA matching for all subjects\n");
2092 #endif
2093 printf(" -help show usage information\n");
2094 printf(" -i show information about compiled patterns\n"
2095 " -M find MATCH_LIMIT minimum for each subject\n"
2096 " -m output memory used information\n"
2097 " -o <n> set size of offsets vector to <n>\n");
2098 #if !defined NOPOSIX
2099 printf(" -p use POSIX interface\n");
2100 #endif
2101 printf(" -q quiet: do not output PCRE version number at start\n");
2102 printf(" -S <n> set stack size to <n> megabytes\n");
2103 printf(" -s force each pattern to be studied at basic level\n"
2104 " -s+ force each pattern to be studied, using JIT if available\n"
2105 " -t time compilation and execution\n");
2106 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2107 printf(" -tm time execution (matching) only\n");
2108 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2109 }
2110
2111
2112
2113 /*************************************************
2114 * Main Program *
2115 *************************************************/
2116
2117 /* Read lines from named file or stdin and write to named file or stdout; lines
2118 consist of a regular expression, in delimiters and optionally followed by
2119 options, followed by a set of test data, terminated by an empty line. */
2120
2121 int main(int argc, char **argv)
2122 {
2123 FILE *infile = stdin;
2124 const char *version;
2125 int options = 0;
2126 int study_options = 0;
2127 int default_find_match_limit = FALSE;
2128 int op = 1;
2129 int timeit = 0;
2130 int timeitm = 0;
2131 int showinfo = 0;
2132 int showstore = 0;
2133 int force_study = -1;
2134 int force_study_options = 0;
2135 int quiet = 0;
2136 int size_offsets = 45;
2137 int size_offsets_max;
2138 int *offsets = NULL;
2139 #if !defined NOPOSIX
2140 int posix = 0;
2141 #endif
2142 int debug = 0;
2143 int done = 0;
2144 int all_use_dfa = 0;
2145 int yield = 0;
2146 int stack_size;
2147
2148 pcre_jit_stack *jit_stack = NULL;
2149
2150 /* These vectors store, end-to-end, a list of zero-terminated captured
2151 substring names, each list itself being terminated by an empty name. Assume
2152 that 1024 is plenty long enough for the few names we'll be testing. It is
2153 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2154 for the actual memory, to ensure alignment. By defining these variables always
2155 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2156 #ifdefs in the code. */
2157
2158 pcre_uint16 copynames[1024];
2159 pcre_uint16 getnames[1024];
2160
2161 pcre_uint16 *cn16ptr;
2162 pcre_uint16 *gn16ptr;
2163
2164 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2165 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2166 pcre_uint8 *cn8ptr;
2167 pcre_uint8 *gn8ptr;
2168
2169 /* Get buffers from malloc() so that valgrind will check their misuse when
2170 debugging. They grow automatically when very long lines are read. The 16-bit
2171 buffer (buffer16) is obtained only if needed. */
2172
2173 buffer = (pcre_uint8 *)malloc(buffer_size);
2174 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2175 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2176
2177 /* The outfile variable is static so that new_malloc can use it. */
2178
2179 outfile = stdout;
2180
2181 /* The following _setmode() stuff is some Windows magic that tells its runtime
2182 library to translate CRLF into a single LF character. At least, that's what
2183 I've been told: never having used Windows I take this all on trust. Originally
2184 it set 0x8000, but then I was advised that _O_BINARY was better. */
2185
2186 #if defined(_WIN32) || defined(WIN32)
2187 _setmode( _fileno( stdout ), _O_BINARY );
2188 #endif
2189
2190 /* Get the version number: both pcre_version() and pcre16_version() give the
2191 same answer. We just need to ensure that we call one that is available. */
2192
2193 #ifdef SUPPORT_PCRE8
2194 version = pcre_version();
2195 #else
2196 version = pcre16_version();
2197 #endif
2198
2199 /* Scan options */
2200
2201 while (argc > 1 && argv[op][0] == '-')
2202 {
2203 pcre_uint8 *endptr;
2204
2205 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2206 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2207 else if (strcmp(argv[op], "-s+") == 0)
2208 {
2209 force_study = 1;
2210 force_study_options = PCRE_STUDY_JIT_COMPILE;
2211 }
2212 else if (strcmp(argv[op], "-16") == 0)
2213 {
2214 #ifdef SUPPORT_PCRE16
2215 use_pcre16 = 1;
2216 #else
2217 printf("** This version of PCRE was built without 16-bit support\n");
2218 exit(1);
2219 #endif
2220 }
2221 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2222 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2223 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2224 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2225 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2226 #if !defined NODFA
2227 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2228 #endif
2229 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2230 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2231 *endptr == 0))
2232 {
2233 op++;
2234 argc--;
2235 }
2236 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2237 {
2238 int both = argv[op][2] == 0;
2239 int temp;
2240 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2241 *endptr == 0))
2242 {
2243 timeitm = temp;
2244 op++;
2245 argc--;
2246 }
2247 else timeitm = LOOPREPEAT;
2248 if (both) timeit = timeitm;
2249 }
2250 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2251 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2252 *endptr == 0))
2253 {
2254 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2255 printf("PCRE: -S not supported on this OS\n");
2256 exit(1);
2257 #else
2258 int rc;
2259 struct rlimit rlim;
2260 getrlimit(RLIMIT_STACK, &rlim);
2261 rlim.rlim_cur = stack_size * 1024 * 1024;
2262 rc = setrlimit(RLIMIT_STACK, &rlim);
2263 if (rc != 0)
2264 {
2265 printf("PCRE: setrlimit() failed with error %d\n", rc);
2266 exit(1);
2267 }
2268 op++;
2269 argc--;
2270 #endif
2271 }
2272 #if !defined NOPOSIX
2273 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2274 #endif
2275 else if (strcmp(argv[op], "-C") == 0)
2276 {
2277 int rc;
2278 unsigned long int lrc;
2279
2280 if (argc > 2)
2281 {
2282 if (strcmp(argv[op + 1], "linksize") == 0)
2283 {
2284 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2285 printf("%d\n", rc);
2286 yield = rc;
2287 goto EXIT;
2288 }
2289 if (strcmp(argv[op + 1], "pcre8") == 0)
2290 {
2291 #ifdef SUPPORT_PCRE8
2292 printf("1\n");
2293 yield = 1;
2294 #else
2295 printf("0\n");
2296 yield = 0;
2297 #endif
2298 goto EXIT;
2299 }
2300 if (strcmp(argv[op + 1], "pcre16") == 0)
2301 {
2302 #ifdef SUPPORT_PCRE16
2303 printf("1\n");
2304 yield = 1;
2305 #else
2306 printf("0\n");
2307 yield = 0;
2308 #endif
2309 goto EXIT;
2310 }
2311 if (strcmp(argv[op + 1], "utf") == 0)
2312 {
2313 #ifdef SUPPORT_PCRE8
2314 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2315 printf("%d\n", rc);
2316 yield = rc;
2317 #else
2318 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2319 printf("%d\n", rc);
2320 yield = rc;
2321 #endif
2322 goto EXIT;
2323 }
2324 if (strcmp(argv[op + 1], "ucp") == 0)
2325 {
2326 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2327 printf("%d\n", rc);
2328 yield = rc;
2329 goto EXIT;
2330 }
2331 if (strcmp(argv[op + 1], "jit") == 0)
2332 {
2333 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2334 printf("%d\n", rc);
2335 yield = rc;
2336 goto EXIT;
2337 }
2338 if (strcmp(argv[op + 1], "newline") == 0)
2339 {
2340 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2341 /* Note that these values are always the ASCII values, even
2342 in EBCDIC environments. CR is 13 and NL is 10. */
2343 printf("%s\n", (rc == 13)? "CR" :
2344 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2345 (rc == -2)? "ANYCRLF" :
2346 (rc == -1)? "ANY" : "???");
2347 goto EXIT;
2348 }
2349 printf("Unknown -C option: %s\n", argv[op + 1]);
2350 goto EXIT;
2351 }
2352
2353 printf("PCRE version %s\n", version);
2354 printf("Compiled with\n");
2355
2356 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2357 are set, either both UTFs are supported or both are not supported. */
2358
2359 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2360 printf(" 8-bit and 16-bit support\n");
2361 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2362 if (rc)
2363 printf(" UTF-8 and UTF-16 support\n");
2364 else
2365 printf(" No UTF-8 or UTF-16 support\n");
2366 #elif defined SUPPORT_PCRE8
2367 printf(" 8-bit support only\n");
2368 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2369 printf(" %sUTF-8 support\n", rc? "" : "No ");
2370 #else
2371 printf(" 16-bit support only\n");
2372 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2373 printf(" %sUTF-16 support\n", rc? "" : "No ");
2374 #endif
2375
2376 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2377 printf(" %sUnicode properties support\n", rc? "" : "No ");
2378 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2379 if (rc)
2380 printf(" Just-in-time compiler support\n");
2381 else
2382 printf(" No just-in-time compiler support\n");
2383 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2384 /* Note that these values are always the ASCII values, even
2385 in EBCDIC environments. CR is 13 and NL is 10. */
2386 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2387 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2388 (rc == -2)? "ANYCRLF" :
2389 (rc == -1)? "ANY" : "???");
2390 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2391 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2392 "all Unicode newlines");
2393 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2394 printf(" Internal link size = %d\n", rc);
2395 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2396 printf(" POSIX malloc threshold = %d\n", rc);
2397 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2398 printf(" Default match limit = %ld\n", lrc);
2399 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2400 printf(" Default recursion depth limit = %ld\n", lrc);
2401 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2402 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2403 goto EXIT;
2404 }
2405 else if (strcmp(argv[op], "-help") == 0 ||
2406 strcmp(argv[op], "--help") == 0)
2407 {
2408 usage();
2409 goto EXIT;
2410 }
2411 else
2412 {
2413 printf("** Unknown or malformed option %s\n", argv[op]);
2414 usage();
2415 yield = 1;
2416 goto EXIT;
2417 }
2418 op++;
2419 argc--;
2420 }
2421
2422 /* Get the store for the offsets vector, and remember what it was */
2423
2424 size_offsets_max = size_offsets;
2425 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2426 if (offsets == NULL)
2427 {
2428 printf("** Failed to get %d bytes of memory for offsets vector\n",
2429 (int)(size_offsets_max * sizeof(int)));
2430 yield = 1;
2431 goto EXIT;
2432 }
2433
2434 /* Sort out the input and output files */
2435
2436 if (argc > 1)
2437 {
2438 infile = fopen(argv[op], INPUT_MODE);
2439 if (infile == NULL)
2440 {
2441 printf("** Failed to open %s\n", argv[op]);
2442 yield = 1;
2443 goto EXIT;
2444 }
2445 }
2446
2447 if (argc > 2)
2448 {
2449 outfile = fopen(argv[op+1], OUTPUT_MODE);
2450 if (outfile == NULL)
2451 {
2452 printf("** Failed to open %s\n", argv[op+1]);
2453 yield = 1;
2454 goto EXIT;
2455 }
2456 }
2457
2458 /* Set alternative malloc function */
2459
2460 #ifdef SUPPORT_PCRE8
2461 pcre_malloc = new_malloc;
2462 pcre_free = new_free;
2463 pcre_stack_malloc = stack_malloc;
2464 pcre_stack_free = stack_free;
2465 #endif
2466
2467 #ifdef SUPPORT_PCRE16
2468 pcre16_malloc = new_malloc;
2469 pcre16_free = new_free;
2470 pcre16_stack_malloc = stack_malloc;
2471 pcre16_stack_free = stack_free;
2472 #endif
2473
2474 /* Heading line unless quiet, then prompt for first regex if stdin */
2475
2476 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2477
2478 /* Main loop */
2479
2480 while (!done)
2481 {
2482 pcre *re = NULL;
2483 pcre_extra *extra = NULL;
2484
2485 #if !defined NOPOSIX /* There are still compilers that require no indent */
2486 regex_t preg;
2487 int do_posix = 0;
2488 #endif
2489
2490 const char *error;
2491 pcre_uint8 *markptr;
2492 pcre_uint8 *p, *pp, *ppp;
2493 pcre_uint8 *to_file = NULL;
2494 const pcre_uint8 *tables = NULL;
2495 unsigned long int true_size, true_study_size = 0;
2496 size_t size, regex_gotten_store;
2497 int do_allcaps = 0;
2498 int do_mark = 0;
2499 int do_study = 0;
2500 int no_force_study = 0;
2501 int do_debug = debug;
2502 int do_G = 0;
2503 int do_g = 0;
2504 int do_showinfo = showinfo;
2505 int do_showrest = 0;
2506 int do_showcaprest = 0;
2507 int do_flip = 0;
2508 int erroroffset, len, delimiter, poffset;
2509
2510 use_utf = 0;
2511 debug_lengths = 1;
2512
2513 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2514 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2515 fflush(outfile);
2516
2517 p = buffer;
2518 while (isspace(*p)) p++;
2519 if (*p == 0) continue;
2520
2521 /* See if the pattern is to be loaded pre-compiled from a file. */
2522
2523 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2524 {
2525 unsigned long int magic, get_options;
2526 pcre_uint8 sbuf[8];
2527 FILE *f;
2528
2529 p++;
2530 if (*p == '!')
2531 {
2532 do_debug = TRUE;
2533 do_showinfo = TRUE;
2534 p++;
2535 }
2536
2537 pp = p + (int)strlen((char *)p);
2538 while (isspace(pp[-1])) pp--;
2539 *pp = 0;
2540
2541 f = fopen((char *)p, "rb");
2542 if (f == NULL)
2543 {
2544 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2545 continue;
2546 }
2547
2548 first_gotten_store = 0;
2549 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2550
2551 true_size =
2552 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2553 true_study_size =
2554 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2555
2556 re = (real_pcre *)new_malloc(true_size);
2557 regex_gotten_store = first_gotten_store;
2558
2559 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2560
2561 magic = ((real_pcre *)re)->magic_number;
2562 if (magic != MAGIC_NUMBER)
2563 {
2564 if (swap_uint32(magic) == MAGIC_NUMBER)
2565 {
2566 do_flip = 1;
2567 }
2568 else
2569 {
2570 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2571 fclose(f);
2572 continue;
2573 }
2574 }
2575
2576 /* We hide the byte-invert info for little and big endian tests. */
2577 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2578 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2579
2580 /* Now see if there is any following study data. */
2581
2582 if (true_study_size != 0)
2583 {
2584 pcre_study_data *psd;
2585
2586 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2587 extra->flags = PCRE_EXTRA_STUDY_DATA;
2588
2589 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2590 extra->study_data = psd;
2591
2592 if (fread(psd, 1, true_study_size, f) != true_study_size)
2593 {
2594 FAIL_READ:
2595 fprintf(outfile, "Failed to read data from %s\n", p);
2596 if (extra != NULL)
2597 {
2598 PCRE_FREE_STUDY(extra);
2599 }
2600 if (re != NULL) new_free(re);
2601 fclose(f);
2602 continue;
2603 }
2604 fprintf(outfile, "Study data loaded from %s\n", p);
2605 do_study = 1; /* To get the data output if requested */
2606 }
2607 else fprintf(outfile, "No study data\n");
2608
2609 /* Flip the necessary bytes. */
2610 if (do_flip)
2611 {
2612 int rc;
2613 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2614 if (rc == PCRE_ERROR_BADMODE)
2615 {
2616 /* Simulate the result of the function call below. */
2617 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2618 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2619 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2620 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2621 continue;
2622 }
2623 }
2624
2625 /* Need to know if UTF-8 for printing data strings. */
2626
2627 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2628 use_utf = (get_options & PCRE_UTF8) != 0;
2629
2630 fclose(f);
2631 goto SHOW_INFO;
2632 }
2633
2634 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2635 the pattern; if it isn't complete, read more. */
2636
2637 delimiter = *p++;
2638
2639 if (isalnum(delimiter) || delimiter == '\\')
2640 {
2641 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2642 goto SKIP_DATA;
2643 }
2644
2645 pp = p;
2646 poffset = (int)(p - buffer);
2647
2648 for(;;)
2649 {
2650 while (*pp != 0)
2651 {
2652 if (*pp == '\\' && pp[1] != 0) pp++;
2653 else if (*pp == delimiter) break;
2654 pp++;
2655 }
2656 if (*pp != 0) break;
2657 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2658 {
2659 fprintf(outfile, "** Unexpected EOF\n");
2660 done = 1;
2661 goto CONTINUE;
2662 }
2663 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2664 }
2665
2666 /* The buffer may have moved while being extended; reset the start of data
2667 pointer to the correct relative point in the buffer. */
2668
2669 p = buffer + poffset;
2670
2671 /* If the first character after the delimiter is backslash, make
2672 the pattern end with backslash. This is purely to provide a way
2673 of testing for the error message when a pattern ends with backslash. */
2674
2675 if (pp[1] == '\\') *pp++ = '\\';
2676
2677 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2678 for callouts. */
2679
2680 *pp++ = 0;
2681 strcpy((char *)pbuffer, (char *)p);
2682
2683 /* Look for options after final delimiter */
2684
2685 options = 0;
2686 study_options = 0;
2687 log_store = showstore; /* default from command line */
2688
2689 while (*pp != 0)
2690 {
2691 switch (*pp++)
2692 {
2693 case 'f': options |= PCRE_FIRSTLINE; break;
2694 case 'g': do_g = 1; break;
2695 case 'i': options |= PCRE_CASELESS; break;
2696 case 'm': options |= PCRE_MULTILINE; break;
2697 case 's': options |= PCRE_DOTALL; break;
2698 case 'x': options |= PCRE_EXTENDED; break;
2699
2700 case '+':
2701 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2702 break;
2703
2704 case '=': do_allcaps = 1; break;
2705 case 'A': options |= PCRE_ANCHORED; break;
2706 case 'B': do_debug = 1; break;
2707 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2708 case 'D': do_debug = do_showinfo = 1; break;
2709 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2710 case 'F': do_flip = 1; break;
2711 case 'G': do_G = 1; break;
2712 case 'I': do_showinfo = 1; break;
2713 case 'J': options |= PCRE_DUPNAMES; break;
2714 case 'K': do_mark = 1; break;
2715 case 'M': log_store = 1; break;
2716 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2717
2718 #if !defined NOPOSIX
2719 case 'P': do_posix = 1; break;
2720 #endif
2721
2722 case 'S':
2723 if (do_study == 0)
2724 {
2725 do_study = 1;
2726 if (*pp == '+')
2727 {
2728 study_options |= PCRE_STUDY_JIT_COMPILE;
2729 pp++;
2730 }
2731 }
2732 else
2733 {
2734 do_study = 0;
2735 no_force_study = 1;
2736 }
2737 break;
2738
2739 case 'U': options |= PCRE_UNGREEDY; break;
2740 case 'W': options |= PCRE_UCP; break;
2741 case 'X': options |= PCRE_EXTRA; break;
2742 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2743 case 'Z': debug_lengths = 0; break;
2744 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2745 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2746
2747 case 'T':
2748 switch (*pp++)
2749 {
2750 case '0': tables = tables0; break;
2751 case '1': tables = tables1; break;
2752
2753 case '\r':
2754 case '\n':
2755 case ' ':
2756 case 0:
2757 fprintf(outfile, "** Missing table number after /T\n");
2758 goto SKIP_DATA;
2759
2760 default:
2761 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2762 goto SKIP_DATA;
2763 }
2764 break;
2765
2766 case 'L':
2767 ppp = pp;
2768 /* The '\r' test here is so that it works on Windows. */
2769 /* The '0' test is just in case this is an unterminated line. */
2770 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2771 *ppp = 0;
2772 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2773 {
2774 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2775 goto SKIP_DATA;
2776 }
2777 locale_set = 1;
2778 tables = PCRE_MAKETABLES;
2779 pp = ppp;
2780 break;
2781
2782 case '>':
2783 to_file = pp;
2784 while (*pp != 0) pp++;
2785 while (isspace(pp[-1])) pp--;
2786 *pp = 0;
2787 break;
2788
2789 case '<':
2790 {
2791 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2792 {
2793 options |= PCRE_JAVASCRIPT_COMPAT;
2794 pp += 3;
2795 }
2796 else
2797 {
2798 int x = check_newline(pp, outfile);
2799 if (x == 0) goto SKIP_DATA;
2800 options |= x;
2801 while (*pp++ != '>');
2802 }
2803 }
2804 break;
2805
2806 case '\r': /* So that it works in Windows */
2807 case '\n':
2808 case ' ':
2809 break;
2810
2811 default:
2812 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2813 goto SKIP_DATA;
2814 }
2815 }
2816
2817 /* Handle compiling via the POSIX interface, which doesn't support the
2818 timing, showing, or debugging options, nor the ability to pass over
2819 local character tables. Neither does it have 16-bit support. */
2820
2821 #if !defined NOPOSIX
2822 if (posix || do_posix)
2823 {
2824 int rc;
2825 int cflags = 0;
2826
2827 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2828 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2829 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2830 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2831 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2832 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2833 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2834
2835 first_gotten_store = 0;
2836 rc = regcomp(&preg, (char *)p, cflags);
2837
2838 /* Compilation failed; go back for another re, skipping to blank line
2839 if non-interactive. */
2840
2841 if (rc != 0)
2842 {
2843 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2844 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2845 goto SKIP_DATA;
2846 }
2847 }
2848
2849 /* Handle compiling via the native interface */
2850
2851 else
2852 #endif /* !defined NOPOSIX */
2853
2854 {
2855 unsigned long int get_options;
2856
2857 /* In 16-bit mode, convert the input. */
2858
2859 #ifdef SUPPORT_PCRE16
2860 if (use_pcre16)
2861 {
2862 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2863 {
2864 case -1:
2865 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2866 "converted to UTF-16\n");
2867 goto SKIP_DATA;
2868
2869 case -2:
2870 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2871 "cannot be converted to UTF-16\n");
2872 goto SKIP_DATA;
2873
2874 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2875 fprintf(outfile, "**Failed: character value greater than 0xffff "
2876 "cannot be converted to 16-bit in non-UTF mode\n");
2877 goto SKIP_DATA;
2878
2879 default:
2880 break;
2881 }
2882 p = (pcre_uint8 *)buffer16;
2883 }
2884 #endif
2885
2886 /* Compile many times when timing */
2887
2888 if (timeit > 0)
2889 {
2890 register int i;
2891 clock_t time_taken;
2892 clock_t start_time = clock();
2893 for (i = 0; i < timeit; i++)
2894 {
2895 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2896 if (re != NULL) free(re);
2897 }
2898 time_taken = clock() - start_time;
2899 fprintf(outfile, "Compile time %.4f milliseconds\n",
2900 (((double)time_taken * 1000.0) / (double)timeit) /
2901 (double)CLOCKS_PER_SEC);
2902 }
2903
2904 first_gotten_store = 0;
2905 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2906
2907 /* Compilation failed; go back for another re, skipping to blank line
2908 if non-interactive. */
2909
2910 if (re == NULL)
2911 {
2912 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2913 SKIP_DATA:
2914 if (infile != stdin)
2915 {
2916 for (;;)
2917 {
2918 if (extend_inputline(infile, buffer, NULL) == NULL)
2919 {
2920 done = 1;
2921 goto CONTINUE;
2922 }
2923 len = (int)strlen((char *)buffer);
2924 while (len > 0 && isspace(buffer[len-1])) len--;
2925 if (len == 0) break;
2926 }
2927 fprintf(outfile, "\n");
2928 }
2929 goto CONTINUE;
2930 }
2931
2932 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2933 within the regex; check for this so that we know how to process the data
2934 lines. */
2935
2936 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2937 goto SKIP_DATA;
2938 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2939
2940 /* Extract the size for possible writing before possibly flipping it,
2941 and remember the store that was got. */
2942
2943 true_size = ((real_pcre *)re)->size;
2944 regex_gotten_store = first_gotten_store;
2945
2946 /* Output code size information if requested */
2947
2948 if (log_store)
2949 fprintf(outfile, "Memory allocation (code space): %d\n",
2950 (int)(first_gotten_store -
2951 sizeof(real_pcre) -
2952 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2953
2954 /* If -s or /S was present, study the regex to generate additional info to
2955 help with the matching, unless the pattern has the SS option, which
2956 suppresses the effect of /S (used for a few test patterns where studying is
2957 never sensible). */
2958
2959 if (do_study || (force_study >= 0 && !no_force_study))
2960 {
2961 if (timeit > 0)
2962 {
2963 register int i;
2964 clock_t time_taken;
2965 clock_t start_time = clock();
2966 for (i = 0; i < timeit; i++)
2967 {
2968 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2969 }
2970 time_taken = clock() - start_time;
2971 if (extra != NULL)
2972 {
2973 PCRE_FREE_STUDY(extra);
2974 }
2975 fprintf(outfile, " Study time %.4f milliseconds\n",
2976 (((double)time_taken * 1000.0) / (double)timeit) /
2977 (double)CLOCKS_PER_SEC);
2978 }
2979 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2980 if (error != NULL)
2981 fprintf(outfile, "Failed to study: %s\n", error);
2982 else if (extra != NULL)
2983 {
2984 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2985 if (log_store)
2986 {
2987 size_t jitsize;
2988 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2989 jitsize != 0)
2990 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2991 }
2992 }
2993 }
2994
2995 /* If /K was present, we set up for handling MARK data. */
2996
2997 if (do_mark)
2998 {
2999 if (extra == NULL)
3000 {
3001 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3002 extra->flags = 0;
3003 }
3004 extra->mark = &markptr;
3005 extra->flags |= PCRE_EXTRA_MARK;
3006 }
3007
3008 /* Extract and display information from the compiled data if required. */
3009
3010 SHOW_INFO:
3011
3012 if (do_debug)
3013 {
3014 fprintf(outfile, "------------------------------------------------------------------\n");
3015 PCRE_PRINTINT(re, outfile, debug_lengths);
3016 }
3017
3018 /* We already have the options in get_options (see above) */
3019
3020 if (do_showinfo)
3021 {
3022 unsigned long int all_options;
3023 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3024 hascrorlf;
3025 int nameentrysize, namecount;
3026 const pcre_uint8 *nametable;
3027
3028 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3029 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3030 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3031 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3032 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3033 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3034 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3035 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3036 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3037 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3038 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3039 != 0)
3040 goto SKIP_DATA;
3041
3042 if (size != regex_gotten_store) fprintf(outfile,
3043 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3044 (int)size, (int)regex_gotten_store);
3045
3046 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3047 if (backrefmax > 0)
3048 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3049
3050 if (namecount > 0)
3051 {
3052 fprintf(outfile, "Named capturing subpatterns:\n");
3053 while (namecount-- > 0)
3054 {
3055 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3056 int imm2_size = use_pcre16 ? 1 : 2;
3057 #else
3058 int imm2_size = IMM2_SIZE;
3059 #endif
3060 int length = (int)STRLEN(nametable + imm2_size);
3061 fprintf(outfile, " ");
3062 PCHARSV(nametable, imm2_size, length, outfile);
3063 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3064 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3065 fprintf(outfile, "%3d\n", use_pcre16?
3066 (int)(((PCRE_SPTR16)nametable)[0])
3067 :((int)nametable[0] << 8) | (int)nametable[1]);
3068 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3069 #else
3070 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3071 #ifdef SUPPORT_PCRE8
3072 nametable += nameentrysize;
3073 #else
3074 nametable += nameentrysize * 2;
3075 #endif
3076 #endif
3077 }
3078 }
3079
3080 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3081 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3082
3083 all_options = ((real_pcre *)re)->options;
3084 if (do_flip) all_options = swap_uint32(all_options);
3085
3086 if (get_options == 0) fprintf(outfile, "No options\n");
3087 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3088 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3089 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3090 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3091 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3092 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3093 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3094 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3095 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3096 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3097 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3098 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3099 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3100 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3101 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3102 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3103 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3104 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3105
3106 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3107
3108 switch (get_options & PCRE_NEWLINE_BITS)
3109 {
3110 case PCRE_NEWLINE_CR:
3111 fprintf(outfile, "Forced newline sequence: CR\n");
3112 break;
3113
3114 case PCRE_NEWLINE_LF:
3115 fprintf(outfile, "Forced newline sequence: LF\n");
3116 break;
3117
3118 case PCRE_NEWLINE_CRLF:
3119 fprintf(outfile, "Forced newline sequence: CRLF\n");
3120 break;
3121
3122 case PCRE_NEWLINE_ANYCRLF:
3123 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3124 break;
3125
3126 case PCRE_NEWLINE_ANY:
3127 fprintf(outfile, "Forced newline sequence: ANY\n");
3128 break;
3129
3130 default:
3131 break;
3132 }
3133
3134 if (first_char == -1)
3135 {
3136 fprintf(outfile, "First char at start or follows newline\n");
3137 }
3138 else if (first_char < 0)
3139 {
3140 fprintf(outfile, "No first char\n");
3141 }
3142 else
3143 {
3144 const char *caseless =
3145 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3146 "" : " (caseless)";
3147
3148 if (PRINTOK(first_char))
3149 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3150 else
3151 {
3152 fprintf(outfile, "First char = ");
3153 pchar(first_char, outfile);
3154 fprintf(outfile, "%s\n", caseless);
3155 }
3156 }
3157
3158 if (need_char < 0)
3159 {
3160 fprintf(outfile, "No need char\n");
3161 }
3162 else
3163 {
3164 const char *caseless =
3165 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3166 "" : " (caseless)";
3167
3168 if (PRINTOK(need_char))
3169 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3170 else
3171 {
3172 fprintf(outfile, "Need char = ");
3173 pchar(need_char, outfile);
3174 fprintf(outfile, "%s\n", caseless);
3175 }
3176 }
3177
3178 /* Don't output study size; at present it is in any case a fixed
3179 value, but it varies, depending on the computer architecture, and
3180 so messes up the test suite. (And with the /F option, it might be
3181 flipped.) If study was forced by an external -s, don't show this
3182 information unless -i or -d was also present. This means that, except
3183 when auto-callouts are involved, the output from runs with and without
3184 -s should be identical. */
3185
3186 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3187 {
3188 if (extra == NULL)
3189 fprintf(outfile, "Study returned NULL\n");
3190 else
3191 {
3192 pcre_uint8 *start_bits = NULL;
3193 int minlength;
3194
3195 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3196 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3197
3198 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3199 {
3200 if (start_bits == NULL)
3201 fprintf(outfile, "No set of starting bytes\n");
3202 else
3203 {
3204 int i;
3205 int c = 24;
3206 fprintf(outfile, "Starting byte set: ");
3207 for (i = 0; i < 256; i++)
3208 {
3209 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3210 {
3211 if (c > 75)
3212 {
3213 fprintf(outfile, "\n ");
3214 c = 2;
3215 }
3216 if (PRINTOK(i) && i != ' ')
3217 {
3218 fprintf(outfile, "%c ", i);
3219 c += 2;
3220 }
3221 else
3222 {
3223 fprintf(outfile, "\\x%02x ", i);
3224 c += 5;
3225 }
3226 }
3227 }
3228 fprintf(outfile, "\n");
3229 }
3230 }
3231 }
3232
3233 /* Show this only if the JIT was set by /S, not by -s. */
3234
3235 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3236 {
3237 int jit;
3238 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3239 {
3240 if (jit)
3241 fprintf(outfile, "JIT study was successful\n");
3242 else
3243 #ifdef SUPPORT_JIT
3244 fprintf(outfile, "JIT study was not successful\n");
3245 #else
3246 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3247 #endif
3248 }
3249 }
3250 }
3251 }
3252
3253 /* If the '>' option was present, we write out the regex to a file, and
3254 that is all. The first 8 bytes of the file are the regex length and then
3255 the study length, in big-endian order. */
3256
3257 if (to_file != NULL)
3258 {
3259 FILE *f = fopen((char *)to_file, "wb");
3260 if (f == NULL)
3261 {
3262 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3263 }
3264 else
3265 {
3266 pcre_uint8 sbuf[8];
3267
3268 if (do_flip) regexflip(re, extra);
3269 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3270 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3271 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3272 sbuf[3] = (pcre_uint8)((true_size) & 255);
3273 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3274 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3275 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3276 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3277
3278 if (fwrite(sbuf, 1, 8, f) < 8 ||
3279 fwrite(re, 1, true_size, f) < true_size)
3280 {
3281 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3282 }
3283 else
3284 {
3285 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3286
3287 /* If there is study data, write it. */
3288
3289 if (extra != NULL)
3290 {
3291 if (fwrite(extra->study_data, 1, true_study_size, f) <
3292 true_study_size)
3293 {
3294 fprintf(outfile, "Write error on %s: %s\n", to_file,
3295 strerror(errno));
3296 }
3297 else fprintf(outfile, "Study data written to %s\n", to_file);
3298 }
3299 }
3300 fclose(f);
3301 }
3302
3303 new_free(re);
3304 if (extra != NULL)
3305 {
3306 PCRE_FREE_STUDY(extra);
3307 }
3308 if (locale_set)
3309 {
3310 new_free((void *)tables);
3311 setlocale(LC_CTYPE, "C");
3312 locale_set = 0;
3313 }
3314 continue; /* With next regex */
3315 }
3316 } /* End of non-POSIX compile */
3317
3318 /* Read data lines and test them */
3319
3320 for (;;)
3321 {
3322 pcre_uint8 *q;
3323 pcre_uint8 *bptr;
3324 int *use_offsets = offsets;
3325 int use_size_offsets = size_offsets;
3326 int callout_data = 0;
3327 int callout_data_set = 0;
3328 int count, c;
3329 int copystrings = 0;
3330 int find_match_limit = default_find_match_limit;
3331 int getstrings = 0;
3332 int getlist = 0;
3333 int gmatched = 0;
3334 int start_offset = 0;
3335 int start_offset_sign = 1;
3336 int g_notempty = 0;
3337 int use_dfa = 0;
3338
3339 *copynames = 0;
3340 *getnames = 0;
3341
3342 cn16ptr = copynames;
3343 gn16ptr = getnames;
3344 cn8ptr = copynames8;
3345 gn8ptr = getnames8;
3346
3347 SET_PCRE_CALLOUT(callout);
3348 first_callout = 1;
3349 last_callout_mark = NULL;
3350 callout_extra = 0;
3351 callout_count = 0;
3352 callout_fail_count = 999999;
3353 callout_fail_id = -1;
3354 show_malloc = 0;
3355 options = 0;
3356
3357 if (extra != NULL) extra->flags &=
3358 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3359
3360 len = 0;
3361 for (;;)
3362 {
3363 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3364 {
3365 if (len > 0) /* Reached EOF without hitting a newline */
3366 {
3367 fprintf(outfile, "\n");
3368 break;
3369 }
3370 done = 1;
3371 goto CONTINUE;
3372 }
3373 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3374 len = (int)strlen((char *)buffer);
3375 if (buffer[len-1] == '\n') break;
3376 }
3377
3378 while (len > 0 && isspace(buffer[len-1])) len--;
3379 buffer[len] = 0;
3380 if (len == 0) break;
3381
3382 p = buffer;
3383 while (isspace(*p)) p++;
3384
3385 bptr = q = dbuffer;
3386 while ((c = *p++) != 0)
3387 {
3388 int i = 0;
3389 int n = 0;
3390
3391 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3392 In non-UTF mode, allow the value of the byte to fall through to later,
3393 where values greater than 127 are turned into UTF-8 when running in
3394 16-bit mode. */
3395
3396 if (c != '\\')
3397 {
3398 if (use_utf)
3399 {
3400 *q++ = c;
3401 continue;
3402 }
3403 }
3404
3405 /* Handle backslash escapes */
3406
3407 else switch ((c = *p++))
3408 {
3409 case 'a': c = 7; break;
3410 case 'b': c = '\b'; break;
3411 case 'e': c = 27; break;
3412 case 'f': c = '\f'; break;
3413 case 'n': c = '\n'; break;
3414 case 'r': c = '\r'; break;
3415 case 't': c = '\t'; break;
3416 case 'v': c = '\v'; break;
3417
3418 case '0': case '1': case '2': case '3':
3419 case '4': case '5': case '6': case '7':
3420 c -= '0';
3421 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3422 c = c * 8 + *p++ - '0';
3423 break;
3424
3425 case 'x':
3426 if (*p == '{')
3427 {
3428 pcre_uint8 *pt = p;
3429 c = 0;
3430
3431 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3432 when isxdigit() is a macro that refers to its argument more than
3433 once. This is banned by the C Standard, but apparently happens in at
3434 least one MacOS environment. */
3435
3436 for (pt++; isxdigit(*pt); pt++)
3437 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3438 if (*pt == '}')
3439 {
3440 p = pt + 1;
3441 break;
3442 }
3443 /* Not correct form for \x{...}; fall through */
3444 }
3445
3446 /* \x without {} always defines just one byte in 8-bit mode. This
3447 allows UTF-8 characters to be constructed byte by byte, and also allows
3448 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3449 Otherwise, pass it down to later code so that it can be turned into
3450 UTF-8 when running in 16-bit mode. */
3451
3452 c = 0;
3453 while (i++ < 2 && isxdigit(*p))
3454 {
3455 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3456 p++;
3457 }
3458 if (use_utf)
3459 {
3460 *q++ = c;
3461 continue;
3462 }
3463 break;
3464
3465 case 0: /* \ followed by EOF allows for an empty line */
3466 p--;
3467 continue;
3468
3469 case '>':
3470 if (*p == '-')
3471 {
3472 start_offset_sign = -1;
3473 p++;
3474 }
3475 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3476 start_offset *= start_offset_sign;
3477 continue;
3478
3479 case 'A': /* Option setting */
3480 options |= PCRE_ANCHORED;
3481 continue;
3482
3483 case 'B':
3484 options |= PCRE_NOTBOL;
3485 continue;
3486
3487 case 'C':
3488 if (isdigit(*p)) /* Set copy string */
3489 {
3490 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3491 copystrings |= 1 << n;
3492 }
3493 else if (isalnum(*p))
3494 {
3495 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3496 }
3497 else if (*p == '+')
3498 {
3499 callout_extra = 1;
3500 p++;
3501 }
3502 else if (*p == '-')
3503 {
3504 SET_PCRE_CALLOUT(NULL);
3505 p++;
3506 }
3507 else if (*p == '!')
3508 {
3509 callout_fail_id = 0;
3510 p++;
3511 while(isdigit(*p))
3512 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3513 callout_fail_count = 0;
3514 if (*p == '!')
3515 {
3516 p++;
3517 while(isdigit(*p))
3518 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3519 }
3520 }
3521 else if (*p == '*')
3522 {
3523 int sign = 1;
3524 callout_data = 0;
3525 if (*(++p) == '-') { sign = -1; p++; }
3526 while(isdigit(*p))
3527 callout_data = callout_data * 10 + *p++ - '0';
3528 callout_data *= sign;
3529 callout_data_set = 1;
3530 }
3531 continue;
3532
3533 #if !defined NODFA
3534 case 'D':
3535 #if !defined NOPOSIX
3536 if (posix || do_posix)
3537 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3538 else
3539 #endif
3540 use_dfa = 1;
3541 continue;
3542 #endif
3543
3544 #if !defined NODFA
3545 case 'F':
3546 options |= PCRE_DFA_SHORTEST;
3547 continue;
3548 #endif
3549
3550 case 'G':
3551 if (isdigit(*p))
3552 {
3553 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3554 getstrings |= 1 << n;
3555 }
3556 else if (isalnum(*p))
3557 {
3558 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3559 }
3560 continue;
3561
3562 case 'J':
3563 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3564 if (extra != NULL
3565 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3566 && extra->executable_jit != NULL)
3567 {
3568 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3569 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3570 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3571 }
3572 continue;
3573
3574 case 'L':
3575 getlist = 1;
3576 continue;
3577
3578 case 'M':
3579 find_match_limit = 1;
3580 continue;
3581
3582 case 'N':
3583 if ((options & PCRE_NOTEMPTY) != 0)
3584 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3585 else
3586 options |= PCRE_NOTEMPTY;
3587 continue;
3588
3589 case 'O':
3590 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3591 if (n > size_offsets_max)
3592 {
3593 size_offsets_max = n;
3594 free(offsets);
3595 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3596 if (offsets == NULL)
3597 {
3598 printf("** Failed to get %d bytes of memory for offsets vector\n",
3599 (int)(size_offsets_max * sizeof(int)));
3600 yield = 1;
3601 goto EXIT;
3602 }
3603 }
3604 use_size_offsets = n;
3605 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3606 continue;
3607
3608 case 'P':
3609 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3610 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3611 continue;
3612
3613 case 'Q':
3614 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3615 if (extra == NULL)
3616 {
3617 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3618 extra->flags = 0;
3619 }
3620 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3621 extra->match_limit_recursion = n;
3622 continue;
3623
3624 case 'q':
3625 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3626 if (extra == NULL)
3627 {
3628 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3629 extra->flags = 0;
3630 }
3631 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3632 extra->match_limit = n;
3633 continue;
3634
3635 #if !defined NODFA
3636 case 'R':
3637 options |= PCRE_DFA_RESTART;
3638 continue;
3639 #endif
3640
3641 case 'S':
3642 show_malloc = 1;
3643 continue;
3644
3645 case 'Y':
3646 options |= PCRE_NO_START_OPTIMIZE;
3647 continue;
3648
3649 case 'Z':
3650 options |= PCRE_NOTEOL;
3651 continue;
3652
3653 case '?':
3654 options |= PCRE_NO_UTF8_CHECK;
3655 continue;
3656
3657 case '<':
3658 {
3659 int x = check_newline(p, outfile);
3660 if (x == 0) goto NEXT_DATA;
3661 options |= x;
3662 while (*p++ != '>');
3663 }
3664 continue;
3665 }
3666
3667 /* We now have a character value in c that may be greater than 255. In
3668 16-bit mode, we always convert characters to UTF-8 so that values greater
3669 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3670 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3671 mode must have come from \x{...} or octal constructs because values from
3672 \x.. get this far only in non-UTF mode. */
3673
3674 if (use_pcre16 || use_utf)
3675 {
3676 pcre_uint8 buff8[8];
3677 int ii, utn;
3678 utn = ord2utf8(c, buff8);
3679 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3680 }
3681 else
3682 {
3683 if (c > 255)
3684 {
3685 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3686 "and UTF-8 mode is not enabled.\n", c);
3687 fprintf(outfile, "** Truncation will probably give the wrong "
3688 "result.\n");
3689 }
3690 *q++ = c;
3691 }
3692 }
3693
3694 /* Reached end of subject string */
3695
3696 *q = 0;
3697 len = (int)(q - dbuffer);
3698
3699 /* Move the data to the end of the buffer so that a read over the end of
3700 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3701 we are using the POSIX interface, we must include the terminating zero. */
3702
3703 #if !defined NOPOSIX
3704 if (posix || do_posix)
3705 {
3706 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3707 bptr += buffer_size - len - 1;
3708 }
3709 else
3710 #endif
3711 {
3712 memmove(bptr + buffer_size - len, bptr, len);
3713 bptr += buffer_size - len;
3714 }
3715
3716 if ((all_use_dfa || use_dfa) && find_match_limit)
3717 {
3718 printf("**Match limit not relevant for DFA matching: ignored\n");
3719 find_match_limit = 0;
3720 }
3721
3722 /* Handle matching via the POSIX interface, which does not
3723 support timing or playing with the match limit or callout data. */
3724
3725 #if !defined NOPOSIX
3726 if (posix || do_posix)
3727 {
3728 int rc;
3729 int eflags = 0;
3730 regmatch_t *pmatch = NULL;
3731 if (use_size_offsets > 0)
3732 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3733 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3734 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3735 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3736
3737 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3738
3739 if (rc != 0)
3740 {
3741 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3742 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3743 }
3744 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3745 != 0)
3746 {
3747 fprintf(outfile, "Matched with REG_NOSUB\n");
3748 }
3749 else
3750 {
3751 size_t i;
3752 for (i = 0; i < (size_t)use_size_offsets; i++)
3753 {
3754 if (pmatch[i].rm_so >= 0)
3755 {
3756 fprintf(outfile, "%2d: ", (int)i);
3757 PCHARSV(dbuffer, pmatch[i].rm_so,
3758 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3759 fprintf(outfile, "\n");
3760 if (do_showcaprest || (i == 0 && do_showrest))
3761 {
3762 fprintf(outfile, "%2d+ ", (int)i);
3763 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3764 outfile);
3765 fprintf(outfile, "\n");
3766 }
3767 }
3768 }
3769 }
3770 free(pmatch);
3771 goto NEXT_DATA;
3772 }
3773
3774 #endif /* !defined NOPOSIX */
3775
3776 /* Handle matching via the native interface - repeats for /g and /G */
3777
3778 #ifdef SUPPORT_PCRE16
3779 if (use_pcre16)
3780 {
3781 len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3782 switch(len)
3783 {
3784 case -1:
3785 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3786 "converted to UTF-16\n");
3787 goto NEXT_DATA;
3788
3789 case -2:
3790 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3791 "cannot be converted to UTF-16\n");
3792 goto NEXT_DATA;
3793
3794 case -3:
3795 fprintf(outfile, "**Failed: character value greater than 0xffff "
3796 "cannot be converted to 16-bit in non-UTF mode\n");
3797 goto NEXT_DATA;
3798
3799 default:
3800 break;
3801 }
3802 bptr = (pcre_uint8 *)buffer16;
3803 }
3804 #endif
3805
3806 for (;; gmatched++) /* Loop for /g or /G */
3807 {
3808 markptr = NULL;
3809
3810 if (timeitm > 0)
3811 {
3812 register int i;
3813 clock_t time_taken;
3814 clock_t start_time = clock();
3815
3816 #if !defined NODFA
3817 if (all_use_dfa || use_dfa)
3818 {
3819 int workspace[1000];
3820 for (i = 0; i < timeitm; i++)
3821 {
3822 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3823 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3824 (sizeof(workspace)/sizeof(int)));
3825 }
3826 }
3827 else
3828 #endif
3829
3830 for (i = 0; i < timeitm; i++)
3831 {
3832 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3833 (options | g_notempty), use_offsets, use_size_offsets);
3834 }
3835 time_taken = clock() - start_time;
3836 fprintf(outfile, "Execute time %.4f milliseconds\n",
3837 (((double)time_taken * 1000.0) / (double)timeitm) /
3838 (double)CLOCKS_PER_SEC);
3839 }
3840
3841 /* If find_match_limit is set, we want to do repeated matches with
3842 varying limits in order to find the minimum value for the match limit and
3843 for the recursion limit. The match limits are relevant only to the normal
3844 running of pcre_exec(), so disable the JIT optimization. This makes it
3845 possible to run the same set of tests with and without JIT externally
3846 requested. */
3847
3848 if (find_match_limit)
3849 {
3850 if (extra == NULL)
3851 {
3852 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3853 extra->flags = 0;
3854 }
3855 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3856
3857 (void)check_match_limit(re, extra, bptr, len, start_offset,
3858 options|g_notempty, use_offsets, use_size_offsets,
3859 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3860 PCRE_ERROR_MATCHLIMIT, "match()");
3861
3862 count = check_match_limit(re, extra, bptr, len, start_offset,
3863 options|g_notempty, use_offsets, use_size_offsets,
3864 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3865 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3866 }
3867
3868 /* If callout_data is set, use the interface with additional data */
3869
3870 else if (callout_data_set)
3871 {
3872 if (extra == NULL)
3873 {
3874 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3875 extra->flags = 0;
3876 }
3877 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3878 extra->callout_data = &callout_data;
3879 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3880 options | g_notempty, use_offsets, use_size_offsets);
3881 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3882 }
3883
3884 /* The normal case is just to do the match once, with the default
3885 value of match_limit. */
3886
3887 #if !defined NODFA
3888 else if (all_use_dfa || use_dfa)
3889 {
3890 int workspace[1000];
3891 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3892 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3893 (sizeof(workspace)/sizeof(int)));
3894 if (count == 0)
3895 {
3896 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3897 count = use_size_offsets/2;
3898 }
3899 }
3900 #endif
3901
3902 else
3903 {
3904 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3905 options | g_notempty, use_offsets, use_size_offsets);
3906 if (count == 0)
3907 {
3908 fprintf(outfile, "Matched, but too many substrings\n");
3909 count = use_size_offsets/3;
3910 }
3911 }
3912
3913 /* Matched */
3914
3915 if (count >= 0)
3916 {
3917 int i, maxcount;
3918 void *cnptr, *gnptr;
3919
3920 #if !defined NODFA
3921 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3922 #endif
3923 maxcount = use_size_offsets/3;
3924
3925 /* This is a check against a lunatic return value. */
3926
3927 if (count > maxcount)
3928 {
3929 fprintf(outfile,
3930 "** PCRE error: returned count %d is too big for offset size %d\n",
3931 count, use_size_offsets);
3932 count = use_size_offsets/3;
3933 if (do_g || do_G)
3934 {
3935 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3936 do_g = do_G = FALSE; /* Break g/G loop */
3937 }
3938 }
3939
3940 /* do_allcaps requests showing of all captures in the pattern, to check
3941 unset ones at the end. */
3942
3943 if (do_allcaps)
3944 {
3945 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3946 goto SKIP_DATA;
3947 count++; /* Allow for full match */
3948 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3949 }
3950
3951 /* Output the captured substrings */
3952
3953 for (i = 0; i < count * 2; i += 2)
3954 {
3955 if (use_offsets[i] < 0)
3956 {
3957 if (use_offsets[i] != -1)
3958 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3959 use_offsets[i], i);
3960 if (use_offsets[i+1] != -1)
3961 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3962 use_offsets[i+1], i+1);
3963 fprintf(outfile, "%2d: <unset>\n", i/2);
3964 }
3965 else
3966 {
3967 fprintf(outfile, "%2d: ", i/2);
3968 PCHARSV(bptr, use_offsets[i],
3969 use_offsets[i+1] - use_offsets[i], outfile);
3970 fprintf(outfile, "\n");
3971 if (do_showcaprest || (i == 0 && do_showrest))
3972 {
3973 fprintf(outfile, "%2d+ ", i/2);
3974 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3975 outfile);
3976 fprintf(outfile, "\n");
3977 }
3978 }
3979 }
3980
3981 if (markptr != NULL)
3982 {
3983 fprintf(outfile, "MK: ");
3984 PCHARSV(markptr, 0, -1, outfile);
3985 fprintf(outfile, "\n");
3986 }
3987
3988 for (i = 0; i < 32; i++)
3989 {
3990 if ((copystrings & (1 << i)) != 0)
3991 {
3992 int rc;
3993 char copybuffer[256];
3994 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3995 copybuffer, sizeof(copybuffer));
3996 if (rc < 0)
3997 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3998 else
3999 {
4000 fprintf(outfile, "%2dC ", i);
4001 PCHARSV(copybuffer, 0, rc, outfile);
4002 fprintf(outfile, " (%d)\n", rc);
4003 }
4004 }
4005 }
4006
4007 cnptr = copynames;
4008 for (;;)
4009 {
4010 int rc;
4011 char copybuffer[256];
4012
4013 if (use_pcre16)
4014 {
4015 if (*(pcre_uint16 *)cnptr == 0) break;
4016 }
4017 else
4018 {
4019 if (*(pcre_uint8 *)cnptr == 0) break;
4020 }
4021
4022 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4023 cnptr, copybuffer, sizeof(copybuffer));
4024
4025 if (rc < 0)
4026 {
4027 fprintf(outfile, "copy substring ");
4028 PCHARSV(cnptr, 0, -1, outfile);
4029 fprintf(outfile, " failed %d\n", rc);
4030 }
4031 else
4032 {
4033 fprintf(outfile, " C ");
4034 PCHARSV(copybuffer, 0, rc, outfile);
4035 fprintf(outfile, " (%d) ", rc);
4036 PCHARSV(cnptr, 0, -1, outfile);
4037 putc('\n', outfile);
4038 }
4039
4040 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4041 }
4042
4043 for (i = 0; i < 32; i++)
4044 {
4045 if ((getstrings & (1 << i)) != 0)
4046 {
4047 int rc;
4048 const char *substring;
4049 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4050 if (rc < 0)
4051 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4052 else
4053 {
4054 fprintf(outfile, "%2dG ", i);
4055 PCHARSV(substring, 0, rc, outfile);
4056 fprintf(outfile, " (%d)\n", rc);
4057 PCRE_FREE_SUBSTRING(substring);
4058 }
4059 }
4060 }
4061
4062 gnptr = getnames;
4063 for (;;)
4064 {
4065 int rc;
4066 const char *substring;
4067
4068 if (use_pcre16)
4069 {
4070 if (*(pcre_uint16 *)gnptr == 0) break;
4071 }
4072 else
4073 {
4074 if (*(pcre_uint8 *)gnptr == 0) break;
4075 }
4076
4077 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4078 gnptr, &substring);
4079 if (rc < 0)
4080 {
4081 fprintf(outfile, "get substring ");
4082 PCHARSV(gnptr, 0, -1, outfile);
4083 fprintf(outfile, " failed %d\n", rc);
4084 }
4085 else
4086 {
4087 fprintf(outfile, " G ");
4088 PCHARSV(substring, 0, rc, outfile);
4089 fprintf(outfile, " (%d) ", rc);
4090 PCHARSV(gnptr, 0, -1, outfile);
4091 PCRE_FREE_SUBSTRING(substring);
4092 putc('\n', outfile);
4093 }
4094
4095 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4096 }
4097
4098 if (getlist)
4099 {
4100 int rc;
4101 const char **stringlist;
4102 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4103 if (rc < 0)
4104 fprintf(outfile, "get substring list failed %d\n", rc);
4105 else
4106 {
4107 for (i = 0; i < count; i++)
4108 {
4109 fprintf(outfile, "%2dL ", i);
4110 PCHARSV(stringlist[i], 0, -1, outfile);
4111 putc('\n', outfile);
4112 }
4113 if (stringlist[i] != NULL)
4114 fprintf(outfile, "string list not terminated by NULL\n");
4115 PCRE_FREE_SUBSTRING_LIST(stringlist);
4116 }
4117 }
4118 }
4119
4120 /* There was a partial match */
4121
4122 else if (count == PCRE_ERROR_PARTIAL)
4123 {
4124 if (markptr == NULL) fprintf(outfile, "Partial match");
4125 else
4126 {
4127 fprintf(outfile, "Partial match, mark=");
4128 PCHARSV(markptr, 0, -1, outfile);
4129 }
4130 if (use_size_offsets > 1)
4131 {
4132 fprintf(outfile, ": ");
4133 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4134 outfile);
4135 }
4136 fprintf(outfile, "\n");
4137 break; /* Out of the /g loop */
4138 }
4139
4140 /* Failed to match. If this is a /g or /G loop and we previously set
4141 g_notempty after a null match, this is not necessarily the end. We want
4142 to advance the start offset, and continue. We won't be at the end of the
4143 string - that was checked before setting g_notempty.
4144
4145 Complication arises in the case when the newline convention is "any",
4146 "crlf", or "anycrlf". If the previous match was at the end of a line
4147 terminated by CRLF, an advance of one character just passes the \r,
4148 whereas we should prefer the longer newline sequence, as does the code in
4149 pcre_exec(). Fudge the offset value to achieve this. We check for a
4150 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4151 find the default.
4152
4153 Otherwise, in the case of UTF-8 matching, the advance must be one
4154 character, not one byte. */
4155
4156 else
4157 {
4158 if (g_notempty != 0)
4159 {
4160 int onechar = 1;
4161 unsigned int obits = ((real_pcre *)re)->options;
4162 use_offsets[0] = start_offset;
4163 if ((obits & PCRE_NEWLINE_BITS) == 0)
4164 {
4165 int d;
4166 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4167 /* Note that these values are always the ASCII ones, even in
4168 EBCDIC environments. CR = 13, NL = 10. */
4169 obits = (d == 13)? PCRE_NEWLINE_CR :
4170 (d == 10)? PCRE_NEWLINE_LF :
4171 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4172 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4173 (d == -1)? PCRE_NEWLINE_ANY : 0;
4174 }
4175 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4176 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4177 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4178 &&
4179 start_offset < len - 1 &&
4180 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4181 (use_pcre16?
4182 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4183 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4184 :
4185 bptr[start_offset] == '\r'
4186 && bptr[start_offset + 1] == '\n')
4187 #elif defined SUPPORT_PCRE16
4188 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4189 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4190 #else
4191 bptr[start_offset] == '\r'
4192 && bptr[start_offset + 1] == '\n'
4193 #endif
4194 )
4195 onechar++;
4196 else if (use_utf)
4197 {
4198 while (start_offset + onechar < len)
4199 {
4200 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4201 onechar++;
4202 }
4203 }
4204 use_offsets[1] = start_offset + onechar;
4205 }
4206 else
4207 {
4208 switch(count)
4209 {
4210 case PCRE_ERROR_NOMATCH:
4211 if (gmatched == 0)
4212 {
4213 if (markptr == NULL)
4214 {
4215 fprintf(outfile, "No match\n");
4216 }
4217 else
4218 {
4219 fprintf(outfile, "No match, mark = ");
4220 PCHARSV(markptr, 0, -1, outfile);
4221 putc('\n', outfile);
4222 }
4223 }
4224 break;
4225
4226 case PCRE_ERROR_BADUTF8:
4227 case PCRE_ERROR_SHORTUTF8:
4228 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4229 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4230 use_pcre16? "16" : "8");
4231 if (use_size_offsets >= 2)
4232 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4233 use_offsets[1]);
4234 fprintf(outfile, "\n");
4235 break;
4236
4237 case PCRE_ERROR_BADUTF8_OFFSET:
4238 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4239 use_pcre16? "16" : "8");
4240 break;
4241
4242 default:
4243 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4244 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4245 else
4246 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4247 break;
4248 }
4249
4250 break; /* Out of the /g loop */
4251 }
4252 }
4253
4254 /* If not /g or /G we are done */
4255
4256 if (!do_g && !do_G) break;
4257
4258 /* If we have matched an empty string, first check to see if we are at
4259 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4260 Perl's /g options does. This turns out to be rather cunning. First we set
4261 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4262 same point. If this fails (picked up above) we advance to the next
4263 character. */
4264
4265 g_notempty = 0;
4266
4267 if (use_offsets[0] == use_offsets[1])
4268 {
4269 if (use_offsets[0] == len) break;
4270 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4271 }
4272
4273 /* For /g, update the start offset, leaving the rest alone */
4274
4275 if (do_g) start_offset = use_offsets[1];
4276
4277 /* For /G, update the pointer and length */
4278
4279 else
4280 {
4281 bptr += use_offsets[1] * CHAR_SIZE;
4282 len -= use_offsets[1];
4283 }
4284 } /* End of loop for /g and /G */
4285
4286 NEXT_DATA: continue;
4287 } /* End of loop for data lines */
4288
4289 CONTINUE:
4290
4291 #if !defined NOPOSIX
4292 if (posix || do_posix) regfree(&preg);
4293 #endif
4294
4295 if (re != NULL) new_free(re);
4296 if (extra != NULL)
4297 {
4298 PCRE_FREE_STUDY(extra);
4299 }
4300 if (locale_set)
4301 {
4302 new_free((void *)tables);
4303 setlocale(LC_CTYPE, "C");
4304 locale_set = 0;
4305 }
4306 if (jit_stack != NULL)
4307 {
4308 PCRE_JIT_STACK_FREE(jit_stack);
4309 jit_stack = NULL;
4310 }
4311 }
4312
4313 if (infile == stdin) fprintf(outfile, "\n");
4314
4315 EXIT:
4316
4317 if (infile != NULL && infile != stdin) fclose(infile);
4318 if (outfile != NULL && outfile != stdout) fclose(outfile);
4319
4320 free(buffer);
4321 free(dbuffer);
4322 free(pbuffer);
4323 free(offsets);
4324
4325 #ifdef SUPPORT_PCRE16
4326 if (buffer16 != NULL) free(buffer16);
4327 #endif
4328
4329 return yield;
4330 }
4331
4332 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5