/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 847 - (show annotations)
Tue Jan 3 17:49:03 2012 UTC (3 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 128801 byte(s)
Error occurred while calculating annotation data.
fix signed/unsigned half load mismatches and JIT compiler update
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define SET_PCRE_CALLOUT8(callout) \
213 pcre_callout = callout
214
215 #define STRLEN8(p) ((int)strlen((char *)p))
216
217
218 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219 re = pcre_compile((char *)pat, options, error, erroffset, tables)
220
221 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222 namesptr, cbuffer, size) \
223 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224 (char *)namesptr, cbuffer, size)
225
226 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228
229 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230 offsets, size_offsets, workspace, size_workspace) \
231 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace)
233
234 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235 offsets, size_offsets) \
236 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237 offsets, size_offsets)
238
239 #define PCRE_FREE_STUDY8(extra) \
240 pcre_free_study(extra)
241
242 #define PCRE_FREE_SUBSTRING8(substring) \
243 pcre_free_substring(substring)
244
245 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246 pcre_free_substring_list(listptr)
247
248 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249 getnamesptr, subsptr) \
250 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251 (char *)getnamesptr, subsptr)
252
253 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254 n = pcre_get_stringnumber(re, (char *)ptr)
255
256 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258
259 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261
262 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
263 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
264
265 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266 pcre_printint(re, outfile, debug_lengths)
267
268 #define PCRE_STUDY8(extra, re, options, error) \
269 extra = pcre_study(re, options, error)
270
271 #endif /* SUPPORT_PCRE8 */
272
273 /* -----------------------------------------------------------*/
274
275 #ifdef SUPPORT_PCRE16
276
277 #define PCHARS16(lv, p, offset, len, f) \
278 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279
280 #define PCHARSV16(p, offset, len, f) \
281 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282
283 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284 p = read_capture_name16(p, cn16, re)
285
286 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287
288 #define SET_PCRE_CALLOUT16(callout) \
289 pcre16_callout = callout
290
291
292 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294
295 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296 namesptr, cbuffer, size) \
297 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299
300 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302 (PCRE_SCHAR16 *)cbuffer, size/2)
303
304 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305 offsets, size_offsets, workspace, size_workspace) \
306 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307 options, offsets, size_offsets, workspace, size_workspace)
308
309 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310 offsets, size_offsets) \
311 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312 options, offsets, size_offsets)
313
314 #define PCRE_FREE_STUDY16(extra) \
315 pcre16_free_study(extra)
316
317 #define PCRE_FREE_SUBSTRING16(substring) \
318 pcre16_free_substring((PCRE_SPTR16)substring)
319
320 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322
323 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324 getnamesptr, subsptr) \
325 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327
328 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330
331 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333 (PCRE_SPTR16 *)(void*)subsptr)
334
335 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337 (PCRE_SPTR16 **)(void*)listptr)
338
339 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
340 rc = pcre16_pattern_to_host_byte_order(re, extra, tables)
341
342 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343 pcre16_printint(re, outfile, debug_lengths)
344
345 #define PCRE_STUDY16(extra, re, options, error) \
346 extra = pcre16_study(re, options, error)
347
348 #endif /* SUPPORT_PCRE16 */
349
350
351 /* ----- Both modes are supported; a runtime test is needed, except for
352 pcre_config(), and the JIT stack functions, when it doesn't matter which
353 version is called. ----- */
354
355 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356
357 #define CHAR_SIZE (use_pcre16? 2:1)
358
359 #define PCHARS(lv, p, offset, len, f) \
360 if (use_pcre16) \
361 PCHARS16(lv, p, offset, len, f); \
362 else \
363 PCHARS8(lv, p, offset, len, f)
364
365 #define PCHARSV(p, offset, len, f) \
366 if (use_pcre16) \
367 PCHARSV16(p, offset, len, f); \
368 else \
369 PCHARSV8(p, offset, len, f)
370
371 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372 if (use_pcre16) \
373 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374 else \
375 READ_CAPTURE_NAME8(p, cn8, cn16, re)
376
377 #define SET_PCRE_CALLOUT(callout) \
378 if (use_pcre16) \
379 SET_PCRE_CALLOUT16(callout); \
380 else \
381 SET_PCRE_CALLOUT8(callout)
382
383 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384
385 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386
387 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388 if (use_pcre16) \
389 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390 else \
391 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392
393 #define PCRE_CONFIG pcre_config
394
395 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396 namesptr, cbuffer, size) \
397 if (use_pcre16) \
398 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399 namesptr, cbuffer, size); \
400 else \
401 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402 namesptr, cbuffer, size)
403
404 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405 if (use_pcre16) \
406 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407 else \
408 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409
410 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411 offsets, size_offsets, workspace, size_workspace) \
412 if (use_pcre16) \
413 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414 offsets, size_offsets, workspace, size_workspace); \
415 else \
416 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417 offsets, size_offsets, workspace, size_workspace)
418
419 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420 offsets, size_offsets) \
421 if (use_pcre16) \
422 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423 offsets, size_offsets); \
424 else \
425 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426 offsets, size_offsets)
427
428 #define PCRE_FREE_STUDY(extra) \
429 if (use_pcre16) \
430 PCRE_FREE_STUDY16(extra); \
431 else \
432 PCRE_FREE_STUDY8(extra)
433
434 #define PCRE_FREE_SUBSTRING(substring) \
435 if (use_pcre16) \
436 PCRE_FREE_SUBSTRING16(substring); \
437 else \
438 PCRE_FREE_SUBSTRING8(substring)
439
440 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441 if (use_pcre16) \
442 PCRE_FREE_SUBSTRING_LIST16(listptr); \
443 else \
444 PCRE_FREE_SUBSTRING_LIST8(listptr)
445
446 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr) \
448 if (use_pcre16) \
449 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450 getnamesptr, subsptr); \
451 else \
452 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr)
454
455 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456 if (use_pcre16) \
457 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458 else \
459 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460
461 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462 if (use_pcre16) \
463 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464 else \
465 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466
467 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468 if (use_pcre16) \
469 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470 else \
471 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472
473 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475
476 #define PCRE_MAKETABLES \
477 (use_pcre16? pcre16_maketables() : pcre_maketables())
478
479 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
480 if (use_pcre16) \
481 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
482 else \
483 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
484
485 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486 if (use_pcre16) \
487 PCRE_PRINTINT16(re, outfile, debug_lengths); \
488 else \
489 PCRE_PRINTINT8(re, outfile, debug_lengths)
490
491 #define PCRE_STUDY(extra, re, options, error) \
492 if (use_pcre16) \
493 PCRE_STUDY16(extra, re, options, error); \
494 else \
495 PCRE_STUDY8(extra, re, options, error)
496
497 /* ----- Only 8-bit mode is supported ----- */
498
499 #elif defined SUPPORT_PCRE8
500 #define CHAR_SIZE 1
501 #define PCHARS PCHARS8
502 #define PCHARSV PCHARSV8
503 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
504 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
505 #define STRLEN STRLEN8
506 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
507 #define PCRE_COMPILE PCRE_COMPILE8
508 #define PCRE_CONFIG pcre_config
509 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
511 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
512 #define PCRE_EXEC PCRE_EXEC8
513 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
514 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
515 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
516 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
517 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
518 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
519 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
520 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
521 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
522 #define PCRE_MAKETABLES pcre_maketables()
523 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524 #define PCRE_PRINTINT PCRE_PRINTINT8
525 #define PCRE_STUDY PCRE_STUDY8
526
527 /* ----- Only 16-bit mode is supported ----- */
528
529 #else
530 #define CHAR_SIZE 2
531 #define PCHARS PCHARS16
532 #define PCHARSV PCHARSV16
533 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
534 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
535 #define STRLEN STRLEN16
536 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
537 #define PCRE_COMPILE PCRE_COMPILE16
538 #define PCRE_CONFIG pcre16_config
539 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
541 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
542 #define PCRE_EXEC PCRE_EXEC16
543 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
544 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
545 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
546 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
547 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
548 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
549 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
550 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
551 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
552 #define PCRE_MAKETABLES pcre16_maketables()
553 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554 #define PCRE_PRINTINT PCRE_PRINTINT16
555 #define PCRE_STUDY PCRE_STUDY16
556 #endif
557
558 /* ----- End of mode-specific function call macros ----- */
559
560
561 /* Other parameters */
562
563 #ifndef CLOCKS_PER_SEC
564 #ifdef CLK_TCK
565 #define CLOCKS_PER_SEC CLK_TCK
566 #else
567 #define CLOCKS_PER_SEC 100
568 #endif
569 #endif
570
571 /* This is the default loop count for timing. */
572
573 #define LOOPREPEAT 500000
574
575 /* Static variables */
576
577 static FILE *outfile;
578 static int log_store = 0;
579 static int callout_count;
580 static int callout_extra;
581 static int callout_fail_count;
582 static int callout_fail_id;
583 static int debug_lengths;
584 static int first_callout;
585 static int locale_set = 0;
586 static int show_malloc;
587 static int use_utf;
588 static size_t gotten_store;
589 static size_t first_gotten_store = 0;
590 static const unsigned char *last_callout_mark = NULL;
591
592 /* The buffers grow automatically if very long input lines are encountered. */
593
594 static int buffer_size = 50000;
595 static pcre_uint8 *buffer = NULL;
596 static pcre_uint8 *dbuffer = NULL;
597 static pcre_uint8 *pbuffer = NULL;
598
599 /* Another buffer is needed translation to 16-bit character strings. It will
600 obtained and extended as required. */
601
602 #ifdef SUPPORT_PCRE16
603 static int buffer16_size = 0;
604 static pcre_uint16 *buffer16 = NULL;
605
606 #ifdef SUPPORT_PCRE8
607
608 /* We need the table of operator lengths that is used for 16-bit compiling, in
609 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611 appropriately for the 16-bit world. Just as a safety check, make sure that
612 COMPILE_PCRE16 is *not* set. */
613
614 #ifdef COMPILE_PCRE16
615 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616 #endif
617
618 #if LINK_SIZE == 2
619 #undef LINK_SIZE
620 #define LINK_SIZE 1
621 #elif LINK_SIZE == 3 || LINK_SIZE == 4
622 #undef LINK_SIZE
623 #define LINK_SIZE 2
624 #else
625 #error LINK_SIZE must be either 2, 3, or 4
626 #endif
627
628 #undef IMM2_SIZE
629 #define IMM2_SIZE 1
630
631 #endif /* SUPPORT_PCRE8 */
632
633 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
634 #endif /* SUPPORT_PCRE16 */
635
636 /* If we have 8-bit support, default use_pcre16 to false; if there is also
637 16-bit support, it can be changed by an option. If there is no 8-bit support,
638 there must be 16-bit support, so default it to 1. */
639
640 #ifdef SUPPORT_PCRE8
641 static int use_pcre16 = 0;
642 #else
643 static int use_pcre16 = 1;
644 #endif
645
646 /* Textual explanations for runtime error codes */
647
648 static const char *errtexts[] = {
649 NULL, /* 0 is no error */
650 NULL, /* NOMATCH is handled specially */
651 "NULL argument passed",
652 "bad option value",
653 "magic number missing",
654 "unknown opcode - pattern overwritten?",
655 "no more memory",
656 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
657 "match limit exceeded",
658 "callout error code",
659 NULL, /* BADUTF8/16 is handled specially */
660 NULL, /* BADUTF8/16 offset is handled specially */
661 NULL, /* PARTIAL is handled specially */
662 "not used - internal error",
663 "internal error - pattern overwritten?",
664 "bad count value",
665 "item unsupported for DFA matching",
666 "backreference condition or recursion test not supported for DFA matching",
667 "match limit not supported for DFA matching",
668 "workspace size exceeded in DFA matching",
669 "too much recursion for DFA matching",
670 "recursion limit exceeded",
671 "not used - internal error",
672 "invalid combination of newline options",
673 "bad offset value",
674 NULL, /* SHORTUTF8/16 is handled specially */
675 "nested recursion at the same subject position",
676 "JIT stack limit reached",
677 "pattern compiled in wrong mode: 8-bit/16-bit error"
678 };
679
680
681 /*************************************************
682 * Alternate character tables *
683 *************************************************/
684
685 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
686 using the default tables of the library. However, the T option can be used to
687 select alternate sets of tables, for different kinds of testing. Note also that
688 the L (locale) option also adjusts the tables. */
689
690 /* This is the set of tables distributed as default with PCRE. It recognizes
691 only ASCII characters. */
692
693 static const pcre_uint8 tables0[] = {
694
695 /* This table is a lower casing table. */
696
697 0, 1, 2, 3, 4, 5, 6, 7,
698 8, 9, 10, 11, 12, 13, 14, 15,
699 16, 17, 18, 19, 20, 21, 22, 23,
700 24, 25, 26, 27, 28, 29, 30, 31,
701 32, 33, 34, 35, 36, 37, 38, 39,
702 40, 41, 42, 43, 44, 45, 46, 47,
703 48, 49, 50, 51, 52, 53, 54, 55,
704 56, 57, 58, 59, 60, 61, 62, 63,
705 64, 97, 98, 99,100,101,102,103,
706 104,105,106,107,108,109,110,111,
707 112,113,114,115,116,117,118,119,
708 120,121,122, 91, 92, 93, 94, 95,
709 96, 97, 98, 99,100,101,102,103,
710 104,105,106,107,108,109,110,111,
711 112,113,114,115,116,117,118,119,
712 120,121,122,123,124,125,126,127,
713 128,129,130,131,132,133,134,135,
714 136,137,138,139,140,141,142,143,
715 144,145,146,147,148,149,150,151,
716 152,153,154,155,156,157,158,159,
717 160,161,162,163,164,165,166,167,
718 168,169,170,171,172,173,174,175,
719 176,177,178,179,180,181,182,183,
720 184,185,186,187,188,189,190,191,
721 192,193,194,195,196,197,198,199,
722 200,201,202,203,204,205,206,207,
723 208,209,210,211,212,213,214,215,
724 216,217,218,219,220,221,222,223,
725 224,225,226,227,228,229,230,231,
726 232,233,234,235,236,237,238,239,
727 240,241,242,243,244,245,246,247,
728 248,249,250,251,252,253,254,255,
729
730 /* This table is a case flipping table. */
731
732 0, 1, 2, 3, 4, 5, 6, 7,
733 8, 9, 10, 11, 12, 13, 14, 15,
734 16, 17, 18, 19, 20, 21, 22, 23,
735 24, 25, 26, 27, 28, 29, 30, 31,
736 32, 33, 34, 35, 36, 37, 38, 39,
737 40, 41, 42, 43, 44, 45, 46, 47,
738 48, 49, 50, 51, 52, 53, 54, 55,
739 56, 57, 58, 59, 60, 61, 62, 63,
740 64, 97, 98, 99,100,101,102,103,
741 104,105,106,107,108,109,110,111,
742 112,113,114,115,116,117,118,119,
743 120,121,122, 91, 92, 93, 94, 95,
744 96, 65, 66, 67, 68, 69, 70, 71,
745 72, 73, 74, 75, 76, 77, 78, 79,
746 80, 81, 82, 83, 84, 85, 86, 87,
747 88, 89, 90,123,124,125,126,127,
748 128,129,130,131,132,133,134,135,
749 136,137,138,139,140,141,142,143,
750 144,145,146,147,148,149,150,151,
751 152,153,154,155,156,157,158,159,
752 160,161,162,163,164,165,166,167,
753 168,169,170,171,172,173,174,175,
754 176,177,178,179,180,181,182,183,
755 184,185,186,187,188,189,190,191,
756 192,193,194,195,196,197,198,199,
757 200,201,202,203,204,205,206,207,
758 208,209,210,211,212,213,214,215,
759 216,217,218,219,220,221,222,223,
760 224,225,226,227,228,229,230,231,
761 232,233,234,235,236,237,238,239,
762 240,241,242,243,244,245,246,247,
763 248,249,250,251,252,253,254,255,
764
765 /* This table contains bit maps for various character classes. Each map is 32
766 bytes long and the bits run from the least significant end of each byte. The
767 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
768 graph, print, punct, and cntrl. Other classes are built from combinations. */
769
770 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
771 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
772 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774
775 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
776 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779
780 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
781 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784
785 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
787 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789
790 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
792 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794
795 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
796 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
797 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799
800 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
801 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
802 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804
805 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
806 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
807 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809
810 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
811 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
812 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814
815 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
816 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819
820 /* This table identifies various classes of character by individual bits:
821 0x01 white space character
822 0x02 letter
823 0x04 decimal digit
824 0x08 hexadecimal digit
825 0x10 alphanumeric or '_'
826 0x80 regular expression metacharacter or binary zero
827 */
828
829 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
830 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
831 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
832 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
833 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
834 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
835 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
836 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
837 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
838 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
839 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
840 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
841 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
842 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
843 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
844 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
857 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
858 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
861
862 /* This is a set of tables that came orginally from a Windows user. It seems to
863 be at least an approximation of ISO 8859. In particular, there are characters
864 greater than 128 that are marked as spaces, letters, etc. */
865
866 static const pcre_uint8 tables1[] = {
867 0,1,2,3,4,5,6,7,
868 8,9,10,11,12,13,14,15,
869 16,17,18,19,20,21,22,23,
870 24,25,26,27,28,29,30,31,
871 32,33,34,35,36,37,38,39,
872 40,41,42,43,44,45,46,47,
873 48,49,50,51,52,53,54,55,
874 56,57,58,59,60,61,62,63,
875 64,97,98,99,100,101,102,103,
876 104,105,106,107,108,109,110,111,
877 112,113,114,115,116,117,118,119,
878 120,121,122,91,92,93,94,95,
879 96,97,98,99,100,101,102,103,
880 104,105,106,107,108,109,110,111,
881 112,113,114,115,116,117,118,119,
882 120,121,122,123,124,125,126,127,
883 128,129,130,131,132,133,134,135,
884 136,137,138,139,140,141,142,143,
885 144,145,146,147,148,149,150,151,
886 152,153,154,155,156,157,158,159,
887 160,161,162,163,164,165,166,167,
888 168,169,170,171,172,173,174,175,
889 176,177,178,179,180,181,182,183,
890 184,185,186,187,188,189,190,191,
891 224,225,226,227,228,229,230,231,
892 232,233,234,235,236,237,238,239,
893 240,241,242,243,244,245,246,215,
894 248,249,250,251,252,253,254,223,
895 224,225,226,227,228,229,230,231,
896 232,233,234,235,236,237,238,239,
897 240,241,242,243,244,245,246,247,
898 248,249,250,251,252,253,254,255,
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,65,66,67,68,69,70,71,
912 72,73,74,75,76,77,78,79,
913 80,81,82,83,84,85,86,87,
914 88,89,90,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 192,193,194,195,196,197,198,199,
928 200,201,202,203,204,205,206,207,
929 208,209,210,211,212,213,214,247,
930 216,217,218,219,220,221,222,255,
931 0,62,0,0,1,0,0,0,
932 0,0,0,0,0,0,0,0,
933 32,0,0,0,1,0,0,0,
934 0,0,0,0,0,0,0,0,
935 0,0,0,0,0,0,255,3,
936 126,0,0,0,126,0,0,0,
937 0,0,0,0,0,0,0,0,
938 0,0,0,0,0,0,0,0,
939 0,0,0,0,0,0,255,3,
940 0,0,0,0,0,0,0,0,
941 0,0,0,0,0,0,12,2,
942 0,0,0,0,0,0,0,0,
943 0,0,0,0,0,0,0,0,
944 254,255,255,7,0,0,0,0,
945 0,0,0,0,0,0,0,0,
946 255,255,127,127,0,0,0,0,
947 0,0,0,0,0,0,0,0,
948 0,0,0,0,254,255,255,7,
949 0,0,0,0,0,4,32,4,
950 0,0,0,128,255,255,127,255,
951 0,0,0,0,0,0,255,3,
952 254,255,255,135,254,255,255,7,
953 0,0,0,0,0,4,44,6,
954 255,255,127,255,255,255,127,255,
955 0,0,0,0,254,255,255,255,
956 255,255,255,255,255,255,255,127,
957 0,0,0,0,254,255,255,255,
958 255,255,255,255,255,255,255,255,
959 0,2,0,0,255,255,255,255,
960 255,255,255,255,255,255,255,127,
961 0,0,0,0,255,255,255,255,
962 255,255,255,255,255,255,255,255,
963 0,0,0,0,254,255,0,252,
964 1,0,0,248,1,0,0,120,
965 0,0,0,0,254,255,255,255,
966 0,0,128,0,0,0,128,0,
967 255,255,255,255,0,0,0,0,
968 0,0,0,0,0,0,0,128,
969 255,255,255,255,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 128,0,0,0,0,0,0,0,
972 0,1,1,0,1,1,0,0,
973 0,0,0,0,0,0,0,0,
974 0,0,0,0,0,0,0,0,
975 1,0,0,0,128,0,0,0,
976 128,128,128,128,0,0,128,0,
977 28,28,28,28,28,28,28,28,
978 28,28,0,0,0,0,0,128,
979 0,26,26,26,26,26,26,18,
980 18,18,18,18,18,18,18,18,
981 18,18,18,18,18,18,18,18,
982 18,18,18,128,128,0,128,16,
983 0,26,26,26,26,26,26,18,
984 18,18,18,18,18,18,18,18,
985 18,18,18,18,18,18,18,18,
986 18,18,18,128,128,0,0,0,
987 0,0,0,0,0,1,0,0,
988 0,0,0,0,0,0,0,0,
989 0,0,0,0,0,0,0,0,
990 0,0,0,0,0,0,0,0,
991 1,0,0,0,0,0,0,0,
992 0,0,18,0,0,0,0,0,
993 0,0,20,20,0,18,0,0,
994 0,20,18,0,0,0,0,0,
995 18,18,18,18,18,18,18,18,
996 18,18,18,18,18,18,18,18,
997 18,18,18,18,18,18,18,0,
998 18,18,18,18,18,18,18,18,
999 18,18,18,18,18,18,18,18,
1000 18,18,18,18,18,18,18,18,
1001 18,18,18,18,18,18,18,0,
1002 18,18,18,18,18,18,18,18
1003 };
1004
1005
1006
1007
1008 #ifndef HAVE_STRERROR
1009 /*************************************************
1010 * Provide strerror() for non-ANSI libraries *
1011 *************************************************/
1012
1013 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1014 in their libraries, but can provide the same facility by this simple
1015 alternative function. */
1016
1017 extern int sys_nerr;
1018 extern char *sys_errlist[];
1019
1020 char *
1021 strerror(int n)
1022 {
1023 if (n < 0 || n >= sys_nerr) return "unknown error number";
1024 return sys_errlist[n];
1025 }
1026 #endif /* HAVE_STRERROR */
1027
1028
1029 /*************************************************
1030 * JIT memory callback *
1031 *************************************************/
1032
1033 static pcre_jit_stack* jit_callback(void *arg)
1034 {
1035 return (pcre_jit_stack *)arg;
1036 }
1037
1038
1039 #if !defined NOUTF || defined SUPPORT_PCRE16
1040 /*************************************************
1041 * Convert UTF-8 string to value *
1042 *************************************************/
1043
1044 /* This function takes one or more bytes that represents a UTF-8 character,
1045 and returns the value of the character.
1046
1047 Argument:
1048 utf8bytes a pointer to the byte vector
1049 vptr a pointer to an int to receive the value
1050
1051 Returns: > 0 => the number of bytes consumed
1052 -6 to 0 => malformed UTF-8 character at offset = (-return)
1053 */
1054
1055 static int
1056 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1057 {
1058 int c = *utf8bytes++;
1059 int d = c;
1060 int i, j, s;
1061
1062 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1063 {
1064 if ((d & 0x80) == 0) break;
1065 d <<= 1;
1066 }
1067
1068 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1069 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1070
1071 /* i now has a value in the range 1-5 */
1072
1073 s = 6*i;
1074 d = (c & utf8_table3[i]) << s;
1075
1076 for (j = 0; j < i; j++)
1077 {
1078 c = *utf8bytes++;
1079 if ((c & 0xc0) != 0x80) return -(j+1);
1080 s -= 6;
1081 d |= (c & 0x3f) << s;
1082 }
1083
1084 /* Check that encoding was the correct unique one */
1085
1086 for (j = 0; j < utf8_table1_size; j++)
1087 if (d <= utf8_table1[j]) break;
1088 if (j != i) return -(i+1);
1089
1090 /* Valid value */
1091
1092 *vptr = d;
1093 return i+1;
1094 }
1095 #endif /* NOUTF || SUPPORT_PCRE16 */
1096
1097
1098
1099 #if !defined NOUTF || defined SUPPORT_PCRE16
1100 /*************************************************
1101 * Convert character value to UTF-8 *
1102 *************************************************/
1103
1104 /* This function takes an integer value in the range 0 - 0x7fffffff
1105 and encodes it as a UTF-8 character in 0 to 6 bytes.
1106
1107 Arguments:
1108 cvalue the character value
1109 utf8bytes pointer to buffer for result - at least 6 bytes long
1110
1111 Returns: number of characters placed in the buffer
1112 */
1113
1114 static int
1115 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1116 {
1117 register int i, j;
1118 for (i = 0; i < utf8_table1_size; i++)
1119 if (cvalue <= utf8_table1[i]) break;
1120 utf8bytes += i;
1121 for (j = i; j > 0; j--)
1122 {
1123 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1124 cvalue >>= 6;
1125 }
1126 *utf8bytes = utf8_table2[i] | cvalue;
1127 return i + 1;
1128 }
1129 #endif
1130
1131
1132 #ifdef SUPPORT_PCRE16
1133 /*************************************************
1134 * Convert a string to 16-bit *
1135 *************************************************/
1136
1137 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1138 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1139 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1140 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1141 result is always left in buffer16.
1142
1143 Note that this function does not object to surrogate values. This is
1144 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1145 for the purpose of testing that they are correctly faulted.
1146
1147 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1148 in UTF-8 so that values greater than 255 can be handled.
1149
1150 Arguments:
1151 data TRUE if converting a data line; FALSE for a regex
1152 p points to a byte string
1153 utf true if UTF-8 (to be converted to UTF-16)
1154 len number of bytes in the string (excluding trailing zero)
1155
1156 Returns: number of 16-bit data items used (excluding trailing zero)
1157 OR -1 if a UTF-8 string is malformed
1158 OR -2 if a value > 0x10ffff is encountered
1159 OR -3 if a value > 0xffff is encountered when not in UTF mode
1160 */
1161
1162 static int
1163 to16(int data, pcre_uint8 *p, int utf, int len)
1164 {
1165 pcre_uint16 *pp;
1166
1167 if (buffer16_size < 2*len + 2)
1168 {
1169 if (buffer16 != NULL) free(buffer16);
1170 buffer16_size = 2*len + 2;
1171 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1172 if (buffer16 == NULL)
1173 {
1174 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1175 exit(1);
1176 }
1177 }
1178
1179 pp = buffer16;
1180
1181 if (!utf && !data)
1182 {
1183 while (len-- > 0) *pp++ = *p++;
1184 }
1185
1186 else
1187 {
1188 int c = 0;
1189 while (len > 0)
1190 {
1191 int chlen = utf82ord(p, &c);
1192 if (chlen <= 0) return -1;
1193 if (c > 0x10ffff) return -2;
1194 p += chlen;
1195 len -= chlen;
1196 if (c < 0x10000) *pp++ = c; else
1197 {
1198 if (!utf) return -3;
1199 c -= 0x10000;
1200 *pp++ = 0xD800 | (c >> 10);
1201 *pp++ = 0xDC00 | (c & 0x3ff);
1202 }
1203 }
1204 }
1205
1206 *pp = 0;
1207 return pp - buffer16;
1208 }
1209 #endif
1210
1211
1212 /*************************************************
1213 * Read or extend an input line *
1214 *************************************************/
1215
1216 /* Input lines are read into buffer, but both patterns and data lines can be
1217 continued over multiple input lines. In addition, if the buffer fills up, we
1218 want to automatically expand it so as to be able to handle extremely large
1219 lines that are needed for certain stress tests. When the input buffer is
1220 expanded, the other two buffers must also be expanded likewise, and the
1221 contents of pbuffer, which are a copy of the input for callouts, must be
1222 preserved (for when expansion happens for a data line). This is not the most
1223 optimal way of handling this, but hey, this is just a test program!
1224
1225 Arguments:
1226 f the file to read
1227 start where in buffer to start (this *must* be within buffer)
1228 prompt for stdin or readline()
1229
1230 Returns: pointer to the start of new data
1231 could be a copy of start, or could be moved
1232 NULL if no data read and EOF reached
1233 */
1234
1235 static pcre_uint8 *
1236 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1237 {
1238 pcre_uint8 *here = start;
1239
1240 for (;;)
1241 {
1242 int rlen = (int)(buffer_size - (here - buffer));
1243
1244 if (rlen > 1000)
1245 {
1246 int dlen;
1247
1248 /* If libreadline support is required, use readline() to read a line if the
1249 input is a terminal. Note that readline() removes the trailing newline, so
1250 we must put it back again, to be compatible with fgets(). */
1251
1252 #ifdef SUPPORT_LIBREADLINE
1253 if (isatty(fileno(f)))
1254 {
1255 size_t len;
1256 char *s = readline(prompt);
1257 if (s == NULL) return (here == start)? NULL : start;
1258 len = strlen(s);
1259 if (len > 0) add_history(s);
1260 if (len > rlen - 1) len = rlen - 1;
1261 memcpy(here, s, len);
1262 here[len] = '\n';
1263 here[len+1] = 0;
1264 free(s);
1265 }
1266 else
1267 #endif
1268
1269 /* Read the next line by normal means, prompting if the file is stdin. */
1270
1271 {
1272 if (f == stdin) printf("%s", prompt);
1273 if (fgets((char *)here, rlen, f) == NULL)
1274 return (here == start)? NULL : start;
1275 }
1276
1277 dlen = (int)strlen((char *)here);
1278 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1279 here += dlen;
1280 }
1281
1282 else
1283 {
1284 int new_buffer_size = 2*buffer_size;
1285 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1286 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1287 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1288
1289 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1290 {
1291 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1292 exit(1);
1293 }
1294
1295 memcpy(new_buffer, buffer, buffer_size);
1296 memcpy(new_pbuffer, pbuffer, buffer_size);
1297
1298 buffer_size = new_buffer_size;
1299
1300 start = new_buffer + (start - buffer);
1301 here = new_buffer + (here - buffer);
1302
1303 free(buffer);
1304 free(dbuffer);
1305 free(pbuffer);
1306
1307 buffer = new_buffer;
1308 dbuffer = new_dbuffer;
1309 pbuffer = new_pbuffer;
1310 }
1311 }
1312
1313 return NULL; /* Control never gets here */
1314 }
1315
1316
1317
1318 /*************************************************
1319 * Read number from string *
1320 *************************************************/
1321
1322 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1323 around with conditional compilation, just do the job by hand. It is only used
1324 for unpicking arguments, so just keep it simple.
1325
1326 Arguments:
1327 str string to be converted
1328 endptr where to put the end pointer
1329
1330 Returns: the unsigned long
1331 */
1332
1333 static int
1334 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1335 {
1336 int result = 0;
1337 while(*str != 0 && isspace(*str)) str++;
1338 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1339 *endptr = str;
1340 return(result);
1341 }
1342
1343
1344
1345 /*************************************************
1346 * Print one character *
1347 *************************************************/
1348
1349 /* Print a single character either literally, or as a hex escape. */
1350
1351 static int pchar(int c, FILE *f)
1352 {
1353 if (PRINTOK(c))
1354 {
1355 if (f != NULL) fprintf(f, "%c", c);
1356 return 1;
1357 }
1358
1359 if (c < 0x100)
1360 {
1361 if (use_utf)
1362 {
1363 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1364 return 6;
1365 }
1366 else
1367 {
1368 if (f != NULL) fprintf(f, "\\x%02x", c);
1369 return 4;
1370 }
1371 }
1372
1373 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1374 return (c <= 0x000000ff)? 6 :
1375 (c <= 0x00000fff)? 7 :
1376 (c <= 0x0000ffff)? 8 :
1377 (c <= 0x000fffff)? 9 : 10;
1378 }
1379
1380
1381
1382 #ifdef SUPPORT_PCRE8
1383 /*************************************************
1384 * Print 8-bit character string *
1385 *************************************************/
1386
1387 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1388 If handed a NULL file, just counts chars without printing. */
1389
1390 static int pchars(pcre_uint8 *p, int length, FILE *f)
1391 {
1392 int c = 0;
1393 int yield = 0;
1394
1395 if (length < 0)
1396 length = strlen((char *)p);
1397
1398 while (length-- > 0)
1399 {
1400 #if !defined NOUTF
1401 if (use_utf)
1402 {
1403 int rc = utf82ord(p, &c);
1404 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1405 {
1406 length -= rc - 1;
1407 p += rc;
1408 yield += pchar(c, f);
1409 continue;
1410 }
1411 }
1412 #endif
1413 c = *p++;
1414 yield += pchar(c, f);
1415 }
1416
1417 return yield;
1418 }
1419 #endif
1420
1421
1422
1423 #ifdef SUPPORT_PCRE16
1424 /*************************************************
1425 * Find length of 0-terminated 16-bit string *
1426 *************************************************/
1427
1428 static int strlen16(PCRE_SPTR16 p)
1429 {
1430 int len = 0;
1431 while (*p++ != 0) len++;
1432 return len;
1433 }
1434 #endif /* SUPPORT_PCRE16 */
1435
1436
1437 #ifdef SUPPORT_PCRE16
1438 /*************************************************
1439 * Print 16-bit character string *
1440 *************************************************/
1441
1442 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1443 If handed a NULL file, just counts chars without printing. */
1444
1445 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1446 {
1447 int yield = 0;
1448
1449 if (length < 0)
1450 length = strlen16(p);
1451
1452 while (length-- > 0)
1453 {
1454 int c = *p++ & 0xffff;
1455 #if !defined NOUTF
1456 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1457 {
1458 int d = *p & 0xffff;
1459 if (d >= 0xDC00 && d < 0xDFFF)
1460 {
1461 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1462 length--;
1463 p++;
1464 }
1465 }
1466 #endif
1467 yield += pchar(c, f);
1468 }
1469
1470 return yield;
1471 }
1472 #endif /* SUPPORT_PCRE16 */
1473
1474
1475
1476 #ifdef SUPPORT_PCRE8
1477 /*************************************************
1478 * Read a capture name (8-bit) and check it *
1479 *************************************************/
1480
1481 static pcre_uint8 *
1482 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1483 {
1484 pcre_uint8 *npp = *pp;
1485 while (isalnum(*p)) *npp++ = *p++;
1486 *npp++ = 0;
1487 *npp = 0;
1488 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1489 {
1490 fprintf(outfile, "no parentheses with name \"");
1491 PCHARSV(*pp, 0, -1, outfile);
1492 fprintf(outfile, "\"\n");
1493 }
1494
1495 *pp = npp;
1496 return p;
1497 }
1498 #endif /* SUPPORT_PCRE8 */
1499
1500
1501
1502 #ifdef SUPPORT_PCRE16
1503 /*************************************************
1504 * Read a capture name (16-bit) and check it *
1505 *************************************************/
1506
1507 /* Note that the text being read is 8-bit. */
1508
1509 static pcre_uint8 *
1510 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1511 {
1512 pcre_uint16 *npp = *pp;
1513 while (isalnum(*p)) *npp++ = *p++;
1514 *npp++ = 0;
1515 *npp = 0;
1516 if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1517 {
1518 fprintf(outfile, "no parentheses with name \"");
1519 PCHARSV(*pp, 0, -1, outfile);
1520 fprintf(outfile, "\"\n");
1521 }
1522 *pp = npp;
1523 return p;
1524 }
1525 #endif /* SUPPORT_PCRE16 */
1526
1527
1528
1529 /*************************************************
1530 * Callout function *
1531 *************************************************/
1532
1533 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1534 the match. Yield zero unless more callouts than the fail count, or the callout
1535 data is not zero. */
1536
1537 static int callout(pcre_callout_block *cb)
1538 {
1539 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1540 int i, pre_start, post_start, subject_length;
1541
1542 if (callout_extra)
1543 {
1544 fprintf(f, "Callout %d: last capture = %d\n",
1545 cb->callout_number, cb->capture_last);
1546
1547 for (i = 0; i < cb->capture_top * 2; i += 2)
1548 {
1549 if (cb->offset_vector[i] < 0)
1550 fprintf(f, "%2d: <unset>\n", i/2);
1551 else
1552 {
1553 fprintf(f, "%2d: ", i/2);
1554 PCHARSV(cb->subject, cb->offset_vector[i],
1555 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1556 fprintf(f, "\n");
1557 }
1558 }
1559 }
1560
1561 /* Re-print the subject in canonical form, the first time or if giving full
1562 datails. On subsequent calls in the same match, we use pchars just to find the
1563 printed lengths of the substrings. */
1564
1565 if (f != NULL) fprintf(f, "--->");
1566
1567 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1568 PCHARS(post_start, cb->subject, cb->start_match,
1569 cb->current_position - cb->start_match, f);
1570
1571 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1572
1573 PCHARSV(cb->subject, cb->current_position,
1574 cb->subject_length - cb->current_position, f);
1575
1576 if (f != NULL) fprintf(f, "\n");
1577
1578 /* Always print appropriate indicators, with callout number if not already
1579 shown. For automatic callouts, show the pattern offset. */
1580
1581 if (cb->callout_number == 255)
1582 {
1583 fprintf(outfile, "%+3d ", cb->pattern_position);
1584 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1585 }
1586 else
1587 {
1588 if (callout_extra) fprintf(outfile, " ");
1589 else fprintf(outfile, "%3d ", cb->callout_number);
1590 }
1591
1592 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1593 fprintf(outfile, "^");
1594
1595 if (post_start > 0)
1596 {
1597 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1598 fprintf(outfile, "^");
1599 }
1600
1601 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1602 fprintf(outfile, " ");
1603
1604 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1605 pbuffer + cb->pattern_position);
1606
1607 fprintf(outfile, "\n");
1608 first_callout = 0;
1609
1610 if (cb->mark != last_callout_mark)
1611 {
1612 if (cb->mark == NULL)
1613 fprintf(outfile, "Latest Mark: <unset>\n");
1614 else
1615 {
1616 fprintf(outfile, "Latest Mark: ");
1617 PCHARSV(cb->mark, 0, -1, outfile);
1618 putc('\n', outfile);
1619 }
1620 last_callout_mark = cb->mark;
1621 }
1622
1623 if (cb->callout_data != NULL)
1624 {
1625 int callout_data = *((int *)(cb->callout_data));
1626 if (callout_data != 0)
1627 {
1628 fprintf(outfile, "Callout data = %d\n", callout_data);
1629 return callout_data;
1630 }
1631 }
1632
1633 return (cb->callout_number != callout_fail_id)? 0 :
1634 (++callout_count >= callout_fail_count)? 1 : 0;
1635 }
1636
1637
1638 /*************************************************
1639 * Local malloc functions *
1640 *************************************************/
1641
1642 /* Alternative malloc function, to test functionality and save the size of a
1643 compiled re, which is the first store request that pcre_compile() makes. The
1644 show_malloc variable is set only during matching. */
1645
1646 static void *new_malloc(size_t size)
1647 {
1648 void *block = malloc(size);
1649 gotten_store = size;
1650 if (first_gotten_store == 0) first_gotten_store = size;
1651 if (show_malloc)
1652 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1653 return block;
1654 }
1655
1656 static void new_free(void *block)
1657 {
1658 if (show_malloc)
1659 fprintf(outfile, "free %p\n", block);
1660 free(block);
1661 }
1662
1663 /* For recursion malloc/free, to test stacking calls */
1664
1665 static void *stack_malloc(size_t size)
1666 {
1667 void *block = malloc(size);
1668 if (show_malloc)
1669 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1670 return block;
1671 }
1672
1673 static void stack_free(void *block)
1674 {
1675 if (show_malloc)
1676 fprintf(outfile, "stack_free %p\n", block);
1677 free(block);
1678 }
1679
1680
1681 /*************************************************
1682 * Call pcre_fullinfo() *
1683 *************************************************/
1684
1685 /* Get one piece of information from the pcre_fullinfo() function. When only
1686 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1687 value, but the code is defensive.
1688
1689 Arguments:
1690 re compiled regex
1691 study study data
1692 option PCRE_INFO_xxx option
1693 ptr where to put the data
1694
1695 Returns: 0 when OK, < 0 on error
1696 */
1697
1698 static int
1699 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1700 {
1701 int rc;
1702
1703 if (use_pcre16)
1704 #ifdef SUPPORT_PCRE16
1705 rc = pcre16_fullinfo(re, study, option, ptr);
1706 #else
1707 rc = PCRE_ERROR_BADMODE;
1708 #endif
1709 else
1710 #ifdef SUPPORT_PCRE8
1711 rc = pcre_fullinfo(re, study, option, ptr);
1712 #else
1713 rc = PCRE_ERROR_BADMODE;
1714 #endif
1715
1716 if (rc < 0)
1717 {
1718 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1719 use_pcre16? "16" : "", option);
1720 if (rc == PCRE_ERROR_BADMODE)
1721 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1722 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1723 }
1724
1725 return rc;
1726 }
1727
1728
1729
1730 /*************************************************
1731 * Swap byte functions *
1732 *************************************************/
1733
1734 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1735 value, respectively.
1736
1737 Arguments:
1738 value any number
1739
1740 Returns: the byte swapped value
1741 */
1742
1743 static pcre_uint32
1744 swap_uint32(pcre_uint32 value)
1745 {
1746 return ((value & 0x000000ff) << 24) |
1747 ((value & 0x0000ff00) << 8) |
1748 ((value & 0x00ff0000) >> 8) |
1749 (value >> 24);
1750 }
1751
1752 static pcre_uint16
1753 swap_uint16(pcre_uint16 value)
1754 {
1755 return (value >> 8) | (value << 8);
1756 }
1757
1758
1759
1760 /*************************************************
1761 * Flip bytes in a compiled pattern *
1762 *************************************************/
1763
1764 /* This function is called if the 'F' option was present on a pattern that is
1765 to be written to a file. We flip the bytes of all the integer fields in the
1766 regex data block and the study block. In 16-bit mode this also flips relevant
1767 bytes in the pattern itself. This is to make it possible to test PCRE's
1768 ability to reload byte-flipped patterns, e.g. those compiled on a different
1769 architecture. */
1770
1771 static void
1772 regexflip(pcre *ere, pcre_extra *extra)
1773 {
1774 real_pcre *re = (real_pcre *)ere;
1775 #ifdef SUPPORT_PCRE16
1776 int op;
1777 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1778 int length = re->name_count * re->name_entry_size;
1779 #ifdef SUPPORT_UTF
1780 BOOL utf = (re->options & PCRE_UTF16) != 0;
1781 BOOL utf16_char = FALSE;
1782 #endif /* SUPPORT_UTF */
1783 #endif /* SUPPORT_PCRE16 */
1784
1785 /* Always flip the bytes in the main data block and study blocks. */
1786
1787 re->magic_number = REVERSED_MAGIC_NUMBER;
1788 re->size = swap_uint32(re->size);
1789 re->options = swap_uint32(re->options);
1790 re->flags = swap_uint16(re->flags);
1791 re->top_bracket = swap_uint16(re->top_bracket);
1792 re->top_backref = swap_uint16(re->top_backref);
1793 re->first_char = swap_uint16(re->first_char);
1794 re->req_char = swap_uint16(re->req_char);
1795 re->name_table_offset = swap_uint16(re->name_table_offset);
1796 re->name_entry_size = swap_uint16(re->name_entry_size);
1797 re->name_count = swap_uint16(re->name_count);
1798
1799 if (extra != NULL)
1800 {
1801 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1802 rsd->size = swap_uint32(rsd->size);
1803 rsd->flags = swap_uint32(rsd->flags);
1804 rsd->minlength = swap_uint32(rsd->minlength);
1805 }
1806
1807 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1808 in the name table, if present, and then in the pattern itself. */
1809
1810 #ifdef SUPPORT_PCRE16
1811 if (!use_pcre16) return;
1812
1813 while(TRUE)
1814 {
1815 /* Swap previous characters. */
1816 while (length-- > 0)
1817 {
1818 *ptr = swap_uint16(*ptr);
1819 ptr++;
1820 }
1821 #ifdef SUPPORT_UTF
1822 if (utf16_char)
1823 {
1824 if ((ptr[-1] & 0xfc00) == 0xd800)
1825 {
1826 /* We know that there is only one extra character in UTF-16. */
1827 *ptr = swap_uint16(*ptr);
1828 ptr++;
1829 }
1830 }
1831 utf16_char = FALSE;
1832 #endif /* SUPPORT_UTF */
1833
1834 /* Get next opcode. */
1835
1836 length = 0;
1837 op = *ptr;
1838 *ptr++ = swap_uint16(op);
1839
1840 switch (op)
1841 {
1842 case OP_END:
1843 return;
1844
1845 #ifdef SUPPORT_UTF
1846 case OP_CHAR:
1847 case OP_CHARI:
1848 case OP_NOT:
1849 case OP_NOTI:
1850 case OP_STAR:
1851 case OP_MINSTAR:
1852 case OP_PLUS:
1853 case OP_MINPLUS:
1854 case OP_QUERY:
1855 case OP_MINQUERY:
1856 case OP_UPTO:
1857 case OP_MINUPTO:
1858 case OP_EXACT:
1859 case OP_POSSTAR:
1860 case OP_POSPLUS:
1861 case OP_POSQUERY:
1862 case OP_POSUPTO:
1863 case OP_STARI:
1864 case OP_MINSTARI:
1865 case OP_PLUSI:
1866 case OP_MINPLUSI:
1867 case OP_QUERYI:
1868 case OP_MINQUERYI:
1869 case OP_UPTOI:
1870 case OP_MINUPTOI:
1871 case OP_EXACTI:
1872 case OP_POSSTARI:
1873 case OP_POSPLUSI:
1874 case OP_POSQUERYI:
1875 case OP_POSUPTOI:
1876 case OP_NOTSTAR:
1877 case OP_NOTMINSTAR:
1878 case OP_NOTPLUS:
1879 case OP_NOTMINPLUS:
1880 case OP_NOTQUERY:
1881 case OP_NOTMINQUERY:
1882 case OP_NOTUPTO:
1883 case OP_NOTMINUPTO:
1884 case OP_NOTEXACT:
1885 case OP_NOTPOSSTAR:
1886 case OP_NOTPOSPLUS:
1887 case OP_NOTPOSQUERY:
1888 case OP_NOTPOSUPTO:
1889 case OP_NOTSTARI:
1890 case OP_NOTMINSTARI:
1891 case OP_NOTPLUSI:
1892 case OP_NOTMINPLUSI:
1893 case OP_NOTQUERYI:
1894 case OP_NOTMINQUERYI:
1895 case OP_NOTUPTOI:
1896 case OP_NOTMINUPTOI:
1897 case OP_NOTEXACTI:
1898 case OP_NOTPOSSTARI:
1899 case OP_NOTPOSPLUSI:
1900 case OP_NOTPOSQUERYI:
1901 case OP_NOTPOSUPTOI:
1902 if (utf) utf16_char = TRUE;
1903 #endif
1904 /* Fall through. */
1905
1906 default:
1907 length = OP_lengths16[op] - 1;
1908 break;
1909
1910 case OP_CLASS:
1911 case OP_NCLASS:
1912 /* Skip the character bit map. */
1913 ptr += 32/sizeof(pcre_uint16);
1914 length = 0;
1915 break;
1916
1917 case OP_XCLASS:
1918 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1919 if (LINK_SIZE > 1)
1920 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1921 - (1 + LINK_SIZE + 1));
1922 else
1923 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1924
1925 /* Reverse the size of the XCLASS instance. */
1926 *ptr = swap_uint16(*ptr);
1927 ptr++;
1928 if (LINK_SIZE > 1)
1929 {
1930 *ptr = swap_uint16(*ptr);
1931 ptr++;
1932 }
1933
1934 op = *ptr;
1935 *ptr = swap_uint16(op);
1936 ptr++;
1937 if ((op & XCL_MAP) != 0)
1938 {
1939 /* Skip the character bit map. */
1940 ptr += 32/sizeof(pcre_uint16);
1941 length -= 32/sizeof(pcre_uint16);
1942 }
1943 break;
1944 }
1945 }
1946 /* Control should never reach here in 16 bit mode. */
1947 #endif /* SUPPORT_PCRE16 */
1948 }
1949
1950
1951
1952 /*************************************************
1953 * Check match or recursion limit *
1954 *************************************************/
1955
1956 static int
1957 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1958 int start_offset, int options, int *use_offsets, int use_size_offsets,
1959 int flag, unsigned long int *limit, int errnumber, const char *msg)
1960 {
1961 int count;
1962 int min = 0;
1963 int mid = 64;
1964 int max = -1;
1965
1966 extra->flags |= flag;
1967
1968 for (;;)
1969 {
1970 *limit = mid;
1971
1972 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1973 use_offsets, use_size_offsets);
1974
1975 if (count == errnumber)
1976 {
1977 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1978 min = mid;
1979 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1980 }
1981
1982 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1983 count == PCRE_ERROR_PARTIAL)
1984 {
1985 if (mid == min + 1)
1986 {
1987 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1988 break;
1989 }
1990 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1991 max = mid;
1992 mid = (min + mid)/2;
1993 }
1994 else break; /* Some other error */
1995 }
1996
1997 extra->flags &= ~flag;
1998 return count;
1999 }
2000
2001
2002
2003 /*************************************************
2004 * Case-independent strncmp() function *
2005 *************************************************/
2006
2007 /*
2008 Arguments:
2009 s first string
2010 t second string
2011 n number of characters to compare
2012
2013 Returns: < 0, = 0, or > 0, according to the comparison
2014 */
2015
2016 static int
2017 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2018 {
2019 while (n--)
2020 {
2021 int c = tolower(*s++) - tolower(*t++);
2022 if (c) return c;
2023 }
2024 return 0;
2025 }
2026
2027
2028
2029 /*************************************************
2030 * Check newline indicator *
2031 *************************************************/
2032
2033 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2034 a message and return 0 if there is no match.
2035
2036 Arguments:
2037 p points after the leading '<'
2038 f file for error message
2039
2040 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2041 */
2042
2043 static int
2044 check_newline(pcre_uint8 *p, FILE *f)
2045 {
2046 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2047 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2048 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2049 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2050 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2051 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2052 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2053 fprintf(f, "Unknown newline type at: <%s\n", p);
2054 return 0;
2055 }
2056
2057
2058
2059 /*************************************************
2060 * Usage function *
2061 *************************************************/
2062
2063 static void
2064 usage(void)
2065 {
2066 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2067 printf("Input and output default to stdin and stdout.\n");
2068 #ifdef SUPPORT_LIBREADLINE
2069 printf("If input is a terminal, readline() is used to read from it.\n");
2070 #else
2071 printf("This version of pcretest is not linked with readline().\n");
2072 #endif
2073 printf("\nOptions:\n");
2074 #ifdef SUPPORT_PCRE16
2075 printf(" -16 use 16-bit interface\n");
2076 #endif
2077 printf(" -b show compiled code (bytecode)\n");
2078 printf(" -C show PCRE compile-time options and exit\n");
2079 printf(" -C arg show a specific compile-time option\n");
2080 printf(" and exit with its value. The arg can be:\n");
2081 printf(" linksize internal link size [2, 3, 4]\n");
2082 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2083 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2084 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2085 printf(" ucp Unicode Properties supported [0, 1]\n");
2086 printf(" jit Just-in-time compiler supported [0, 1]\n");
2087 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2088 printf(" -d debug: show compiled code and information (-b and -i)\n");
2089 #if !defined NODFA
2090 printf(" -dfa force DFA matching for all subjects\n");
2091 #endif
2092 printf(" -help show usage information\n");
2093 printf(" -i show information about compiled patterns\n"
2094 " -M find MATCH_LIMIT minimum for each subject\n"
2095 " -m output memory used information\n"
2096 " -o <n> set size of offsets vector to <n>\n");
2097 #if !defined NOPOSIX
2098 printf(" -p use POSIX interface\n");
2099 #endif
2100 printf(" -q quiet: do not output PCRE version number at start\n");
2101 printf(" -S <n> set stack size to <n> megabytes\n");
2102 printf(" -s force each pattern to be studied at basic level\n"
2103 " -s+ force each pattern to be studied, using JIT if available\n"
2104 " -t time compilation and execution\n");
2105 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2106 printf(" -tm time execution (matching) only\n");
2107 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2108 }
2109
2110
2111
2112 /*************************************************
2113 * Main Program *
2114 *************************************************/
2115
2116 /* Read lines from named file or stdin and write to named file or stdout; lines
2117 consist of a regular expression, in delimiters and optionally followed by
2118 options, followed by a set of test data, terminated by an empty line. */
2119
2120 int main(int argc, char **argv)
2121 {
2122 FILE *infile = stdin;
2123 const char *version;
2124 int options = 0;
2125 int study_options = 0;
2126 int default_find_match_limit = FALSE;
2127 int op = 1;
2128 int timeit = 0;
2129 int timeitm = 0;
2130 int showinfo = 0;
2131 int showstore = 0;
2132 int force_study = -1;
2133 int force_study_options = 0;
2134 int quiet = 0;
2135 int size_offsets = 45;
2136 int size_offsets_max;
2137 int *offsets = NULL;
2138 #if !defined NOPOSIX
2139 int posix = 0;
2140 #endif
2141 int debug = 0;
2142 int done = 0;
2143 int all_use_dfa = 0;
2144 int yield = 0;
2145 int stack_size;
2146
2147 pcre_jit_stack *jit_stack = NULL;
2148
2149 /* These vectors store, end-to-end, a list of zero-terminated captured
2150 substring names, each list itself being terminated by an empty name. Assume
2151 that 1024 is plenty long enough for the few names we'll be testing. It is
2152 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2153 for the actual memory, to ensure alignment. By defining these variables always
2154 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2155 #ifdefs in the code. */
2156
2157 pcre_uint16 copynames[1024];
2158 pcre_uint16 getnames[1024];
2159
2160 pcre_uint16 *cn16ptr;
2161 pcre_uint16 *gn16ptr;
2162
2163 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2164 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2165 pcre_uint8 *cn8ptr;
2166 pcre_uint8 *gn8ptr;
2167
2168 /* Get buffers from malloc() so that valgrind will check their misuse when
2169 debugging. They grow automatically when very long lines are read. The 16-bit
2170 buffer (buffer16) is obtained only if needed. */
2171
2172 buffer = (pcre_uint8 *)malloc(buffer_size);
2173 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2174 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2175
2176 /* The outfile variable is static so that new_malloc can use it. */
2177
2178 outfile = stdout;
2179
2180 /* The following _setmode() stuff is some Windows magic that tells its runtime
2181 library to translate CRLF into a single LF character. At least, that's what
2182 I've been told: never having used Windows I take this all on trust. Originally
2183 it set 0x8000, but then I was advised that _O_BINARY was better. */
2184
2185 #if defined(_WIN32) || defined(WIN32)
2186 _setmode( _fileno( stdout ), _O_BINARY );
2187 #endif
2188
2189 /* Get the version number: both pcre_version() and pcre16_version() give the
2190 same answer. We just need to ensure that we call one that is available. */
2191
2192 #ifdef SUPPORT_PCRE8
2193 version = pcre_version();
2194 #else
2195 version = pcre16_version();
2196 #endif
2197
2198 /* Scan options */
2199
2200 while (argc > 1 && argv[op][0] == '-')
2201 {
2202 pcre_uint8 *endptr;
2203
2204 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2205 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2206 else if (strcmp(argv[op], "-s+") == 0)
2207 {
2208 force_study = 1;
2209 force_study_options = PCRE_STUDY_JIT_COMPILE;
2210 }
2211 else if (strcmp(argv[op], "-16") == 0)
2212 {
2213 #ifdef SUPPORT_PCRE16
2214 use_pcre16 = 1;
2215 #else
2216 printf("** This version of PCRE was built without 16-bit support\n");
2217 exit(1);
2218 #endif
2219 }
2220 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2221 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2222 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2223 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2224 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2225 #if !defined NODFA
2226 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2227 #endif
2228 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2229 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2230 *endptr == 0))
2231 {
2232 op++;
2233 argc--;
2234 }
2235 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2236 {
2237 int both = argv[op][2] == 0;
2238 int temp;
2239 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2240 *endptr == 0))
2241 {
2242 timeitm = temp;
2243 op++;
2244 argc--;
2245 }
2246 else timeitm = LOOPREPEAT;
2247 if (both) timeit = timeitm;
2248 }
2249 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2250 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2251 *endptr == 0))
2252 {
2253 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2254 printf("PCRE: -S not supported on this OS\n");
2255 exit(1);
2256 #else
2257 int rc;
2258 struct rlimit rlim;
2259 getrlimit(RLIMIT_STACK, &rlim);
2260 rlim.rlim_cur = stack_size * 1024 * 1024;
2261 rc = setrlimit(RLIMIT_STACK, &rlim);
2262 if (rc != 0)
2263 {
2264 printf("PCRE: setrlimit() failed with error %d\n", rc);
2265 exit(1);
2266 }
2267 op++;
2268 argc--;
2269 #endif
2270 }
2271 #if !defined NOPOSIX
2272 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2273 #endif
2274 else if (strcmp(argv[op], "-C") == 0)
2275 {
2276 int rc;
2277 unsigned long int lrc;
2278
2279 if (argc > 2)
2280 {
2281 if (strcmp(argv[op + 1], "linksize") == 0)
2282 {
2283 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2284 printf("%d\n", rc);
2285 yield = rc;
2286 goto EXIT;
2287 }
2288 if (strcmp(argv[op + 1], "pcre8") == 0)
2289 {
2290 #ifdef SUPPORT_PCRE8
2291 printf("1\n");
2292 yield = 1;
2293 #else
2294 printf("0\n");
2295 yield = 0;
2296 #endif
2297 goto EXIT;
2298 }
2299 if (strcmp(argv[op + 1], "pcre16") == 0)
2300 {
2301 #ifdef SUPPORT_PCRE16
2302 printf("1\n");
2303 yield = 1;
2304 #else
2305 printf("0\n");
2306 yield = 0;
2307 #endif
2308 goto EXIT;
2309 }
2310 if (strcmp(argv[op + 1], "utf") == 0)
2311 {
2312 #ifdef SUPPORT_PCRE8
2313 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2314 printf("%d\n", rc);
2315 yield = rc;
2316 #else
2317 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2318 printf("%d\n", rc);
2319 yield = rc;
2320 #endif
2321 goto EXIT;
2322 }
2323 if (strcmp(argv[op + 1], "ucp") == 0)
2324 {
2325 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2326 printf("%d\n", rc);
2327 yield = rc;
2328 goto EXIT;
2329 }
2330 if (strcmp(argv[op + 1], "jit") == 0)
2331 {
2332 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2333 printf("%d\n", rc);
2334 yield = rc;
2335 goto EXIT;
2336 }
2337 if (strcmp(argv[op + 1], "newline") == 0)
2338 {
2339 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2340 /* Note that these values are always the ASCII values, even
2341 in EBCDIC environments. CR is 13 and NL is 10. */
2342 printf("%s\n", (rc == 13)? "CR" :
2343 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2344 (rc == -2)? "ANYCRLF" :
2345 (rc == -1)? "ANY" : "???");
2346 goto EXIT;
2347 }
2348 printf("Unknown -C option: %s\n", argv[op + 1]);
2349 goto EXIT;
2350 }
2351
2352 printf("PCRE version %s\n", version);
2353 printf("Compiled with\n");
2354
2355 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2356 are set, either both UTFs are supported or both are not supported. */
2357
2358 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2359 printf(" 8-bit and 16-bit support\n");
2360 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2361 if (rc)
2362 printf(" UTF-8 and UTF-16 support\n");
2363 else
2364 printf(" No UTF-8 or UTF-16 support\n");
2365 #elif defined SUPPORT_PCRE8
2366 printf(" 8-bit support only\n");
2367 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2368 printf(" %sUTF-8 support\n", rc? "" : "No ");
2369 #else
2370 printf(" 16-bit support only\n");
2371 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2372 printf(" %sUTF-16 support\n", rc? "" : "No ");
2373 #endif
2374
2375 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2376 printf(" %sUnicode properties support\n", rc? "" : "No ");
2377 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2378 if (rc)
2379 printf(" Just-in-time compiler support\n");
2380 else
2381 printf(" No just-in-time compiler support\n");
2382 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2383 /* Note that these values are always the ASCII values, even
2384 in EBCDIC environments. CR is 13 and NL is 10. */
2385 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2386 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2387 (rc == -2)? "ANYCRLF" :
2388 (rc == -1)? "ANY" : "???");
2389 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2390 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2391 "all Unicode newlines");
2392 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2393 printf(" Internal link size = %d\n", rc);
2394 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2395 printf(" POSIX malloc threshold = %d\n", rc);
2396 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2397 printf(" Default match limit = %ld\n", lrc);
2398 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2399 printf(" Default recursion depth limit = %ld\n", lrc);
2400 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2401 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2402 goto EXIT;
2403 }
2404 else if (strcmp(argv[op], "-help") == 0 ||
2405 strcmp(argv[op], "--help") == 0)
2406 {
2407 usage();
2408 goto EXIT;
2409 }
2410 else
2411 {
2412 printf("** Unknown or malformed option %s\n", argv[op]);
2413 usage();
2414 yield = 1;
2415 goto EXIT;
2416 }
2417 op++;
2418 argc--;
2419 }
2420
2421 /* Get the store for the offsets vector, and remember what it was */
2422
2423 size_offsets_max = size_offsets;
2424 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2425 if (offsets == NULL)
2426 {
2427 printf("** Failed to get %d bytes of memory for offsets vector\n",
2428 (int)(size_offsets_max * sizeof(int)));
2429 yield = 1;
2430 goto EXIT;
2431 }
2432
2433 /* Sort out the input and output files */
2434
2435 if (argc > 1)
2436 {
2437 infile = fopen(argv[op], INPUT_MODE);
2438 if (infile == NULL)
2439 {
2440 printf("** Failed to open %s\n", argv[op]);
2441 yield = 1;
2442 goto EXIT;
2443 }
2444 }
2445
2446 if (argc > 2)
2447 {
2448 outfile = fopen(argv[op+1], OUTPUT_MODE);
2449 if (outfile == NULL)
2450 {
2451 printf("** Failed to open %s\n", argv[op+1]);
2452 yield = 1;
2453 goto EXIT;
2454 }
2455 }
2456
2457 /* Set alternative malloc function */
2458
2459 #ifdef SUPPORT_PCRE8
2460 pcre_malloc = new_malloc;
2461 pcre_free = new_free;
2462 pcre_stack_malloc = stack_malloc;
2463 pcre_stack_free = stack_free;
2464 #endif
2465
2466 #ifdef SUPPORT_PCRE16
2467 pcre16_malloc = new_malloc;
2468 pcre16_free = new_free;
2469 pcre16_stack_malloc = stack_malloc;
2470 pcre16_stack_free = stack_free;
2471 #endif
2472
2473 /* Heading line unless quiet, then prompt for first regex if stdin */
2474
2475 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2476
2477 /* Main loop */
2478
2479 while (!done)
2480 {
2481 pcre *re = NULL;
2482 pcre_extra *extra = NULL;
2483
2484 #if !defined NOPOSIX /* There are still compilers that require no indent */
2485 regex_t preg;
2486 int do_posix = 0;
2487 #endif
2488
2489 const char *error;
2490 pcre_uint8 *markptr;
2491 pcre_uint8 *p, *pp, *ppp;
2492 pcre_uint8 *to_file = NULL;
2493 const pcre_uint8 *tables = NULL;
2494 unsigned long int get_options;
2495 unsigned long int true_size, true_study_size = 0;
2496 size_t size, regex_gotten_store;
2497 int do_allcaps = 0;
2498 int do_mark = 0;
2499 int do_study = 0;
2500 int no_force_study = 0;
2501 int do_debug = debug;
2502 int do_G = 0;
2503 int do_g = 0;
2504 int do_showinfo = showinfo;
2505 int do_showrest = 0;
2506 int do_showcaprest = 0;
2507 int do_flip = 0;
2508 int erroroffset, len, delimiter, poffset;
2509
2510 use_utf = 0;
2511 debug_lengths = 1;
2512
2513 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2514 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2515 fflush(outfile);
2516
2517 p = buffer;
2518 while (isspace(*p)) p++;
2519 if (*p == 0) continue;
2520
2521 /* See if the pattern is to be loaded pre-compiled from a file. */
2522
2523 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2524 {
2525 pcre_uint32 magic;
2526 pcre_uint8 sbuf[8];
2527 FILE *f;
2528
2529 p++;
2530 if (*p == '!')
2531 {
2532 do_debug = TRUE;
2533 do_showinfo = TRUE;
2534 p++;
2535 }
2536
2537 pp = p + (int)strlen((char *)p);
2538 while (isspace(pp[-1])) pp--;
2539 *pp = 0;
2540
2541 f = fopen((char *)p, "rb");
2542 if (f == NULL)
2543 {
2544 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2545 continue;
2546 }
2547
2548 first_gotten_store = 0;
2549 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2550
2551 true_size =
2552 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2553 true_study_size =
2554 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2555
2556 re = (real_pcre *)new_malloc(true_size);
2557 regex_gotten_store = first_gotten_store;
2558
2559 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2560
2561 magic = ((real_pcre *)re)->magic_number;
2562 if (magic != MAGIC_NUMBER)
2563 {
2564 if (swap_uint32(magic) == MAGIC_NUMBER)
2565 {
2566 do_flip = 1;
2567 }
2568 else
2569 {
2570 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2571 fclose(f);
2572 continue;
2573 }
2574 }
2575
2576 /* We hide the byte-invert info for little and big endian tests. */
2577 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2578 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2579
2580 /* Now see if there is any following study data. */
2581
2582 if (true_study_size != 0)
2583 {
2584 pcre_study_data *psd;
2585
2586 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2587 extra->flags = PCRE_EXTRA_STUDY_DATA;
2588
2589 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2590 extra->study_data = psd;
2591
2592 if (fread(psd, 1, true_study_size, f) != true_study_size)
2593 {
2594 FAIL_READ:
2595 fprintf(outfile, "Failed to read data from %s\n", p);
2596 if (extra != NULL)
2597 {
2598 PCRE_FREE_STUDY(extra);
2599 }
2600 if (re != NULL) new_free(re);
2601 fclose(f);
2602 continue;
2603 }
2604 fprintf(outfile, "Study data loaded from %s\n", p);
2605 do_study = 1; /* To get the data output if requested */
2606 }
2607 else fprintf(outfile, "No study data\n");
2608
2609 /* Flip the necessary bytes. */
2610 if (do_flip)
2611 {
2612 int rc;
2613 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2614 if (rc == PCRE_ERROR_BADMODE)
2615 {
2616 /* Simulate the result of the function call below. */
2617 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2618 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2619 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2620 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2621 continue;
2622 }
2623 }
2624
2625 /* Need to know if UTF-8 for printing data strings. */
2626
2627 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2628 use_utf = (get_options & PCRE_UTF8) != 0;
2629
2630 fclose(f);
2631 goto SHOW_INFO;
2632 }
2633
2634 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2635 the pattern; if it isn't complete, read more. */
2636
2637 delimiter = *p++;
2638
2639 if (isalnum(delimiter) || delimiter == '\\')
2640 {
2641 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2642 goto SKIP_DATA;
2643 }
2644
2645 pp = p;
2646 poffset = (int)(p - buffer);
2647
2648 for(;;)
2649 {
2650 while (*pp != 0)
2651 {
2652 if (*pp == '\\' && pp[1] != 0) pp++;
2653 else if (*pp == delimiter) break;
2654 pp++;
2655 }
2656 if (*pp != 0) break;
2657 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2658 {
2659 fprintf(outfile, "** Unexpected EOF\n");
2660 done = 1;
2661 goto CONTINUE;
2662 }
2663 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2664 }
2665
2666 /* The buffer may have moved while being extended; reset the start of data
2667 pointer to the correct relative point in the buffer. */
2668
2669 p = buffer + poffset;
2670
2671 /* If the first character after the delimiter is backslash, make
2672 the pattern end with backslash. This is purely to provide a way
2673 of testing for the error message when a pattern ends with backslash. */
2674
2675 if (pp[1] == '\\') *pp++ = '\\';
2676
2677 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2678 for callouts. */
2679
2680 *pp++ = 0;
2681 strcpy((char *)pbuffer, (char *)p);
2682
2683 /* Look for options after final delimiter */
2684
2685 options = 0;
2686 study_options = 0;
2687 log_store = showstore; /* default from command line */
2688
2689 while (*pp != 0)
2690 {
2691 switch (*pp++)
2692 {
2693 case 'f': options |= PCRE_FIRSTLINE; break;
2694 case 'g': do_g = 1; break;
2695 case 'i': options |= PCRE_CASELESS; break;
2696 case 'm': options |= PCRE_MULTILINE; break;
2697 case 's': options |= PCRE_DOTALL; break;
2698 case 'x': options |= PCRE_EXTENDED; break;
2699
2700 case '+':
2701 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2702 break;
2703
2704 case '=': do_allcaps = 1; break;
2705 case 'A': options |= PCRE_ANCHORED; break;
2706 case 'B': do_debug = 1; break;
2707 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2708 case 'D': do_debug = do_showinfo = 1; break;
2709 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2710 case 'F': do_flip = 1; break;
2711 case 'G': do_G = 1; break;
2712 case 'I': do_showinfo = 1; break;
2713 case 'J': options |= PCRE_DUPNAMES; break;
2714 case 'K': do_mark = 1; break;
2715 case 'M': log_store = 1; break;
2716 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2717
2718 #if !defined NOPOSIX
2719 case 'P': do_posix = 1; break;
2720 #endif
2721
2722 case 'S':
2723 if (do_study == 0)
2724 {
2725 do_study = 1;
2726 if (*pp == '+')
2727 {
2728 study_options |= PCRE_STUDY_JIT_COMPILE;
2729 pp++;
2730 }
2731 }
2732 else
2733 {
2734 do_study = 0;
2735 no_force_study = 1;
2736 }
2737 break;
2738
2739 case 'U': options |= PCRE_UNGREEDY; break;
2740 case 'W': options |= PCRE_UCP; break;
2741 case 'X': options |= PCRE_EXTRA; break;
2742 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2743 case 'Z': debug_lengths = 0; break;
2744 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2745 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2746
2747 case 'T':
2748 switch (*pp++)
2749 {
2750 case '0': tables = tables0; break;
2751 case '1': tables = tables1; break;
2752
2753 case '\r':
2754 case '\n':
2755 case ' ':
2756 case 0:
2757 fprintf(outfile, "** Missing table number after /T\n");
2758 goto SKIP_DATA;
2759
2760 default:
2761 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2762 goto SKIP_DATA;
2763 }
2764 break;
2765
2766 case 'L':
2767 ppp = pp;
2768 /* The '\r' test here is so that it works on Windows. */
2769 /* The '0' test is just in case this is an unterminated line. */
2770 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2771 *ppp = 0;
2772 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2773 {
2774 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2775 goto SKIP_DATA;
2776 }
2777 locale_set = 1;
2778 tables = PCRE_MAKETABLES;
2779 pp = ppp;
2780 break;
2781
2782 case '>':
2783 to_file = pp;
2784 while (*pp != 0) pp++;
2785 while (isspace(pp[-1])) pp--;
2786 *pp = 0;
2787 break;
2788
2789 case '<':
2790 {
2791 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2792 {
2793 options |= PCRE_JAVASCRIPT_COMPAT;
2794 pp += 3;
2795 }
2796 else
2797 {
2798 int x = check_newline(pp, outfile);
2799 if (x == 0) goto SKIP_DATA;
2800 options |= x;
2801 while (*pp++ != '>');
2802 }
2803 }
2804 break;
2805
2806 case '\r': /* So that it works in Windows */
2807 case '\n':
2808 case ' ':
2809 break;
2810
2811 default:
2812 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2813 goto SKIP_DATA;
2814 }
2815 }
2816
2817 /* Handle compiling via the POSIX interface, which doesn't support the
2818 timing, showing, or debugging options, nor the ability to pass over
2819 local character tables. Neither does it have 16-bit support. */
2820
2821 #if !defined NOPOSIX
2822 if (posix || do_posix)
2823 {
2824 int rc;
2825 int cflags = 0;
2826
2827 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2828 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2829 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2830 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2831 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2832 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2833 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2834
2835 first_gotten_store = 0;
2836 rc = regcomp(&preg, (char *)p, cflags);
2837
2838 /* Compilation failed; go back for another re, skipping to blank line
2839 if non-interactive. */
2840
2841 if (rc != 0)
2842 {
2843 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2844 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2845 goto SKIP_DATA;
2846 }
2847 }
2848
2849 /* Handle compiling via the native interface */
2850
2851 else
2852 #endif /* !defined NOPOSIX */
2853
2854 {
2855 /* In 16-bit mode, convert the input. */
2856
2857 #ifdef SUPPORT_PCRE16
2858 if (use_pcre16)
2859 {
2860 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2861 {
2862 case -1:
2863 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2864 "converted to UTF-16\n");
2865 goto SKIP_DATA;
2866
2867 case -2:
2868 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2869 "cannot be converted to UTF-16\n");
2870 goto SKIP_DATA;
2871
2872 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2873 fprintf(outfile, "**Failed: character value greater than 0xffff "
2874 "cannot be converted to 16-bit in non-UTF mode\n");
2875 goto SKIP_DATA;
2876
2877 default:
2878 break;
2879 }
2880 p = (pcre_uint8 *)buffer16;
2881 }
2882 #endif
2883
2884 /* Compile many times when timing */
2885
2886 if (timeit > 0)
2887 {
2888 register int i;
2889 clock_t time_taken;
2890 clock_t start_time = clock();
2891 for (i = 0; i < timeit; i++)
2892 {
2893 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2894 if (re != NULL) free(re);
2895 }
2896 time_taken = clock() - start_time;
2897 fprintf(outfile, "Compile time %.4f milliseconds\n",
2898 (((double)time_taken * 1000.0) / (double)timeit) /
2899 (double)CLOCKS_PER_SEC);
2900 }
2901
2902 first_gotten_store = 0;
2903 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2904
2905 /* Compilation failed; go back for another re, skipping to blank line
2906 if non-interactive. */
2907
2908 if (re == NULL)
2909 {
2910 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2911 SKIP_DATA:
2912 if (infile != stdin)
2913 {
2914 for (;;)
2915 {
2916 if (extend_inputline(infile, buffer, NULL) == NULL)
2917 {
2918 done = 1;
2919 goto CONTINUE;
2920 }
2921 len = (int)strlen((char *)buffer);
2922 while (len > 0 && isspace(buffer[len-1])) len--;
2923 if (len == 0) break;
2924 }
2925 fprintf(outfile, "\n");
2926 }
2927 goto CONTINUE;
2928 }
2929
2930 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2931 within the regex; check for this so that we know how to process the data
2932 lines. */
2933
2934 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2935 goto SKIP_DATA;
2936 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2937
2938 /* Extract the size for possible writing before possibly flipping it,
2939 and remember the store that was got. */
2940
2941 true_size = ((real_pcre *)re)->size;
2942 regex_gotten_store = first_gotten_store;
2943
2944 /* Output code size information if requested */
2945
2946 if (log_store)
2947 fprintf(outfile, "Memory allocation (code space): %d\n",
2948 (int)(first_gotten_store -
2949 sizeof(real_pcre) -
2950 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2951
2952 /* If -s or /S was present, study the regex to generate additional info to
2953 help with the matching, unless the pattern has the SS option, which
2954 suppresses the effect of /S (used for a few test patterns where studying is
2955 never sensible). */
2956
2957 if (do_study || (force_study >= 0 && !no_force_study))
2958 {
2959 if (timeit > 0)
2960 {
2961 register int i;
2962 clock_t time_taken;
2963 clock_t start_time = clock();
2964 for (i = 0; i < timeit; i++)
2965 {
2966 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2967 }
2968 time_taken = clock() - start_time;
2969 if (extra != NULL)
2970 {
2971 PCRE_FREE_STUDY(extra);
2972 }
2973 fprintf(outfile, " Study time %.4f milliseconds\n",
2974 (((double)time_taken * 1000.0) / (double)timeit) /
2975 (double)CLOCKS_PER_SEC);
2976 }
2977 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2978 if (error != NULL)
2979 fprintf(outfile, "Failed to study: %s\n", error);
2980 else if (extra != NULL)
2981 {
2982 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2983 if (log_store)
2984 {
2985 size_t jitsize;
2986 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2987 jitsize != 0)
2988 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2989 }
2990 }
2991 }
2992
2993 /* If /K was present, we set up for handling MARK data. */
2994
2995 if (do_mark)
2996 {
2997 if (extra == NULL)
2998 {
2999 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3000 extra->flags = 0;
3001 }
3002 extra->mark = &markptr;
3003 extra->flags |= PCRE_EXTRA_MARK;
3004 }
3005
3006 /* Extract and display information from the compiled data if required. */
3007
3008 SHOW_INFO:
3009
3010 if (do_debug)
3011 {
3012 fprintf(outfile, "------------------------------------------------------------------\n");
3013 PCRE_PRINTINT(re, outfile, debug_lengths);
3014 }
3015
3016 /* We already have the options in get_options (see above) */
3017
3018 if (do_showinfo)
3019 {
3020 unsigned long int all_options;
3021 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3022 hascrorlf;
3023 int nameentrysize, namecount;
3024 const pcre_uint8 *nametable;
3025
3026 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3027 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3028 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3029 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3030 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3031 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3032 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3033 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3034 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3035 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3036 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3037 != 0)
3038 goto SKIP_DATA;
3039
3040 if (size != regex_gotten_store) fprintf(outfile,
3041 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3042 (int)size, (int)regex_gotten_store);
3043
3044 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3045 if (backrefmax > 0)
3046 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3047
3048 if (namecount > 0)
3049 {
3050 fprintf(outfile, "Named capturing subpatterns:\n");
3051 while (namecount-- > 0)
3052 {
3053 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3054 int imm2_size = use_pcre16 ? 1 : 2;
3055 #else
3056 int imm2_size = IMM2_SIZE;
3057 #endif
3058 int length = (int)STRLEN(nametable + imm2_size);
3059 fprintf(outfile, " ");
3060 PCHARSV(nametable, imm2_size, length, outfile);
3061 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3062 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3063 fprintf(outfile, "%3d\n", use_pcre16?
3064 (int)(((PCRE_SPTR16)nametable)[0])
3065 :((int)nametable[0] << 8) | (int)nametable[1]);
3066 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3067 #else
3068 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3069 #ifdef SUPPORT_PCRE8
3070 nametable += nameentrysize;
3071 #else
3072 nametable += nameentrysize * 2;
3073 #endif
3074 #endif
3075 }
3076 }
3077
3078 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3079 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3080
3081 all_options = ((real_pcre *)re)->options;
3082 if (do_flip) all_options = swap_uint32(all_options);
3083
3084 if (get_options == 0) fprintf(outfile, "No options\n");
3085 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3086 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3087 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3088 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3089 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3090 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3091 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3092 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3093 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3094 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3095 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3096 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3097 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3098 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3099 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3100 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3101 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3102 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3103
3104 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3105
3106 switch (get_options & PCRE_NEWLINE_BITS)
3107 {
3108 case PCRE_NEWLINE_CR:
3109 fprintf(outfile, "Forced newline sequence: CR\n");
3110 break;
3111
3112 case PCRE_NEWLINE_LF:
3113 fprintf(outfile, "Forced newline sequence: LF\n");
3114 break;
3115
3116 case PCRE_NEWLINE_CRLF:
3117 fprintf(outfile, "Forced newline sequence: CRLF\n");
3118 break;
3119
3120 case PCRE_NEWLINE_ANYCRLF:
3121 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3122 break;
3123
3124 case PCRE_NEWLINE_ANY:
3125 fprintf(outfile, "Forced newline sequence: ANY\n");
3126 break;
3127
3128 default:
3129 break;
3130 }
3131
3132 if (first_char == -1)
3133 {
3134 fprintf(outfile, "First char at start or follows newline\n");
3135 }
3136 else if (first_char < 0)
3137 {
3138 fprintf(outfile, "No first char\n");
3139 }
3140 else
3141 {
3142 const char *caseless =
3143 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3144 "" : " (caseless)";
3145
3146 if (PRINTOK(first_char))
3147 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3148 else
3149 {
3150 fprintf(outfile, "First char = ");
3151 pchar(first_char, outfile);
3152 fprintf(outfile, "%s\n", caseless);
3153 }
3154 }
3155
3156 if (need_char < 0)
3157 {
3158 fprintf(outfile, "No need char\n");
3159 }
3160 else
3161 {
3162 const char *caseless =
3163 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3164 "" : " (caseless)";
3165
3166 if (PRINTOK(need_char))
3167 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3168 else
3169 {
3170 fprintf(outfile, "Need char = ");
3171 pchar(need_char, outfile);
3172 fprintf(outfile, "%s\n", caseless);
3173 }
3174 }
3175
3176 /* Don't output study size; at present it is in any case a fixed
3177 value, but it varies, depending on the computer architecture, and
3178 so messes up the test suite. (And with the /F option, it might be
3179 flipped.) If study was forced by an external -s, don't show this
3180 information unless -i or -d was also present. This means that, except
3181 when auto-callouts are involved, the output from runs with and without
3182 -s should be identical. */
3183
3184 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3185 {
3186 if (extra == NULL)
3187 fprintf(outfile, "Study returned NULL\n");
3188 else
3189 {
3190 pcre_uint8 *start_bits = NULL;
3191 int minlength;
3192
3193 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3194 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3195
3196 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3197 {
3198 if (start_bits == NULL)
3199 fprintf(outfile, "No set of starting bytes\n");
3200 else
3201 {
3202 int i;
3203 int c = 24;
3204 fprintf(outfile, "Starting byte set: ");
3205 for (i = 0; i < 256; i++)
3206 {
3207 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3208 {
3209 if (c > 75)
3210 {
3211 fprintf(outfile, "\n ");
3212 c = 2;
3213 }
3214 if (PRINTOK(i) && i != ' ')
3215 {
3216 fprintf(outfile, "%c ", i);
3217 c += 2;
3218 }
3219 else
3220 {
3221 fprintf(outfile, "\\x%02x ", i);
3222 c += 5;
3223 }
3224 }
3225 }
3226 fprintf(outfile, "\n");
3227 }
3228 }
3229 }
3230
3231 /* Show this only if the JIT was set by /S, not by -s. */
3232
3233 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3234 {
3235 int jit;
3236 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3237 {
3238 if (jit)
3239 fprintf(outfile, "JIT study was successful\n");
3240 else
3241 #ifdef SUPPORT_JIT
3242 fprintf(outfile, "JIT study was not successful\n");
3243 #else
3244 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3245 #endif
3246 }
3247 }
3248 }
3249 }
3250
3251 /* If the '>' option was present, we write out the regex to a file, and
3252 that is all. The first 8 bytes of the file are the regex length and then
3253 the study length, in big-endian order. */
3254
3255 if (to_file != NULL)
3256 {
3257 FILE *f = fopen((char *)to_file, "wb");
3258 if (f == NULL)
3259 {
3260 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3261 }
3262 else
3263 {
3264 pcre_uint8 sbuf[8];
3265
3266 if (do_flip) regexflip(re, extra);
3267 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3268 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3269 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3270 sbuf[3] = (pcre_uint8)((true_size) & 255);
3271 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3272 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3273 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3274 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3275
3276 if (fwrite(sbuf, 1, 8, f) < 8 ||
3277 fwrite(re, 1, true_size, f) < true_size)
3278 {
3279 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3280 }
3281 else
3282 {
3283 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3284
3285 /* If there is study data, write it. */
3286
3287 if (extra != NULL)
3288 {
3289 if (fwrite(extra->study_data, 1, true_study_size, f) <
3290 true_study_size)
3291 {
3292 fprintf(outfile, "Write error on %s: %s\n", to_file,
3293 strerror(errno));
3294 }
3295 else fprintf(outfile, "Study data written to %s\n", to_file);
3296 }
3297 }
3298 fclose(f);
3299 }
3300
3301 new_free(re);
3302 if (extra != NULL)
3303 {
3304 PCRE_FREE_STUDY(extra);
3305 }
3306 if (locale_set)
3307 {
3308 new_free((void *)tables);
3309 setlocale(LC_CTYPE, "C");
3310 locale_set = 0;
3311 }
3312 continue; /* With next regex */
3313 }
3314 } /* End of non-POSIX compile */
3315
3316 /* Read data lines and test them */
3317
3318 for (;;)
3319 {
3320 pcre_uint8 *q;
3321 pcre_uint8 *bptr;
3322 int *use_offsets = offsets;
3323 int use_size_offsets = size_offsets;
3324 int callout_data = 0;
3325 int callout_data_set = 0;
3326 int count, c;
3327 int copystrings = 0;
3328 int find_match_limit = default_find_match_limit;
3329 int getstrings = 0;
3330 int getlist = 0;
3331 int gmatched = 0;
3332 int start_offset = 0;
3333 int start_offset_sign = 1;
3334 int g_notempty = 0;
3335 int use_dfa = 0;
3336
3337 *copynames = 0;
3338 *getnames = 0;
3339
3340 cn16ptr = copynames;
3341 gn16ptr = getnames;
3342 cn8ptr = copynames8;
3343 gn8ptr = getnames8;
3344
3345 SET_PCRE_CALLOUT(callout);
3346 first_callout = 1;
3347 last_callout_mark = NULL;
3348 callout_extra = 0;
3349 callout_count = 0;
3350 callout_fail_count = 999999;
3351 callout_fail_id = -1;
3352 show_malloc = 0;
3353 options = 0;
3354
3355 if (extra != NULL) extra->flags &=
3356 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3357
3358 len = 0;
3359 for (;;)
3360 {
3361 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3362 {
3363 if (len > 0) /* Reached EOF without hitting a newline */
3364 {
3365 fprintf(outfile, "\n");
3366 break;
3367 }
3368 done = 1;
3369 goto CONTINUE;
3370 }
3371 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3372 len = (int)strlen((char *)buffer);
3373 if (buffer[len-1] == '\n') break;
3374 }
3375
3376 while (len > 0 && isspace(buffer[len-1])) len--;
3377 buffer[len] = 0;
3378 if (len == 0) break;
3379
3380 p = buffer;
3381 while (isspace(*p)) p++;
3382
3383 bptr = q = dbuffer;
3384 while ((c = *p++) != 0)
3385 {
3386 int i = 0;
3387 int n = 0;
3388
3389 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3390 In non-UTF mode, allow the value of the byte to fall through to later,
3391 where values greater than 127 are turned into UTF-8 when running in
3392 16-bit mode. */
3393
3394 if (c != '\\')
3395 {
3396 if (use_utf)
3397 {
3398 *q++ = c;
3399 continue;
3400 }
3401 }
3402
3403 /* Handle backslash escapes */
3404
3405 else switch ((c = *p++))
3406 {
3407 case 'a': c = 7; break;
3408 case 'b': c = '\b'; break;
3409 case 'e': c = 27; break;
3410 case 'f': c = '\f'; break;
3411 case 'n': c = '\n'; break;
3412 case 'r': c = '\r'; break;
3413 case 't': c = '\t'; break;
3414 case 'v': c = '\v'; break;
3415
3416 case '0': case '1': case '2': case '3':
3417 case '4': case '5': case '6': case '7':
3418 c -= '0';
3419 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3420 c = c * 8 + *p++ - '0';
3421 break;
3422
3423 case 'x':
3424 if (*p == '{')
3425 {
3426 pcre_uint8 *pt = p;
3427 c = 0;
3428
3429 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3430 when isxdigit() is a macro that refers to its argument more than
3431 once. This is banned by the C Standard, but apparently happens in at
3432 least one MacOS environment. */
3433
3434 for (pt++; isxdigit(*pt); pt++)
3435 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3436 if (*pt == '}')
3437 {
3438 p = pt + 1;
3439 break;
3440 }
3441 /* Not correct form for \x{...}; fall through */
3442 }
3443
3444 /* \x without {} always defines just one byte in 8-bit mode. This
3445 allows UTF-8 characters to be constructed byte by byte, and also allows
3446 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3447 Otherwise, pass it down to later code so that it can be turned into
3448 UTF-8 when running in 16-bit mode. */
3449
3450 c = 0;
3451 while (i++ < 2 && isxdigit(*p))
3452 {
3453 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3454 p++;
3455 }
3456 if (use_utf)
3457 {
3458 *q++ = c;
3459 continue;
3460 }
3461 break;
3462
3463 case 0: /* \ followed by EOF allows for an empty line */
3464 p--;
3465 continue;
3466
3467 case '>':
3468 if (*p == '-')
3469 {
3470 start_offset_sign = -1;
3471 p++;
3472 }
3473 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3474 start_offset *= start_offset_sign;
3475 continue;
3476
3477 case 'A': /* Option setting */
3478 options |= PCRE_ANCHORED;
3479 continue;
3480
3481 case 'B':
3482 options |= PCRE_NOTBOL;
3483 continue;
3484
3485 case 'C':
3486 if (isdigit(*p)) /* Set copy string */
3487 {
3488 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3489 copystrings |= 1 << n;
3490 }
3491 else if (isalnum(*p))
3492 {
3493 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3494 }
3495 else if (*p == '+')
3496 {
3497 callout_extra = 1;
3498 p++;
3499 }
3500 else if (*p == '-')
3501 {
3502 SET_PCRE_CALLOUT(NULL);
3503 p++;
3504 }
3505 else if (*p == '!')
3506 {
3507 callout_fail_id = 0;
3508 p++;
3509 while(isdigit(*p))
3510 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3511 callout_fail_count = 0;
3512 if (*p == '!')
3513 {
3514 p++;
3515 while(isdigit(*p))
3516 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3517 }
3518 }
3519 else if (*p == '*')
3520 {
3521 int sign = 1;
3522 callout_data = 0;
3523 if (*(++p) == '-') { sign = -1; p++; }
3524 while(isdigit(*p))
3525 callout_data = callout_data * 10 + *p++ - '0';
3526 callout_data *= sign;
3527 callout_data_set = 1;
3528 }
3529 continue;
3530
3531 #if !defined NODFA
3532 case 'D':
3533 #if !defined NOPOSIX
3534 if (posix || do_posix)
3535 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3536 else
3537 #endif
3538 use_dfa = 1;
3539 continue;
3540 #endif
3541
3542 #if !defined NODFA
3543 case 'F':
3544 options |= PCRE_DFA_SHORTEST;
3545 continue;
3546 #endif
3547
3548 case 'G':
3549 if (isdigit(*p))
3550 {
3551 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3552 getstrings |= 1 << n;
3553 }
3554 else if (isalnum(*p))
3555 {
3556 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3557 }
3558 continue;
3559
3560 case 'J':
3561 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3562 if (extra != NULL
3563 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3564 && extra->executable_jit != NULL)
3565 {
3566 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3567 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3568 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3569 }
3570 continue;
3571
3572 case 'L':
3573 getlist = 1;
3574 continue;
3575
3576 case 'M':
3577 find_match_limit = 1;
3578 continue;
3579
3580 case 'N':
3581 if ((options & PCRE_NOTEMPTY) != 0)
3582 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3583 else
3584 options |= PCRE_NOTEMPTY;
3585 continue;
3586
3587 case 'O':
3588 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3589 if (n > size_offsets_max)
3590 {
3591 size_offsets_max = n;
3592 free(offsets);
3593 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3594 if (offsets == NULL)
3595 {
3596 printf("** Failed to get %d bytes of memory for offsets vector\n",
3597 (int)(size_offsets_max * sizeof(int)));
3598 yield = 1;
3599 goto EXIT;
3600 }
3601 }
3602 use_size_offsets = n;
3603 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3604 continue;
3605
3606 case 'P':
3607 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3608 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3609 continue;
3610
3611 case 'Q':
3612 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3613 if (extra == NULL)
3614 {
3615 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3616 extra->flags = 0;
3617 }
3618 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3619 extra->match_limit_recursion = n;
3620 continue;
3621
3622 case 'q':
3623 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3624 if (extra == NULL)
3625 {
3626 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3627 extra->flags = 0;
3628 }
3629 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3630 extra->match_limit = n;
3631 continue;
3632
3633 #if !defined NODFA
3634 case 'R':
3635 options |= PCRE_DFA_RESTART;
3636 continue;
3637 #endif
3638
3639 case 'S':
3640 show_malloc = 1;
3641 continue;
3642
3643 case 'Y':
3644 options |= PCRE_NO_START_OPTIMIZE;
3645 continue;
3646
3647 case 'Z':
3648 options |= PCRE_NOTEOL;
3649 continue;
3650
3651 case '?':
3652 options |= PCRE_NO_UTF8_CHECK;
3653 continue;
3654
3655 case '<':
3656 {
3657 int x = check_newline(p, outfile);
3658 if (x == 0) goto NEXT_DATA;
3659 options |= x;
3660 while (*p++ != '>');
3661 }
3662 continue;
3663 }
3664
3665 /* We now have a character value in c that may be greater than 255. In
3666 16-bit mode, we always convert characters to UTF-8 so that values greater
3667 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3668 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3669 mode must have come from \x{...} or octal constructs because values from
3670 \x.. get this far only in non-UTF mode. */
3671
3672 #if !defined NOUTF || defined SUPPORT_PCRE16
3673 if (use_pcre16 || use_utf)
3674 {
3675 pcre_uint8 buff8[8];
3676 int ii, utn;
3677 utn = ord2utf8(c, buff8);
3678 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3679 }
3680 else
3681 #endif
3682 {
3683 if (c > 255)
3684 {
3685 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3686 "and UTF-8 mode is not enabled.\n", c);
3687 fprintf(outfile, "** Truncation will probably give the wrong "
3688 "result.\n");
3689 }
3690 *q++ = c;
3691 }
3692 }
3693
3694 /* Reached end of subject string */
3695
3696 *q = 0;
3697 len = (int)(q - dbuffer);
3698
3699 /* Move the data to the end of the buffer so that a read over the end of
3700 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3701 we are using the POSIX interface, we must include the terminating zero. */
3702
3703 #if !defined NOPOSIX
3704 if (posix || do_posix)
3705 {
3706 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3707 bptr += buffer_size - len - 1;
3708 }
3709 else
3710 #endif
3711 {
3712 memmove(bptr + buffer_size - len, bptr, len);
3713 bptr += buffer_size - len;
3714 }
3715
3716 if ((all_use_dfa || use_dfa) && find_match_limit)
3717 {
3718 printf("**Match limit not relevant for DFA matching: ignored\n");
3719 find_match_limit = 0;
3720 }
3721
3722 /* Handle matching via the POSIX interface, which does not
3723 support timing or playing with the match limit or callout data. */
3724
3725 #if !defined NOPOSIX
3726 if (posix || do_posix)
3727 {
3728 int rc;
3729 int eflags = 0;
3730 regmatch_t *pmatch = NULL;
3731 if (use_size_offsets > 0)
3732 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3733 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3734 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3735 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3736
3737 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3738
3739 if (rc != 0)
3740 {
3741 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3742 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3743 }
3744 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3745 != 0)
3746 {
3747 fprintf(outfile, "Matched with REG_NOSUB\n");
3748 }
3749 else
3750 {
3751 size_t i;
3752 for (i = 0; i < (size_t)use_size_offsets; i++)
3753 {
3754 if (pmatch[i].rm_so >= 0)
3755 {
3756 fprintf(outfile, "%2d: ", (int)i);
3757 PCHARSV(dbuffer, pmatch[i].rm_so,
3758 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3759 fprintf(outfile, "\n");
3760 if (do_showcaprest || (i == 0 && do_showrest))
3761 {
3762 fprintf(outfile, "%2d+ ", (int)i);
3763 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3764 outfile);
3765 fprintf(outfile, "\n");
3766 }
3767 }
3768 }
3769 }
3770 free(pmatch);
3771 goto NEXT_DATA;
3772 }
3773
3774 #endif /* !defined NOPOSIX */
3775
3776 /* Handle matching via the native interface - repeats for /g and /G */
3777
3778 #ifdef SUPPORT_PCRE16
3779 if (use_pcre16)
3780 {
3781 len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3782 switch(len)
3783 {
3784 case -1:
3785 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3786 "converted to UTF-16\n");
3787 goto NEXT_DATA;
3788
3789 case -2:
3790 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3791 "cannot be converted to UTF-16\n");
3792 goto NEXT_DATA;
3793
3794 case -3:
3795 fprintf(outfile, "**Failed: character value greater than 0xffff "
3796 "cannot be converted to 16-bit in non-UTF mode\n");
3797 goto NEXT_DATA;
3798
3799 default:
3800 break;
3801 }
3802 bptr = (pcre_uint8 *)buffer16;
3803 }
3804 #endif
3805
3806 for (;; gmatched++) /* Loop for /g or /G */
3807 {
3808 markptr = NULL;
3809
3810 if (timeitm > 0)
3811 {
3812 register int i;
3813 clock_t time_taken;
3814 clock_t start_time = clock();
3815
3816 #if !defined NODFA
3817 if (all_use_dfa || use_dfa)
3818 {
3819 int workspace[1000];
3820 for (i = 0; i < timeitm; i++)
3821 {
3822 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3823 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3824 (sizeof(workspace)/sizeof(int)));
3825 }
3826 }
3827 else
3828 #endif
3829
3830 for (i = 0; i < timeitm; i++)
3831 {
3832 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3833 (options | g_notempty), use_offsets, use_size_offsets);
3834 }
3835 time_taken = clock() - start_time;
3836 fprintf(outfile, "Execute time %.4f milliseconds\n",
3837 (((double)time_taken * 1000.0) / (double)timeitm) /
3838 (double)CLOCKS_PER_SEC);
3839 }
3840
3841 /* If find_match_limit is set, we want to do repeated matches with
3842 varying limits in order to find the minimum value for the match limit and
3843 for the recursion limit. The match limits are relevant only to the normal
3844 running of pcre_exec(), so disable the JIT optimization. This makes it
3845 possible to run the same set of tests with and without JIT externally
3846 requested. */
3847
3848 if (find_match_limit)
3849 {
3850 if (extra == NULL)
3851 {
3852 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3853 extra->flags = 0;
3854 }
3855 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3856
3857 (void)check_match_limit(re, extra, bptr, len, start_offset,
3858 options|g_notempty, use_offsets, use_size_offsets,
3859 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3860 PCRE_ERROR_MATCHLIMIT, "match()");
3861
3862 count = check_match_limit(re, extra, bptr, len, start_offset,
3863 options|g_notempty, use_offsets, use_size_offsets,
3864 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3865 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3866 }
3867
3868 /* If callout_data is set, use the interface with additional data */
3869
3870 else if (callout_data_set)
3871 {
3872 if (extra == NULL)
3873 {
3874 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3875 extra->flags = 0;
3876 }
3877 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3878 extra->callout_data = &callout_data;
3879 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3880 options | g_notempty, use_offsets, use_size_offsets);
3881 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3882 }
3883
3884 /* The normal case is just to do the match once, with the default
3885 value of match_limit. */
3886
3887 #if !defined NODFA
3888 else if (all_use_dfa || use_dfa)
3889 {
3890 int workspace[1000];
3891 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3892 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3893 (sizeof(workspace)/sizeof(int)));
3894 if (count == 0)
3895 {
3896 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3897 count = use_size_offsets/2;
3898 }
3899 }
3900 #endif
3901
3902 else
3903 {
3904 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3905 options | g_notempty, use_offsets, use_size_offsets);
3906 if (count == 0)
3907 {
3908 fprintf(outfile, "Matched, but too many substrings\n");
3909 count = use_size_offsets/3;
3910 }
3911 }
3912
3913 /* Matched */
3914
3915 if (count >= 0)
3916 {
3917 int i, maxcount;
3918 void *cnptr, *gnptr;
3919
3920 #if !defined NODFA
3921 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3922 #endif
3923 maxcount = use_size_offsets/3;
3924
3925 /* This is a check against a lunatic return value. */
3926
3927 if (count > maxcount)
3928 {
3929 fprintf(outfile,
3930 "** PCRE error: returned count %d is too big for offset size %d\n",
3931 count, use_size_offsets);
3932 count = use_size_offsets/3;
3933 if (do_g || do_G)
3934 {
3935 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3936 do_g = do_G = FALSE; /* Break g/G loop */
3937 }
3938 }
3939
3940 /* do_allcaps requests showing of all captures in the pattern, to check
3941 unset ones at the end. */
3942
3943 if (do_allcaps)
3944 {
3945 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3946 goto SKIP_DATA;
3947 count++; /* Allow for full match */
3948 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3949 }
3950
3951 /* Output the captured substrings */
3952
3953 for (i = 0; i < count * 2; i += 2)
3954 {
3955 if (use_offsets[i] < 0)
3956 {
3957 if (use_offsets[i] != -1)
3958 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3959 use_offsets[i], i);
3960 if (use_offsets[i+1] != -1)
3961 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3962 use_offsets[i+1], i+1);
3963 fprintf(outfile, "%2d: <unset>\n", i/2);
3964 }
3965 else
3966 {
3967 fprintf(outfile, "%2d: ", i/2);
3968 PCHARSV(bptr, use_offsets[i],
3969 use_offsets[i+1] - use_offsets[i], outfile);
3970 fprintf(outfile, "\n");
3971 if (do_showcaprest || (i == 0 && do_showrest))
3972 {
3973 fprintf(outfile, "%2d+ ", i/2);
3974 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3975 outfile);
3976 fprintf(outfile, "\n");
3977 }
3978 }
3979 }
3980
3981 if (markptr != NULL)
3982 {
3983 fprintf(outfile, "MK: ");
3984 PCHARSV(markptr, 0, -1, outfile);
3985 fprintf(outfile, "\n");
3986 }
3987
3988 for (i = 0; i < 32; i++)
3989 {
3990 if ((copystrings & (1 << i)) != 0)
3991 {
3992 int rc;
3993 char copybuffer[256];
3994 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3995 copybuffer, sizeof(copybuffer));
3996 if (rc < 0)
3997 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3998 else
3999 {
4000 fprintf(outfile, "%2dC ", i);
4001 PCHARSV(copybuffer, 0, rc, outfile);
4002 fprintf(outfile, " (%d)\n", rc);
4003 }
4004 }
4005 }
4006
4007 cnptr = copynames;
4008 for (;;)
4009 {
4010 int rc;
4011 char copybuffer[256];
4012
4013 if (use_pcre16)
4014 {
4015 if (*(pcre_uint16 *)cnptr == 0) break;
4016 }
4017 else
4018 {
4019 if (*(pcre_uint8 *)cnptr == 0) break;
4020 }
4021
4022 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4023 cnptr, copybuffer, sizeof(copybuffer));
4024
4025 if (rc < 0)
4026 {
4027 fprintf(outfile, "copy substring ");
4028 PCHARSV(cnptr, 0, -1, outfile);
4029 fprintf(outfile, " failed %d\n", rc);
4030 }
4031 else
4032 {
4033 fprintf(outfile, " C ");
4034 PCHARSV(copybuffer, 0, rc, outfile);
4035 fprintf(outfile, " (%d) ", rc);
4036 PCHARSV(cnptr, 0, -1, outfile);
4037 putc('\n', outfile);
4038 }
4039
4040 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4041 }
4042
4043 for (i = 0; i < 32; i++)
4044 {
4045 if ((getstrings & (1 << i)) != 0)
4046 {
4047 int rc;
4048 const char *substring;
4049 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4050 if (rc < 0)
4051 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4052 else
4053 {
4054 fprintf(outfile, "%2dG ", i);
4055 PCHARSV(substring, 0, rc, outfile);
4056 fprintf(outfile, " (%d)\n", rc);
4057 PCRE_FREE_SUBSTRING(substring);
4058 }
4059 }
4060 }
4061
4062 gnptr = getnames;
4063 for (;;)
4064 {
4065 int rc;
4066 const char *substring;
4067
4068 if (use_pcre16)
4069 {
4070 if (*(pcre_uint16 *)gnptr == 0) break;
4071 }
4072 else
4073 {
4074 if (*(pcre_uint8 *)gnptr == 0) break;
4075 }
4076
4077 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4078 gnptr, &substring);
4079 if (rc < 0)
4080 {
4081 fprintf(outfile, "get substring ");
4082 PCHARSV(gnptr, 0, -1, outfile);
4083 fprintf(outfile, " failed %d\n", rc);
4084 }
4085 else
4086 {
4087 fprintf(outfile, " G ");
4088 PCHARSV(substring, 0, rc, outfile);
4089 fprintf(outfile, " (%d) ", rc);
4090 PCHARSV(gnptr, 0, -1, outfile);
4091 PCRE_FREE_SUBSTRING(substring);
4092 putc('\n', outfile);
4093 }
4094
4095 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4096 }
4097
4098 if (getlist)
4099 {
4100 int rc;
4101 const char **stringlist;
4102 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4103 if (rc < 0)
4104 fprintf(outfile, "get substring list failed %d\n", rc);
4105 else
4106 {
4107 for (i = 0; i < count; i++)
4108 {
4109 fprintf(outfile, "%2dL ", i);
4110 PCHARSV(stringlist[i], 0, -1, outfile);
4111 putc('\n', outfile);
4112 }
4113 if (stringlist[i] != NULL)
4114 fprintf(outfile, "string list not terminated by NULL\n");
4115 PCRE_FREE_SUBSTRING_LIST(stringlist);
4116 }
4117 }
4118 }
4119
4120 /* There was a partial match */
4121
4122 else if (count == PCRE_ERROR_PARTIAL)
4123 {
4124 if (markptr == NULL) fprintf(outfile, "Partial match");
4125 else
4126 {
4127 fprintf(outfile, "Partial match, mark=");
4128 PCHARSV(markptr, 0, -1, outfile);
4129 }
4130 if (use_size_offsets > 1)
4131 {
4132 fprintf(outfile, ": ");
4133 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4134 outfile);
4135 }
4136 fprintf(outfile, "\n");
4137 break; /* Out of the /g loop */
4138 }
4139
4140 /* Failed to match. If this is a /g or /G loop and we previously set
4141 g_notempty after a null match, this is not necessarily the end. We want
4142 to advance the start offset, and continue. We won't be at the end of the
4143 string - that was checked before setting g_notempty.
4144
4145 Complication arises in the case when the newline convention is "any",
4146 "crlf", or "anycrlf". If the previous match was at the end of a line
4147 terminated by CRLF, an advance of one character just passes the \r,
4148 whereas we should prefer the longer newline sequence, as does the code in
4149 pcre_exec(). Fudge the offset value to achieve this. We check for a
4150 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4151 find the default.
4152
4153 Otherwise, in the case of UTF-8 matching, the advance must be one
4154 character, not one byte. */
4155
4156 else
4157 {
4158 if (g_notempty != 0)
4159 {
4160 int onechar = 1;
4161 unsigned int obits = ((real_pcre *)re)->options;
4162 use_offsets[0] = start_offset;
4163 if ((obits & PCRE_NEWLINE_BITS) == 0)
4164 {
4165 int d;
4166 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4167 /* Note that these values are always the ASCII ones, even in
4168 EBCDIC environments. CR = 13, NL = 10. */
4169 obits = (d == 13)? PCRE_NEWLINE_CR :
4170 (d == 10)? PCRE_NEWLINE_LF :
4171 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4172 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4173 (d == -1)? PCRE_NEWLINE_ANY : 0;
4174 }
4175 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4176 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4177 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4178 &&
4179 start_offset < len - 1 &&
4180 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4181 (use_pcre16?
4182 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4183 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4184 :
4185 bptr[start_offset] == '\r'
4186 && bptr[start_offset + 1] == '\n')
4187 #elif defined SUPPORT_PCRE16
4188 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4189 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4190 #else
4191 bptr[start_offset] == '\r'
4192 && bptr[start_offset + 1] == '\n'
4193 #endif
4194 )
4195 onechar++;
4196 else if (use_utf)
4197 {
4198 while (start_offset + onechar < len)
4199 {
4200 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4201 onechar++;
4202 }
4203 }
4204 use_offsets[1] = start_offset + onechar;
4205 }
4206 else
4207 {
4208 switch(count)
4209 {
4210 case PCRE_ERROR_NOMATCH:
4211 if (gmatched == 0)
4212 {
4213 if (markptr == NULL)
4214 {
4215 fprintf(outfile, "No match\n");
4216 }
4217 else
4218 {
4219 fprintf(outfile, "No match, mark = ");
4220 PCHARSV(markptr, 0, -1, outfile);
4221 putc('\n', outfile);
4222 }
4223 }
4224 break;
4225
4226 case PCRE_ERROR_BADUTF8:
4227 case PCRE_ERROR_SHORTUTF8:
4228 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4229 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4230 use_pcre16? "16" : "8");
4231 if (use_size_offsets >= 2)
4232 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4233 use_offsets[1]);
4234 fprintf(outfile, "\n");
4235 break;
4236
4237 case PCRE_ERROR_BADUTF8_OFFSET:
4238 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4239 use_pcre16? "16" : "8");
4240 break;
4241
4242 default:
4243 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4244 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4245 else
4246 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4247 break;
4248 }
4249
4250 break; /* Out of the /g loop */
4251 }
4252 }
4253
4254 /* If not /g or /G we are done */
4255
4256 if (!do_g && !do_G) break;
4257
4258 /* If we have matched an empty string, first check to see if we are at
4259 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4260 Perl's /g options does. This turns out to be rather cunning. First we set
4261 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4262 same point. If this fails (picked up above) we advance to the next
4263 character. */
4264
4265 g_notempty = 0;
4266
4267 if (use_offsets[0] == use_offsets[1])
4268 {
4269 if (use_offsets[0] == len) break;
4270 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4271 }
4272
4273 /* For /g, update the start offset, leaving the rest alone */
4274
4275 if (do_g) start_offset = use_offsets[1];
4276
4277 /* For /G, update the pointer and length */
4278
4279 else
4280 {
4281 bptr += use_offsets[1] * CHAR_SIZE;
4282 len -= use_offsets[1];
4283 }
4284 } /* End of loop for /g and /G */
4285
4286 NEXT_DATA: continue;
4287 } /* End of loop for data lines */
4288
4289 CONTINUE:
4290
4291 #if !defined NOPOSIX
4292 if (posix || do_posix) regfree(&preg);
4293 #endif
4294
4295 if (re != NULL) new_free(re);
4296 if (extra != NULL)
4297 {
4298 PCRE_FREE_STUDY(extra);
4299 }
4300 if (locale_set)
4301 {
4302 new_free((void *)tables);
4303 setlocale(LC_CTYPE, "C");
4304 locale_set = 0;
4305 }
4306 if (jit_stack != NULL)
4307 {
4308 PCRE_JIT_STACK_FREE(jit_stack);
4309 jit_stack = NULL;
4310 }
4311 }
4312
4313 if (infile == stdin) fprintf(outfile, "\n");
4314
4315 EXIT:
4316
4317 if (infile != NULL && infile != stdin) fclose(infile);
4318 if (outfile != NULL && outfile != stdout) fclose(outfile);
4319
4320 free(buffer);
4321 free(dbuffer);
4322 free(pbuffer);
4323 free(offsets);
4324
4325 #ifdef SUPPORT_PCRE16
4326 if (buffer16 != NULL) free(buffer16);
4327 #endif
4328
4329 return yield;
4330 }
4331
4332 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5