/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 890 - (show annotations)
Wed Jan 18 16:25:19 2012 UTC (7 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 130324 byte(s)
Error occurred while calculating annotation data.
Add PCRE_CONFIG_JITTARGET output to pcretest -C.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define STRLEN8(p) ((int)strlen((char *)p))
213
214 #define SET_PCRE_CALLOUT8(callout) \
215 pcre_callout = callout
216
217 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218 pcre_assign_jit_stack(extra, callback, userdata)
219
220 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221 re = pcre_compile((char *)pat, options, error, erroffset, tables)
222
223 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224 namesptr, cbuffer, size) \
225 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226 (char *)namesptr, cbuffer, size)
227
228 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230
231 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace) \
233 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234 offsets, size_offsets, workspace, size_workspace)
235
236 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237 offsets, size_offsets) \
238 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239 offsets, size_offsets)
240
241 #define PCRE_FREE_STUDY8(extra) \
242 pcre_free_study(extra)
243
244 #define PCRE_FREE_SUBSTRING8(substring) \
245 pcre_free_substring(substring)
246
247 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248 pcre_free_substring_list(listptr)
249
250 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 getnamesptr, subsptr) \
252 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)getnamesptr, subsptr)
254
255 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256 n = pcre_get_stringnumber(re, (char *)ptr)
257
258 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260
261 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263
264 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266
267 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268 pcre_printint(re, outfile, debug_lengths)
269
270 #define PCRE_STUDY8(extra, re, options, error) \
271 extra = pcre_study(re, options, error)
272
273 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274 pcre_jit_stack_alloc(startsize, maxsize)
275
276 #define PCRE_JIT_STACK_FREE8(stack) \
277 pcre_jit_stack_free(stack)
278
279 #endif /* SUPPORT_PCRE8 */
280
281 /* -----------------------------------------------------------*/
282
283 #ifdef SUPPORT_PCRE16
284
285 #define PCHARS16(lv, p, offset, len, f) \
286 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287
288 #define PCHARSV16(p, offset, len, f) \
289 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290
291 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292 p = read_capture_name16(p, cn16, re)
293
294 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295
296 #define SET_PCRE_CALLOUT16(callout) \
297 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298
299 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300 pcre16_assign_jit_stack((pcre16_extra *)extra, \
301 (pcre16_jit_callback)callback, userdata)
302
303 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305 tables)
306
307 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308 namesptr, cbuffer, size) \
309 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311
312 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 (PCRE_UCHAR16 *)cbuffer, size/2)
315
316 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317 offsets, size_offsets, workspace, size_workspace) \
318 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320 workspace, size_workspace)
321
322 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323 offsets, size_offsets) \
324 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325 len, start_offset, options, offsets, size_offsets)
326
327 #define PCRE_FREE_STUDY16(extra) \
328 pcre16_free_study((pcre16_extra *)extra)
329
330 #define PCRE_FREE_SUBSTRING16(substring) \
331 pcre16_free_substring((PCRE_SPTR16)substring)
332
333 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335
336 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337 getnamesptr, subsptr) \
338 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340
341 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343
344 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346 (PCRE_SPTR16 *)(void*)subsptr)
347
348 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350 (PCRE_SPTR16 **)(void*)listptr)
351
352 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354 tables)
355
356 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357 pcre16_printint(re, outfile, debug_lengths)
358
359 #define PCRE_STUDY16(extra, re, options, error) \
360 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361
362 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364
365 #define PCRE_JIT_STACK_FREE16(stack) \
366 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367
368 #endif /* SUPPORT_PCRE16 */
369
370
371 /* ----- Both modes are supported; a runtime test is needed, except for
372 pcre_config(), and the JIT stack functions, when it doesn't matter which
373 version is called. ----- */
374
375 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376
377 #define CHAR_SIZE (use_pcre16? 2:1)
378
379 #define PCHARS(lv, p, offset, len, f) \
380 if (use_pcre16) \
381 PCHARS16(lv, p, offset, len, f); \
382 else \
383 PCHARS8(lv, p, offset, len, f)
384
385 #define PCHARSV(p, offset, len, f) \
386 if (use_pcre16) \
387 PCHARSV16(p, offset, len, f); \
388 else \
389 PCHARSV8(p, offset, len, f)
390
391 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392 if (use_pcre16) \
393 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394 else \
395 READ_CAPTURE_NAME8(p, cn8, cn16, re)
396
397 #define SET_PCRE_CALLOUT(callout) \
398 if (use_pcre16) \
399 SET_PCRE_CALLOUT16(callout); \
400 else \
401 SET_PCRE_CALLOUT8(callout)
402
403 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404
405 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406 if (use_pcre16) \
407 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408 else \
409 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410
411 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412 if (use_pcre16) \
413 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414 else \
415 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416
417 #define PCRE_CONFIG pcre_config
418
419 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420 namesptr, cbuffer, size) \
421 if (use_pcre16) \
422 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423 namesptr, cbuffer, size); \
424 else \
425 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426 namesptr, cbuffer, size)
427
428 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429 if (use_pcre16) \
430 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431 else \
432 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433
434 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435 offsets, size_offsets, workspace, size_workspace) \
436 if (use_pcre16) \
437 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438 offsets, size_offsets, workspace, size_workspace); \
439 else \
440 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441 offsets, size_offsets, workspace, size_workspace)
442
443 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444 offsets, size_offsets) \
445 if (use_pcre16) \
446 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447 offsets, size_offsets); \
448 else \
449 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets)
451
452 #define PCRE_FREE_STUDY(extra) \
453 if (use_pcre16) \
454 PCRE_FREE_STUDY16(extra); \
455 else \
456 PCRE_FREE_STUDY8(extra)
457
458 #define PCRE_FREE_SUBSTRING(substring) \
459 if (use_pcre16) \
460 PCRE_FREE_SUBSTRING16(substring); \
461 else \
462 PCRE_FREE_SUBSTRING8(substring)
463
464 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465 if (use_pcre16) \
466 PCRE_FREE_SUBSTRING_LIST16(listptr); \
467 else \
468 PCRE_FREE_SUBSTRING_LIST8(listptr)
469
470 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471 getnamesptr, subsptr) \
472 if (use_pcre16) \
473 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474 getnamesptr, subsptr); \
475 else \
476 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477 getnamesptr, subsptr)
478
479 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480 if (use_pcre16) \
481 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482 else \
483 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484
485 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486 if (use_pcre16) \
487 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488 else \
489 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490
491 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492 if (use_pcre16) \
493 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494 else \
495 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496
497 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498 (use_pcre16 ? \
499 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501
502 #define PCRE_JIT_STACK_FREE(stack) \
503 if (use_pcre16) \
504 PCRE_JIT_STACK_FREE16(stack); \
505 else \
506 PCRE_JIT_STACK_FREE8(stack)
507
508 #define PCRE_MAKETABLES \
509 (use_pcre16? pcre16_maketables() : pcre_maketables())
510
511 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 if (use_pcre16) \
513 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 else \
515 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516
517 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518 if (use_pcre16) \
519 PCRE_PRINTINT16(re, outfile, debug_lengths); \
520 else \
521 PCRE_PRINTINT8(re, outfile, debug_lengths)
522
523 #define PCRE_STUDY(extra, re, options, error) \
524 if (use_pcre16) \
525 PCRE_STUDY16(extra, re, options, error); \
526 else \
527 PCRE_STUDY8(extra, re, options, error)
528
529 /* ----- Only 8-bit mode is supported ----- */
530
531 #elif defined SUPPORT_PCRE8
532 #define CHAR_SIZE 1
533 #define PCHARS PCHARS8
534 #define PCHARSV PCHARSV8
535 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537 #define STRLEN STRLEN8
538 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 #define PCRE_COMPILE PCRE_COMPILE8
540 #define PCRE_CONFIG pcre_config
541 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544 #define PCRE_EXEC PCRE_EXEC8
545 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 #define PCRE_MAKETABLES pcre_maketables()
555 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556 #define PCRE_PRINTINT PCRE_PRINTINT8
557 #define PCRE_STUDY PCRE_STUDY8
558
559 /* ----- Only 16-bit mode is supported ----- */
560
561 #else
562 #define CHAR_SIZE 2
563 #define PCHARS PCHARS16
564 #define PCHARSV PCHARSV16
565 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567 #define STRLEN STRLEN16
568 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 #define PCRE_COMPILE PCRE_COMPILE16
570 #define PCRE_CONFIG pcre16_config
571 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574 #define PCRE_EXEC PCRE_EXEC16
575 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 #define PCRE_MAKETABLES pcre16_maketables()
585 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586 #define PCRE_PRINTINT PCRE_PRINTINT16
587 #define PCRE_STUDY PCRE_STUDY16
588 #endif
589
590 /* ----- End of mode-specific function call macros ----- */
591
592
593 /* Other parameters */
594
595 #ifndef CLOCKS_PER_SEC
596 #ifdef CLK_TCK
597 #define CLOCKS_PER_SEC CLK_TCK
598 #else
599 #define CLOCKS_PER_SEC 100
600 #endif
601 #endif
602
603 /* This is the default loop count for timing. */
604
605 #define LOOPREPEAT 500000
606
607 /* Static variables */
608
609 static FILE *outfile;
610 static int log_store = 0;
611 static int callout_count;
612 static int callout_extra;
613 static int callout_fail_count;
614 static int callout_fail_id;
615 static int debug_lengths;
616 static int first_callout;
617 static int locale_set = 0;
618 static int show_malloc;
619 static int use_utf;
620 static size_t gotten_store;
621 static size_t first_gotten_store = 0;
622 static const unsigned char *last_callout_mark = NULL;
623
624 /* The buffers grow automatically if very long input lines are encountered. */
625
626 static int buffer_size = 50000;
627 static pcre_uint8 *buffer = NULL;
628 static pcre_uint8 *dbuffer = NULL;
629 static pcre_uint8 *pbuffer = NULL;
630
631 /* Another buffer is needed translation to 16-bit character strings. It will
632 obtained and extended as required. */
633
634 #ifdef SUPPORT_PCRE16
635 static int buffer16_size = 0;
636 static pcre_uint16 *buffer16 = NULL;
637
638 #ifdef SUPPORT_PCRE8
639
640 /* We need the table of operator lengths that is used for 16-bit compiling, in
641 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643 appropriately for the 16-bit world. Just as a safety check, make sure that
644 COMPILE_PCRE16 is *not* set. */
645
646 #ifdef COMPILE_PCRE16
647 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648 #endif
649
650 #if LINK_SIZE == 2
651 #undef LINK_SIZE
652 #define LINK_SIZE 1
653 #elif LINK_SIZE == 3 || LINK_SIZE == 4
654 #undef LINK_SIZE
655 #define LINK_SIZE 2
656 #else
657 #error LINK_SIZE must be either 2, 3, or 4
658 #endif
659
660 #undef IMM2_SIZE
661 #define IMM2_SIZE 1
662
663 #endif /* SUPPORT_PCRE8 */
664
665 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666 #endif /* SUPPORT_PCRE16 */
667
668 /* If we have 8-bit support, default use_pcre16 to false; if there is also
669 16-bit support, it can be changed by an option. If there is no 8-bit support,
670 there must be 16-bit support, so default it to 1. */
671
672 #ifdef SUPPORT_PCRE8
673 static int use_pcre16 = 0;
674 #else
675 static int use_pcre16 = 1;
676 #endif
677
678 /* Textual explanations for runtime error codes */
679
680 static const char *errtexts[] = {
681 NULL, /* 0 is no error */
682 NULL, /* NOMATCH is handled specially */
683 "NULL argument passed",
684 "bad option value",
685 "magic number missing",
686 "unknown opcode - pattern overwritten?",
687 "no more memory",
688 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 "match limit exceeded",
690 "callout error code",
691 NULL, /* BADUTF8/16 is handled specially */
692 NULL, /* BADUTF8/16 offset is handled specially */
693 NULL, /* PARTIAL is handled specially */
694 "not used - internal error",
695 "internal error - pattern overwritten?",
696 "bad count value",
697 "item unsupported for DFA matching",
698 "backreference condition or recursion test not supported for DFA matching",
699 "match limit not supported for DFA matching",
700 "workspace size exceeded in DFA matching",
701 "too much recursion for DFA matching",
702 "recursion limit exceeded",
703 "not used - internal error",
704 "invalid combination of newline options",
705 "bad offset value",
706 NULL, /* SHORTUTF8/16 is handled specially */
707 "nested recursion at the same subject position",
708 "JIT stack limit reached",
709 "pattern compiled in wrong mode: 8-bit/16-bit error"
710 };
711
712
713 /*************************************************
714 * Alternate character tables *
715 *************************************************/
716
717 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718 using the default tables of the library. However, the T option can be used to
719 select alternate sets of tables, for different kinds of testing. Note also that
720 the L (locale) option also adjusts the tables. */
721
722 /* This is the set of tables distributed as default with PCRE. It recognizes
723 only ASCII characters. */
724
725 static const pcre_uint8 tables0[] = {
726
727 /* This table is a lower casing table. */
728
729 0, 1, 2, 3, 4, 5, 6, 7,
730 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23,
732 24, 25, 26, 27, 28, 29, 30, 31,
733 32, 33, 34, 35, 36, 37, 38, 39,
734 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55,
736 56, 57, 58, 59, 60, 61, 62, 63,
737 64, 97, 98, 99,100,101,102,103,
738 104,105,106,107,108,109,110,111,
739 112,113,114,115,116,117,118,119,
740 120,121,122, 91, 92, 93, 94, 95,
741 96, 97, 98, 99,100,101,102,103,
742 104,105,106,107,108,109,110,111,
743 112,113,114,115,116,117,118,119,
744 120,121,122,123,124,125,126,127,
745 128,129,130,131,132,133,134,135,
746 136,137,138,139,140,141,142,143,
747 144,145,146,147,148,149,150,151,
748 152,153,154,155,156,157,158,159,
749 160,161,162,163,164,165,166,167,
750 168,169,170,171,172,173,174,175,
751 176,177,178,179,180,181,182,183,
752 184,185,186,187,188,189,190,191,
753 192,193,194,195,196,197,198,199,
754 200,201,202,203,204,205,206,207,
755 208,209,210,211,212,213,214,215,
756 216,217,218,219,220,221,222,223,
757 224,225,226,227,228,229,230,231,
758 232,233,234,235,236,237,238,239,
759 240,241,242,243,244,245,246,247,
760 248,249,250,251,252,253,254,255,
761
762 /* This table is a case flipping table. */
763
764 0, 1, 2, 3, 4, 5, 6, 7,
765 8, 9, 10, 11, 12, 13, 14, 15,
766 16, 17, 18, 19, 20, 21, 22, 23,
767 24, 25, 26, 27, 28, 29, 30, 31,
768 32, 33, 34, 35, 36, 37, 38, 39,
769 40, 41, 42, 43, 44, 45, 46, 47,
770 48, 49, 50, 51, 52, 53, 54, 55,
771 56, 57, 58, 59, 60, 61, 62, 63,
772 64, 97, 98, 99,100,101,102,103,
773 104,105,106,107,108,109,110,111,
774 112,113,114,115,116,117,118,119,
775 120,121,122, 91, 92, 93, 94, 95,
776 96, 65, 66, 67, 68, 69, 70, 71,
777 72, 73, 74, 75, 76, 77, 78, 79,
778 80, 81, 82, 83, 84, 85, 86, 87,
779 88, 89, 90,123,124,125,126,127,
780 128,129,130,131,132,133,134,135,
781 136,137,138,139,140,141,142,143,
782 144,145,146,147,148,149,150,151,
783 152,153,154,155,156,157,158,159,
784 160,161,162,163,164,165,166,167,
785 168,169,170,171,172,173,174,175,
786 176,177,178,179,180,181,182,183,
787 184,185,186,187,188,189,190,191,
788 192,193,194,195,196,197,198,199,
789 200,201,202,203,204,205,206,207,
790 208,209,210,211,212,213,214,215,
791 216,217,218,219,220,221,222,223,
792 224,225,226,227,228,229,230,231,
793 232,233,234,235,236,237,238,239,
794 240,241,242,243,244,245,246,247,
795 248,249,250,251,252,253,254,255,
796
797 /* This table contains bit maps for various character classes. Each map is 32
798 bytes long and the bits run from the least significant end of each byte. The
799 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800 graph, print, punct, and cntrl. Other classes are built from combinations. */
801
802 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806
807 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811
812 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821
822 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826
827 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831
832 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836
837 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841
842 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846
847 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851
852 /* This table identifies various classes of character by individual bits:
853 0x01 white space character
854 0x02 letter
855 0x04 decimal digit
856 0x08 hexadecimal digit
857 0x10 alphanumeric or '_'
858 0x80 regular expression metacharacter or binary zero
859 */
860
861 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893
894 /* This is a set of tables that came orginally from a Windows user. It seems to
895 be at least an approximation of ISO 8859. In particular, there are characters
896 greater than 128 that are marked as spaces, letters, etc. */
897
898 static const pcre_uint8 tables1[] = {
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,97,98,99,100,101,102,103,
912 104,105,106,107,108,109,110,111,
913 112,113,114,115,116,117,118,119,
914 120,121,122,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 224,225,226,227,228,229,230,231,
928 232,233,234,235,236,237,238,239,
929 240,241,242,243,244,245,246,247,
930 248,249,250,251,252,253,254,255,
931 0,1,2,3,4,5,6,7,
932 8,9,10,11,12,13,14,15,
933 16,17,18,19,20,21,22,23,
934 24,25,26,27,28,29,30,31,
935 32,33,34,35,36,37,38,39,
936 40,41,42,43,44,45,46,47,
937 48,49,50,51,52,53,54,55,
938 56,57,58,59,60,61,62,63,
939 64,97,98,99,100,101,102,103,
940 104,105,106,107,108,109,110,111,
941 112,113,114,115,116,117,118,119,
942 120,121,122,91,92,93,94,95,
943 96,65,66,67,68,69,70,71,
944 72,73,74,75,76,77,78,79,
945 80,81,82,83,84,85,86,87,
946 88,89,90,123,124,125,126,127,
947 128,129,130,131,132,133,134,135,
948 136,137,138,139,140,141,142,143,
949 144,145,146,147,148,149,150,151,
950 152,153,154,155,156,157,158,159,
951 160,161,162,163,164,165,166,167,
952 168,169,170,171,172,173,174,175,
953 176,177,178,179,180,181,182,183,
954 184,185,186,187,188,189,190,191,
955 224,225,226,227,228,229,230,231,
956 232,233,234,235,236,237,238,239,
957 240,241,242,243,244,245,246,215,
958 248,249,250,251,252,253,254,223,
959 192,193,194,195,196,197,198,199,
960 200,201,202,203,204,205,206,207,
961 208,209,210,211,212,213,214,247,
962 216,217,218,219,220,221,222,255,
963 0,62,0,0,1,0,0,0,
964 0,0,0,0,0,0,0,0,
965 32,0,0,0,1,0,0,0,
966 0,0,0,0,0,0,0,0,
967 0,0,0,0,0,0,255,3,
968 126,0,0,0,126,0,0,0,
969 0,0,0,0,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 0,0,0,0,0,0,255,3,
972 0,0,0,0,0,0,0,0,
973 0,0,0,0,0,0,12,2,
974 0,0,0,0,0,0,0,0,
975 0,0,0,0,0,0,0,0,
976 254,255,255,7,0,0,0,0,
977 0,0,0,0,0,0,0,0,
978 255,255,127,127,0,0,0,0,
979 0,0,0,0,0,0,0,0,
980 0,0,0,0,254,255,255,7,
981 0,0,0,0,0,4,32,4,
982 0,0,0,128,255,255,127,255,
983 0,0,0,0,0,0,255,3,
984 254,255,255,135,254,255,255,7,
985 0,0,0,0,0,4,44,6,
986 255,255,127,255,255,255,127,255,
987 0,0,0,0,254,255,255,255,
988 255,255,255,255,255,255,255,127,
989 0,0,0,0,254,255,255,255,
990 255,255,255,255,255,255,255,255,
991 0,2,0,0,255,255,255,255,
992 255,255,255,255,255,255,255,127,
993 0,0,0,0,255,255,255,255,
994 255,255,255,255,255,255,255,255,
995 0,0,0,0,254,255,0,252,
996 1,0,0,248,1,0,0,120,
997 0,0,0,0,254,255,255,255,
998 0,0,128,0,0,0,128,0,
999 255,255,255,255,0,0,0,0,
1000 0,0,0,0,0,0,0,128,
1001 255,255,255,255,0,0,0,0,
1002 0,0,0,0,0,0,0,0,
1003 128,0,0,0,0,0,0,0,
1004 0,1,1,0,1,1,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 1,0,0,0,128,0,0,0,
1008 128,128,128,128,0,0,128,0,
1009 28,28,28,28,28,28,28,28,
1010 28,28,0,0,0,0,0,128,
1011 0,26,26,26,26,26,26,18,
1012 18,18,18,18,18,18,18,18,
1013 18,18,18,18,18,18,18,18,
1014 18,18,18,128,128,0,128,16,
1015 0,26,26,26,26,26,26,18,
1016 18,18,18,18,18,18,18,18,
1017 18,18,18,18,18,18,18,18,
1018 18,18,18,128,128,0,0,0,
1019 0,0,0,0,0,1,0,0,
1020 0,0,0,0,0,0,0,0,
1021 0,0,0,0,0,0,0,0,
1022 0,0,0,0,0,0,0,0,
1023 1,0,0,0,0,0,0,0,
1024 0,0,18,0,0,0,0,0,
1025 0,0,20,20,0,18,0,0,
1026 0,20,18,0,0,0,0,0,
1027 18,18,18,18,18,18,18,18,
1028 18,18,18,18,18,18,18,18,
1029 18,18,18,18,18,18,18,0,
1030 18,18,18,18,18,18,18,18,
1031 18,18,18,18,18,18,18,18,
1032 18,18,18,18,18,18,18,18,
1033 18,18,18,18,18,18,18,0,
1034 18,18,18,18,18,18,18,18
1035 };
1036
1037
1038
1039
1040 #ifndef HAVE_STRERROR
1041 /*************************************************
1042 * Provide strerror() for non-ANSI libraries *
1043 *************************************************/
1044
1045 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046 in their libraries, but can provide the same facility by this simple
1047 alternative function. */
1048
1049 extern int sys_nerr;
1050 extern char *sys_errlist[];
1051
1052 char *
1053 strerror(int n)
1054 {
1055 if (n < 0 || n >= sys_nerr) return "unknown error number";
1056 return sys_errlist[n];
1057 }
1058 #endif /* HAVE_STRERROR */
1059
1060
1061 /*************************************************
1062 * JIT memory callback *
1063 *************************************************/
1064
1065 static pcre_jit_stack* jit_callback(void *arg)
1066 {
1067 return (pcre_jit_stack *)arg;
1068 }
1069
1070
1071 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 /*************************************************
1073 * Convert UTF-8 string to value *
1074 *************************************************/
1075
1076 /* This function takes one or more bytes that represents a UTF-8 character,
1077 and returns the value of the character.
1078
1079 Argument:
1080 utf8bytes a pointer to the byte vector
1081 vptr a pointer to an int to receive the value
1082
1083 Returns: > 0 => the number of bytes consumed
1084 -6 to 0 => malformed UTF-8 character at offset = (-return)
1085 */
1086
1087 static int
1088 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089 {
1090 int c = *utf8bytes++;
1091 int d = c;
1092 int i, j, s;
1093
1094 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095 {
1096 if ((d & 0x80) == 0) break;
1097 d <<= 1;
1098 }
1099
1100 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102
1103 /* i now has a value in the range 1-5 */
1104
1105 s = 6*i;
1106 d = (c & utf8_table3[i]) << s;
1107
1108 for (j = 0; j < i; j++)
1109 {
1110 c = *utf8bytes++;
1111 if ((c & 0xc0) != 0x80) return -(j+1);
1112 s -= 6;
1113 d |= (c & 0x3f) << s;
1114 }
1115
1116 /* Check that encoding was the correct unique one */
1117
1118 for (j = 0; j < utf8_table1_size; j++)
1119 if (d <= utf8_table1[j]) break;
1120 if (j != i) return -(i+1);
1121
1122 /* Valid value */
1123
1124 *vptr = d;
1125 return i+1;
1126 }
1127 #endif /* NOUTF || SUPPORT_PCRE16 */
1128
1129
1130
1131 #if !defined NOUTF || defined SUPPORT_PCRE16
1132 /*************************************************
1133 * Convert character value to UTF-8 *
1134 *************************************************/
1135
1136 /* This function takes an integer value in the range 0 - 0x7fffffff
1137 and encodes it as a UTF-8 character in 0 to 6 bytes.
1138
1139 Arguments:
1140 cvalue the character value
1141 utf8bytes pointer to buffer for result - at least 6 bytes long
1142
1143 Returns: number of characters placed in the buffer
1144 */
1145
1146 static int
1147 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148 {
1149 register int i, j;
1150 for (i = 0; i < utf8_table1_size; i++)
1151 if (cvalue <= utf8_table1[i]) break;
1152 utf8bytes += i;
1153 for (j = i; j > 0; j--)
1154 {
1155 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156 cvalue >>= 6;
1157 }
1158 *utf8bytes = utf8_table2[i] | cvalue;
1159 return i + 1;
1160 }
1161 #endif
1162
1163
1164 #ifdef SUPPORT_PCRE16
1165 /*************************************************
1166 * Convert a string to 16-bit *
1167 *************************************************/
1168
1169 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173 result is always left in buffer16.
1174
1175 Note that this function does not object to surrogate values. This is
1176 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177 for the purpose of testing that they are correctly faulted.
1178
1179 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 in UTF-8 so that values greater than 255 can be handled.
1181
1182 Arguments:
1183 data TRUE if converting a data line; FALSE for a regex
1184 p points to a byte string
1185 utf true if UTF-8 (to be converted to UTF-16)
1186 len number of bytes in the string (excluding trailing zero)
1187
1188 Returns: number of 16-bit data items used (excluding trailing zero)
1189 OR -1 if a UTF-8 string is malformed
1190 OR -2 if a value > 0x10ffff is encountered
1191 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 */
1193
1194 static int
1195 to16(int data, pcre_uint8 *p, int utf, int len)
1196 {
1197 pcre_uint16 *pp;
1198
1199 if (buffer16_size < 2*len + 2)
1200 {
1201 if (buffer16 != NULL) free(buffer16);
1202 buffer16_size = 2*len + 2;
1203 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204 if (buffer16 == NULL)
1205 {
1206 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207 exit(1);
1208 }
1209 }
1210
1211 pp = buffer16;
1212
1213 if (!utf && !data)
1214 {
1215 while (len-- > 0) *pp++ = *p++;
1216 }
1217
1218 else
1219 {
1220 int c = 0;
1221 while (len > 0)
1222 {
1223 int chlen = utf82ord(p, &c);
1224 if (chlen <= 0) return -1;
1225 if (c > 0x10ffff) return -2;
1226 p += chlen;
1227 len -= chlen;
1228 if (c < 0x10000) *pp++ = c; else
1229 {
1230 if (!utf) return -3;
1231 c -= 0x10000;
1232 *pp++ = 0xD800 | (c >> 10);
1233 *pp++ = 0xDC00 | (c & 0x3ff);
1234 }
1235 }
1236 }
1237
1238 *pp = 0;
1239 return pp - buffer16;
1240 }
1241 #endif
1242
1243
1244 /*************************************************
1245 * Read or extend an input line *
1246 *************************************************/
1247
1248 /* Input lines are read into buffer, but both patterns and data lines can be
1249 continued over multiple input lines. In addition, if the buffer fills up, we
1250 want to automatically expand it so as to be able to handle extremely large
1251 lines that are needed for certain stress tests. When the input buffer is
1252 expanded, the other two buffers must also be expanded likewise, and the
1253 contents of pbuffer, which are a copy of the input for callouts, must be
1254 preserved (for when expansion happens for a data line). This is not the most
1255 optimal way of handling this, but hey, this is just a test program!
1256
1257 Arguments:
1258 f the file to read
1259 start where in buffer to start (this *must* be within buffer)
1260 prompt for stdin or readline()
1261
1262 Returns: pointer to the start of new data
1263 could be a copy of start, or could be moved
1264 NULL if no data read and EOF reached
1265 */
1266
1267 static pcre_uint8 *
1268 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 {
1270 pcre_uint8 *here = start;
1271
1272 for (;;)
1273 {
1274 int rlen = (int)(buffer_size - (here - buffer));
1275
1276 if (rlen > 1000)
1277 {
1278 int dlen;
1279
1280 /* If libreadline support is required, use readline() to read a line if the
1281 input is a terminal. Note that readline() removes the trailing newline, so
1282 we must put it back again, to be compatible with fgets(). */
1283
1284 #ifdef SUPPORT_LIBREADLINE
1285 if (isatty(fileno(f)))
1286 {
1287 size_t len;
1288 char *s = readline(prompt);
1289 if (s == NULL) return (here == start)? NULL : start;
1290 len = strlen(s);
1291 if (len > 0) add_history(s);
1292 if (len > rlen - 1) len = rlen - 1;
1293 memcpy(here, s, len);
1294 here[len] = '\n';
1295 here[len+1] = 0;
1296 free(s);
1297 }
1298 else
1299 #endif
1300
1301 /* Read the next line by normal means, prompting if the file is stdin. */
1302
1303 {
1304 if (f == stdin) printf("%s", prompt);
1305 if (fgets((char *)here, rlen, f) == NULL)
1306 return (here == start)? NULL : start;
1307 }
1308
1309 dlen = (int)strlen((char *)here);
1310 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311 here += dlen;
1312 }
1313
1314 else
1315 {
1316 int new_buffer_size = 2*buffer_size;
1317 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320
1321 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322 {
1323 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324 exit(1);
1325 }
1326
1327 memcpy(new_buffer, buffer, buffer_size);
1328 memcpy(new_pbuffer, pbuffer, buffer_size);
1329
1330 buffer_size = new_buffer_size;
1331
1332 start = new_buffer + (start - buffer);
1333 here = new_buffer + (here - buffer);
1334
1335 free(buffer);
1336 free(dbuffer);
1337 free(pbuffer);
1338
1339 buffer = new_buffer;
1340 dbuffer = new_dbuffer;
1341 pbuffer = new_pbuffer;
1342 }
1343 }
1344
1345 return NULL; /* Control never gets here */
1346 }
1347
1348
1349
1350 /*************************************************
1351 * Read number from string *
1352 *************************************************/
1353
1354 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355 around with conditional compilation, just do the job by hand. It is only used
1356 for unpicking arguments, so just keep it simple.
1357
1358 Arguments:
1359 str string to be converted
1360 endptr where to put the end pointer
1361
1362 Returns: the unsigned long
1363 */
1364
1365 static int
1366 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 {
1368 int result = 0;
1369 while(*str != 0 && isspace(*str)) str++;
1370 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371 *endptr = str;
1372 return(result);
1373 }
1374
1375
1376
1377 /*************************************************
1378 * Print one character *
1379 *************************************************/
1380
1381 /* Print a single character either literally, or as a hex escape. */
1382
1383 static int pchar(int c, FILE *f)
1384 {
1385 if (PRINTOK(c))
1386 {
1387 if (f != NULL) fprintf(f, "%c", c);
1388 return 1;
1389 }
1390
1391 if (c < 0x100)
1392 {
1393 if (use_utf)
1394 {
1395 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396 return 6;
1397 }
1398 else
1399 {
1400 if (f != NULL) fprintf(f, "\\x%02x", c);
1401 return 4;
1402 }
1403 }
1404
1405 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406 return (c <= 0x000000ff)? 6 :
1407 (c <= 0x00000fff)? 7 :
1408 (c <= 0x0000ffff)? 8 :
1409 (c <= 0x000fffff)? 9 : 10;
1410 }
1411
1412
1413
1414 #ifdef SUPPORT_PCRE8
1415 /*************************************************
1416 * Print 8-bit character string *
1417 *************************************************/
1418
1419 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420 If handed a NULL file, just counts chars without printing. */
1421
1422 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423 {
1424 int c = 0;
1425 int yield = 0;
1426
1427 if (length < 0)
1428 length = strlen((char *)p);
1429
1430 while (length-- > 0)
1431 {
1432 #if !defined NOUTF
1433 if (use_utf)
1434 {
1435 int rc = utf82ord(p, &c);
1436 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437 {
1438 length -= rc - 1;
1439 p += rc;
1440 yield += pchar(c, f);
1441 continue;
1442 }
1443 }
1444 #endif
1445 c = *p++;
1446 yield += pchar(c, f);
1447 }
1448
1449 return yield;
1450 }
1451 #endif
1452
1453
1454
1455 #ifdef SUPPORT_PCRE16
1456 /*************************************************
1457 * Find length of 0-terminated 16-bit string *
1458 *************************************************/
1459
1460 static int strlen16(PCRE_SPTR16 p)
1461 {
1462 int len = 0;
1463 while (*p++ != 0) len++;
1464 return len;
1465 }
1466 #endif /* SUPPORT_PCRE16 */
1467
1468
1469 #ifdef SUPPORT_PCRE16
1470 /*************************************************
1471 * Print 16-bit character string *
1472 *************************************************/
1473
1474 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475 If handed a NULL file, just counts chars without printing. */
1476
1477 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 {
1479 int yield = 0;
1480
1481 if (length < 0)
1482 length = strlen16(p);
1483
1484 while (length-- > 0)
1485 {
1486 int c = *p++ & 0xffff;
1487 #if !defined NOUTF
1488 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 {
1490 int d = *p & 0xffff;
1491 if (d >= 0xDC00 && d < 0xDFFF)
1492 {
1493 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494 length--;
1495 p++;
1496 }
1497 }
1498 #endif
1499 yield += pchar(c, f);
1500 }
1501
1502 return yield;
1503 }
1504 #endif /* SUPPORT_PCRE16 */
1505
1506
1507
1508 #ifdef SUPPORT_PCRE8
1509 /*************************************************
1510 * Read a capture name (8-bit) and check it *
1511 *************************************************/
1512
1513 static pcre_uint8 *
1514 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515 {
1516 pcre_uint8 *npp = *pp;
1517 while (isalnum(*p)) *npp++ = *p++;
1518 *npp++ = 0;
1519 *npp = 0;
1520 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521 {
1522 fprintf(outfile, "no parentheses with name \"");
1523 PCHARSV(*pp, 0, -1, outfile);
1524 fprintf(outfile, "\"\n");
1525 }
1526
1527 *pp = npp;
1528 return p;
1529 }
1530 #endif /* SUPPORT_PCRE8 */
1531
1532
1533
1534 #ifdef SUPPORT_PCRE16
1535 /*************************************************
1536 * Read a capture name (16-bit) and check it *
1537 *************************************************/
1538
1539 /* Note that the text being read is 8-bit. */
1540
1541 static pcre_uint8 *
1542 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543 {
1544 pcre_uint16 *npp = *pp;
1545 while (isalnum(*p)) *npp++ = *p++;
1546 *npp++ = 0;
1547 *npp = 0;
1548 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 {
1550 fprintf(outfile, "no parentheses with name \"");
1551 PCHARSV(*pp, 0, -1, outfile);
1552 fprintf(outfile, "\"\n");
1553 }
1554 *pp = npp;
1555 return p;
1556 }
1557 #endif /* SUPPORT_PCRE16 */
1558
1559
1560
1561 /*************************************************
1562 * Callout function *
1563 *************************************************/
1564
1565 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566 the match. Yield zero unless more callouts than the fail count, or the callout
1567 data is not zero. */
1568
1569 static int callout(pcre_callout_block *cb)
1570 {
1571 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 int i, pre_start, post_start, subject_length;
1573
1574 if (callout_extra)
1575 {
1576 fprintf(f, "Callout %d: last capture = %d\n",
1577 cb->callout_number, cb->capture_last);
1578
1579 for (i = 0; i < cb->capture_top * 2; i += 2)
1580 {
1581 if (cb->offset_vector[i] < 0)
1582 fprintf(f, "%2d: <unset>\n", i/2);
1583 else
1584 {
1585 fprintf(f, "%2d: ", i/2);
1586 PCHARSV(cb->subject, cb->offset_vector[i],
1587 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588 fprintf(f, "\n");
1589 }
1590 }
1591 }
1592
1593 /* Re-print the subject in canonical form, the first time or if giving full
1594 datails. On subsequent calls in the same match, we use pchars just to find the
1595 printed lengths of the substrings. */
1596
1597 if (f != NULL) fprintf(f, "--->");
1598
1599 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600 PCHARS(post_start, cb->subject, cb->start_match,
1601 cb->current_position - cb->start_match, f);
1602
1603 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604
1605 PCHARSV(cb->subject, cb->current_position,
1606 cb->subject_length - cb->current_position, f);
1607
1608 if (f != NULL) fprintf(f, "\n");
1609
1610 /* Always print appropriate indicators, with callout number if not already
1611 shown. For automatic callouts, show the pattern offset. */
1612
1613 if (cb->callout_number == 255)
1614 {
1615 fprintf(outfile, "%+3d ", cb->pattern_position);
1616 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617 }
1618 else
1619 {
1620 if (callout_extra) fprintf(outfile, " ");
1621 else fprintf(outfile, "%3d ", cb->callout_number);
1622 }
1623
1624 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625 fprintf(outfile, "^");
1626
1627 if (post_start > 0)
1628 {
1629 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630 fprintf(outfile, "^");
1631 }
1632
1633 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634 fprintf(outfile, " ");
1635
1636 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637 pbuffer + cb->pattern_position);
1638
1639 fprintf(outfile, "\n");
1640 first_callout = 0;
1641
1642 if (cb->mark != last_callout_mark)
1643 {
1644 if (cb->mark == NULL)
1645 fprintf(outfile, "Latest Mark: <unset>\n");
1646 else
1647 {
1648 fprintf(outfile, "Latest Mark: ");
1649 PCHARSV(cb->mark, 0, -1, outfile);
1650 putc('\n', outfile);
1651 }
1652 last_callout_mark = cb->mark;
1653 }
1654
1655 if (cb->callout_data != NULL)
1656 {
1657 int callout_data = *((int *)(cb->callout_data));
1658 if (callout_data != 0)
1659 {
1660 fprintf(outfile, "Callout data = %d\n", callout_data);
1661 return callout_data;
1662 }
1663 }
1664
1665 return (cb->callout_number != callout_fail_id)? 0 :
1666 (++callout_count >= callout_fail_count)? 1 : 0;
1667 }
1668
1669
1670 /*************************************************
1671 * Local malloc functions *
1672 *************************************************/
1673
1674 /* Alternative malloc function, to test functionality and save the size of a
1675 compiled re, which is the first store request that pcre_compile() makes. The
1676 show_malloc variable is set only during matching. */
1677
1678 static void *new_malloc(size_t size)
1679 {
1680 void *block = malloc(size);
1681 gotten_store = size;
1682 if (first_gotten_store == 0) first_gotten_store = size;
1683 if (show_malloc)
1684 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 return block;
1686 }
1687
1688 static void new_free(void *block)
1689 {
1690 if (show_malloc)
1691 fprintf(outfile, "free %p\n", block);
1692 free(block);
1693 }
1694
1695 /* For recursion malloc/free, to test stacking calls */
1696
1697 static void *stack_malloc(size_t size)
1698 {
1699 void *block = malloc(size);
1700 if (show_malloc)
1701 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 return block;
1703 }
1704
1705 static void stack_free(void *block)
1706 {
1707 if (show_malloc)
1708 fprintf(outfile, "stack_free %p\n", block);
1709 free(block);
1710 }
1711
1712
1713 /*************************************************
1714 * Call pcre_fullinfo() *
1715 *************************************************/
1716
1717 /* Get one piece of information from the pcre_fullinfo() function. When only
1718 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719 value, but the code is defensive.
1720
1721 Arguments:
1722 re compiled regex
1723 study study data
1724 option PCRE_INFO_xxx option
1725 ptr where to put the data
1726
1727 Returns: 0 when OK, < 0 on error
1728 */
1729
1730 static int
1731 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 {
1733 int rc;
1734
1735 if (use_pcre16)
1736 #ifdef SUPPORT_PCRE16
1737 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 #else
1739 rc = PCRE_ERROR_BADMODE;
1740 #endif
1741 else
1742 #ifdef SUPPORT_PCRE8
1743 rc = pcre_fullinfo(re, study, option, ptr);
1744 #else
1745 rc = PCRE_ERROR_BADMODE;
1746 #endif
1747
1748 if (rc < 0)
1749 {
1750 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751 use_pcre16? "16" : "", option);
1752 if (rc == PCRE_ERROR_BADMODE)
1753 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755 }
1756
1757 return rc;
1758 }
1759
1760
1761
1762 /*************************************************
1763 * Swap byte functions *
1764 *************************************************/
1765
1766 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767 value, respectively.
1768
1769 Arguments:
1770 value any number
1771
1772 Returns: the byte swapped value
1773 */
1774
1775 static pcre_uint32
1776 swap_uint32(pcre_uint32 value)
1777 {
1778 return ((value & 0x000000ff) << 24) |
1779 ((value & 0x0000ff00) << 8) |
1780 ((value & 0x00ff0000) >> 8) |
1781 (value >> 24);
1782 }
1783
1784 static pcre_uint16
1785 swap_uint16(pcre_uint16 value)
1786 {
1787 return (value >> 8) | (value << 8);
1788 }
1789
1790
1791
1792 /*************************************************
1793 * Flip bytes in a compiled pattern *
1794 *************************************************/
1795
1796 /* This function is called if the 'F' option was present on a pattern that is
1797 to be written to a file. We flip the bytes of all the integer fields in the
1798 regex data block and the study block. In 16-bit mode this also flips relevant
1799 bytes in the pattern itself. This is to make it possible to test PCRE's
1800 ability to reload byte-flipped patterns, e.g. those compiled on a different
1801 architecture. */
1802
1803 static void
1804 regexflip(pcre *ere, pcre_extra *extra)
1805 {
1806 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 #ifdef SUPPORT_PCRE16
1808 int op;
1809 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810 int length = re->name_count * re->name_entry_size;
1811 #ifdef SUPPORT_UTF
1812 BOOL utf = (re->options & PCRE_UTF16) != 0;
1813 BOOL utf16_char = FALSE;
1814 #endif /* SUPPORT_UTF */
1815 #endif /* SUPPORT_PCRE16 */
1816
1817 /* Always flip the bytes in the main data block and study blocks. */
1818
1819 re->magic_number = REVERSED_MAGIC_NUMBER;
1820 re->size = swap_uint32(re->size);
1821 re->options = swap_uint32(re->options);
1822 re->flags = swap_uint16(re->flags);
1823 re->top_bracket = swap_uint16(re->top_bracket);
1824 re->top_backref = swap_uint16(re->top_backref);
1825 re->first_char = swap_uint16(re->first_char);
1826 re->req_char = swap_uint16(re->req_char);
1827 re->name_table_offset = swap_uint16(re->name_table_offset);
1828 re->name_entry_size = swap_uint16(re->name_entry_size);
1829 re->name_count = swap_uint16(re->name_count);
1830
1831 if (extra != NULL)
1832 {
1833 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834 rsd->size = swap_uint32(rsd->size);
1835 rsd->flags = swap_uint32(rsd->flags);
1836 rsd->minlength = swap_uint32(rsd->minlength);
1837 }
1838
1839 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840 in the name table, if present, and then in the pattern itself. */
1841
1842 #ifdef SUPPORT_PCRE16
1843 if (!use_pcre16) return;
1844
1845 while(TRUE)
1846 {
1847 /* Swap previous characters. */
1848 while (length-- > 0)
1849 {
1850 *ptr = swap_uint16(*ptr);
1851 ptr++;
1852 }
1853 #ifdef SUPPORT_UTF
1854 if (utf16_char)
1855 {
1856 if ((ptr[-1] & 0xfc00) == 0xd800)
1857 {
1858 /* We know that there is only one extra character in UTF-16. */
1859 *ptr = swap_uint16(*ptr);
1860 ptr++;
1861 }
1862 }
1863 utf16_char = FALSE;
1864 #endif /* SUPPORT_UTF */
1865
1866 /* Get next opcode. */
1867
1868 length = 0;
1869 op = *ptr;
1870 *ptr++ = swap_uint16(op);
1871
1872 switch (op)
1873 {
1874 case OP_END:
1875 return;
1876
1877 #ifdef SUPPORT_UTF
1878 case OP_CHAR:
1879 case OP_CHARI:
1880 case OP_NOT:
1881 case OP_NOTI:
1882 case OP_STAR:
1883 case OP_MINSTAR:
1884 case OP_PLUS:
1885 case OP_MINPLUS:
1886 case OP_QUERY:
1887 case OP_MINQUERY:
1888 case OP_UPTO:
1889 case OP_MINUPTO:
1890 case OP_EXACT:
1891 case OP_POSSTAR:
1892 case OP_POSPLUS:
1893 case OP_POSQUERY:
1894 case OP_POSUPTO:
1895 case OP_STARI:
1896 case OP_MINSTARI:
1897 case OP_PLUSI:
1898 case OP_MINPLUSI:
1899 case OP_QUERYI:
1900 case OP_MINQUERYI:
1901 case OP_UPTOI:
1902 case OP_MINUPTOI:
1903 case OP_EXACTI:
1904 case OP_POSSTARI:
1905 case OP_POSPLUSI:
1906 case OP_POSQUERYI:
1907 case OP_POSUPTOI:
1908 case OP_NOTSTAR:
1909 case OP_NOTMINSTAR:
1910 case OP_NOTPLUS:
1911 case OP_NOTMINPLUS:
1912 case OP_NOTQUERY:
1913 case OP_NOTMINQUERY:
1914 case OP_NOTUPTO:
1915 case OP_NOTMINUPTO:
1916 case OP_NOTEXACT:
1917 case OP_NOTPOSSTAR:
1918 case OP_NOTPOSPLUS:
1919 case OP_NOTPOSQUERY:
1920 case OP_NOTPOSUPTO:
1921 case OP_NOTSTARI:
1922 case OP_NOTMINSTARI:
1923 case OP_NOTPLUSI:
1924 case OP_NOTMINPLUSI:
1925 case OP_NOTQUERYI:
1926 case OP_NOTMINQUERYI:
1927 case OP_NOTUPTOI:
1928 case OP_NOTMINUPTOI:
1929 case OP_NOTEXACTI:
1930 case OP_NOTPOSSTARI:
1931 case OP_NOTPOSPLUSI:
1932 case OP_NOTPOSQUERYI:
1933 case OP_NOTPOSUPTOI:
1934 if (utf) utf16_char = TRUE;
1935 #endif
1936 /* Fall through. */
1937
1938 default:
1939 length = OP_lengths16[op] - 1;
1940 break;
1941
1942 case OP_CLASS:
1943 case OP_NCLASS:
1944 /* Skip the character bit map. */
1945 ptr += 32/sizeof(pcre_uint16);
1946 length = 0;
1947 break;
1948
1949 case OP_XCLASS:
1950 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951 if (LINK_SIZE > 1)
1952 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953 - (1 + LINK_SIZE + 1));
1954 else
1955 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956
1957 /* Reverse the size of the XCLASS instance. */
1958 *ptr = swap_uint16(*ptr);
1959 ptr++;
1960 if (LINK_SIZE > 1)
1961 {
1962 *ptr = swap_uint16(*ptr);
1963 ptr++;
1964 }
1965
1966 op = *ptr;
1967 *ptr = swap_uint16(op);
1968 ptr++;
1969 if ((op & XCL_MAP) != 0)
1970 {
1971 /* Skip the character bit map. */
1972 ptr += 32/sizeof(pcre_uint16);
1973 length -= 32/sizeof(pcre_uint16);
1974 }
1975 break;
1976 }
1977 }
1978 /* Control should never reach here in 16 bit mode. */
1979 #endif /* SUPPORT_PCRE16 */
1980 }
1981
1982
1983
1984 /*************************************************
1985 * Check match or recursion limit *
1986 *************************************************/
1987
1988 static int
1989 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991 int flag, unsigned long int *limit, int errnumber, const char *msg)
1992 {
1993 int count;
1994 int min = 0;
1995 int mid = 64;
1996 int max = -1;
1997
1998 extra->flags |= flag;
1999
2000 for (;;)
2001 {
2002 *limit = mid;
2003
2004 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 use_offsets, use_size_offsets);
2006
2007 if (count == errnumber)
2008 {
2009 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010 min = mid;
2011 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012 }
2013
2014 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015 count == PCRE_ERROR_PARTIAL)
2016 {
2017 if (mid == min + 1)
2018 {
2019 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020 break;
2021 }
2022 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023 max = mid;
2024 mid = (min + mid)/2;
2025 }
2026 else break; /* Some other error */
2027 }
2028
2029 extra->flags &= ~flag;
2030 return count;
2031 }
2032
2033
2034
2035 /*************************************************
2036 * Case-independent strncmp() function *
2037 *************************************************/
2038
2039 /*
2040 Arguments:
2041 s first string
2042 t second string
2043 n number of characters to compare
2044
2045 Returns: < 0, = 0, or > 0, according to the comparison
2046 */
2047
2048 static int
2049 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 {
2051 while (n--)
2052 {
2053 int c = tolower(*s++) - tolower(*t++);
2054 if (c) return c;
2055 }
2056 return 0;
2057 }
2058
2059
2060
2061 /*************************************************
2062 * Check newline indicator *
2063 *************************************************/
2064
2065 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066 a message and return 0 if there is no match.
2067
2068 Arguments:
2069 p points after the leading '<'
2070 f file for error message
2071
2072 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073 */
2074
2075 static int
2076 check_newline(pcre_uint8 *p, FILE *f)
2077 {
2078 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 fprintf(f, "Unknown newline type at: <%s\n", p);
2086 return 0;
2087 }
2088
2089
2090
2091 /*************************************************
2092 * Usage function *
2093 *************************************************/
2094
2095 static void
2096 usage(void)
2097 {
2098 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099 printf("Input and output default to stdin and stdout.\n");
2100 #ifdef SUPPORT_LIBREADLINE
2101 printf("If input is a terminal, readline() is used to read from it.\n");
2102 #else
2103 printf("This version of pcretest is not linked with readline().\n");
2104 #endif
2105 printf("\nOptions:\n");
2106 #ifdef SUPPORT_PCRE16
2107 printf(" -16 use the 16-bit library\n");
2108 #endif
2109 printf(" -b show compiled code\n");
2110 printf(" -C show PCRE compile-time options and exit\n");
2111 printf(" -C arg show a specific compile-time option\n");
2112 printf(" and exit with its value. The arg can be:\n");
2113 printf(" linksize internal link size [2, 3, 4]\n");
2114 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117 printf(" ucp Unicode Properties supported [0, 1]\n");
2118 printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121 #if !defined NODFA
2122 printf(" -dfa force DFA matching for all subjects\n");
2123 #endif
2124 printf(" -help show usage information\n");
2125 printf(" -i show information about compiled patterns\n"
2126 " -M find MATCH_LIMIT minimum for each subject\n"
2127 " -m output memory used information\n"
2128 " -o <n> set size of offsets vector to <n>\n");
2129 #if !defined NOPOSIX
2130 printf(" -p use POSIX interface\n");
2131 #endif
2132 printf(" -q quiet: do not output PCRE version number at start\n");
2133 printf(" -S <n> set stack size to <n> megabytes\n");
2134 printf(" -s force each pattern to be studied at basic level\n"
2135 " -s+ force each pattern to be studied, using JIT if available\n"
2136 " -t time compilation and execution\n");
2137 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138 printf(" -tm time execution (matching) only\n");
2139 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140 }
2141
2142
2143
2144 /*************************************************
2145 * Main Program *
2146 *************************************************/
2147
2148 /* Read lines from named file or stdin and write to named file or stdout; lines
2149 consist of a regular expression, in delimiters and optionally followed by
2150 options, followed by a set of test data, terminated by an empty line. */
2151
2152 int main(int argc, char **argv)
2153 {
2154 FILE *infile = stdin;
2155 const char *version;
2156 int options = 0;
2157 int study_options = 0;
2158 int default_find_match_limit = FALSE;
2159 int op = 1;
2160 int timeit = 0;
2161 int timeitm = 0;
2162 int showinfo = 0;
2163 int showstore = 0;
2164 int force_study = -1;
2165 int force_study_options = 0;
2166 int quiet = 0;
2167 int size_offsets = 45;
2168 int size_offsets_max;
2169 int *offsets = NULL;
2170 #if !defined NOPOSIX
2171 int posix = 0;
2172 #endif
2173 int debug = 0;
2174 int done = 0;
2175 int all_use_dfa = 0;
2176 int yield = 0;
2177 int stack_size;
2178
2179 pcre_jit_stack *jit_stack = NULL;
2180
2181 /* These vectors store, end-to-end, a list of zero-terminated captured
2182 substring names, each list itself being terminated by an empty name. Assume
2183 that 1024 is plenty long enough for the few names we'll be testing. It is
2184 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185 for the actual memory, to ensure alignment. */
2186
2187 pcre_uint16 copynames[1024];
2188 pcre_uint16 getnames[1024];
2189
2190 #ifdef SUPPORT_PCRE16
2191 pcre_uint16 *cn16ptr;
2192 pcre_uint16 *gn16ptr;
2193 #endif
2194
2195 #ifdef SUPPORT_PCRE8
2196 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198 pcre_uint8 *cn8ptr;
2199 pcre_uint8 *gn8ptr;
2200 #endif
2201
2202 /* Get buffers from malloc() so that valgrind will check their misuse when
2203 debugging. They grow automatically when very long lines are read. The 16-bit
2204 buffer (buffer16) is obtained only if needed. */
2205
2206 buffer = (pcre_uint8 *)malloc(buffer_size);
2207 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209
2210 /* The outfile variable is static so that new_malloc can use it. */
2211
2212 outfile = stdout;
2213
2214 /* The following _setmode() stuff is some Windows magic that tells its runtime
2215 library to translate CRLF into a single LF character. At least, that's what
2216 I've been told: never having used Windows I take this all on trust. Originally
2217 it set 0x8000, but then I was advised that _O_BINARY was better. */
2218
2219 #if defined(_WIN32) || defined(WIN32)
2220 _setmode( _fileno( stdout ), _O_BINARY );
2221 #endif
2222
2223 /* Get the version number: both pcre_version() and pcre16_version() give the
2224 same answer. We just need to ensure that we call one that is available. */
2225
2226 #ifdef SUPPORT_PCRE8
2227 version = pcre_version();
2228 #else
2229 version = pcre16_version();
2230 #endif
2231
2232 /* Scan options */
2233
2234 while (argc > 1 && argv[op][0] == '-')
2235 {
2236 pcre_uint8 *endptr;
2237
2238 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240 else if (strcmp(argv[op], "-s+") == 0)
2241 {
2242 force_study = 1;
2243 force_study_options = PCRE_STUDY_JIT_COMPILE;
2244 }
2245 else if (strcmp(argv[op], "-16") == 0)
2246 {
2247 #ifdef SUPPORT_PCRE16
2248 use_pcre16 = 1;
2249 #else
2250 printf("** This version of PCRE was built without 16-bit support\n");
2251 exit(1);
2252 #endif
2253 }
2254 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2257 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2258 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259 #if !defined NODFA
2260 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261 #endif
2262 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264 *endptr == 0))
2265 {
2266 op++;
2267 argc--;
2268 }
2269 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2270 {
2271 int both = argv[op][2] == 0;
2272 int temp;
2273 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274 *endptr == 0))
2275 {
2276 timeitm = temp;
2277 op++;
2278 argc--;
2279 }
2280 else timeitm = LOOPREPEAT;
2281 if (both) timeit = timeitm;
2282 }
2283 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285 *endptr == 0))
2286 {
2287 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288 printf("PCRE: -S not supported on this OS\n");
2289 exit(1);
2290 #else
2291 int rc;
2292 struct rlimit rlim;
2293 getrlimit(RLIMIT_STACK, &rlim);
2294 rlim.rlim_cur = stack_size * 1024 * 1024;
2295 rc = setrlimit(RLIMIT_STACK, &rlim);
2296 if (rc != 0)
2297 {
2298 printf("PCRE: setrlimit() failed with error %d\n", rc);
2299 exit(1);
2300 }
2301 op++;
2302 argc--;
2303 #endif
2304 }
2305 #if !defined NOPOSIX
2306 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2307 #endif
2308 else if (strcmp(argv[op], "-C") == 0)
2309 {
2310 int rc;
2311 unsigned long int lrc;
2312
2313 if (argc > 2)
2314 {
2315 if (strcmp(argv[op + 1], "linksize") == 0)
2316 {
2317 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318 printf("%d\n", rc);
2319 yield = rc;
2320 goto EXIT;
2321 }
2322 if (strcmp(argv[op + 1], "pcre8") == 0)
2323 {
2324 #ifdef SUPPORT_PCRE8
2325 printf("1\n");
2326 yield = 1;
2327 #else
2328 printf("0\n");
2329 yield = 0;
2330 #endif
2331 goto EXIT;
2332 }
2333 if (strcmp(argv[op + 1], "pcre16") == 0)
2334 {
2335 #ifdef SUPPORT_PCRE16
2336 printf("1\n");
2337 yield = 1;
2338 #else
2339 printf("0\n");
2340 yield = 0;
2341 #endif
2342 goto EXIT;
2343 }
2344 if (strcmp(argv[op + 1], "utf") == 0)
2345 {
2346 #ifdef SUPPORT_PCRE8
2347 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348 printf("%d\n", rc);
2349 yield = rc;
2350 #else
2351 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352 printf("%d\n", rc);
2353 yield = rc;
2354 #endif
2355 goto EXIT;
2356 }
2357 if (strcmp(argv[op + 1], "ucp") == 0)
2358 {
2359 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360 printf("%d\n", rc);
2361 yield = rc;
2362 goto EXIT;
2363 }
2364 if (strcmp(argv[op + 1], "jit") == 0)
2365 {
2366 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367 printf("%d\n", rc);
2368 yield = rc;
2369 goto EXIT;
2370 }
2371 if (strcmp(argv[op + 1], "newline") == 0)
2372 {
2373 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374 /* Note that these values are always the ASCII values, even
2375 in EBCDIC environments. CR is 13 and NL is 10. */
2376 printf("%s\n", (rc == 13)? "CR" :
2377 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378 (rc == -2)? "ANYCRLF" :
2379 (rc == -1)? "ANY" : "???");
2380 goto EXIT;
2381 }
2382 printf("Unknown -C option: %s\n", argv[op + 1]);
2383 goto EXIT;
2384 }
2385
2386 printf("PCRE version %s\n", version);
2387 printf("Compiled with\n");
2388
2389 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390 are set, either both UTFs are supported or both are not supported. */
2391
2392 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393 printf(" 8-bit and 16-bit support\n");
2394 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395 if (rc)
2396 printf(" UTF-8 and UTF-16 support\n");
2397 else
2398 printf(" No UTF-8 or UTF-16 support\n");
2399 #elif defined SUPPORT_PCRE8
2400 printf(" 8-bit support only\n");
2401 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402 printf(" %sUTF-8 support\n", rc? "" : "No ");
2403 #else
2404 printf(" 16-bit support only\n");
2405 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406 printf(" %sUTF-16 support\n", rc? "" : "No ");
2407 #endif
2408
2409 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410 printf(" %sUnicode properties support\n", rc? "" : "No ");
2411 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412 if (rc)
2413 {
2414 const char *arch;
2415 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416 printf(" Just-in-time compiler support: %s\n", arch);
2417 }
2418 else
2419 printf(" No just-in-time compiler support\n");
2420 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421 /* Note that these values are always the ASCII values, even
2422 in EBCDIC environments. CR is 13 and NL is 10. */
2423 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2424 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425 (rc == -2)? "ANYCRLF" :
2426 (rc == -1)? "ANY" : "???");
2427 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429 "all Unicode newlines");
2430 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431 printf(" Internal link size = %d\n", rc);
2432 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433 printf(" POSIX malloc threshold = %d\n", rc);
2434 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435 printf(" Default match limit = %ld\n", lrc);
2436 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437 printf(" Default recursion depth limit = %ld\n", lrc);
2438 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2440 goto EXIT;
2441 }
2442 else if (strcmp(argv[op], "-help") == 0 ||
2443 strcmp(argv[op], "--help") == 0)
2444 {
2445 usage();
2446 goto EXIT;
2447 }
2448 else
2449 {
2450 printf("** Unknown or malformed option %s\n", argv[op]);
2451 usage();
2452 yield = 1;
2453 goto EXIT;
2454 }
2455 op++;
2456 argc--;
2457 }
2458
2459 /* Get the store for the offsets vector, and remember what it was */
2460
2461 size_offsets_max = size_offsets;
2462 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2463 if (offsets == NULL)
2464 {
2465 printf("** Failed to get %d bytes of memory for offsets vector\n",
2466 (int)(size_offsets_max * sizeof(int)));
2467 yield = 1;
2468 goto EXIT;
2469 }
2470
2471 /* Sort out the input and output files */
2472
2473 if (argc > 1)
2474 {
2475 infile = fopen(argv[op], INPUT_MODE);
2476 if (infile == NULL)
2477 {
2478 printf("** Failed to open %s\n", argv[op]);
2479 yield = 1;
2480 goto EXIT;
2481 }
2482 }
2483
2484 if (argc > 2)
2485 {
2486 outfile = fopen(argv[op+1], OUTPUT_MODE);
2487 if (outfile == NULL)
2488 {
2489 printf("** Failed to open %s\n", argv[op+1]);
2490 yield = 1;
2491 goto EXIT;
2492 }
2493 }
2494
2495 /* Set alternative malloc function */
2496
2497 #ifdef SUPPORT_PCRE8
2498 pcre_malloc = new_malloc;
2499 pcre_free = new_free;
2500 pcre_stack_malloc = stack_malloc;
2501 pcre_stack_free = stack_free;
2502 #endif
2503
2504 #ifdef SUPPORT_PCRE16
2505 pcre16_malloc = new_malloc;
2506 pcre16_free = new_free;
2507 pcre16_stack_malloc = stack_malloc;
2508 pcre16_stack_free = stack_free;
2509 #endif
2510
2511 /* Heading line unless quiet, then prompt for first regex if stdin */
2512
2513 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2514
2515 /* Main loop */
2516
2517 while (!done)
2518 {
2519 pcre *re = NULL;
2520 pcre_extra *extra = NULL;
2521
2522 #if !defined NOPOSIX /* There are still compilers that require no indent */
2523 regex_t preg;
2524 int do_posix = 0;
2525 #endif
2526
2527 const char *error;
2528 pcre_uint8 *markptr;
2529 pcre_uint8 *p, *pp, *ppp;
2530 pcre_uint8 *to_file = NULL;
2531 const pcre_uint8 *tables = NULL;
2532 unsigned long int get_options;
2533 unsigned long int true_size, true_study_size = 0;
2534 size_t size, regex_gotten_store;
2535 int do_allcaps = 0;
2536 int do_mark = 0;
2537 int do_study = 0;
2538 int no_force_study = 0;
2539 int do_debug = debug;
2540 int do_G = 0;
2541 int do_g = 0;
2542 int do_showinfo = showinfo;
2543 int do_showrest = 0;
2544 int do_showcaprest = 0;
2545 int do_flip = 0;
2546 int erroroffset, len, delimiter, poffset;
2547
2548 use_utf = 0;
2549 debug_lengths = 1;
2550
2551 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2552 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2553 fflush(outfile);
2554
2555 p = buffer;
2556 while (isspace(*p)) p++;
2557 if (*p == 0) continue;
2558
2559 /* See if the pattern is to be loaded pre-compiled from a file. */
2560
2561 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2562 {
2563 pcre_uint32 magic;
2564 pcre_uint8 sbuf[8];
2565 FILE *f;
2566
2567 p++;
2568 if (*p == '!')
2569 {
2570 do_debug = TRUE;
2571 do_showinfo = TRUE;
2572 p++;
2573 }
2574
2575 pp = p + (int)strlen((char *)p);
2576 while (isspace(pp[-1])) pp--;
2577 *pp = 0;
2578
2579 f = fopen((char *)p, "rb");
2580 if (f == NULL)
2581 {
2582 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2583 continue;
2584 }
2585
2586 first_gotten_store = 0;
2587 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2588
2589 true_size =
2590 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2591 true_study_size =
2592 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2593
2594 re = (pcre *)new_malloc(true_size);
2595 regex_gotten_store = first_gotten_store;
2596
2597 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2598
2599 magic = ((REAL_PCRE *)re)->magic_number;
2600 if (magic != MAGIC_NUMBER)
2601 {
2602 if (swap_uint32(magic) == MAGIC_NUMBER)
2603 {
2604 do_flip = 1;
2605 }
2606 else
2607 {
2608 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2609 fclose(f);
2610 continue;
2611 }
2612 }
2613
2614 /* We hide the byte-invert info for little and big endian tests. */
2615 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2616 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2617
2618 /* Now see if there is any following study data. */
2619
2620 if (true_study_size != 0)
2621 {
2622 pcre_study_data *psd;
2623
2624 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2625 extra->flags = PCRE_EXTRA_STUDY_DATA;
2626
2627 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2628 extra->study_data = psd;
2629
2630 if (fread(psd, 1, true_study_size, f) != true_study_size)
2631 {
2632 FAIL_READ:
2633 fprintf(outfile, "Failed to read data from %s\n", p);
2634 if (extra != NULL)
2635 {
2636 PCRE_FREE_STUDY(extra);
2637 }
2638 if (re != NULL) new_free(re);
2639 fclose(f);
2640 continue;
2641 }
2642 fprintf(outfile, "Study data loaded from %s\n", p);
2643 do_study = 1; /* To get the data output if requested */
2644 }
2645 else fprintf(outfile, "No study data\n");
2646
2647 /* Flip the necessary bytes. */
2648 if (do_flip)
2649 {
2650 int rc;
2651 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2652 if (rc == PCRE_ERROR_BADMODE)
2653 {
2654 /* Simulate the result of the function call below. */
2655 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2656 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2657 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2658 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2659 continue;
2660 }
2661 }
2662
2663 /* Need to know if UTF-8 for printing data strings. */
2664
2665 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2666 use_utf = (get_options & PCRE_UTF8) != 0;
2667
2668 fclose(f);
2669 goto SHOW_INFO;
2670 }
2671
2672 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2673 the pattern; if it isn't complete, read more. */
2674
2675 delimiter = *p++;
2676
2677 if (isalnum(delimiter) || delimiter == '\\')
2678 {
2679 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2680 goto SKIP_DATA;
2681 }
2682
2683 pp = p;
2684 poffset = (int)(p - buffer);
2685
2686 for(;;)
2687 {
2688 while (*pp != 0)
2689 {
2690 if (*pp == '\\' && pp[1] != 0) pp++;
2691 else if (*pp == delimiter) break;
2692 pp++;
2693 }
2694 if (*pp != 0) break;
2695 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2696 {
2697 fprintf(outfile, "** Unexpected EOF\n");
2698 done = 1;
2699 goto CONTINUE;
2700 }
2701 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2702 }
2703
2704 /* The buffer may have moved while being extended; reset the start of data
2705 pointer to the correct relative point in the buffer. */
2706
2707 p = buffer + poffset;
2708
2709 /* If the first character after the delimiter is backslash, make
2710 the pattern end with backslash. This is purely to provide a way
2711 of testing for the error message when a pattern ends with backslash. */
2712
2713 if (pp[1] == '\\') *pp++ = '\\';
2714
2715 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2716 for callouts. */
2717
2718 *pp++ = 0;
2719 strcpy((char *)pbuffer, (char *)p);
2720
2721 /* Look for options after final delimiter */
2722
2723 options = 0;
2724 study_options = 0;
2725 log_store = showstore; /* default from command line */
2726
2727 while (*pp != 0)
2728 {
2729 switch (*pp++)
2730 {
2731 case 'f': options |= PCRE_FIRSTLINE; break;
2732 case 'g': do_g = 1; break;
2733 case 'i': options |= PCRE_CASELESS; break;
2734 case 'm': options |= PCRE_MULTILINE; break;
2735 case 's': options |= PCRE_DOTALL; break;
2736 case 'x': options |= PCRE_EXTENDED; break;
2737
2738 case '+':
2739 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2740 break;
2741
2742 case '=': do_allcaps = 1; break;
2743 case 'A': options |= PCRE_ANCHORED; break;
2744 case 'B': do_debug = 1; break;
2745 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2746 case 'D': do_debug = do_showinfo = 1; break;
2747 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2748 case 'F': do_flip = 1; break;
2749 case 'G': do_G = 1; break;
2750 case 'I': do_showinfo = 1; break;
2751 case 'J': options |= PCRE_DUPNAMES; break;
2752 case 'K': do_mark = 1; break;
2753 case 'M': log_store = 1; break;
2754 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2755
2756 #if !defined NOPOSIX
2757 case 'P': do_posix = 1; break;
2758 #endif
2759
2760 case 'S':
2761 if (do_study == 0)
2762 {
2763 do_study = 1;
2764 if (*pp == '+')
2765 {
2766 study_options |= PCRE_STUDY_JIT_COMPILE;
2767 pp++;
2768 }
2769 }
2770 else
2771 {
2772 do_study = 0;
2773 no_force_study = 1;
2774 }
2775 break;
2776
2777 case 'U': options |= PCRE_UNGREEDY; break;
2778 case 'W': options |= PCRE_UCP; break;
2779 case 'X': options |= PCRE_EXTRA; break;
2780 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2781 case 'Z': debug_lengths = 0; break;
2782 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2783 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2784
2785 case 'T':
2786 switch (*pp++)
2787 {
2788 case '0': tables = tables0; break;
2789 case '1': tables = tables1; break;
2790
2791 case '\r':
2792 case '\n':
2793 case ' ':
2794 case 0:
2795 fprintf(outfile, "** Missing table number after /T\n");
2796 goto SKIP_DATA;
2797
2798 default:
2799 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2800 goto SKIP_DATA;
2801 }
2802 break;
2803
2804 case 'L':
2805 ppp = pp;
2806 /* The '\r' test here is so that it works on Windows. */
2807 /* The '0' test is just in case this is an unterminated line. */
2808 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2809 *ppp = 0;
2810 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2811 {
2812 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2813 goto SKIP_DATA;
2814 }
2815 locale_set = 1;
2816 tables = PCRE_MAKETABLES;
2817 pp = ppp;
2818 break;
2819
2820 case '>':
2821 to_file = pp;
2822 while (*pp != 0) pp++;
2823 while (isspace(pp[-1])) pp--;
2824 *pp = 0;
2825 break;
2826
2827 case '<':
2828 {
2829 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2830 {
2831 options |= PCRE_JAVASCRIPT_COMPAT;
2832 pp += 3;
2833 }
2834 else
2835 {
2836 int x = check_newline(pp, outfile);
2837 if (x == 0) goto SKIP_DATA;
2838 options |= x;
2839 while (*pp++ != '>');
2840 }
2841 }
2842 break;
2843
2844 case '\r': /* So that it works in Windows */
2845 case '\n':
2846 case ' ':
2847 break;
2848
2849 default:
2850 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2851 goto SKIP_DATA;
2852 }
2853 }
2854
2855 /* Handle compiling via the POSIX interface, which doesn't support the
2856 timing, showing, or debugging options, nor the ability to pass over
2857 local character tables. Neither does it have 16-bit support. */
2858
2859 #if !defined NOPOSIX
2860 if (posix || do_posix)
2861 {
2862 int rc;
2863 int cflags = 0;
2864
2865 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2866 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2867 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2868 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2869 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2870 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2871 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2872
2873 first_gotten_store = 0;
2874 rc = regcomp(&preg, (char *)p, cflags);
2875
2876 /* Compilation failed; go back for another re, skipping to blank line
2877 if non-interactive. */
2878
2879 if (rc != 0)
2880 {
2881 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2882 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2883 goto SKIP_DATA;
2884 }
2885 }
2886
2887 /* Handle compiling via the native interface */
2888
2889 else
2890 #endif /* !defined NOPOSIX */
2891
2892 {
2893 /* In 16-bit mode, convert the input. */
2894
2895 #ifdef SUPPORT_PCRE16
2896 if (use_pcre16)
2897 {
2898 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2899 {
2900 case -1:
2901 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2902 "converted to UTF-16\n");
2903 goto SKIP_DATA;
2904
2905 case -2:
2906 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2907 "cannot be converted to UTF-16\n");
2908 goto SKIP_DATA;
2909
2910 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2911 fprintf(outfile, "**Failed: character value greater than 0xffff "
2912 "cannot be converted to 16-bit in non-UTF mode\n");
2913 goto SKIP_DATA;
2914
2915 default:
2916 break;
2917 }
2918 p = (pcre_uint8 *)buffer16;
2919 }
2920 #endif
2921
2922 /* Compile many times when timing */
2923
2924 if (timeit > 0)
2925 {
2926 register int i;
2927 clock_t time_taken;
2928 clock_t start_time = clock();
2929 for (i = 0; i < timeit; i++)
2930 {
2931 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2932 if (re != NULL) free(re);
2933 }
2934 time_taken = clock() - start_time;
2935 fprintf(outfile, "Compile time %.4f milliseconds\n",
2936 (((double)time_taken * 1000.0) / (double)timeit) /
2937 (double)CLOCKS_PER_SEC);
2938 }
2939
2940 first_gotten_store = 0;
2941 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2942
2943 /* Compilation failed; go back for another re, skipping to blank line
2944 if non-interactive. */
2945
2946 if (re == NULL)
2947 {
2948 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2949 SKIP_DATA:
2950 if (infile != stdin)
2951 {
2952 for (;;)
2953 {
2954 if (extend_inputline(infile, buffer, NULL) == NULL)
2955 {
2956 done = 1;
2957 goto CONTINUE;
2958 }
2959 len = (int)strlen((char *)buffer);
2960 while (len > 0 && isspace(buffer[len-1])) len--;
2961 if (len == 0) break;
2962 }
2963 fprintf(outfile, "\n");
2964 }
2965 goto CONTINUE;
2966 }
2967
2968 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2969 within the regex; check for this so that we know how to process the data
2970 lines. */
2971
2972 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2973 goto SKIP_DATA;
2974 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2975
2976 /* Extract the size for possible writing before possibly flipping it,
2977 and remember the store that was got. */
2978
2979 true_size = ((REAL_PCRE *)re)->size;
2980 regex_gotten_store = first_gotten_store;
2981
2982 /* Output code size information if requested */
2983
2984 if (log_store)
2985 fprintf(outfile, "Memory allocation (code space): %d\n",
2986 (int)(first_gotten_store -
2987 sizeof(REAL_PCRE) -
2988 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2989
2990 /* If -s or /S was present, study the regex to generate additional info to
2991 help with the matching, unless the pattern has the SS option, which
2992 suppresses the effect of /S (used for a few test patterns where studying is
2993 never sensible). */
2994
2995 if (do_study || (force_study >= 0 && !no_force_study))
2996 {
2997 if (timeit > 0)
2998 {
2999 register int i;
3000 clock_t time_taken;
3001 clock_t start_time = clock();
3002 for (i = 0; i < timeit; i++)
3003 {
3004 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3005 }
3006 time_taken = clock() - start_time;
3007 if (extra != NULL)
3008 {
3009 PCRE_FREE_STUDY(extra);
3010 }
3011 fprintf(outfile, " Study time %.4f milliseconds\n",
3012 (((double)time_taken * 1000.0) / (double)timeit) /
3013 (double)CLOCKS_PER_SEC);
3014 }
3015 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3016 if (error != NULL)
3017 fprintf(outfile, "Failed to study: %s\n", error);
3018 else if (extra != NULL)
3019 {
3020 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3021 if (log_store)
3022 {
3023 size_t jitsize;
3024 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3025 jitsize != 0)
3026 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3027 }
3028 }
3029 }
3030
3031 /* If /K was present, we set up for handling MARK data. */
3032
3033 if (do_mark)
3034 {
3035 if (extra == NULL)
3036 {
3037 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3038 extra->flags = 0;
3039 }
3040 extra->mark = &markptr;
3041 extra->flags |= PCRE_EXTRA_MARK;
3042 }
3043
3044 /* Extract and display information from the compiled data if required. */
3045
3046 SHOW_INFO:
3047
3048 if (do_debug)
3049 {
3050 fprintf(outfile, "------------------------------------------------------------------\n");
3051 PCRE_PRINTINT(re, outfile, debug_lengths);
3052 }
3053
3054 /* We already have the options in get_options (see above) */
3055
3056 if (do_showinfo)
3057 {
3058 unsigned long int all_options;
3059 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3060 hascrorlf;
3061 int nameentrysize, namecount;
3062 const pcre_uint8 *nametable;
3063
3064 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3065 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3066 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3067 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3068 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3069 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3070 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3071 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3072 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3073 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3074 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3075 != 0)
3076 goto SKIP_DATA;
3077
3078 if (size != regex_gotten_store) fprintf(outfile,
3079 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3080 (int)size, (int)regex_gotten_store);
3081
3082 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3083 if (backrefmax > 0)
3084 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3085
3086 if (namecount > 0)
3087 {
3088 fprintf(outfile, "Named capturing subpatterns:\n");
3089 while (namecount-- > 0)
3090 {
3091 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3092 int imm2_size = use_pcre16 ? 1 : 2;
3093 #else
3094 int imm2_size = IMM2_SIZE;
3095 #endif
3096 int length = (int)STRLEN(nametable + imm2_size);
3097 fprintf(outfile, " ");
3098 PCHARSV(nametable, imm2_size, length, outfile);
3099 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3100 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3101 fprintf(outfile, "%3d\n", use_pcre16?
3102 (int)(((PCRE_SPTR16)nametable)[0])
3103 :((int)nametable[0] << 8) | (int)nametable[1]);
3104 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3105 #else
3106 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3107 #ifdef SUPPORT_PCRE8
3108 nametable += nameentrysize;
3109 #else
3110 nametable += nameentrysize * 2;
3111 #endif
3112 #endif
3113 }
3114 }
3115
3116 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3117 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3118
3119 all_options = ((REAL_PCRE *)re)->options;
3120 if (do_flip) all_options = swap_uint32(all_options);
3121
3122 if (get_options == 0) fprintf(outfile, "No options\n");
3123 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3124 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3125 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3126 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3127 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3128 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3129 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3130 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3131 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3132 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3133 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3134 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3135 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3136 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3137 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3138 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3139 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3140 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3141
3142 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3143
3144 switch (get_options & PCRE_NEWLINE_BITS)
3145 {
3146 case PCRE_NEWLINE_CR:
3147 fprintf(outfile, "Forced newline sequence: CR\n");
3148 break;
3149
3150 case PCRE_NEWLINE_LF:
3151 fprintf(outfile, "Forced newline sequence: LF\n");
3152 break;
3153
3154 case PCRE_NEWLINE_CRLF:
3155 fprintf(outfile, "Forced newline sequence: CRLF\n");
3156 break;
3157
3158 case PCRE_NEWLINE_ANYCRLF:
3159 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3160 break;
3161
3162 case PCRE_NEWLINE_ANY:
3163 fprintf(outfile, "Forced newline sequence: ANY\n");
3164 break;
3165
3166 default:
3167 break;
3168 }
3169
3170 if (first_char == -1)
3171 {
3172 fprintf(outfile, "First char at start or follows newline\n");
3173 }
3174 else if (first_char < 0)
3175 {
3176 fprintf(outfile, "No first char\n");
3177 }
3178 else
3179 {
3180 const char *caseless =
3181 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3182 "" : " (caseless)";
3183
3184 if (PRINTOK(first_char))
3185 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3186 else
3187 {
3188 fprintf(outfile, "First char = ");
3189 pchar(first_char, outfile);
3190 fprintf(outfile, "%s\n", caseless);
3191 }
3192 }
3193
3194 if (need_char < 0)
3195 {
3196 fprintf(outfile, "No need char\n");
3197 }
3198 else
3199 {
3200 const char *caseless =
3201 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3202 "" : " (caseless)";
3203
3204 if (PRINTOK(need_char))
3205 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3206 else
3207 {
3208 fprintf(outfile, "Need char = ");
3209 pchar(need_char, outfile);
3210 fprintf(outfile, "%s\n", caseless);
3211 }
3212 }
3213
3214 /* Don't output study size; at present it is in any case a fixed
3215 value, but it varies, depending on the computer architecture, and
3216 so messes up the test suite. (And with the /F option, it might be
3217 flipped.) If study was forced by an external -s, don't show this
3218 information unless -i or -d was also present. This means that, except
3219 when auto-callouts are involved, the output from runs with and without
3220 -s should be identical. */
3221
3222 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3223 {
3224 if (extra == NULL)
3225 fprintf(outfile, "Study returned NULL\n");
3226 else
3227 {
3228 pcre_uint8 *start_bits = NULL;
3229 int minlength;
3230
3231 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3232 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3233
3234 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3235 {
3236 if (start_bits == NULL)
3237 fprintf(outfile, "No set of starting bytes\n");
3238 else
3239 {
3240 int i;
3241 int c = 24;
3242 fprintf(outfile, "Starting byte set: ");
3243 for (i = 0; i < 256; i++)
3244 {
3245 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3246 {
3247 if (c > 75)
3248 {
3249 fprintf(outfile, "\n ");
3250 c = 2;
3251 }
3252 if (PRINTOK(i) && i != ' ')
3253 {
3254 fprintf(outfile, "%c ", i);
3255 c += 2;
3256 }
3257 else
3258 {
3259 fprintf(outfile, "\\x%02x ", i);
3260 c += 5;
3261 }
3262 }
3263 }
3264 fprintf(outfile, "\n");
3265 }
3266 }
3267 }
3268
3269 /* Show this only if the JIT was set by /S, not by -s. */
3270
3271 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3272 {
3273 int jit;
3274 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3275 {
3276 if (jit)
3277 fprintf(outfile, "JIT study was successful\n");
3278 else
3279 #ifdef SUPPORT_JIT
3280 fprintf(outfile, "JIT study was not successful\n");
3281 #else
3282 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3283 #endif
3284 }
3285 }
3286 }
3287 }
3288
3289 /* If the '>' option was present, we write out the regex to a file, and
3290 that is all. The first 8 bytes of the file are the regex length and then
3291 the study length, in big-endian order. */
3292
3293 if (to_file != NULL)
3294 {
3295 FILE *f = fopen((char *)to_file, "wb");
3296 if (f == NULL)
3297 {
3298 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3299 }
3300 else
3301 {
3302 pcre_uint8 sbuf[8];
3303
3304 if (do_flip) regexflip(re, extra);
3305 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3306 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3307 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3308 sbuf[3] = (pcre_uint8)((true_size) & 255);
3309 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3310 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3311 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3312 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3313
3314 if (fwrite(sbuf, 1, 8, f) < 8 ||
3315 fwrite(re, 1, true_size, f) < true_size)
3316 {
3317 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3318 }
3319 else
3320 {
3321 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3322
3323 /* If there is study data, write it. */
3324
3325 if (extra != NULL)
3326 {
3327 if (fwrite(extra->study_data, 1, true_study_size, f) <
3328 true_study_size)
3329 {
3330 fprintf(outfile, "Write error on %s: %s\n", to_file,
3331 strerror(errno));
3332 }
3333 else fprintf(outfile, "Study data written to %s\n", to_file);
3334 }
3335 }
3336 fclose(f);
3337 }
3338
3339 new_free(re);
3340 if (extra != NULL)
3341 {
3342 PCRE_FREE_STUDY(extra);
3343 }
3344 if (locale_set)
3345 {
3346 new_free((void *)tables);
3347 setlocale(LC_CTYPE, "C");
3348 locale_set = 0;
3349 }
3350 continue; /* With next regex */
3351 }
3352 } /* End of non-POSIX compile */
3353
3354 /* Read data lines and test them */
3355
3356 for (;;)
3357 {
3358 pcre_uint8 *q;
3359 pcre_uint8 *bptr;
3360 int *use_offsets = offsets;
3361 int use_size_offsets = size_offsets;
3362 int callout_data = 0;
3363 int callout_data_set = 0;
3364 int count, c;
3365 int copystrings = 0;
3366 int find_match_limit = default_find_match_limit;
3367 int getstrings = 0;
3368 int getlist = 0;
3369 int gmatched = 0;
3370 int start_offset = 0;
3371 int start_offset_sign = 1;
3372 int g_notempty = 0;
3373 int use_dfa = 0;
3374
3375 *copynames = 0;
3376 *getnames = 0;
3377
3378 #ifdef SUPPORT_PCRE16
3379 cn16ptr = copynames;
3380 gn16ptr = getnames;
3381 #endif
3382 #ifdef SUPPORT_PCRE8
3383 cn8ptr = copynames8;
3384 gn8ptr = getnames8;
3385 #endif
3386
3387 SET_PCRE_CALLOUT(callout);
3388 first_callout = 1;
3389 last_callout_mark = NULL;
3390 callout_extra = 0;
3391 callout_count = 0;
3392 callout_fail_count = 999999;
3393 callout_fail_id = -1;
3394 show_malloc = 0;
3395 options = 0;
3396
3397 if (extra != NULL) extra->flags &=
3398 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3399
3400 len = 0;
3401 for (;;)
3402 {
3403 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3404 {
3405 if (len > 0) /* Reached EOF without hitting a newline */
3406 {
3407 fprintf(outfile, "\n");
3408 break;
3409 }
3410 done = 1;
3411 goto CONTINUE;
3412 }
3413 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3414 len = (int)strlen((char *)buffer);
3415 if (buffer[len-1] == '\n') break;
3416 }
3417
3418 while (len > 0 && isspace(buffer[len-1])) len--;
3419 buffer[len] = 0;
3420 if (len == 0) break;
3421
3422 p = buffer;
3423 while (isspace(*p)) p++;
3424
3425 bptr = q = dbuffer;
3426 while ((c = *p++) != 0)
3427 {
3428 int i = 0;
3429 int n = 0;
3430
3431 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3432 In non-UTF mode, allow the value of the byte to fall through to later,
3433 where values greater than 127 are turned into UTF-8 when running in
3434 16-bit mode. */
3435
3436 if (c != '\\')
3437 {
3438 if (use_utf)
3439 {
3440 *q++ = c;
3441 continue;
3442 }
3443 }
3444
3445 /* Handle backslash escapes */
3446
3447 else switch ((c = *p++))
3448 {
3449 case 'a': c = 7; break;
3450 case 'b': c = '\b'; break;
3451 case 'e': c = 27; break;
3452 case 'f': c = '\f'; break;
3453 case 'n': c = '\n'; break;
3454 case 'r': c = '\r'; break;
3455 case 't': c = '\t'; break;
3456 case 'v': c = '\v'; break;
3457
3458 case '0': case '1': case '2': case '3':
3459 case '4': case '5': case '6': case '7':
3460 c -= '0';
3461 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3462 c = c * 8 + *p++ - '0';
3463 break;
3464
3465 case 'x':
3466 if (*p == '{')
3467 {
3468 pcre_uint8 *pt = p;
3469 c = 0;
3470
3471 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3472 when isxdigit() is a macro that refers to its argument more than
3473 once. This is banned by the C Standard, but apparently happens in at
3474 least one MacOS environment. */
3475
3476 for (pt++; isxdigit(*pt); pt++)
3477 {
3478 if (++i == 9)
3479 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3480 "using only the first eight.\n");
3481 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3482 }
3483 if (*pt == '}')
3484 {
3485 p = pt + 1;
3486 break;
3487 }
3488 /* Not correct form for \x{...}; fall through */
3489 }
3490
3491 /* \x without {} always defines just one byte in 8-bit mode. This
3492 allows UTF-8 characters to be constructed byte by byte, and also allows
3493 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3494 Otherwise, pass it down to later code so that it can be turned into
3495 UTF-8 when running in 16-bit mode. */
3496
3497 c = 0;
3498 while (i++ < 2 && isxdigit(*p))
3499 {
3500 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3501 p++;
3502 }
3503 if (use_utf)
3504 {
3505 *q++ = c;
3506 continue;
3507 }
3508 break;
3509
3510 case 0: /* \ followed by EOF allows for an empty line */
3511 p--;
3512 continue;
3513
3514 case '>':
3515 if (*p == '-')
3516 {
3517 start_offset_sign = -1;
3518 p++;
3519 }
3520 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3521 start_offset *= start_offset_sign;
3522 continue;
3523
3524 case 'A': /* Option setting */
3525 options |= PCRE_ANCHORED;
3526 continue;
3527
3528 case 'B':
3529 options |= PCRE_NOTBOL;
3530 continue;
3531
3532 case 'C':
3533 if (isdigit(*p)) /* Set copy string */
3534 {
3535 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3536 copystrings |= 1 << n;
3537 }
3538 else if (isalnum(*p))
3539 {
3540 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3541 }
3542 else if (*p == '+')
3543 {
3544 callout_extra = 1;
3545 p++;
3546 }
3547 else if (*p == '-')
3548 {
3549 SET_PCRE_CALLOUT(NULL);
3550 p++;
3551 }
3552 else if (*p == '!')
3553 {
3554 callout_fail_id = 0;
3555 p++;
3556 while(isdigit(*p))
3557 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3558 callout_fail_count = 0;
3559 if (*p == '!')
3560 {
3561 p++;
3562 while(isdigit(*p))
3563 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3564 }
3565 }
3566 else if (*p == '*')
3567 {
3568 int sign = 1;
3569 callout_data = 0;
3570 if (*(++p) == '-') { sign = -1; p++; }
3571 while(isdigit(*p))
3572 callout_data = callout_data * 10 + *p++ - '0';
3573 callout_data *= sign;
3574 callout_data_set = 1;
3575 }
3576 continue;
3577
3578 #if !defined NODFA
3579 case 'D':
3580 #if !defined NOPOSIX
3581 if (posix || do_posix)
3582 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3583 else
3584 #endif
3585 use_dfa = 1;
3586 continue;
3587 #endif
3588
3589 #if !defined NODFA
3590 case 'F':
3591 options |= PCRE_DFA_SHORTEST;
3592 continue;
3593 #endif
3594
3595 case 'G':
3596 if (isdigit(*p))
3597 {
3598 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3599 getstrings |= 1 << n;
3600 }
3601 else if (isalnum(*p))
3602 {
3603 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3604 }
3605 continue;
3606
3607 case 'J':
3608 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3609 if (extra != NULL
3610 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3611 && extra->executable_jit != NULL)
3612 {
3613 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3614 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3615 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3616 }
3617 continue;
3618
3619 case 'L':
3620 getlist = 1;
3621 continue;
3622
3623 case 'M':
3624 find_match_limit = 1;
3625 continue;
3626
3627 case 'N':
3628 if ((options & PCRE_NOTEMPTY) != 0)
3629 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3630 else
3631 options |= PCRE_NOTEMPTY;
3632 continue;
3633
3634 case 'O':
3635 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3636 if (n > size_offsets_max)
3637 {
3638 size_offsets_max = n;
3639 free(offsets);
3640 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3641 if (offsets == NULL)
3642 {
3643 printf("** Failed to get %d bytes of memory for offsets vector\n",
3644 (int)(size_offsets_max * sizeof(int)));
3645 yield = 1;
3646 goto EXIT;
3647 }
3648 }
3649 use_size_offsets = n;
3650 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3651 continue;
3652
3653 case 'P':
3654 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3655 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3656 continue;
3657
3658 case 'Q':
3659 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3660 if (extra == NULL)
3661 {
3662 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3663 extra->flags = 0;
3664 }
3665 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3666 extra->match_limit_recursion = n;
3667 continue;
3668
3669 case 'q':
3670 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3671 if (extra == NULL)
3672 {
3673 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3674 extra->flags = 0;
3675 }
3676 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3677 extra->match_limit = n;
3678 continue;
3679
3680 #if !defined NODFA
3681 case 'R':
3682 options |= PCRE_DFA_RESTART;
3683 continue;
3684 #endif
3685
3686 case 'S':
3687 show_malloc = 1;
3688 continue;
3689
3690 case 'Y':
3691 options |= PCRE_NO_START_OPTIMIZE;
3692 continue;
3693
3694 case 'Z':
3695 options |= PCRE_NOTEOL;
3696 continue;
3697
3698 case '?':
3699 options |= PCRE_NO_UTF8_CHECK;
3700 continue;
3701
3702 case '<':
3703 {
3704 int x = check_newline(p, outfile);
3705 if (x == 0) goto NEXT_DATA;
3706 options |= x;
3707 while (*p++ != '>');
3708 }
3709 continue;
3710 }
3711
3712 /* We now have a character value in c that may be greater than 255. In
3713 16-bit mode, we always convert characters to UTF-8 so that values greater
3714 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3715 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3716 mode must have come from \x{...} or octal constructs because values from
3717 \x.. get this far only in non-UTF mode. */
3718
3719 #if !defined NOUTF || defined SUPPORT_PCRE16
3720 if (use_pcre16 || use_utf)
3721 {
3722 pcre_uint8 buff8[8];
3723 int ii, utn;
3724 utn = ord2utf8(c, buff8);
3725 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3726 }
3727 else
3728 #endif
3729 {
3730 if (c > 255)
3731 {
3732 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3733 "and UTF-8 mode is not enabled.\n", c);
3734 fprintf(outfile, "** Truncation will probably give the wrong "
3735 "result.\n");
3736 }
3737 *q++ = c;
3738 }
3739 }
3740
3741 /* Reached end of subject string */
3742
3743 *q = 0;
3744 len = (int)(q - dbuffer);
3745
3746 /* Move the data to the end of the buffer so that a read over the end of
3747 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3748 we are using the POSIX interface, we must include the terminating zero. */
3749
3750 #if !defined NOPOSIX
3751 if (posix || do_posix)
3752 {
3753 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3754 bptr += buffer_size - len - 1;
3755 }
3756 else
3757 #endif
3758 {
3759 memmove(bptr + buffer_size - len, bptr, len);
3760 bptr += buffer_size - len;
3761 }
3762
3763 if ((all_use_dfa || use_dfa) && find_match_limit)
3764 {
3765 printf("**Match limit not relevant for DFA matching: ignored\n");
3766 find_match_limit = 0;
3767 }
3768
3769 /* Handle matching via the POSIX interface, which does not
3770 support timing or playing with the match limit or callout data. */
3771
3772 #if !defined NOPOSIX
3773 if (posix || do_posix)
3774 {
3775 int rc;
3776 int eflags = 0;
3777 regmatch_t *pmatch = NULL;
3778 if (use_size_offsets > 0)
3779 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3780 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3781 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3782 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3783
3784 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3785
3786 if (rc != 0)
3787 {
3788 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3789 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3790 }
3791 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3792 != 0)
3793 {
3794 fprintf(outfile, "Matched with REG_NOSUB\n");
3795 }
3796 else
3797 {
3798 size_t i;
3799 for (i = 0; i < (size_t)use_size_offsets; i++)
3800 {
3801 if (pmatch[i].rm_so >= 0)
3802 {
3803 fprintf(outfile, "%2d: ", (int)i);
3804 PCHARSV(dbuffer, pmatch[i].rm_so,
3805 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3806 fprintf(outfile, "\n");
3807 if (do_showcaprest || (i == 0 && do_showrest))
3808 {
3809 fprintf(outfile, "%2d+ ", (int)i);
3810 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3811 outfile);
3812 fprintf(outfile, "\n");
3813 }
3814 }
3815 }
3816 }
3817 free(pmatch);
3818 goto NEXT_DATA;
3819 }
3820
3821 #endif /* !defined NOPOSIX */
3822
3823 /* Handle matching via the native interface - repeats for /g and /G */
3824
3825 #ifdef SUPPORT_PCRE16
3826 if (use_pcre16)
3827 {
3828 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3829 switch(len)
3830 {
3831 case -1:
3832 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3833 "converted to UTF-16\n");
3834 goto NEXT_DATA;
3835
3836 case -2:
3837 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3838 "cannot be converted to UTF-16\n");
3839 goto NEXT_DATA;
3840
3841 case -3:
3842 fprintf(outfile, "**Failed: character value greater than 0xffff "
3843 "cannot be converted to 16-bit in non-UTF mode\n");
3844 goto NEXT_DATA;
3845
3846 default:
3847 break;
3848 }
3849 bptr = (pcre_uint8 *)buffer16;
3850 }
3851 #endif
3852
3853 for (;; gmatched++) /* Loop for /g or /G */
3854 {
3855 markptr = NULL;
3856
3857 if (timeitm > 0)
3858 {
3859 register int i;
3860 clock_t time_taken;
3861 clock_t start_time = clock();
3862
3863 #if !defined NODFA
3864 if (all_use_dfa || use_dfa)
3865 {
3866 int workspace[1000];
3867 for (i = 0; i < timeitm; i++)
3868 {
3869 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3870 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3871 (sizeof(workspace)/sizeof(int)));
3872 }
3873 }
3874 else
3875 #endif
3876
3877 for (i = 0; i < timeitm; i++)
3878 {
3879 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3880 (options | g_notempty), use_offsets, use_size_offsets);
3881 }
3882 time_taken = clock() - start_time;
3883 fprintf(outfile, "Execute time %.4f milliseconds\n",
3884 (((double)time_taken * 1000.0) / (double)timeitm) /
3885 (double)CLOCKS_PER_SEC);
3886 }
3887
3888 /* If find_match_limit is set, we want to do repeated matches with
3889 varying limits in order to find the minimum value for the match limit and
3890 for the recursion limit. The match limits are relevant only to the normal
3891 running of pcre_exec(), so disable the JIT optimization. This makes it
3892 possible to run the same set of tests with and without JIT externally
3893 requested. */
3894
3895 if (find_match_limit)
3896 {
3897 if (extra == NULL)
3898 {
3899 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3900 extra->flags = 0;
3901 }
3902 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3903
3904 (void)check_match_limit(re, extra, bptr, len, start_offset,
3905 options|g_notempty, use_offsets, use_size_offsets,
3906 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3907 PCRE_ERROR_MATCHLIMIT, "match()");
3908
3909 count = check_match_limit(re, extra, bptr, len, start_offset,
3910 options|g_notempty, use_offsets, use_size_offsets,
3911 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3912 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3913 }
3914
3915 /* If callout_data is set, use the interface with additional data */
3916
3917 else if (callout_data_set)
3918 {
3919 if (extra == NULL)
3920 {
3921 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3922 extra->flags = 0;
3923 }
3924 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3925 extra->callout_data = &callout_data;
3926 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3927 options | g_notempty, use_offsets, use_size_offsets);
3928 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3929 }
3930
3931 /* The normal case is just to do the match once, with the default
3932 value of match_limit. */
3933
3934 #if !defined NODFA
3935 else if (all_use_dfa || use_dfa)
3936 {
3937 int workspace[1000];
3938 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3939 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3940 (sizeof(workspace)/sizeof(int)));
3941 if (count == 0)
3942 {
3943 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3944 count = use_size_offsets/2;
3945 }
3946 }
3947 #endif
3948
3949 else
3950 {
3951 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3952 options | g_notempty, use_offsets, use_size_offsets);
3953 if (count == 0)
3954 {
3955 fprintf(outfile, "Matched, but too many substrings\n");
3956 count = use_size_offsets/3;
3957 }
3958 }
3959
3960 /* Matched */
3961
3962 if (count >= 0)
3963 {
3964 int i, maxcount;
3965 void *cnptr, *gnptr;
3966
3967 #if !defined NODFA
3968 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3969 #endif
3970 maxcount = use_size_offsets/3;
3971
3972 /* This is a check against a lunatic return value. */
3973
3974 if (count > maxcount)
3975 {
3976 fprintf(outfile,
3977 "** PCRE error: returned count %d is too big for offset size %d\n",
3978 count, use_size_offsets);
3979 count = use_size_offsets/3;
3980 if (do_g || do_G)
3981 {
3982 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3983 do_g = do_G = FALSE; /* Break g/G loop */
3984 }
3985 }
3986
3987 /* do_allcaps requests showing of all captures in the pattern, to check
3988 unset ones at the end. */
3989
3990 if (do_allcaps)
3991 {
3992 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3993 goto SKIP_DATA;
3994 count++; /* Allow for full match */
3995 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3996 }
3997
3998 /* Output the captured substrings */
3999
4000 for (i = 0; i < count * 2; i += 2)
4001 {
4002 if (use_offsets[i] < 0)
4003 {
4004 if (use_offsets[i] != -1)
4005 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4006 use_offsets[i], i);
4007 if (use_offsets[i+1] != -1)
4008 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4009 use_offsets[i+1], i+1);
4010 fprintf(outfile, "%2d: <unset>\n", i/2);
4011 }
4012 else
4013 {
4014 fprintf(outfile, "%2d: ", i/2);
4015 PCHARSV(bptr, use_offsets[i],
4016 use_offsets[i+1] - use_offsets[i], outfile);
4017 fprintf(outfile, "\n");
4018 if (do_showcaprest || (i == 0 && do_showrest))
4019 {
4020 fprintf(outfile, "%2d+ ", i/2);
4021 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4022 outfile);
4023 fprintf(outfile, "\n");
4024 }
4025 }
4026 }
4027
4028 if (markptr != NULL)
4029 {
4030 fprintf(outfile, "MK: ");
4031 PCHARSV(markptr, 0, -1, outfile);
4032 fprintf(outfile, "\n");
4033 }
4034
4035 for (i = 0; i < 32; i++)
4036 {
4037 if ((copystrings & (1 << i)) != 0)
4038 {
4039 int rc;
4040 char copybuffer[256];
4041 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4042 copybuffer, sizeof(copybuffer));
4043 if (rc < 0)
4044 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4045 else
4046 {
4047 fprintf(outfile, "%2dC ", i);
4048 PCHARSV(copybuffer, 0, rc, outfile);
4049 fprintf(outfile, " (%d)\n", rc);
4050 }
4051 }
4052 }
4053
4054 cnptr = copynames;
4055 for (;;)
4056 {
4057 int rc;
4058 char copybuffer[256];
4059
4060 if (use_pcre16)
4061 {
4062 if (*(pcre_uint16 *)cnptr == 0) break;
4063 }
4064 else
4065 {
4066 if (*(pcre_uint8 *)cnptr == 0) break;
4067 }
4068
4069 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4070 cnptr, copybuffer, sizeof(copybuffer));
4071
4072 if (rc < 0)
4073 {
4074 fprintf(outfile, "copy substring ");
4075 PCHARSV(cnptr, 0, -1, outfile);
4076 fprintf(outfile, " failed %d\n", rc);
4077 }
4078 else
4079 {
4080 fprintf(outfile, " C ");
4081 PCHARSV(copybuffer, 0, rc, outfile);
4082 fprintf(outfile, " (%d) ", rc);
4083 PCHARSV(cnptr, 0, -1, outfile);
4084 putc('\n', outfile);
4085 }
4086
4087 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4088 }
4089
4090 for (i = 0; i < 32; i++)
4091 {
4092 if ((getstrings & (1 << i)) != 0)
4093 {
4094 int rc;
4095 const char *substring;
4096 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4097 if (rc < 0)
4098 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4099 else
4100 {
4101 fprintf(outfile, "%2dG ", i);
4102 PCHARSV(substring, 0, rc, outfile);
4103 fprintf(outfile, " (%d)\n", rc);
4104 PCRE_FREE_SUBSTRING(substring);
4105 }
4106 }
4107 }
4108
4109 gnptr = getnames;
4110 for (;;)
4111 {
4112 int rc;
4113 const char *substring;
4114
4115 if (use_pcre16)
4116 {
4117 if (*(pcre_uint16 *)gnptr == 0) break;
4118 }
4119 else
4120 {
4121 if (*(pcre_uint8 *)gnptr == 0) break;
4122 }
4123
4124 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4125 gnptr, &substring);
4126 if (rc < 0)
4127 {
4128 fprintf(outfile, "get substring ");
4129 PCHARSV(gnptr, 0, -1, outfile);
4130 fprintf(outfile, " failed %d\n", rc);
4131 }
4132 else
4133 {
4134 fprintf(outfile, " G ");
4135 PCHARSV(substring, 0, rc, outfile);
4136 fprintf(outfile, " (%d) ", rc);
4137 PCHARSV(gnptr, 0, -1, outfile);
4138 PCRE_FREE_SUBSTRING(substring);
4139 putc('\n', outfile);
4140 }
4141
4142 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4143 }
4144
4145 if (getlist)
4146 {
4147 int rc;
4148 const char **stringlist;
4149 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4150 if (rc < 0)
4151 fprintf(outfile, "get substring list failed %d\n", rc);
4152 else
4153 {
4154 for (i = 0; i < count; i++)
4155 {
4156 fprintf(outfile, "%2dL ", i);
4157 PCHARSV(stringlist[i], 0, -1, outfile);
4158 putc('\n', outfile);
4159 }
4160 if (stringlist[i] != NULL)
4161 fprintf(outfile, "string list not terminated by NULL\n");
4162 PCRE_FREE_SUBSTRING_LIST(stringlist);
4163 }
4164 }
4165 }
4166
4167 /* There was a partial match */
4168
4169 else if (count == PCRE_ERROR_PARTIAL)
4170 {
4171 if (markptr == NULL) fprintf(outfile, "Partial match");
4172 else
4173 {
4174 fprintf(outfile, "Partial match, mark=");
4175 PCHARSV(markptr, 0, -1, outfile);
4176 }
4177 if (use_size_offsets > 1)
4178 {
4179 fprintf(outfile, ": ");
4180 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4181 outfile);
4182 }
4183 fprintf(outfile, "\n");
4184 break; /* Out of the /g loop */
4185 }
4186
4187 /* Failed to match. If this is a /g or /G loop and we previously set
4188 g_notempty after a null match, this is not necessarily the end. We want
4189 to advance the start offset, and continue. We won't be at the end of the
4190 string - that was checked before setting g_notempty.
4191
4192 Complication arises in the case when the newline convention is "any",
4193 "crlf", or "anycrlf". If the previous match was at the end of a line
4194 terminated by CRLF, an advance of one character just passes the \r,
4195 whereas we should prefer the longer newline sequence, as does the code in
4196 pcre_exec(). Fudge the offset value to achieve this. We check for a
4197 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4198 find the default.
4199
4200 Otherwise, in the case of UTF-8 matching, the advance must be one
4201 character, not one byte. */
4202
4203 else
4204 {
4205 if (g_notempty != 0)
4206 {
4207 int onechar = 1;
4208 unsigned int obits = ((REAL_PCRE *)re)->options;
4209 use_offsets[0] = start_offset;
4210 if ((obits & PCRE_NEWLINE_BITS) == 0)
4211 {
4212 int d;
4213 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4214 /* Note that these values are always the ASCII ones, even in
4215 EBCDIC environments. CR = 13, NL = 10. */
4216 obits = (d == 13)? PCRE_NEWLINE_CR :
4217 (d == 10)? PCRE_NEWLINE_LF :
4218 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4219 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4220 (d == -1)? PCRE_NEWLINE_ANY : 0;
4221 }
4222 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4223 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4224 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4225 &&
4226 start_offset < len - 1 &&
4227 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4228 (use_pcre16?
4229 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4230 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4231 :
4232 bptr[start_offset] == '\r'
4233 && bptr[start_offset + 1] == '\n')
4234 #elif defined SUPPORT_PCRE16
4235 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4236 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4237 #else
4238 bptr[start_offset] == '\r'
4239 && bptr[start_offset + 1] == '\n'
4240 #endif
4241 )
4242 onechar++;
4243 else if (use_utf)
4244 {
4245 while (start_offset + onechar < len)
4246 {
4247 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4248 onechar++;
4249 }
4250 }
4251 use_offsets[1] = start_offset + onechar;
4252 }
4253 else
4254 {
4255 switch(count)
4256 {
4257 case PCRE_ERROR_NOMATCH:
4258 if (gmatched == 0)
4259 {
4260 if (markptr == NULL)
4261 {
4262 fprintf(outfile, "No match\n");
4263 }
4264 else
4265 {
4266 fprintf(outfile, "No match, mark = ");
4267 PCHARSV(markptr, 0, -1, outfile);
4268 putc('\n', outfile);
4269 }
4270 }
4271 break;
4272
4273 case PCRE_ERROR_BADUTF8:
4274 case PCRE_ERROR_SHORTUTF8:
4275 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4276 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4277 use_pcre16? "16" : "8");
4278 if (use_size_offsets >= 2)
4279 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4280 use_offsets[1]);
4281 fprintf(outfile, "\n");
4282 break;
4283
4284 case PCRE_ERROR_BADUTF8_OFFSET:
4285 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4286 use_pcre16? "16" : "8");
4287 break;
4288
4289 default:
4290 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4291 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4292 else
4293 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4294 break;
4295 }
4296
4297 break; /* Out of the /g loop */
4298 }
4299 }
4300
4301 /* If not /g or /G we are done */
4302
4303 if (!do_g && !do_G) break;
4304
4305 /* If we have matched an empty string, first check to see if we are at
4306 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4307 Perl's /g options does. This turns out to be rather cunning. First we set
4308 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4309 same point. If this fails (picked up above) we advance to the next
4310 character. */
4311
4312 g_notempty = 0;
4313
4314 if (use_offsets[0] == use_offsets[1])
4315 {
4316 if (use_offsets[0] == len) break;
4317 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4318 }
4319
4320 /* For /g, update the start offset, leaving the rest alone */
4321
4322 if (do_g) start_offset = use_offsets[1];
4323
4324 /* For /G, update the pointer and length */
4325
4326 else
4327 {
4328 bptr += use_offsets[1] * CHAR_SIZE;
4329 len -= use_offsets[1];
4330 }
4331 } /* End of loop for /g and /G */
4332
4333 NEXT_DATA: continue;
4334 } /* End of loop for data lines */
4335
4336 CONTINUE:
4337
4338 #if !defined NOPOSIX
4339 if (posix || do_posix) regfree(&preg);
4340 #endif
4341
4342 if (re != NULL) new_free(re);
4343 if (extra != NULL)
4344 {
4345 PCRE_FREE_STUDY(extra);
4346 }
4347 if (locale_set)
4348 {
4349 new_free((void *)tables);
4350 setlocale(LC_CTYPE, "C");
4351 locale_set = 0;
4352 }
4353 if (jit_stack != NULL)
4354 {
4355 PCRE_JIT_STACK_FREE(jit_stack);
4356 jit_stack = NULL;
4357 }
4358 }
4359
4360 if (infile == stdin) fprintf(outfile, "\n");
4361
4362 EXIT:
4363
4364 if (infile != NULL && infile != stdin) fclose(infile);
4365 if (outfile != NULL && outfile != stdout) fclose(outfile);
4366
4367 free(buffer);
4368 free(dbuffer);
4369 free(pbuffer);
4370 free(offsets);
4371
4372 #ifdef SUPPORT_PCRE16
4373 if (buffer16 != NULL) free(buffer16);
4374 #endif
4375
4376 return yield;
4377 }
4378
4379 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5