/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 823 - (show annotations)
Sat Dec 24 17:43:22 2011 UTC (9 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 123641 byte(s)
Tidy pcretest source code and some 8/16 messages. Add "16" error codes.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119 #include "pcre_internal.h"
120
121 /* The pcre_printint() function, which prints the internal form of a compiled
122 regex, is held in a separate file so that (a) it can be compiled in either
123 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
124 when that is compiled in debug mode. */
125
126 #ifdef SUPPORT_PCRE8
127 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
128 #endif
129 #ifdef SUPPORT_PCRE16
130 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
131 #endif
132
133 /* We need access to some of the data tables that PCRE uses. So as not to have
134 to keep two copies, we include the source file here, changing the names of the
135 external symbols to prevent clashes. */
136
137 #define _pcre_ucp_gentype ucp_gentype
138 #define _pcre_ucp_typerange ucp_typerange
139 #define _pcre_utf8_table1 utf8_table1
140 #define _pcre_utf8_table1_size utf8_table1_size
141 #define _pcre_utf8_table2 utf8_table2
142 #define _pcre_utf8_table3 utf8_table3
143 #define _pcre_utf8_table4 utf8_table4
144 #define _pcre_utt utt
145 #define _pcre_utt_size utt_size
146 #define _pcre_utt_names utt_names
147 #define _pcre_OP_lengths OP_lengths
148
149 #include "pcre_tables.c"
150
151 /* The definition of the macro PRINTABLE, which determines whether to print an
152 output character as-is or as a hex value when showing compiled patterns, is
153 the same as in the printint.src file. We uses it here in cases when the locale
154 has not been explicitly changed, so as to get consistent output from systems
155 that differ in their output from isprint() even in the "C" locale. */
156
157 #ifdef EBCDIC
158 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
159 #else
160 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
161 #endif
162
163 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
164
165 /* It is possible to compile this test program without including support for
166 testing the POSIX interface, though this is not available via the standard
167 Makefile. */
168
169 #if !defined NOPOSIX
170 #include "pcreposix.h"
171 #endif
172
173 /* It is also possible, originally for the benefit of a version that was
174 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
175 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
176 automatically cut out the UTF support if PCRE is built without it. */
177
178 #ifndef SUPPORT_UTF
179 #ifndef NOUTF
180 #define NOUTF
181 #endif
182 #endif
183
184 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
185 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
186 only from one place and is handled differently). I couldn't dream up any way of
187 using a single macro to do this in a generic way, because of the many different
188 argument requirements. We know that at least one of SUPPORT_PCRE8 and
189 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
190 use these in the definitions of generic macros.
191
192 **** Special note about the PCHARSxxx macros: the address of the string to be
193 printed is always given as two arguments: a base address followed by an offset.
194 The base address is cast to the correct data size for 8 or 16 bit data; the
195 offset is in units of this size. If the string were given as base+offset in one
196 argument, the casting might be incorrectly applied. */
197
198 #ifdef SUPPORT_PCRE8
199
200 #define PCHARS8(lv, p, offset, len, f) \
201 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
202
203 #define PCHARSV8(p, offset, len, f) \
204 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
207 p = read_capture_name8(p, cn8, re)
208
209 #define SET_PCRE_CALLOUT8(callout) \
210 pcre_callout = callout
211
212 #define STRLEN8(p) ((int)strlen((char *)p))
213
214
215 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
216 re = pcre_compile((char *)pat, options, error, erroffset, tables)
217
218 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
219 namesptr, cbuffer, size) \
220 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
221 (char *)namesptr, cbuffer, size)
222
223 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
224 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
225
226 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
227 offsets, size_offsets, workspace, size_workspace) \
228 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
229 offsets, size_offsets, workspace, size_workspace)
230
231 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232 offsets, size_offsets) \
233 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
234 offsets, size_offsets)
235
236 #define PCRE_FREE_STUDY8(extra) \
237 pcre_free_study(extra)
238
239 #define PCRE_FREE_SUBSTRING8(substring) \
240 pcre_free_substring(substring)
241
242 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
243 pcre_free_substring_list(listptr)
244
245 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246 getnamesptr, subsptr) \
247 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
248 (char *)getnamesptr, subsptr)
249
250 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
251 n = pcre_get_stringnumber(re, (char *)ptr)
252
253 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
254 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
255
256 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
257 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
258
259 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
260 pcre_pattern_to_host_byte_order(re, extra, tables)
261
262 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
263 pcre_printint(re, outfile, debug_lengths)
264
265 #define PCRE_STUDY8(extra, re, options, error) \
266 extra = pcre_study(re, options, error)
267
268 #endif /* SUPPORT_PCRE8 */
269
270 /* -----------------------------------------------------------*/
271
272 #ifdef SUPPORT_PCRE16
273
274 #define PCHARS16(lv, p, offset, len, f) \
275 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
276
277 #define PCHARSV16(p, offset, len, f) \
278 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
279
280 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
281 p = read_capture_name16(p, cn16, re)
282
283 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
284
285 #define SET_PCRE_CALLOUT16(callout) \
286 pcre16_callout = callout
287
288
289 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
290 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
291
292 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
293 namesptr, cbuffer, size) \
294 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
295 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
296
297 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
298 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
299 (PCRE_SCHAR16 *)cbuffer, size/2)
300
301 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
302 offsets, size_offsets, workspace, size_workspace) \
303 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
304 options, offsets, size_offsets, workspace, size_workspace)
305
306 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
307 offsets, size_offsets) \
308 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
309 options, offsets, size_offsets)
310
311 #define PCRE_FREE_STUDY16(extra) \
312 pcre16_free_study(extra)
313
314 #define PCRE_FREE_SUBSTRING16(substring) \
315 pcre16_free_substring((PCRE_SPTR16)substring)
316
317 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
318 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
319
320 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
321 getnamesptr, subsptr) \
322 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
323 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
324
325 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
326 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
327
328 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
329 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
330 (PCRE_SPTR16 *)(void*)subsptr)
331
332 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
333 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
334 (PCRE_SPTR16 **)(void*)listptr)
335
336 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
337 pcre16_pattern_to_host_byte_order(re, extra, tables)
338
339 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
340 pcre16_printint(re, outfile, debug_lengths)
341
342 #define PCRE_STUDY16(extra, re, options, error) \
343 extra = pcre16_study(re, options, error)
344
345 #endif /* SUPPORT_PCRE16 */
346
347
348 /* ----- Both modes are supported; a runtime test is needed, except for
349 pcre_config(), and the JIT stack functions, when it doesn't matter which
350 version is called. ----- */
351
352 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
353
354 #define CHAR_SIZE (use_pcre16? 2:1)
355
356 #define PCHARS(lv, p, offset, len, f) \
357 if (use_pcre16) \
358 PCHARS16(lv, p, offset, len, f); \
359 else \
360 PCHARS8(lv, p, offset, len, f)
361
362 #define PCHARSV(p, offset, len, f) \
363 if (use_pcre16) \
364 PCHARSV16(p, offset, len, f); \
365 else \
366 PCHARSV8(p, offset, len, f)
367
368 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
369 if (use_pcre16) \
370 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
371 else \
372 READ_CAPTURE_NAME8(p, cn8, cn16, re)
373
374 #define SET_PCRE_CALLOUT(callout) \
375 if (use_pcre16) \
376 SET_PCRE_CALLOUT16(callout); \
377 else \
378 SET_PCRE_CALLOUT8(callout)
379
380 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
381
382 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
383
384 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
385 if (use_pcre16) \
386 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
387 else \
388 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
389
390 #define PCRE_CONFIG pcre_config
391
392 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
393 namesptr, cbuffer, size) \
394 if (use_pcre16) \
395 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
396 namesptr, cbuffer, size); \
397 else \
398 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
399 namesptr, cbuffer, size)
400
401 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
402 if (use_pcre16) \
403 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
404 else \
405 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
406
407 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
408 offsets, size_offsets, workspace, size_workspace) \
409 if (use_pcre16) \
410 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
411 offsets, size_offsets, workspace, size_workspace); \
412 else \
413 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
414 offsets, size_offsets, workspace, size_workspace)
415
416 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
417 offsets, size_offsets) \
418 if (use_pcre16) \
419 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
420 offsets, size_offsets); \
421 else \
422 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
423 offsets, size_offsets)
424
425 #define PCRE_FREE_STUDY(extra) \
426 if (use_pcre16) \
427 PCRE_FREE_STUDY16(extra); \
428 else \
429 PCRE_FREE_STUDY8(extra)
430
431 #define PCRE_FREE_SUBSTRING(substring) \
432 if (use_pcre16) \
433 PCRE_FREE_SUBSTRING16(substring); \
434 else \
435 PCRE_FREE_SUBSTRING8(substring)
436
437 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
438 if (use_pcre16) \
439 PCRE_FREE_SUBSTRING_LIST16(listptr); \
440 else \
441 PCRE_FREE_SUBSTRING_LIST8(listptr)
442
443 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
444 getnamesptr, subsptr) \
445 if (use_pcre16) \
446 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr); \
448 else \
449 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
450 getnamesptr, subsptr)
451
452 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
453 if (use_pcre16) \
454 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
455 else \
456 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
457
458 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
459 if (use_pcre16) \
460 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
461 else \
462 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
465 if (use_pcre16) \
466 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
467 else \
468 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
469
470 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
471 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
472
473 #define PCRE_MAKETABLES \
474 (use_pcre16? pcre16_maketables() : pcre_maketables())
475
476 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
477 if (use_pcre16) \
478 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
479 else \
480 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
481
482 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
483 if (use_pcre16) \
484 PCRE_PRINTINT16(re, outfile, debug_lengths); \
485 else \
486 PCRE_PRINTINT8(re, outfile, debug_lengths)
487
488 #define PCRE_STUDY(extra, re, options, error) \
489 if (use_pcre16) \
490 PCRE_STUDY16(extra, re, options, error); \
491 else \
492 PCRE_STUDY8(extra, re, options, error)
493
494 /* ----- Only 8-bit mode is supported ----- */
495
496 #elif defined SUPPORT_PCRE8
497 #define CHAR_SIZE 1
498 #define PCHARS PCHARS8
499 #define PCHARSV PCHARSV8
500 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
501 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
502 #define STRLEN STRLEN8
503 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
504 #define PCRE_COMPILE PCRE_COMPILE8
505 #define PCRE_CONFIG pcre_config
506 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
507 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
508 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
509 #define PCRE_EXEC PCRE_EXEC8
510 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
511 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
512 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
513 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
514 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
515 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
516 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
517 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
518 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
519 #define PCRE_MAKETABLES pcre_maketables()
520 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
521 #define PCRE_PRINTINT PCRE_PRINTINT8
522 #define PCRE_STUDY PCRE_STUDY8
523
524 /* ----- Only 16-bit mode is supported ----- */
525
526 #else
527 #define CHAR_SIZE 1
528 #define PCHARS PCHARS16
529 #define PCHARSV PCHARSV16
530 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
531 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
532 #define STRLEN STRLEN16
533 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
534 #define PCRE_COMPILE PCRE_COMPILE16
535 #define PCRE_CONFIG pcre16_config
536 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
537 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
538 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
539 #define PCRE_EXEC PCRE_EXEC16
540 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
541 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
542 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
543 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
544 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
545 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
546 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
547 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
548 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
549 #define PCRE_MAKETABLES pcre16_maketables()
550 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
551 #define PCRE_PRINTINT PCRE_PRINTINT16
552 #define PCRE_STUDY PCRE_STUDY16
553 #endif
554
555 /* ----- End of mode-specific function call macros ----- */
556
557
558 /* Other parameters */
559
560 #ifndef CLOCKS_PER_SEC
561 #ifdef CLK_TCK
562 #define CLOCKS_PER_SEC CLK_TCK
563 #else
564 #define CLOCKS_PER_SEC 100
565 #endif
566 #endif
567
568 /* This is the default loop count for timing. */
569
570 #define LOOPREPEAT 500000
571
572 /* Static variables */
573
574 static FILE *outfile;
575 static int log_store = 0;
576 static int callout_count;
577 static int callout_extra;
578 static int callout_fail_count;
579 static int callout_fail_id;
580 static int debug_lengths;
581 static int first_callout;
582 static int locale_set = 0;
583 static int show_malloc;
584 static int use_utf;
585 static size_t gotten_store;
586 static size_t first_gotten_store = 0;
587 static const unsigned char *last_callout_mark = NULL;
588
589 /* The buffers grow automatically if very long input lines are encountered. */
590
591 static int buffer_size = 50000;
592 static pcre_uint8 *buffer = NULL;
593 static pcre_uint8 *dbuffer = NULL;
594 static pcre_uint8 *pbuffer = NULL;
595
596 /* Another buffer is needed translation to 16-bit character strings. It will
597 obtained and extended as required. */
598
599 #ifdef SUPPORT_PCRE16
600 static int buffer16_size = 0;
601 static pcre_uint16 *buffer16 = NULL;
602
603 /* We need the table of operator lengths that is used for 16-bit compiling, in
604 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
605 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
606 appropriately for the 16-bit world. Just as a safety check, make sure that
607 COMPILE_PCRE16 is *not* set. */
608
609 #ifdef COMPILE_PCRE16
610 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
611 #endif
612
613 #if LINK_SIZE == 2
614 #undef LINK_SIZE
615 #define LINK_SIZE 1
616 #elif LINK_SIZE == 3 || LINK_SIZE == 4
617 #undef LINK_SIZE
618 #define LINK_SIZE 2
619 #else
620 #error LINK_SIZE must be either 2, 3, or 4
621 #endif
622
623 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
624 #endif /* SUPPORT_PCRE16 */
625
626 /* If we have 8-bit support, default use_pcre16 to false; if there is also
627 16-bit support, it can be changed by an option. If there is no 8-bit support,
628 there must be 16-bit support, so default it to 1. */
629
630 #ifdef SUPPORT_PCRE8
631 static int use_pcre16 = 0;
632 #else
633 static int use_pcre16 = 1;
634 #endif
635
636 /* Textual explanations for runtime error codes */
637
638 static const char *errtexts[] = {
639 NULL, /* 0 is no error */
640 NULL, /* NOMATCH is handled specially */
641 "NULL argument passed",
642 "bad option value",
643 "magic number missing",
644 "unknown opcode - pattern overwritten?",
645 "no more memory",
646 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
647 "match limit exceeded",
648 "callout error code",
649 NULL, /* BADUTF8/16 is handled specially */
650 NULL, /* BADUTF8/16 offset is handled specially */
651 NULL, /* PARTIAL is handled specially */
652 "not used - internal error",
653 "internal error - pattern overwritten?",
654 "bad count value",
655 "item unsupported for DFA matching",
656 "backreference condition or recursion test not supported for DFA matching",
657 "match limit not supported for DFA matching",
658 "workspace size exceeded in DFA matching",
659 "too much recursion for DFA matching",
660 "recursion limit exceeded",
661 "not used - internal error",
662 "invalid combination of newline options",
663 "bad offset value",
664 NULL, /* SHORTUTF8/16 is handled specially */
665 "nested recursion at the same subject position",
666 "JIT stack limit reached",
667 "pattern compiled in wrong mode (8-bit/16-bit error)"
668 };
669
670
671 /*************************************************
672 * Alternate character tables *
673 *************************************************/
674
675 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
676 using the default tables of the library. However, the T option can be used to
677 select alternate sets of tables, for different kinds of testing. Note also that
678 the L (locale) option also adjusts the tables. */
679
680 /* This is the set of tables distributed as default with PCRE. It recognizes
681 only ASCII characters. */
682
683 static const pcre_uint8 tables0[] = {
684
685 /* This table is a lower casing table. */
686
687 0, 1, 2, 3, 4, 5, 6, 7,
688 8, 9, 10, 11, 12, 13, 14, 15,
689 16, 17, 18, 19, 20, 21, 22, 23,
690 24, 25, 26, 27, 28, 29, 30, 31,
691 32, 33, 34, 35, 36, 37, 38, 39,
692 40, 41, 42, 43, 44, 45, 46, 47,
693 48, 49, 50, 51, 52, 53, 54, 55,
694 56, 57, 58, 59, 60, 61, 62, 63,
695 64, 97, 98, 99,100,101,102,103,
696 104,105,106,107,108,109,110,111,
697 112,113,114,115,116,117,118,119,
698 120,121,122, 91, 92, 93, 94, 95,
699 96, 97, 98, 99,100,101,102,103,
700 104,105,106,107,108,109,110,111,
701 112,113,114,115,116,117,118,119,
702 120,121,122,123,124,125,126,127,
703 128,129,130,131,132,133,134,135,
704 136,137,138,139,140,141,142,143,
705 144,145,146,147,148,149,150,151,
706 152,153,154,155,156,157,158,159,
707 160,161,162,163,164,165,166,167,
708 168,169,170,171,172,173,174,175,
709 176,177,178,179,180,181,182,183,
710 184,185,186,187,188,189,190,191,
711 192,193,194,195,196,197,198,199,
712 200,201,202,203,204,205,206,207,
713 208,209,210,211,212,213,214,215,
714 216,217,218,219,220,221,222,223,
715 224,225,226,227,228,229,230,231,
716 232,233,234,235,236,237,238,239,
717 240,241,242,243,244,245,246,247,
718 248,249,250,251,252,253,254,255,
719
720 /* This table is a case flipping table. */
721
722 0, 1, 2, 3, 4, 5, 6, 7,
723 8, 9, 10, 11, 12, 13, 14, 15,
724 16, 17, 18, 19, 20, 21, 22, 23,
725 24, 25, 26, 27, 28, 29, 30, 31,
726 32, 33, 34, 35, 36, 37, 38, 39,
727 40, 41, 42, 43, 44, 45, 46, 47,
728 48, 49, 50, 51, 52, 53, 54, 55,
729 56, 57, 58, 59, 60, 61, 62, 63,
730 64, 97, 98, 99,100,101,102,103,
731 104,105,106,107,108,109,110,111,
732 112,113,114,115,116,117,118,119,
733 120,121,122, 91, 92, 93, 94, 95,
734 96, 65, 66, 67, 68, 69, 70, 71,
735 72, 73, 74, 75, 76, 77, 78, 79,
736 80, 81, 82, 83, 84, 85, 86, 87,
737 88, 89, 90,123,124,125,126,127,
738 128,129,130,131,132,133,134,135,
739 136,137,138,139,140,141,142,143,
740 144,145,146,147,148,149,150,151,
741 152,153,154,155,156,157,158,159,
742 160,161,162,163,164,165,166,167,
743 168,169,170,171,172,173,174,175,
744 176,177,178,179,180,181,182,183,
745 184,185,186,187,188,189,190,191,
746 192,193,194,195,196,197,198,199,
747 200,201,202,203,204,205,206,207,
748 208,209,210,211,212,213,214,215,
749 216,217,218,219,220,221,222,223,
750 224,225,226,227,228,229,230,231,
751 232,233,234,235,236,237,238,239,
752 240,241,242,243,244,245,246,247,
753 248,249,250,251,252,253,254,255,
754
755 /* This table contains bit maps for various character classes. Each map is 32
756 bytes long and the bits run from the least significant end of each byte. The
757 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
758 graph, print, punct, and cntrl. Other classes are built from combinations. */
759
760 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
761 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
762 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
763 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
764
765 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
766 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
767 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
768 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
769
770 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
771 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
772 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774
775 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
776 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779
780 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
781 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784
785 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
786 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
787 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789
790 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
791 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
792 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794
795 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
796 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
797 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799
800 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
801 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
802 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804
805 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
806 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
807 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809
810 /* This table identifies various classes of character by individual bits:
811 0x01 white space character
812 0x02 letter
813 0x04 decimal digit
814 0x08 hexadecimal digit
815 0x10 alphanumeric or '_'
816 0x80 regular expression metacharacter or binary zero
817 */
818
819 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
820 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
821 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
822 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
823 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
824 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
825 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
826 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
827 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
828 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
829 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
830 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
831 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
832 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
833 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
834 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
836 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
837 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
838 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
841 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
842 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
851
852 /* This is a set of tables that came orginally from a Windows user. It seems to
853 be at least an approximation of ISO 8859. In particular, there are characters
854 greater than 128 that are marked as spaces, letters, etc. */
855
856 static const pcre_uint8 tables1[] = {
857 0,1,2,3,4,5,6,7,
858 8,9,10,11,12,13,14,15,
859 16,17,18,19,20,21,22,23,
860 24,25,26,27,28,29,30,31,
861 32,33,34,35,36,37,38,39,
862 40,41,42,43,44,45,46,47,
863 48,49,50,51,52,53,54,55,
864 56,57,58,59,60,61,62,63,
865 64,97,98,99,100,101,102,103,
866 104,105,106,107,108,109,110,111,
867 112,113,114,115,116,117,118,119,
868 120,121,122,91,92,93,94,95,
869 96,97,98,99,100,101,102,103,
870 104,105,106,107,108,109,110,111,
871 112,113,114,115,116,117,118,119,
872 120,121,122,123,124,125,126,127,
873 128,129,130,131,132,133,134,135,
874 136,137,138,139,140,141,142,143,
875 144,145,146,147,148,149,150,151,
876 152,153,154,155,156,157,158,159,
877 160,161,162,163,164,165,166,167,
878 168,169,170,171,172,173,174,175,
879 176,177,178,179,180,181,182,183,
880 184,185,186,187,188,189,190,191,
881 224,225,226,227,228,229,230,231,
882 232,233,234,235,236,237,238,239,
883 240,241,242,243,244,245,246,215,
884 248,249,250,251,252,253,254,223,
885 224,225,226,227,228,229,230,231,
886 232,233,234,235,236,237,238,239,
887 240,241,242,243,244,245,246,247,
888 248,249,250,251,252,253,254,255,
889 0,1,2,3,4,5,6,7,
890 8,9,10,11,12,13,14,15,
891 16,17,18,19,20,21,22,23,
892 24,25,26,27,28,29,30,31,
893 32,33,34,35,36,37,38,39,
894 40,41,42,43,44,45,46,47,
895 48,49,50,51,52,53,54,55,
896 56,57,58,59,60,61,62,63,
897 64,97,98,99,100,101,102,103,
898 104,105,106,107,108,109,110,111,
899 112,113,114,115,116,117,118,119,
900 120,121,122,91,92,93,94,95,
901 96,65,66,67,68,69,70,71,
902 72,73,74,75,76,77,78,79,
903 80,81,82,83,84,85,86,87,
904 88,89,90,123,124,125,126,127,
905 128,129,130,131,132,133,134,135,
906 136,137,138,139,140,141,142,143,
907 144,145,146,147,148,149,150,151,
908 152,153,154,155,156,157,158,159,
909 160,161,162,163,164,165,166,167,
910 168,169,170,171,172,173,174,175,
911 176,177,178,179,180,181,182,183,
912 184,185,186,187,188,189,190,191,
913 224,225,226,227,228,229,230,231,
914 232,233,234,235,236,237,238,239,
915 240,241,242,243,244,245,246,215,
916 248,249,250,251,252,253,254,223,
917 192,193,194,195,196,197,198,199,
918 200,201,202,203,204,205,206,207,
919 208,209,210,211,212,213,214,247,
920 216,217,218,219,220,221,222,255,
921 0,62,0,0,1,0,0,0,
922 0,0,0,0,0,0,0,0,
923 32,0,0,0,1,0,0,0,
924 0,0,0,0,0,0,0,0,
925 0,0,0,0,0,0,255,3,
926 126,0,0,0,126,0,0,0,
927 0,0,0,0,0,0,0,0,
928 0,0,0,0,0,0,0,0,
929 0,0,0,0,0,0,255,3,
930 0,0,0,0,0,0,0,0,
931 0,0,0,0,0,0,12,2,
932 0,0,0,0,0,0,0,0,
933 0,0,0,0,0,0,0,0,
934 254,255,255,7,0,0,0,0,
935 0,0,0,0,0,0,0,0,
936 255,255,127,127,0,0,0,0,
937 0,0,0,0,0,0,0,0,
938 0,0,0,0,254,255,255,7,
939 0,0,0,0,0,4,32,4,
940 0,0,0,128,255,255,127,255,
941 0,0,0,0,0,0,255,3,
942 254,255,255,135,254,255,255,7,
943 0,0,0,0,0,4,44,6,
944 255,255,127,255,255,255,127,255,
945 0,0,0,0,254,255,255,255,
946 255,255,255,255,255,255,255,127,
947 0,0,0,0,254,255,255,255,
948 255,255,255,255,255,255,255,255,
949 0,2,0,0,255,255,255,255,
950 255,255,255,255,255,255,255,127,
951 0,0,0,0,255,255,255,255,
952 255,255,255,255,255,255,255,255,
953 0,0,0,0,254,255,0,252,
954 1,0,0,248,1,0,0,120,
955 0,0,0,0,254,255,255,255,
956 0,0,128,0,0,0,128,0,
957 255,255,255,255,0,0,0,0,
958 0,0,0,0,0,0,0,128,
959 255,255,255,255,0,0,0,0,
960 0,0,0,0,0,0,0,0,
961 128,0,0,0,0,0,0,0,
962 0,1,1,0,1,1,0,0,
963 0,0,0,0,0,0,0,0,
964 0,0,0,0,0,0,0,0,
965 1,0,0,0,128,0,0,0,
966 128,128,128,128,0,0,128,0,
967 28,28,28,28,28,28,28,28,
968 28,28,0,0,0,0,0,128,
969 0,26,26,26,26,26,26,18,
970 18,18,18,18,18,18,18,18,
971 18,18,18,18,18,18,18,18,
972 18,18,18,128,128,0,128,16,
973 0,26,26,26,26,26,26,18,
974 18,18,18,18,18,18,18,18,
975 18,18,18,18,18,18,18,18,
976 18,18,18,128,128,0,0,0,
977 0,0,0,0,0,1,0,0,
978 0,0,0,0,0,0,0,0,
979 0,0,0,0,0,0,0,0,
980 0,0,0,0,0,0,0,0,
981 1,0,0,0,0,0,0,0,
982 0,0,18,0,0,0,0,0,
983 0,0,20,20,0,18,0,0,
984 0,20,18,0,0,0,0,0,
985 18,18,18,18,18,18,18,18,
986 18,18,18,18,18,18,18,18,
987 18,18,18,18,18,18,18,0,
988 18,18,18,18,18,18,18,18,
989 18,18,18,18,18,18,18,18,
990 18,18,18,18,18,18,18,18,
991 18,18,18,18,18,18,18,0,
992 18,18,18,18,18,18,18,18
993 };
994
995
996
997
998 #ifndef HAVE_STRERROR
999 /*************************************************
1000 * Provide strerror() for non-ANSI libraries *
1001 *************************************************/
1002
1003 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1004 in their libraries, but can provide the same facility by this simple
1005 alternative function. */
1006
1007 extern int sys_nerr;
1008 extern char *sys_errlist[];
1009
1010 char *
1011 strerror(int n)
1012 {
1013 if (n < 0 || n >= sys_nerr) return "unknown error number";
1014 return sys_errlist[n];
1015 }
1016 #endif /* HAVE_STRERROR */
1017
1018
1019 /*************************************************
1020 * JIT memory callback *
1021 *************************************************/
1022
1023 static pcre_jit_stack* jit_callback(void *arg)
1024 {
1025 return (pcre_jit_stack *)arg;
1026 }
1027
1028
1029 #if !defined NOUTF
1030 /*************************************************
1031 * Convert UTF-8 string to value *
1032 *************************************************/
1033
1034 /* This function takes one or more bytes that represents a UTF-8 character,
1035 and returns the value of the character.
1036
1037 Argument:
1038 utf8bytes a pointer to the byte vector
1039 vptr a pointer to an int to receive the value
1040
1041 Returns: > 0 => the number of bytes consumed
1042 -6 to 0 => malformed UTF-8 character at offset = (-return)
1043 */
1044
1045 static int
1046 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1047 {
1048 int c = *utf8bytes++;
1049 int d = c;
1050 int i, j, s;
1051
1052 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1053 {
1054 if ((d & 0x80) == 0) break;
1055 d <<= 1;
1056 }
1057
1058 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1059 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1060
1061 /* i now has a value in the range 1-5 */
1062
1063 s = 6*i;
1064 d = (c & utf8_table3[i]) << s;
1065
1066 for (j = 0; j < i; j++)
1067 {
1068 c = *utf8bytes++;
1069 if ((c & 0xc0) != 0x80) return -(j+1);
1070 s -= 6;
1071 d |= (c & 0x3f) << s;
1072 }
1073
1074 /* Check that encoding was the correct unique one */
1075
1076 for (j = 0; j < utf8_table1_size; j++)
1077 if (d <= utf8_table1[j]) break;
1078 if (j != i) return -(i+1);
1079
1080 /* Valid value */
1081
1082 *vptr = d;
1083 return i+1;
1084 }
1085 #endif /* NOUTF */
1086
1087
1088
1089 #if !defined NOUTF
1090 /*************************************************
1091 * Convert character value to UTF-8 *
1092 *************************************************/
1093
1094 /* This function takes an integer value in the range 0 - 0x7fffffff
1095 and encodes it as a UTF-8 character in 0 to 6 bytes.
1096
1097 Arguments:
1098 cvalue the character value
1099 utf8bytes pointer to buffer for result - at least 6 bytes long
1100
1101 Returns: number of characters placed in the buffer
1102 */
1103
1104 static int
1105 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1106 {
1107 register int i, j;
1108 for (i = 0; i < utf8_table1_size; i++)
1109 if (cvalue <= utf8_table1[i]) break;
1110 utf8bytes += i;
1111 for (j = i; j > 0; j--)
1112 {
1113 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1114 cvalue >>= 6;
1115 }
1116 *utf8bytes = utf8_table2[i] | cvalue;
1117 return i + 1;
1118 }
1119 #endif
1120
1121
1122
1123 #ifdef SUPPORT_PCRE16
1124 /*************************************************
1125 * Convert a string to 16-bit *
1126 *************************************************/
1127
1128 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1129 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1130 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1131 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1132 result is always left in buffer16.
1133
1134 Note that this function does not object to surrogate values. This is
1135 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1136 for the purpose of testing that they are correctly faulted.
1137
1138 Arguments:
1139 p points to a byte string
1140 utf true if UTF-8 (to be converted to UTF-16)
1141 len number of bytes in the string (excluding trailing zero)
1142
1143 Returns: number of 16-bit data items used (excluding trailing zero)
1144 OR -1 if a UTF-8 string is malformed
1145 OR -2 if a value > 0x10ffff is encountered
1146 */
1147
1148 static int
1149 to16(pcre_uint8 *p, int utf, int len)
1150 {
1151 pcre_uint16 *pp;
1152
1153 if (buffer16_size < 2*len + 2)
1154 {
1155 if (buffer16 != NULL) free(buffer16);
1156 buffer16_size = 2*len + 2;
1157 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1158 if (buffer16 == NULL)
1159 {
1160 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1161 exit(1);
1162 }
1163 }
1164
1165 pp = buffer16;
1166
1167 if (!utf)
1168 {
1169 while (len-- > 0) *pp++ = *p++;
1170 }
1171
1172 else
1173 {
1174 int c = 0;
1175 while (len > 0)
1176 {
1177 int chlen = utf82ord(p, &c);
1178 if (chlen <= 0) return -1;
1179 if (c > 0x10ffff) return -2;
1180 p += chlen;
1181 len -= chlen;
1182 if (c < 0x10000) *pp++ = c; else
1183 {
1184 c -= 0x10000;
1185 *pp++ = 0xD800 | (c >> 10);
1186 *pp++ = 0xDC00 | (c & 0x3ff);
1187 }
1188 }
1189 }
1190
1191 *pp = 0;
1192 return pp - buffer16;
1193 }
1194 #endif
1195
1196
1197 /*************************************************
1198 * Read or extend an input line *
1199 *************************************************/
1200
1201 /* Input lines are read into buffer, but both patterns and data lines can be
1202 continued over multiple input lines. In addition, if the buffer fills up, we
1203 want to automatically expand it so as to be able to handle extremely large
1204 lines that are needed for certain stress tests. When the input buffer is
1205 expanded, the other two buffers must also be expanded likewise, and the
1206 contents of pbuffer, which are a copy of the input for callouts, must be
1207 preserved (for when expansion happens for a data line). This is not the most
1208 optimal way of handling this, but hey, this is just a test program!
1209
1210 Arguments:
1211 f the file to read
1212 start where in buffer to start (this *must* be within buffer)
1213 prompt for stdin or readline()
1214
1215 Returns: pointer to the start of new data
1216 could be a copy of start, or could be moved
1217 NULL if no data read and EOF reached
1218 */
1219
1220 static pcre_uint8 *
1221 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1222 {
1223 pcre_uint8 *here = start;
1224
1225 for (;;)
1226 {
1227 int rlen = (int)(buffer_size - (here - buffer));
1228
1229 if (rlen > 1000)
1230 {
1231 int dlen;
1232
1233 /* If libreadline support is required, use readline() to read a line if the
1234 input is a terminal. Note that readline() removes the trailing newline, so
1235 we must put it back again, to be compatible with fgets(). */
1236
1237 #ifdef SUPPORT_LIBREADLINE
1238 if (isatty(fileno(f)))
1239 {
1240 size_t len;
1241 char *s = readline(prompt);
1242 if (s == NULL) return (here == start)? NULL : start;
1243 len = strlen(s);
1244 if (len > 0) add_history(s);
1245 if (len > rlen - 1) len = rlen - 1;
1246 memcpy(here, s, len);
1247 here[len] = '\n';
1248 here[len+1] = 0;
1249 free(s);
1250 }
1251 else
1252 #endif
1253
1254 /* Read the next line by normal means, prompting if the file is stdin. */
1255
1256 {
1257 if (f == stdin) printf("%s", prompt);
1258 if (fgets((char *)here, rlen, f) == NULL)
1259 return (here == start)? NULL : start;
1260 }
1261
1262 dlen = (int)strlen((char *)here);
1263 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1264 here += dlen;
1265 }
1266
1267 else
1268 {
1269 int new_buffer_size = 2*buffer_size;
1270 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1271 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1272 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1273
1274 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1275 {
1276 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1277 exit(1);
1278 }
1279
1280 memcpy(new_buffer, buffer, buffer_size);
1281 memcpy(new_pbuffer, pbuffer, buffer_size);
1282
1283 buffer_size = new_buffer_size;
1284
1285 start = new_buffer + (start - buffer);
1286 here = new_buffer + (here - buffer);
1287
1288 free(buffer);
1289 free(dbuffer);
1290 free(pbuffer);
1291
1292 buffer = new_buffer;
1293 dbuffer = new_dbuffer;
1294 pbuffer = new_pbuffer;
1295 }
1296 }
1297
1298 return NULL; /* Control never gets here */
1299 }
1300
1301
1302
1303 /*************************************************
1304 * Read number from string *
1305 *************************************************/
1306
1307 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1308 around with conditional compilation, just do the job by hand. It is only used
1309 for unpicking arguments, so just keep it simple.
1310
1311 Arguments:
1312 str string to be converted
1313 endptr where to put the end pointer
1314
1315 Returns: the unsigned long
1316 */
1317
1318 static int
1319 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1320 {
1321 int result = 0;
1322 while(*str != 0 && isspace(*str)) str++;
1323 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1324 *endptr = str;
1325 return(result);
1326 }
1327
1328
1329
1330 /*************************************************
1331 * Print one character *
1332 *************************************************/
1333
1334 /* Print a single character either literally, or as a hex escape. */
1335
1336 static int pchar(int c, FILE *f)
1337 {
1338 if (PRINTOK(c))
1339 {
1340 if (f != NULL) fprintf(f, "%c", c);
1341 return 1;
1342 }
1343
1344 if (c < 0x100)
1345 {
1346 if (use_utf)
1347 {
1348 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1349 return 6;
1350 }
1351 else
1352 {
1353 if (f != NULL) fprintf(f, "\\x%02x", c);
1354 return 4;
1355 }
1356 }
1357
1358 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1359 return (c <= 0x000000ff)? 6 :
1360 (c <= 0x00000fff)? 7 :
1361 (c <= 0x0000ffff)? 8 :
1362 (c <= 0x000fffff)? 9 : 10;
1363 }
1364
1365
1366
1367 #ifdef SUPPORT_PCRE8
1368 /*************************************************
1369 * Print 8-bit character string *
1370 *************************************************/
1371
1372 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1373 If handed a NULL file, just counts chars without printing. */
1374
1375 static int pchars(pcre_uint8 *p, int length, FILE *f)
1376 {
1377 int c = 0;
1378 int yield = 0;
1379
1380 if (length < 0)
1381 length = strlen((char *)p);
1382
1383 while (length-- > 0)
1384 {
1385 #if !defined NOUTF
1386 if (use_utf)
1387 {
1388 int rc = utf82ord(p, &c);
1389 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1390 {
1391 length -= rc - 1;
1392 p += rc;
1393 yield += pchar(c, f);
1394 continue;
1395 }
1396 }
1397 #endif
1398 c = *p++;
1399 yield += pchar(c, f);
1400 }
1401
1402 return yield;
1403 }
1404 #endif
1405
1406
1407
1408 #ifdef SUPPORT_PCRE16
1409 /*************************************************
1410 * Find length of 0-terminated 16-bit string *
1411 *************************************************/
1412
1413 static int strlen16(PCRE_SPTR16 p)
1414 {
1415 int len = 0;
1416 while (*p++ != 0) len++;
1417 return len;
1418 }
1419 #endif /* SUPPORT_PCRE16 */
1420
1421
1422 #ifdef SUPPORT_PCRE16
1423 /*************************************************
1424 * Print 16-bit character string *
1425 *************************************************/
1426
1427 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1428 If handed a NULL file, just counts chars without printing. */
1429
1430 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1431 {
1432 int yield = 0;
1433
1434 if (length < 0)
1435 length = strlen16(p);
1436
1437 while (length-- > 0)
1438 {
1439 int c = *p++ & 0xffff;
1440 #if !defined NOUTF
1441 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1442 {
1443 int d = *p & 0xffff;
1444 if (d >= 0xDC00 && d < 0xDFFF)
1445 {
1446 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1447 length--;
1448 p++;
1449 }
1450 }
1451 #endif
1452 yield += pchar(c, f);
1453 }
1454
1455 return yield;
1456 }
1457 #endif /* SUPPORT_PCRE16 */
1458
1459
1460
1461 #ifdef SUPPORT_PCRE8
1462 /*************************************************
1463 * Read a capture name (8-bit) and check it *
1464 *************************************************/
1465
1466 static pcre_uint8 *
1467 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1468 {
1469 pcre_uint8 *npp = *pp;
1470 while (isalnum(*p)) *npp++ = *p++;
1471 *npp++ = 0;
1472 *npp = 0;
1473 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1474 {
1475 fprintf(outfile, "no parentheses with name \"");
1476 PCHARSV(*pp, 0, -1, outfile);
1477 fprintf(outfile, "\"\n");
1478 }
1479
1480 *pp = npp;
1481 return p;
1482 }
1483 #endif /* SUPPORT_PCRE8 */
1484
1485
1486
1487 #ifdef SUPPORT_PCRE16
1488 /*************************************************
1489 * Read a capture name (16-bit) and check it *
1490 *************************************************/
1491
1492 /* Note that the text being read is 8-bit. */
1493
1494 static pcre_uint8 *
1495 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1496 {
1497 pcre_uint16 *npp = *pp;
1498 while (isalnum(*p)) *npp++ = *p++;
1499 *npp++ = 0;
1500 *npp = 0;
1501 if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1502 {
1503 fprintf(outfile, "no parentheses with name \"");
1504 PCHARSV(*pp, 0, -1, outfile);
1505 fprintf(outfile, "\"\n");
1506 }
1507 *pp = npp;
1508 return p;
1509 }
1510 #endif /* SUPPORT_PCRE16 */
1511
1512
1513
1514 /*************************************************
1515 * Callout function *
1516 *************************************************/
1517
1518 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1519 the match. Yield zero unless more callouts than the fail count, or the callout
1520 data is not zero. */
1521
1522 static int callout(pcre_callout_block *cb)
1523 {
1524 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1525 int i, pre_start, post_start, subject_length;
1526
1527 if (callout_extra)
1528 {
1529 fprintf(f, "Callout %d: last capture = %d\n",
1530 cb->callout_number, cb->capture_last);
1531
1532 for (i = 0; i < cb->capture_top * 2; i += 2)
1533 {
1534 if (cb->offset_vector[i] < 0)
1535 fprintf(f, "%2d: <unset>\n", i/2);
1536 else
1537 {
1538 fprintf(f, "%2d: ", i/2);
1539 PCHARSV(cb->subject, cb->offset_vector[i],
1540 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1541 fprintf(f, "\n");
1542 }
1543 }
1544 }
1545
1546 /* Re-print the subject in canonical form, the first time or if giving full
1547 datails. On subsequent calls in the same match, we use pchars just to find the
1548 printed lengths of the substrings. */
1549
1550 if (f != NULL) fprintf(f, "--->");
1551
1552 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1553 PCHARS(post_start, cb->subject, cb->start_match,
1554 cb->current_position - cb->start_match, f);
1555
1556 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1557
1558 PCHARSV(cb->subject, cb->current_position,
1559 cb->subject_length - cb->current_position, f);
1560
1561 if (f != NULL) fprintf(f, "\n");
1562
1563 /* Always print appropriate indicators, with callout number if not already
1564 shown. For automatic callouts, show the pattern offset. */
1565
1566 if (cb->callout_number == 255)
1567 {
1568 fprintf(outfile, "%+3d ", cb->pattern_position);
1569 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1570 }
1571 else
1572 {
1573 if (callout_extra) fprintf(outfile, " ");
1574 else fprintf(outfile, "%3d ", cb->callout_number);
1575 }
1576
1577 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1578 fprintf(outfile, "^");
1579
1580 if (post_start > 0)
1581 {
1582 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1583 fprintf(outfile, "^");
1584 }
1585
1586 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1587 fprintf(outfile, " ");
1588
1589 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1590 pbuffer + cb->pattern_position);
1591
1592 fprintf(outfile, "\n");
1593 first_callout = 0;
1594
1595 if (cb->mark != last_callout_mark)
1596 {
1597 if (cb->mark == NULL)
1598 fprintf(outfile, "Latest Mark: <unset>\n");
1599 else
1600 {
1601 fprintf(outfile, "Latest Mark: ");
1602 PCHARSV(cb->mark, 0, -1, outfile);
1603 putc('\n', outfile);
1604 }
1605 last_callout_mark = cb->mark;
1606 }
1607
1608 if (cb->callout_data != NULL)
1609 {
1610 int callout_data = *((int *)(cb->callout_data));
1611 if (callout_data != 0)
1612 {
1613 fprintf(outfile, "Callout data = %d\n", callout_data);
1614 return callout_data;
1615 }
1616 }
1617
1618 return (cb->callout_number != callout_fail_id)? 0 :
1619 (++callout_count >= callout_fail_count)? 1 : 0;
1620 }
1621
1622
1623 /*************************************************
1624 * Local malloc functions *
1625 *************************************************/
1626
1627 /* Alternative malloc function, to test functionality and save the size of a
1628 compiled re, which is the first store request that pcre_compile() makes. The
1629 show_malloc variable is set only during matching. */
1630
1631 static void *new_malloc(size_t size)
1632 {
1633 void *block = malloc(size);
1634 gotten_store = size;
1635 if (first_gotten_store == 0) first_gotten_store = size;
1636 if (show_malloc)
1637 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1638 return block;
1639 }
1640
1641 static void new_free(void *block)
1642 {
1643 if (show_malloc)
1644 fprintf(outfile, "free %p\n", block);
1645 free(block);
1646 }
1647
1648 /* For recursion malloc/free, to test stacking calls */
1649
1650 static void *stack_malloc(size_t size)
1651 {
1652 void *block = malloc(size);
1653 if (show_malloc)
1654 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1655 return block;
1656 }
1657
1658 static void stack_free(void *block)
1659 {
1660 if (show_malloc)
1661 fprintf(outfile, "stack_free %p\n", block);
1662 free(block);
1663 }
1664
1665
1666 /*************************************************
1667 * Call pcre_fullinfo() *
1668 *************************************************/
1669
1670 /* Get one piece of information from the pcre_fullinfo() function. When only
1671 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1672 value, but the code is defensive. */
1673
1674 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1675 {
1676 int rc;
1677
1678 if (use_pcre16)
1679 #ifdef SUPPORT_PCRE16
1680 rc = pcre16_fullinfo(re, study, option, ptr);
1681 #else
1682 rc = PCRE_ERROR_BADMODE;
1683 #endif
1684 else
1685 #ifdef SUPPORT_PCRE8
1686 rc = pcre_fullinfo(re, study, option, ptr);
1687 #else
1688 rc = PCRE_ERROR_BADMODE;
1689 #endif
1690
1691 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1692 use_pcre16? "16" : "", option);
1693 }
1694
1695
1696
1697 /*************************************************
1698 * Swap byte functions *
1699 *************************************************/
1700
1701 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1702 value, respectively.
1703
1704 Arguments:
1705 value any number
1706
1707 Returns: the byte swapped value
1708 */
1709
1710 static pcre_uint32
1711 swap_uint32(pcre_uint32 value)
1712 {
1713 return ((value & 0x000000ff) << 24) |
1714 ((value & 0x0000ff00) << 8) |
1715 ((value & 0x00ff0000) >> 8) |
1716 (value >> 24);
1717 }
1718
1719 static pcre_uint16
1720 swap_uint16(pcre_uint16 value)
1721 {
1722 return (value >> 8) | (value << 8);
1723 }
1724
1725
1726
1727 /*************************************************
1728 * Flip bytes in a compiled pattern *
1729 *************************************************/
1730
1731 /* This function is called if the 'F' option was present on a pattern that is
1732 to be written to a file. We flip the bytes of all the integer fields in the
1733 regex data block and the study block. In 16-bit mode this also flips relevant
1734 bytes in the pattern itself. This is to make it possible to test PCRE's
1735 ability to reload byte-flipped patterns, e.g. those compiled on a different
1736 architecture. */
1737
1738 static void
1739 regexflip(pcre *ere, pcre_extra *extra)
1740 {
1741 real_pcre *re = (real_pcre *)ere;
1742 #ifdef SUPPORT_PCRE16
1743 int op;
1744 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1745 int length = re->name_count * re->name_entry_size;
1746 #ifdef SUPPORT_UTF
1747 BOOL utf = (re->options & PCRE_UTF16) != 0;
1748 BOOL utf16_char = FALSE;
1749 #endif /* SUPPORT_UTF */
1750 #endif /* SUPPORT_PCRE16 */
1751
1752 /* Always flip the bytes in the main data block and study blocks. */
1753
1754 re->magic_number = REVERSED_MAGIC_NUMBER;
1755 re->size = swap_uint32(re->size);
1756 re->options = swap_uint32(re->options);
1757 re->flags = swap_uint16(re->flags);
1758 re->top_bracket = swap_uint16(re->top_bracket);
1759 re->top_backref = swap_uint16(re->top_backref);
1760 re->first_char = swap_uint16(re->first_char);
1761 re->req_char = swap_uint16(re->req_char);
1762 re->name_table_offset = swap_uint16(re->name_table_offset);
1763 re->name_entry_size = swap_uint16(re->name_entry_size);
1764 re->name_count = swap_uint16(re->name_count);
1765
1766 if (extra != NULL)
1767 {
1768 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1769 rsd->size = swap_uint32(rsd->size);
1770 rsd->flags = swap_uint32(rsd->flags);
1771 rsd->minlength = swap_uint32(rsd->minlength);
1772 }
1773
1774 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1775 in the name table, if present, and then in the pattern itself. */
1776
1777 #ifdef SUPPORT_PCRE16
1778 if (!use_pcre16) return;
1779
1780 while(TRUE)
1781 {
1782 /* Swap previous characters. */
1783 while (length-- > 0)
1784 {
1785 *ptr = swap_uint16(*ptr);
1786 ptr++;
1787 }
1788 #ifdef SUPPORT_UTF
1789 if (utf16_char)
1790 {
1791 if ((ptr[-1] & 0xfc00) == 0xd800)
1792 {
1793 /* We know that there is only one extra character in UTF-16. */
1794 *ptr = swap_uint16(*ptr);
1795 ptr++;
1796 }
1797 }
1798 utf16_char = FALSE;
1799 #endif /* SUPPORT_UTF */
1800
1801 /* Get next opcode. */
1802
1803 length = 0;
1804 op = *ptr;
1805 *ptr++ = swap_uint16(op);
1806
1807 switch (op)
1808 {
1809 case OP_END:
1810 return;
1811
1812 #ifdef SUPPORT_UTF
1813 case OP_CHAR:
1814 case OP_CHARI:
1815 case OP_NOT:
1816 case OP_NOTI:
1817 case OP_STAR:
1818 case OP_MINSTAR:
1819 case OP_PLUS:
1820 case OP_MINPLUS:
1821 case OP_QUERY:
1822 case OP_MINQUERY:
1823 case OP_UPTO:
1824 case OP_MINUPTO:
1825 case OP_EXACT:
1826 case OP_POSSTAR:
1827 case OP_POSPLUS:
1828 case OP_POSQUERY:
1829 case OP_POSUPTO:
1830 case OP_STARI:
1831 case OP_MINSTARI:
1832 case OP_PLUSI:
1833 case OP_MINPLUSI:
1834 case OP_QUERYI:
1835 case OP_MINQUERYI:
1836 case OP_UPTOI:
1837 case OP_MINUPTOI:
1838 case OP_EXACTI:
1839 case OP_POSSTARI:
1840 case OP_POSPLUSI:
1841 case OP_POSQUERYI:
1842 case OP_POSUPTOI:
1843 case OP_NOTSTAR:
1844 case OP_NOTMINSTAR:
1845 case OP_NOTPLUS:
1846 case OP_NOTMINPLUS:
1847 case OP_NOTQUERY:
1848 case OP_NOTMINQUERY:
1849 case OP_NOTUPTO:
1850 case OP_NOTMINUPTO:
1851 case OP_NOTEXACT:
1852 case OP_NOTPOSSTAR:
1853 case OP_NOTPOSPLUS:
1854 case OP_NOTPOSQUERY:
1855 case OP_NOTPOSUPTO:
1856 case OP_NOTSTARI:
1857 case OP_NOTMINSTARI:
1858 case OP_NOTPLUSI:
1859 case OP_NOTMINPLUSI:
1860 case OP_NOTQUERYI:
1861 case OP_NOTMINQUERYI:
1862 case OP_NOTUPTOI:
1863 case OP_NOTMINUPTOI:
1864 case OP_NOTEXACTI:
1865 case OP_NOTPOSSTARI:
1866 case OP_NOTPOSPLUSI:
1867 case OP_NOTPOSQUERYI:
1868 case OP_NOTPOSUPTOI:
1869 if (utf) utf16_char = TRUE;
1870 #endif
1871 /* Fall through. */
1872
1873 default:
1874 length = OP_lengths16[op] - 1;
1875 break;
1876
1877 case OP_CLASS:
1878 case OP_NCLASS:
1879 /* Skip the character bit map. */
1880 ptr += 32/sizeof(pcre_uint16);
1881 length = 0;
1882 break;
1883
1884 case OP_XCLASS:
1885 /* Reverse the size of the XCLASS instance. */
1886 ptr++;
1887 *ptr = swap_uint16(*ptr);
1888 if (LINK_SIZE > 1)
1889 {
1890 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1891 ptr++;
1892 *ptr = swap_uint16(*ptr);
1893 }
1894 ptr++;
1895
1896 if (LINK_SIZE > 1)
1897 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1898 (1 + LINK_SIZE + 1);
1899 else
1900 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1901
1902 op = *ptr;
1903 *ptr = swap_uint16(op);
1904 if ((op & XCL_MAP) != 0)
1905 {
1906 /* Skip the character bit map. */
1907 ptr += 32/sizeof(pcre_uint16);
1908 length -= 32/sizeof(pcre_uint16);
1909 }
1910 break;
1911 }
1912 }
1913 /* Control should never reach here in 16 bit mode. */
1914 #endif /* SUPPORT_PCRE16 */
1915 }
1916
1917
1918
1919 /*************************************************
1920 * Check match or recursion limit *
1921 *************************************************/
1922
1923 static int
1924 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1925 int start_offset, int options, int *use_offsets, int use_size_offsets,
1926 int flag, unsigned long int *limit, int errnumber, const char *msg)
1927 {
1928 int count;
1929 int min = 0;
1930 int mid = 64;
1931 int max = -1;
1932
1933 extra->flags |= flag;
1934
1935 for (;;)
1936 {
1937 *limit = mid;
1938
1939 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1940 use_offsets, use_size_offsets);
1941
1942 if (count == errnumber)
1943 {
1944 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1945 min = mid;
1946 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1947 }
1948
1949 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1950 count == PCRE_ERROR_PARTIAL)
1951 {
1952 if (mid == min + 1)
1953 {
1954 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1955 break;
1956 }
1957 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1958 max = mid;
1959 mid = (min + mid)/2;
1960 }
1961 else break; /* Some other error */
1962 }
1963
1964 extra->flags &= ~flag;
1965 return count;
1966 }
1967
1968
1969
1970 /*************************************************
1971 * Case-independent strncmp() function *
1972 *************************************************/
1973
1974 /*
1975 Arguments:
1976 s first string
1977 t second string
1978 n number of characters to compare
1979
1980 Returns: < 0, = 0, or > 0, according to the comparison
1981 */
1982
1983 static int
1984 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1985 {
1986 while (n--)
1987 {
1988 int c = tolower(*s++) - tolower(*t++);
1989 if (c) return c;
1990 }
1991 return 0;
1992 }
1993
1994
1995
1996 /*************************************************
1997 * Check newline indicator *
1998 *************************************************/
1999
2000 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2001 a message and return 0 if there is no match.
2002
2003 Arguments:
2004 p points after the leading '<'
2005 f file for error message
2006
2007 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2008 */
2009
2010 static int
2011 check_newline(pcre_uint8 *p, FILE *f)
2012 {
2013 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2014 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2015 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2016 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2017 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2018 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2019 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2020 fprintf(f, "Unknown newline type at: <%s\n", p);
2021 return 0;
2022 }
2023
2024
2025
2026 /*************************************************
2027 * Usage function *
2028 *************************************************/
2029
2030 static void
2031 usage(void)
2032 {
2033 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2034 printf("Input and output default to stdin and stdout.\n");
2035 #ifdef SUPPORT_LIBREADLINE
2036 printf("If input is a terminal, readline() is used to read from it.\n");
2037 #else
2038 printf("This version of pcretest is not linked with readline().\n");
2039 #endif
2040 printf("\nOptions:\n");
2041 #ifdef SUPPORT_PCRE16
2042 printf(" -16 use 16-bit interface\n");
2043 #endif
2044 printf(" -b show compiled code (bytecode)\n");
2045 printf(" -C show PCRE compile-time options and exit\n");
2046 printf(" -d debug: show compiled code and information (-b and -i)\n");
2047 #if !defined NODFA
2048 printf(" -dfa force DFA matching for all subjects\n");
2049 #endif
2050 printf(" -help show usage information\n");
2051 printf(" -i show information about compiled patterns\n"
2052 " -M find MATCH_LIMIT minimum for each subject\n"
2053 " -m output memory used information\n"
2054 " -o <n> set size of offsets vector to <n>\n");
2055 #if !defined NOPOSIX
2056 printf(" -p use POSIX interface\n");
2057 #endif
2058 printf(" -q quiet: do not output PCRE version number at start\n");
2059 printf(" -S <n> set stack size to <n> megabytes\n");
2060 printf(" -s force each pattern to be studied at basic level\n"
2061 " -s+ force each pattern to be studied, using JIT if available\n"
2062 " -t time compilation and execution\n");
2063 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2064 printf(" -tm time execution (matching) only\n");
2065 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2066 }
2067
2068
2069
2070 /*************************************************
2071 * Main Program *
2072 *************************************************/
2073
2074 /* Read lines from named file or stdin and write to named file or stdout; lines
2075 consist of a regular expression, in delimiters and optionally followed by
2076 options, followed by a set of test data, terminated by an empty line. */
2077
2078 int main(int argc, char **argv)
2079 {
2080 FILE *infile = stdin;
2081 const char *version;
2082 int options = 0;
2083 int study_options = 0;
2084 int default_find_match_limit = FALSE;
2085 int op = 1;
2086 int timeit = 0;
2087 int timeitm = 0;
2088 int showinfo = 0;
2089 int showstore = 0;
2090 int force_study = -1;
2091 int force_study_options = 0;
2092 int quiet = 0;
2093 int size_offsets = 45;
2094 int size_offsets_max;
2095 int *offsets = NULL;
2096 #if !defined NOPOSIX
2097 int posix = 0;
2098 #endif
2099 int debug = 0;
2100 int done = 0;
2101 int all_use_dfa = 0;
2102 int yield = 0;
2103 int stack_size;
2104
2105 pcre_jit_stack *jit_stack = NULL;
2106
2107 /* These vectors store, end-to-end, a list of zero-terminated captured
2108 substring names, each list itself being terminated by an empty name. Assume
2109 that 1024 is plenty long enough for the few names we'll be testing. It is
2110 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2111 for the actual memory, to ensure alignment. By defining these variables always
2112 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2113 #ifdefs in the code. */
2114
2115 pcre_uint16 copynames[1024];
2116 pcre_uint16 getnames[1024];
2117
2118 pcre_uint16 *cn16ptr;
2119 pcre_uint16 *gn16ptr;
2120
2121 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2122 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2123 pcre_uint8 *cn8ptr;
2124 pcre_uint8 *gn8ptr;
2125
2126 /* Get buffers from malloc() so that valgrind will check their misuse when
2127 debugging. They grow automatically when very long lines are read. The 16-bit
2128 buffer (buffer16) is obtained only if needed. */
2129
2130 buffer = (pcre_uint8 *)malloc(buffer_size);
2131 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2132 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2133
2134 /* The outfile variable is static so that new_malloc can use it. */
2135
2136 outfile = stdout;
2137
2138 /* The following _setmode() stuff is some Windows magic that tells its runtime
2139 library to translate CRLF into a single LF character. At least, that's what
2140 I've been told: never having used Windows I take this all on trust. Originally
2141 it set 0x8000, but then I was advised that _O_BINARY was better. */
2142
2143 #if defined(_WIN32) || defined(WIN32)
2144 _setmode( _fileno( stdout ), _O_BINARY );
2145 #endif
2146
2147 /* Get the version number: both pcre_version() and pcre16_version() give the
2148 same answer. We just need to ensure that we call one that is available. */
2149
2150 #ifdef SUPPORT_PCRE8
2151 version = pcre_version();
2152 #else
2153 version = pcre16_version();
2154 #endif
2155
2156 /* Scan options */
2157
2158 while (argc > 1 && argv[op][0] == '-')
2159 {
2160 pcre_uint8 *endptr;
2161
2162 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2163 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2164 else if (strcmp(argv[op], "-s+") == 0)
2165 {
2166 force_study = 1;
2167 force_study_options = PCRE_STUDY_JIT_COMPILE;
2168 }
2169 else if (strcmp(argv[op], "-16") == 0)
2170 {
2171 #ifdef SUPPORT_PCRE16
2172 use_pcre16 = 1;
2173 #else
2174 printf("** This version of PCRE was built without 16-bit support\n");
2175 exit(1);
2176 #endif
2177 }
2178 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2179 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2180 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2181 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2182 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2183 #if !defined NODFA
2184 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2185 #endif
2186 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2187 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2188 *endptr == 0))
2189 {
2190 op++;
2191 argc--;
2192 }
2193 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2194 {
2195 int both = argv[op][2] == 0;
2196 int temp;
2197 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2198 *endptr == 0))
2199 {
2200 timeitm = temp;
2201 op++;
2202 argc--;
2203 }
2204 else timeitm = LOOPREPEAT;
2205 if (both) timeit = timeitm;
2206 }
2207 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2208 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2209 *endptr == 0))
2210 {
2211 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2212 printf("PCRE: -S not supported on this OS\n");
2213 exit(1);
2214 #else
2215 int rc;
2216 struct rlimit rlim;
2217 getrlimit(RLIMIT_STACK, &rlim);
2218 rlim.rlim_cur = stack_size * 1024 * 1024;
2219 rc = setrlimit(RLIMIT_STACK, &rlim);
2220 if (rc != 0)
2221 {
2222 printf("PCRE: setrlimit() failed with error %d\n", rc);
2223 exit(1);
2224 }
2225 op++;
2226 argc--;
2227 #endif
2228 }
2229 #if !defined NOPOSIX
2230 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2231 #endif
2232 else if (strcmp(argv[op], "-C") == 0)
2233 {
2234 int rc;
2235 unsigned long int lrc;
2236 printf("PCRE version %s\n", version);
2237 printf("Compiled with\n");
2238
2239 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2240 are set, either both UTFs are supported or both are not supported. */
2241
2242 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2243 printf(" 8-bit and 16-bit support\n");
2244 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2245 if (rc)
2246 printf(" UTF-8 and UTF-16 support\n");
2247 else
2248 printf(" No UTF-8 or UTF-16 support\n");
2249 #elif defined SUPPORT_PCRE8
2250 printf(" 8-bit support only\n");
2251 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2252 printf(" %sUTF-8 support\n", rc? "" : "No ");
2253 #else
2254 printf(" 16-bit support only\n");
2255 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2256 printf(" %sUTF-16 support\n", rc? "" : "No ");
2257 #endif
2258
2259 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2260 printf(" %sUnicode properties support\n", rc? "" : "No ");
2261 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2262 if (rc)
2263 printf(" Just-in-time compiler support\n");
2264 else
2265 printf(" No just-in-time compiler support\n");
2266 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2267 /* Note that these values are always the ASCII values, even
2268 in EBCDIC environments. CR is 13 and NL is 10. */
2269 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2270 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2271 (rc == -2)? "ANYCRLF" :
2272 (rc == -1)? "ANY" : "???");
2273 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2274 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2275 "all Unicode newlines");
2276 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2277 printf(" Internal link size = %d\n", rc);
2278 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2279 printf(" POSIX malloc threshold = %d\n", rc);
2280 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2281 printf(" Default match limit = %ld\n", lrc);
2282 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2283 printf(" Default recursion depth limit = %ld\n", lrc);
2284 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2285 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2286 goto EXIT;
2287 }
2288 else if (strcmp(argv[op], "-help") == 0 ||
2289 strcmp(argv[op], "--help") == 0)
2290 {
2291 usage();
2292 goto EXIT;
2293 }
2294 else
2295 {
2296 printf("** Unknown or malformed option %s\n", argv[op]);
2297 usage();
2298 yield = 1;
2299 goto EXIT;
2300 }
2301 op++;
2302 argc--;
2303 }
2304
2305 /* Get the store for the offsets vector, and remember what it was */
2306
2307 size_offsets_max = size_offsets;
2308 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2309 if (offsets == NULL)
2310 {
2311 printf("** Failed to get %d bytes of memory for offsets vector\n",
2312 (int)(size_offsets_max * sizeof(int)));
2313 yield = 1;
2314 goto EXIT;
2315 }
2316
2317 /* Sort out the input and output files */
2318
2319 if (argc > 1)
2320 {
2321 infile = fopen(argv[op], INPUT_MODE);
2322 if (infile == NULL)
2323 {
2324 printf("** Failed to open %s\n", argv[op]);
2325 yield = 1;
2326 goto EXIT;
2327 }
2328 }
2329
2330 if (argc > 2)
2331 {
2332 outfile = fopen(argv[op+1], OUTPUT_MODE);
2333 if (outfile == NULL)
2334 {
2335 printf("** Failed to open %s\n", argv[op+1]);
2336 yield = 1;
2337 goto EXIT;
2338 }
2339 }
2340
2341 /* Set alternative malloc function */
2342
2343 #ifdef SUPPORT_PCRE8
2344 pcre_malloc = new_malloc;
2345 pcre_free = new_free;
2346 pcre_stack_malloc = stack_malloc;
2347 pcre_stack_free = stack_free;
2348 #endif
2349
2350 #ifdef SUPPORT_PCRE16
2351 pcre16_malloc = new_malloc;
2352 pcre16_free = new_free;
2353 pcre16_stack_malloc = stack_malloc;
2354 pcre16_stack_free = stack_free;
2355 #endif
2356
2357 /* Heading line unless quiet, then prompt for first regex if stdin */
2358
2359 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2360
2361 /* Main loop */
2362
2363 while (!done)
2364 {
2365 pcre *re = NULL;
2366 pcre_extra *extra = NULL;
2367
2368 #if !defined NOPOSIX /* There are still compilers that require no indent */
2369 regex_t preg;
2370 int do_posix = 0;
2371 #endif
2372
2373 const char *error;
2374 pcre_uint8 *markptr;
2375 pcre_uint8 *p, *pp, *ppp;
2376 pcre_uint8 *to_file = NULL;
2377 const pcre_uint8 *tables = NULL;
2378 unsigned long int true_size, true_study_size = 0;
2379 size_t size, regex_gotten_store;
2380 int do_allcaps = 0;
2381 int do_mark = 0;
2382 int do_study = 0;
2383 int no_force_study = 0;
2384 int do_debug = debug;
2385 int do_G = 0;
2386 int do_g = 0;
2387 int do_showinfo = showinfo;
2388 int do_showrest = 0;
2389 int do_showcaprest = 0;
2390 int do_flip = 0;
2391 int erroroffset, len, delimiter, poffset;
2392
2393 use_utf = 0;
2394 debug_lengths = 1;
2395
2396 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2397 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2398 fflush(outfile);
2399
2400 p = buffer;
2401 while (isspace(*p)) p++;
2402 if (*p == 0) continue;
2403
2404 /* See if the pattern is to be loaded pre-compiled from a file. */
2405
2406 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2407 {
2408 unsigned long int magic, get_options;
2409 pcre_uint8 sbuf[8];
2410 FILE *f;
2411
2412 p++;
2413 pp = p + (int)strlen((char *)p);
2414 while (isspace(pp[-1])) pp--;
2415 *pp = 0;
2416
2417 f = fopen((char *)p, "rb");
2418 if (f == NULL)
2419 {
2420 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2421 continue;
2422 }
2423
2424 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2425
2426 true_size =
2427 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2428 true_study_size =
2429 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2430
2431 re = (real_pcre *)new_malloc(true_size);
2432 regex_gotten_store = first_gotten_store;
2433
2434 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2435
2436 magic = ((real_pcre *)re)->magic_number;
2437 if (magic != MAGIC_NUMBER)
2438 {
2439 if (swap_uint32(magic) == MAGIC_NUMBER)
2440 {
2441 do_flip = 1;
2442 }
2443 else
2444 {
2445 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2446 fclose(f);
2447 continue;
2448 }
2449 }
2450
2451 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2452 do_flip? " (byte-inverted)" : "", p);
2453
2454 /* Now see if there is any following study data. */
2455
2456 if (true_study_size != 0)
2457 {
2458 pcre_study_data *psd;
2459
2460 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2461 extra->flags = PCRE_EXTRA_STUDY_DATA;
2462
2463 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2464 extra->study_data = psd;
2465
2466 if (fread(psd, 1, true_study_size, f) != true_study_size)
2467 {
2468 FAIL_READ:
2469 fprintf(outfile, "Failed to read data from %s\n", p);
2470 if (extra != NULL)
2471 {
2472 PCRE_FREE_STUDY(extra);
2473 }
2474 if (re != NULL) new_free(re);
2475 fclose(f);
2476 continue;
2477 }
2478 fprintf(outfile, "Study data loaded from %s\n", p);
2479 do_study = 1; /* To get the data output if requested */
2480 }
2481 else fprintf(outfile, "No study data\n");
2482
2483 /* Flip the necessary bytes. */
2484 if (do_flip)
2485 {
2486 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2487 }
2488
2489 /* Need to know if UTF-8 for printing data strings */
2490
2491 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2492 use_utf = (get_options & PCRE_UTF8) != 0;
2493
2494 fclose(f);
2495 goto SHOW_INFO;
2496 }
2497
2498 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2499 the pattern; if it isn't complete, read more. */
2500
2501 delimiter = *p++;
2502
2503 if (isalnum(delimiter) || delimiter == '\\')
2504 {
2505 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2506 goto SKIP_DATA;
2507 }
2508
2509 pp = p;
2510 poffset = (int)(p - buffer);
2511
2512 for(;;)
2513 {
2514 while (*pp != 0)
2515 {
2516 if (*pp == '\\' && pp[1] != 0) pp++;
2517 else if (*pp == delimiter) break;
2518 pp++;
2519 }
2520 if (*pp != 0) break;
2521 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2522 {
2523 fprintf(outfile, "** Unexpected EOF\n");
2524 done = 1;
2525 goto CONTINUE;
2526 }
2527 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2528 }
2529
2530 /* The buffer may have moved while being extended; reset the start of data
2531 pointer to the correct relative point in the buffer. */
2532
2533 p = buffer + poffset;
2534
2535 /* If the first character after the delimiter is backslash, make
2536 the pattern end with backslash. This is purely to provide a way
2537 of testing for the error message when a pattern ends with backslash. */
2538
2539 if (pp[1] == '\\') *pp++ = '\\';
2540
2541 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2542 for callouts. */
2543
2544 *pp++ = 0;
2545 strcpy((char *)pbuffer, (char *)p);
2546
2547 /* Look for options after final delimiter */
2548
2549 options = 0;
2550 study_options = 0;
2551 log_store = showstore; /* default from command line */
2552
2553 while (*pp != 0)
2554 {
2555 switch (*pp++)
2556 {
2557 case 'f': options |= PCRE_FIRSTLINE; break;
2558 case 'g': do_g = 1; break;
2559 case 'i': options |= PCRE_CASELESS; break;
2560 case 'm': options |= PCRE_MULTILINE; break;
2561 case 's': options |= PCRE_DOTALL; break;
2562 case 'x': options |= PCRE_EXTENDED; break;
2563
2564 case '+':
2565 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2566 break;
2567
2568 case '=': do_allcaps = 1; break;
2569 case 'A': options |= PCRE_ANCHORED; break;
2570 case 'B': do_debug = 1; break;
2571 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2572 case 'D': do_debug = do_showinfo = 1; break;
2573 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2574 case 'F': do_flip = 1; break;
2575 case 'G': do_G = 1; break;
2576 case 'I': do_showinfo = 1; break;
2577 case 'J': options |= PCRE_DUPNAMES; break;
2578 case 'K': do_mark = 1; break;
2579 case 'M': log_store = 1; break;
2580 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2581
2582 #if !defined NOPOSIX
2583 case 'P': do_posix = 1; break;
2584 #endif
2585
2586 case 'S':
2587 if (do_study == 0)
2588 {
2589 do_study = 1;
2590 if (*pp == '+')
2591 {
2592 study_options |= PCRE_STUDY_JIT_COMPILE;
2593 pp++;
2594 }
2595 }
2596 else
2597 {
2598 do_study = 0;
2599 no_force_study = 1;
2600 }
2601 break;
2602
2603 case 'U': options |= PCRE_UNGREEDY; break;
2604 case 'W': options |= PCRE_UCP; break;
2605 case 'X': options |= PCRE_EXTRA; break;
2606 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2607 case 'Z': debug_lengths = 0; break;
2608 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2609 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2610
2611 case 'T':
2612 switch (*pp++)
2613 {
2614 case '0': tables = tables0; break;
2615 case '1': tables = tables1; break;
2616
2617 case '\r':
2618 case '\n':
2619 case ' ':
2620 case 0:
2621 fprintf(outfile, "** Missing table number after /T\n");
2622 goto SKIP_DATA;
2623
2624 default:
2625 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2626 goto SKIP_DATA;
2627 }
2628 break;
2629
2630 case 'L':
2631 ppp = pp;
2632 /* The '\r' test here is so that it works on Windows. */
2633 /* The '0' test is just in case this is an unterminated line. */
2634 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2635 *ppp = 0;
2636 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2637 {
2638 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2639 goto SKIP_DATA;
2640 }
2641 locale_set = 1;
2642 tables = PCRE_MAKETABLES;
2643 pp = ppp;
2644 break;
2645
2646 case '>':
2647 to_file = pp;
2648 while (*pp != 0) pp++;
2649 while (isspace(pp[-1])) pp--;
2650 *pp = 0;
2651 break;
2652
2653 case '<':
2654 {
2655 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2656 {
2657 options |= PCRE_JAVASCRIPT_COMPAT;
2658 pp += 3;
2659 }
2660 else
2661 {
2662 int x = check_newline(pp, outfile);
2663 if (x == 0) goto SKIP_DATA;
2664 options |= x;
2665 while (*pp++ != '>');
2666 }
2667 }
2668 break;
2669
2670 case '\r': /* So that it works in Windows */
2671 case '\n':
2672 case ' ':
2673 break;
2674
2675 default:
2676 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2677 goto SKIP_DATA;
2678 }
2679 }
2680
2681 /* Handle compiling via the POSIX interface, which doesn't support the
2682 timing, showing, or debugging options, nor the ability to pass over
2683 local character tables. Neither does it have 16-bit support. */
2684
2685 #if !defined NOPOSIX
2686 if (posix || do_posix)
2687 {
2688 int rc;
2689 int cflags = 0;
2690
2691 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2692 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2693 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2694 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2695 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2696 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2697 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2698
2699 first_gotten_store = 0;
2700 rc = regcomp(&preg, (char *)p, cflags);
2701
2702 /* Compilation failed; go back for another re, skipping to blank line
2703 if non-interactive. */
2704
2705 if (rc != 0)
2706 {
2707 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2708 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2709 goto SKIP_DATA;
2710 }
2711 }
2712
2713 /* Handle compiling via the native interface */
2714
2715 else
2716 #endif /* !defined NOPOSIX */
2717
2718 {
2719 unsigned long int get_options;
2720
2721 /* In 16-bit mode, convert the input. */
2722
2723 #ifdef SUPPORT_PCRE16
2724 if (use_pcre16)
2725 {
2726 switch(to16(p, options & PCRE_UTF8, (int)strlen((char *)p)))
2727 {
2728 case -1:
2729 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2730 "converted to UTF-16\n");
2731 goto SKIP_DATA;
2732
2733 case -2:
2734 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2735 "cannot be converted to UTF-16\n");
2736 goto SKIP_DATA;
2737
2738 default:
2739 break;
2740 }
2741 p = (pcre_uint8 *)buffer16;
2742 }
2743 #endif
2744
2745 /* Compile many times when timing */
2746
2747 if (timeit > 0)
2748 {
2749 register int i;
2750 clock_t time_taken;
2751 clock_t start_time = clock();
2752 for (i = 0; i < timeit; i++)
2753 {
2754 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2755 if (re != NULL) free(re);
2756 }
2757 time_taken = clock() - start_time;
2758 fprintf(outfile, "Compile time %.4f milliseconds\n",
2759 (((double)time_taken * 1000.0) / (double)timeit) /
2760 (double)CLOCKS_PER_SEC);
2761 }
2762
2763 first_gotten_store = 0;
2764 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2765
2766 /* Compilation failed; go back for another re, skipping to blank line
2767 if non-interactive. */
2768
2769 if (re == NULL)
2770 {
2771 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2772 SKIP_DATA:
2773 if (infile != stdin)
2774 {
2775 for (;;)
2776 {
2777 if (extend_inputline(infile, buffer, NULL) == NULL)
2778 {
2779 done = 1;
2780 goto CONTINUE;
2781 }
2782 len = (int)strlen((char *)buffer);
2783 while (len > 0 && isspace(buffer[len-1])) len--;
2784 if (len == 0) break;
2785 }
2786 fprintf(outfile, "\n");
2787 }
2788 goto CONTINUE;
2789 }
2790
2791 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2792 within the regex; check for this so that we know how to process the data
2793 lines. */
2794
2795 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2796 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2797
2798 /* Extract the size for possible writing before possibly flipping it,
2799 and remember the store that was got. */
2800
2801 true_size = ((real_pcre *)re)->size;
2802 regex_gotten_store = first_gotten_store;
2803
2804 /* Output code size information if requested */
2805
2806 if (log_store)
2807 fprintf(outfile, "Memory allocation (code space): %d\n",
2808 (int)(first_gotten_store -
2809 sizeof(real_pcre) -
2810 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2811
2812 /* If -s or /S was present, study the regex to generate additional info to
2813 help with the matching, unless the pattern has the SS option, which
2814 suppresses the effect of /S (used for a few test patterns where studying is
2815 never sensible). */
2816
2817 if (do_study || (force_study >= 0 && !no_force_study))
2818 {
2819 if (timeit > 0)
2820 {
2821 register int i;
2822 clock_t time_taken;
2823 clock_t start_time = clock();
2824 for (i = 0; i < timeit; i++)
2825 {
2826 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2827 }
2828 time_taken = clock() - start_time;
2829 if (extra != NULL)
2830 {
2831 PCRE_FREE_STUDY(extra);
2832 }
2833 fprintf(outfile, " Study time %.4f milliseconds\n",
2834 (((double)time_taken * 1000.0) / (double)timeit) /
2835 (double)CLOCKS_PER_SEC);
2836 }
2837 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2838 if (error != NULL)
2839 fprintf(outfile, "Failed to study: %s\n", error);
2840 else if (extra != NULL)
2841 {
2842 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2843 if (log_store)
2844 {
2845 size_t jitsize;
2846 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2847 if (jitsize != 0)
2848 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2849 }
2850 }
2851 }
2852
2853 /* If /K was present, we set up for handling MARK data. */
2854
2855 if (do_mark)
2856 {
2857 if (extra == NULL)
2858 {
2859 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2860 extra->flags = 0;
2861 }
2862 extra->mark = &markptr;
2863 extra->flags |= PCRE_EXTRA_MARK;
2864 }
2865
2866 /* Extract and display information from the compiled data if required. */
2867
2868 SHOW_INFO:
2869
2870 if (do_debug)
2871 {
2872 fprintf(outfile, "------------------------------------------------------------------\n");
2873 PCRE_PRINTINT(re, outfile, debug_lengths);
2874 }
2875
2876 /* We already have the options in get_options (see above) */
2877
2878 if (do_showinfo)
2879 {
2880 unsigned long int all_options;
2881 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2882 hascrorlf;
2883 int nameentrysize, namecount;
2884 const pcre_uint8 *nametable;
2885
2886 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2887 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2888 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2889 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2890 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2891 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2892 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2893 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2894 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2895 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2896 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2897
2898 if (size != regex_gotten_store) fprintf(outfile,
2899 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2900 (int)size, (int)regex_gotten_store);
2901
2902 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2903 if (backrefmax > 0)
2904 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2905
2906 if (namecount > 0)
2907 {
2908 fprintf(outfile, "Named capturing subpatterns:\n");
2909 while (namecount-- > 0)
2910 {
2911 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2912 int imm2_size = use_pcre16 ? 1 : 2;
2913 #else
2914 int imm2_size = IMM2_SIZE;
2915 #endif
2916 int length = (int)STRLEN(nametable + imm2_size);
2917 fprintf(outfile, " ");
2918 PCHARSV(nametable, imm2_size, length, outfile);
2919 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
2920 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2921 fprintf(outfile, "%3d\n", use_pcre16?
2922 (int)(((PCRE_SPTR16)nametable)[0])
2923 :((int)nametable[0] << 8) | (int)nametable[1]);
2924 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
2925 #else
2926 fprintf(outfile, "%3d\n", GET2(nametable, 0));
2927 #ifdef SUPPORT_PCRE8
2928 nametable += nameentrysize;
2929 #else
2930 nametable += nameentrysize * 2;
2931 #endif
2932 #endif
2933 }
2934 }
2935
2936 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2937 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2938
2939 all_options = ((real_pcre *)re)->options;
2940 if (do_flip) all_options = swap_uint32(all_options);
2941
2942 if (get_options == 0) fprintf(outfile, "No options\n");
2943 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2944 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2945 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2946 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2947 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2948 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2949 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2950 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2951 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2952 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2953 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2954 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2955 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2956 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2957 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2958 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2959 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2960 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2961
2962 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2963
2964 switch (get_options & PCRE_NEWLINE_BITS)
2965 {
2966 case PCRE_NEWLINE_CR:
2967 fprintf(outfile, "Forced newline sequence: CR\n");
2968 break;
2969
2970 case PCRE_NEWLINE_LF:
2971 fprintf(outfile, "Forced newline sequence: LF\n");
2972 break;
2973
2974 case PCRE_NEWLINE_CRLF:
2975 fprintf(outfile, "Forced newline sequence: CRLF\n");
2976 break;
2977
2978 case PCRE_NEWLINE_ANYCRLF:
2979 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2980 break;
2981
2982 case PCRE_NEWLINE_ANY:
2983 fprintf(outfile, "Forced newline sequence: ANY\n");
2984 break;
2985
2986 default:
2987 break;
2988 }
2989
2990 if (first_char == -1)
2991 {
2992 fprintf(outfile, "First char at start or follows newline\n");
2993 }
2994 else if (first_char < 0)
2995 {
2996 fprintf(outfile, "No first char\n");
2997 }
2998 else
2999 {
3000 const char *caseless =
3001 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3002 "" : " (caseless)";
3003
3004 if (PRINTOK(first_char))
3005 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3006 else
3007 {
3008 fprintf(outfile, "First char = ");
3009 pchar(first_char, outfile);
3010 fprintf(outfile, "%s\n", caseless);
3011 }
3012 }
3013
3014 if (need_char < 0)
3015 {
3016 fprintf(outfile, "No need char\n");
3017 }
3018 else
3019 {
3020 const char *caseless =
3021 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3022 "" : " (caseless)";
3023
3024 if (PRINTOK(need_char))
3025 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3026 else
3027 {
3028 fprintf(outfile, "Need char = ");
3029 pchar(need_char, outfile);
3030 fprintf(outfile, "%s\n", caseless);
3031 }
3032 }
3033
3034 /* Don't output study size; at present it is in any case a fixed
3035 value, but it varies, depending on the computer architecture, and
3036 so messes up the test suite. (And with the /F option, it might be
3037 flipped.) If study was forced by an external -s, don't show this
3038 information unless -i or -d was also present. This means that, except
3039 when auto-callouts are involved, the output from runs with and without
3040 -s should be identical. */
3041
3042 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3043 {
3044 if (extra == NULL)
3045 fprintf(outfile, "Study returned NULL\n");
3046 else
3047 {
3048 pcre_uint8 *start_bits = NULL;
3049 int minlength;
3050
3051 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
3052 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3053
3054 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
3055 if (start_bits == NULL)
3056 fprintf(outfile, "No set of starting bytes\n");
3057 else
3058 {
3059 int i;
3060 int c = 24;
3061 fprintf(outfile, "Starting byte set: ");
3062 for (i = 0; i < 256; i++)
3063 {
3064 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3065 {
3066 if (c > 75)
3067 {
3068 fprintf(outfile, "\n ");
3069 c = 2;
3070 }
3071 if (PRINTOK(i) && i != ' ')
3072 {
3073 fprintf(outfile, "%c ", i);
3074 c += 2;
3075 }
3076 else
3077 {
3078 fprintf(outfile, "\\x%02x ", i);
3079 c += 5;
3080 }
3081 }
3082 }
3083 fprintf(outfile, "\n");
3084 }
3085 }
3086
3087 /* Show this only if the JIT was set by /S, not by -s. */
3088
3089 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3090 {
3091 int jit;
3092 new_info(re, extra, PCRE_INFO_JIT, &jit);
3093 if (jit)
3094 fprintf(outfile, "JIT study was successful\n");
3095 else
3096 #ifdef SUPPORT_JIT
3097 fprintf(outfile, "JIT study was not successful\n");
3098 #else
3099 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3100 #endif
3101 }
3102 }
3103 }
3104
3105 /* If the '>' option was present, we write out the regex to a file, and
3106 that is all. The first 8 bytes of the file are the regex length and then
3107 the study length, in big-endian order. */
3108
3109 if (to_file != NULL)
3110 {
3111 FILE *f = fopen((char *)to_file, "wb");
3112 if (f == NULL)
3113 {
3114 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3115 }
3116 else
3117 {
3118 pcre_uint8 sbuf[8];
3119
3120 if (do_flip) regexflip(re, extra);
3121 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3122 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3123 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3124 sbuf[3] = (pcre_uint8)((true_size) & 255);
3125 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3126 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3127 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3128 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3129
3130 if (fwrite(sbuf, 1, 8, f) < 8 ||
3131 fwrite(re, 1, true_size, f) < true_size)
3132 {
3133 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3134 }
3135 else
3136 {
3137 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3138
3139 /* If there is study data, write it. */
3140
3141 if (extra != NULL)
3142 {
3143 if (fwrite(extra->study_data, 1, true_study_size, f) <
3144 true_study_size)
3145 {
3146 fprintf(outfile, "Write error on %s: %s\n", to_file,
3147 strerror(errno));
3148 }
3149 else fprintf(outfile, "Study data written to %s\n", to_file);
3150 }
3151 }
3152 fclose(f);
3153 }
3154
3155 new_free(re);
3156 if (extra != NULL)
3157 {
3158 PCRE_FREE_STUDY(extra);
3159 }
3160 if (locale_set)
3161 {
3162 new_free((void *)tables);
3163 setlocale(LC_CTYPE, "C");
3164 locale_set = 0;
3165 }
3166 continue; /* With next regex */
3167 }
3168 } /* End of non-POSIX compile */
3169
3170 /* Read data lines and test them */
3171
3172 for (;;)
3173 {
3174 pcre_uint8 *q;
3175 pcre_uint8 *bptr;
3176 int *use_offsets = offsets;
3177 int use_size_offsets = size_offsets;
3178 int callout_data = 0;
3179 int callout_data_set = 0;
3180 int count, c;
3181 int copystrings = 0;
3182 int find_match_limit = default_find_match_limit;
3183 int getstrings = 0;
3184 int getlist = 0;
3185 int gmatched = 0;
3186 int start_offset = 0;
3187 int start_offset_sign = 1;
3188 int g_notempty = 0;
3189 int use_dfa = 0;
3190
3191 *copynames = 0;
3192 *getnames = 0;
3193
3194 cn16ptr = copynames;
3195 gn16ptr = getnames;
3196 cn8ptr = copynames8;
3197 gn8ptr = getnames8;
3198
3199 SET_PCRE_CALLOUT(callout);
3200 first_callout = 1;
3201 last_callout_mark = NULL;
3202 callout_extra = 0;
3203 callout_count = 0;
3204 callout_fail_count = 999999;
3205 callout_fail_id = -1;
3206 show_malloc = 0;
3207 options = 0;
3208
3209 if (extra != NULL) extra->flags &=
3210 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3211
3212 len = 0;
3213 for (;;)
3214 {
3215 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3216 {
3217 if (len > 0) /* Reached EOF without hitting a newline */
3218 {
3219 fprintf(outfile, "\n");
3220 break;
3221 }
3222 done = 1;
3223 goto CONTINUE;
3224 }
3225 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3226 len = (int)strlen((char *)buffer);
3227 if (buffer[len-1] == '\n') break;
3228 }
3229
3230 while (len > 0 && isspace(buffer[len-1])) len--;
3231 buffer[len] = 0;
3232 if (len == 0) break;
3233
3234 p = buffer;
3235 while (isspace(*p)) p++;
3236
3237 bptr = q = dbuffer;
3238 while ((c = *p++) != 0)
3239 {
3240 int i = 0;
3241 int n = 0;
3242
3243 if (c == '\\') switch ((c = *p++))
3244 {
3245 case 'a': c = 7; break;
3246 case 'b': c = '\b'; break;
3247 case 'e': c = 27; break;
3248 case 'f': c = '\f'; break;
3249 case 'n': c = '\n'; break;
3250 case 'r': c = '\r'; break;
3251 case 't': c = '\t'; break;
3252 case 'v': c = '\v'; break;
3253
3254 case '0': case '1': case '2': case '3':
3255 case '4': case '5': case '6': case '7':
3256 c -= '0';
3257 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3258 c = c * 8 + *p++ - '0';
3259
3260 #if !defined NOUTF
3261 if (use_utf && c > 255)
3262 {
3263 pcre_uint8 buff8[8];
3264 int ii, utn;
3265 utn = ord2utf8(c, buff8);
3266 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3267 c = buff8[ii]; /* Last byte */
3268 }
3269 #endif
3270 break;
3271
3272 case 'x':
3273
3274 /* Handle \x{..} specially - new Perl thing for utf8 */
3275
3276 #if !defined NOUTF
3277 if (*p == '{')
3278 {
3279 pcre_uint8 *pt = p;
3280 c = 0;
3281
3282 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3283 when isxdigit() is a macro that refers to its argument more than
3284 once. This is banned by the C Standard, but apparently happens in at
3285 least one MacOS environment. */
3286
3287 for (pt++; isxdigit(*pt); pt++)
3288 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3289 if (*pt == '}')
3290 {
3291 pcre_uint8 buff8[8];
3292 int ii, utn;
3293 if (use_utf)
3294 {
3295 utn = ord2utf8(c, buff8);
3296 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3297 c = buff8[ii]; /* Last byte */
3298 }
3299 else
3300 {
3301 if (c > 255)
3302 {
3303 if (use_pcre16)
3304 fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
3305 "** Because its input is first processed as 8-bit, pcretest "
3306 "does not\n** support such characters in 16-bit mode when "
3307 "UTF-16 is not set.\n", c);
3308 else
3309 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3310 "and UTF-8 mode is not enabled.\n", c);
3311
3312 fprintf(outfile, "** Truncation will probably give the wrong "
3313 "result.\n");
3314 }
3315 }
3316 p = pt + 1;
3317 break;
3318 }
3319 /* Not correct form; fall through */
3320 }
3321 #endif
3322
3323 /* Ordinary \x */
3324
3325 c = 0;
3326 while (i++ < 2 && isxdigit(*p))
3327 {
3328 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3329 p++;
3330 }
3331 break;
3332
3333 case 0: /* \ followed by EOF allows for an empty line */
3334 p--;
3335 continue;
3336
3337 case '>':
3338 if (*p == '-')
3339 {
3340 start_offset_sign = -1;
3341 p++;
3342 }
3343 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3344 start_offset *= start_offset_sign;
3345 continue;
3346
3347 case 'A': /* Option setting */
3348 options |= PCRE_ANCHORED;
3349 continue;
3350
3351 case 'B':
3352 options |= PCRE_NOTBOL;
3353 continue;
3354
3355 case 'C':
3356 if (isdigit(*p)) /* Set copy string */
3357 {
3358 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3359 copystrings |= 1 << n;
3360 }
3361 else if (isalnum(*p))
3362 {
3363 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3364 }
3365 else if (*p == '+')
3366 {
3367 callout_extra = 1;
3368 p++;
3369 }
3370 else if (*p == '-')
3371 {
3372 SET_PCRE_CALLOUT(NULL);
3373 p++;
3374 }
3375 else if (*p == '!')
3376 {
3377 callout_fail_id = 0;
3378 p++;
3379 while(isdigit(*p))
3380 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3381 callout_fail_count = 0;
3382 if (*p == '!')
3383 {
3384 p++;
3385 while(isdigit(*p))
3386 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3387 }
3388 }
3389 else if (*p == '*')
3390 {
3391 int sign = 1;
3392 callout_data = 0;
3393 if (*(++p) == '-') { sign = -1; p++; }
3394 while(isdigit(*p))
3395 callout_data = callout_data * 10 + *p++ - '0';
3396 callout_data *= sign;
3397 callout_data_set = 1;
3398 }
3399 continue;
3400
3401 #if !defined NODFA
3402 case 'D':
3403 #if !defined NOPOSIX
3404 if (posix || do_posix)
3405 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3406 else
3407 #endif
3408 use_dfa = 1;
3409 continue;
3410 #endif
3411
3412 #if !defined NODFA
3413 case 'F':
3414 options |= PCRE_DFA_SHORTEST;
3415 continue;
3416 #endif
3417
3418 case 'G':
3419 if (isdigit(*p))
3420 {
3421 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3422 getstrings |= 1 << n;
3423 }
3424 else if (isalnum(*p))
3425 {
3426 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3427 }
3428 continue;
3429
3430 case 'J':
3431 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3432 if (extra != NULL
3433 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3434 && extra->executable_jit != NULL)
3435 {
3436 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3437 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3438 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3439 }
3440 continue;
3441
3442 case 'L':
3443 getlist = 1;
3444 continue;
3445
3446 case 'M':
3447 find_match_limit = 1;
3448 continue;
3449
3450 case 'N':
3451 if ((options & PCRE_NOTEMPTY) != 0)
3452 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3453 else
3454 options |= PCRE_NOTEMPTY;
3455 continue;
3456
3457 case 'O':
3458 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3459 if (n > size_offsets_max)
3460 {
3461 size_offsets_max = n;
3462 free(offsets);
3463 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3464 if (offsets == NULL)
3465 {
3466 printf("** Failed to get %d bytes of memory for offsets vector\n",
3467 (int)(size_offsets_max * sizeof(int)));
3468 yield = 1;
3469 goto EXIT;
3470 }
3471 }
3472 use_size_offsets = n;
3473 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3474 continue;
3475
3476 case 'P':
3477 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3478 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3479 continue;
3480
3481 case 'Q':
3482 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3483 if (extra == NULL)
3484 {
3485 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3486 extra->flags = 0;
3487 }
3488 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3489 extra->match_limit_recursion = n;
3490 continue;
3491
3492 case 'q':
3493 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3494 if (extra == NULL)
3495 {
3496 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3497 extra->flags = 0;
3498 }
3499 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3500 extra->match_limit = n;
3501 continue;
3502
3503 #if !defined NODFA
3504 case 'R':
3505 options |= PCRE_DFA_RESTART;
3506 continue;
3507 #endif
3508
3509 case 'S':
3510 show_malloc = 1;
3511 continue;
3512
3513 case 'Y':
3514 options |= PCRE_NO_START_OPTIMIZE;
3515 continue;
3516
3517 case 'Z':
3518 options |= PCRE_NOTEOL;
3519 continue;
3520
3521 case '?':
3522 options |= PCRE_NO_UTF8_CHECK;
3523 continue;
3524
3525 case '<':
3526 {
3527 int x = check_newline(p, outfile);
3528 if (x == 0) goto NEXT_DATA;
3529 options |= x;
3530 while (*p++ != '>');
3531 }
3532 continue;
3533 }
3534 *q++ = c;
3535 }
3536 *q = 0;
3537 len = (int)(q - dbuffer);
3538
3539 /* Move the data to the end of the buffer so that a read over the end of
3540 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3541 we are using the POSIX interface, we must include the terminating zero. */
3542
3543 #if !defined NOPOSIX
3544 if (posix || do_posix)
3545 {
3546 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3547 bptr += buffer_size - len - 1;
3548 }
3549 else
3550 #endif
3551 {
3552 memmove(bptr + buffer_size - len, bptr, len);
3553 bptr += buffer_size - len;
3554 }
3555
3556 if ((all_use_dfa || use_dfa) && find_match_limit)
3557 {
3558 printf("**Match limit not relevant for DFA matching: ignored\n");
3559 find_match_limit = 0;
3560 }
3561
3562 /* Handle matching via the POSIX interface, which does not
3563 support timing or playing with the match limit or callout data. */
3564
3565 #if !defined NOPOSIX
3566 if (posix || do_posix)
3567 {
3568 int rc;
3569 int eflags = 0;
3570 regmatch_t *pmatch = NULL;
3571 if (use_size_offsets > 0)
3572 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3573 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3574 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3575 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3576
3577 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3578
3579 if (rc != 0)
3580 {
3581 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3582 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3583 }
3584 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3585 != 0)
3586 {
3587 fprintf(outfile, "Matched with REG_NOSUB\n");
3588 }
3589 else
3590 {
3591 size_t i;
3592 for (i = 0; i < (size_t)use_size_offsets; i++)
3593 {
3594 if (pmatch[i].rm_so >= 0)
3595 {
3596 fprintf(outfile, "%2d: ", (int)i);
3597 PCHARSV(dbuffer, pmatch[i].rm_so,
3598 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3599 fprintf(outfile, "\n");
3600 if (do_showcaprest || (i == 0 && do_showrest))
3601 {
3602 fprintf(outfile, "%2d+ ", (int)i);
3603 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3604 outfile);
3605 fprintf(outfile, "\n");
3606 }
3607 }
3608 }
3609 }
3610 free(pmatch);
3611 goto NEXT_DATA;
3612 }
3613
3614 #endif /* !defined NOPOSIX */
3615
3616 /* Handle matching via the native interface - repeats for /g and /G */
3617
3618 #ifdef SUPPORT_PCRE16
3619 if (use_pcre16)
3620 {
3621 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3622 switch(len)
3623 {
3624 case -1:
3625 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3626 "converted to UTF-16\n");
3627 goto NEXT_DATA;
3628
3629 case -2:
3630 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3631 "cannot be converted to UTF-16\n");
3632 goto NEXT_DATA;
3633
3634 default:
3635 break;
3636 }
3637 bptr = (pcre_uint8 *)buffer16;
3638 }
3639 #endif
3640
3641 for (;; gmatched++) /* Loop for /g or /G */
3642 {
3643 markptr = NULL;
3644
3645 if (timeitm > 0)
3646 {
3647 register int i;
3648 clock_t time_taken;
3649 clock_t start_time = clock();
3650
3651 #if !defined NODFA
3652 if (all_use_dfa || use_dfa)
3653 {
3654 int workspace[1000];
3655 for (i = 0; i < timeitm; i++)
3656 {
3657 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3658 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3659 (sizeof(workspace)/sizeof(int)));
3660 }
3661 }
3662 else
3663 #endif
3664
3665 for (i = 0; i < timeitm; i++)
3666 {
3667 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3668 (options | g_notempty), use_offsets, use_size_offsets);
3669 }
3670 time_taken = clock() - start_time;
3671 fprintf(outfile, "Execute time %.4f milliseconds\n",
3672 (((double)time_taken * 1000.0) / (double)timeitm) /
3673 (double)CLOCKS_PER_SEC);
3674 }
3675
3676 /* If find_match_limit is set, we want to do repeated matches with
3677 varying limits in order to find the minimum value for the match limit and
3678 for the recursion limit. The match limits are relevant only to the normal
3679 running of pcre_exec(), so disable the JIT optimization. This makes it
3680 possible to run the same set of tests with and without JIT externally
3681 requested. */
3682
3683 if (find_match_limit)
3684 {
3685 if (extra == NULL)
3686 {
3687 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3688 extra->flags = 0;
3689 }
3690 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3691
3692 (void)check_match_limit(re, extra, bptr, len, start_offset,
3693 options|g_notempty, use_offsets, use_size_offsets,
3694 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3695 PCRE_ERROR_MATCHLIMIT, "match()");
3696
3697 count = check_match_limit(re, extra, bptr, len, start_offset,
3698 options|g_notempty, use_offsets, use_size_offsets,
3699 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3700 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3701 }
3702
3703 /* If callout_data is set, use the interface with additional data */
3704
3705 else if (callout_data_set)
3706 {
3707 if (extra == NULL)
3708 {
3709 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3710 extra->flags = 0;
3711 }
3712 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3713 extra->callout_data = &callout_data;
3714 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3715 options | g_notempty, use_offsets, use_size_offsets);
3716 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3717 }
3718
3719 /* The normal case is just to do the match once, with the default
3720 value of match_limit. */
3721
3722 #if !defined NODFA
3723 else if (all_use_dfa || use_dfa)
3724 {
3725 int workspace[1000];
3726 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3727 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3728 (sizeof(workspace)/sizeof(int)));
3729 if (count == 0)
3730 {
3731 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3732 count = use_size_offsets/2;
3733 }
3734 }
3735 #endif
3736
3737 else
3738 {
3739 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3740 options | g_notempty, use_offsets, use_size_offsets);
3741 if (count == 0)
3742 {
3743 fprintf(outfile, "Matched, but too many substrings\n");
3744 count = use_size_offsets/3;
3745 }
3746 }
3747
3748 /* Matched */
3749
3750 if (count >= 0)
3751 {
3752 int i, maxcount;
3753 void *cnptr, *gnptr;
3754
3755 #if !defined NODFA
3756 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3757 #endif
3758 maxcount = use_size_offsets/3;
3759
3760 /* This is a check against a lunatic return value. */
3761
3762 if (count > maxcount)
3763 {
3764 fprintf(outfile,
3765 "** PCRE error: returned count %d is too big for offset size %d\n",
3766 count, use_size_offsets);
3767 count = use_size_offsets/3;
3768 if (do_g || do_G)
3769 {
3770 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3771 do_g = do_G = FALSE; /* Break g/G loop */
3772 }
3773 }
3774
3775 /* do_allcaps requests showing of all captures in the pattern, to check
3776 unset ones at the end. */
3777
3778 if (do_allcaps)
3779 {
3780 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3781 count++; /* Allow for full match */
3782 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3783 }
3784
3785 /* Output the captured substrings */
3786
3787 for (i = 0; i < count * 2; i += 2)
3788 {
3789 if (use_offsets[i] < 0)
3790 {
3791 if (use_offsets[i] != -1)
3792 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3793 use_offsets[i], i);
3794 if (use_offsets[i+1] != -1)
3795 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3796 use_offsets[i+1], i+1);
3797 fprintf(outfile, "%2d: <unset>\n", i/2);
3798 }
3799 else
3800 {
3801 fprintf(outfile, "%2d: ", i/2);
3802 PCHARSV(bptr, use_offsets[i],
3803 use_offsets[i+1] - use_offsets[i], outfile);
3804 fprintf(outfile, "\n");
3805 if (do_showcaprest || (i == 0 && do_showrest))
3806 {
3807 fprintf(outfile, "%2d+ ", i/2);
3808 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3809 outfile);
3810 fprintf(outfile, "\n");
3811 }
3812 }
3813 }
3814
3815 if (markptr != NULL)
3816 {
3817 fprintf(outfile, "MK: ");
3818 PCHARSV(markptr, 0, -1, outfile);
3819 fprintf(outfile, "\n");
3820 }
3821
3822 for (i = 0; i < 32; i++)
3823 {
3824 if ((copystrings & (1 << i)) != 0)
3825 {
3826 int rc;
3827 char copybuffer[256];
3828 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3829 copybuffer, sizeof(copybuffer));
3830 if (rc < 0)
3831 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3832 else
3833 {
3834 fprintf(outfile, "%2dC ", i);
3835 PCHARSV(copybuffer, 0, rc, outfile);
3836 fprintf(outfile, " (%d)\n", rc);
3837 }
3838 }
3839 }
3840
3841 cnptr = copynames;
3842 for (;;)
3843 {
3844 int rc;
3845 char copybuffer[256];
3846
3847 if (use_pcre16)
3848 {
3849 if (*(pcre_uint16 *)cnptr == 0) break;
3850 }
3851 else
3852 {
3853 if (*(pcre_uint8 *)cnptr == 0) break;
3854 }
3855
3856 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3857 cnptr, copybuffer, sizeof(copybuffer));
3858
3859 if (rc < 0)
3860 {
3861 fprintf(outfile, "copy substring ");
3862 PCHARSV(cnptr, 0, -1, outfile);
3863 fprintf(outfile, " failed %d\n", rc);
3864 }
3865 else
3866 {
3867 fprintf(outfile, " C ");
3868 PCHARSV(copybuffer, 0, rc, outfile);
3869 fprintf(outfile, " (%d) ", rc);
3870 PCHARSV(cnptr, 0, -1, outfile);
3871 putc('\n', outfile);
3872 }
3873
3874 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
3875 }
3876
3877 for (i = 0; i < 32; i++)
3878 {
3879 if ((getstrings & (1 << i)) != 0)
3880 {
3881 int rc;
3882 const char *substring;
3883 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
3884 if (rc < 0)
3885 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3886 else
3887 {
3888 fprintf(outfile, "%2dG ", i);
3889 PCHARSV(substring, 0, rc, outfile);
3890 fprintf(outfile, " (%d)\n", rc);
3891 PCRE_FREE_SUBSTRING(substring);
3892 }
3893 }
3894 }
3895
3896 gnptr = getnames;
3897 for (;;)
3898 {
3899 int rc;
3900 const char *substring;
3901
3902 if (use_pcre16)
3903 {
3904 if (*(pcre_uint16 *)gnptr == 0) break;
3905 }
3906 else
3907 {
3908 if (*(pcre_uint8 *)gnptr == 0) break;
3909 }
3910
3911 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3912 gnptr, &substring);
3913 if (rc < 0)
3914 {
3915 fprintf(outfile, "get substring ");
3916 PCHARSV(gnptr, 0, -1, outfile);
3917 fprintf(outfile, " failed %d\n", rc);
3918 }
3919 else
3920 {
3921 fprintf(outfile, " G ");
3922 PCHARSV(substring, 0, rc, outfile);
3923 fprintf(outfile, " (%d) ", rc);
3924 PCHARSV(gnptr, 0, -1, outfile);
3925 PCRE_FREE_SUBSTRING(substring);
3926 putc('\n', outfile);
3927 }
3928
3929 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
3930 }
3931
3932 if (getlist)
3933 {
3934 int rc;
3935 const char **stringlist;
3936 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
3937 if (rc < 0)
3938 fprintf(outfile, "get substring list failed %d\n", rc);
3939 else
3940 {
3941 for (i = 0; i < count; i++)
3942 {
3943 fprintf(outfile, "%2dL ", i);
3944 PCHARSV(stringlist[i], 0, -1, outfile);
3945 putc('\n', outfile);
3946 }
3947 if (stringlist[i] != NULL)
3948 fprintf(outfile, "string list not terminated by NULL\n");
3949 PCRE_FREE_SUBSTRING_LIST(stringlist);
3950 }
3951 }
3952 }
3953
3954 /* There was a partial match */
3955
3956 else if (count == PCRE_ERROR_PARTIAL)
3957 {
3958 if (markptr == NULL) fprintf(outfile, "Partial match");
3959 else
3960 {
3961 fprintf(outfile, "Partial match, mark=");
3962 PCHARSV(markptr, 0, -1, outfile);
3963 }
3964 if (use_size_offsets > 1)
3965 {
3966 fprintf(outfile, ": ");
3967 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
3968 outfile);
3969 }
3970 fprintf(outfile, "\n");
3971 break; /* Out of the /g loop */
3972 }
3973
3974 /* Failed to match. If this is a /g or /G loop and we previously set
3975 g_notempty after a null match, this is not necessarily the end. We want
3976 to advance the start offset, and continue. We won't be at the end of the
3977 string - that was checked before setting g_notempty.
3978
3979 Complication arises in the case when the newline convention is "any",
3980 "crlf", or "anycrlf". If the previous match was at the end of a line
3981 terminated by CRLF, an advance of one character just passes the \r,
3982 whereas we should prefer the longer newline sequence, as does the code in
3983 pcre_exec(). Fudge the offset value to achieve this. We check for a
3984 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
3985 find the default.
3986
3987 Otherwise, in the case of UTF-8 matching, the advance must be one
3988 character, not one byte. */
3989
3990 else
3991 {
3992 if (g_notempty != 0)
3993 {
3994 int onechar = 1;
3995 unsigned int obits = ((real_pcre *)re)->options;
3996 use_offsets[0] = start_offset;
3997 if ((obits & PCRE_NEWLINE_BITS) == 0)
3998 {
3999 int d;
4000 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4001 /* Note that these values are always the ASCII ones, even in
4002 EBCDIC environments. CR = 13, NL = 10. */
4003 obits = (d == 13)? PCRE_NEWLINE_CR :
4004 (d == 10)? PCRE_NEWLINE_LF :
4005 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4006 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4007 (d == -1)? PCRE_NEWLINE_ANY : 0;
4008 }
4009 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4010 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4011 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4012 &&
4013 start_offset < len - 1 &&
4014 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4015 (use_pcre16?
4016 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4017 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4018 :
4019 bptr[start_offset] == '\r'
4020 && bptr[start_offset + 1] == '\n')
4021 #else
4022 bptr[start_offset] == '\r' &&
4023 bptr[start_offset + 1] == '\n'
4024 #endif
4025 )
4026 onechar++;
4027 else if (use_utf)
4028 {
4029 while (start_offset + onechar < len)
4030 {
4031 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4032 onechar++;
4033 }
4034 }
4035 use_offsets[1] = start_offset + onechar;
4036 }
4037 else
4038 {
4039 switch(count)
4040 {
4041 case PCRE_ERROR_NOMATCH:
4042 if (gmatched == 0)
4043 {
4044 if (markptr == NULL)
4045 {
4046 fprintf(outfile, "No match\n");
4047 }
4048 else
4049 {
4050 fprintf(outfile, "No match, mark = ");
4051 PCHARSV(markptr, 0, -1, outfile);
4052 putc('\n', outfile);
4053 }
4054 }
4055 break;
4056
4057 case PCRE_ERROR_BADUTF8:
4058 case PCRE_ERROR_SHORTUTF8:
4059 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4060 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4061 use_pcre16? "16" : "8");
4062 if (use_size_offsets >= 2)
4063 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4064 use_offsets[1]);
4065 fprintf(outfile, "\n");
4066 break;
4067
4068 case PCRE_ERROR_BADUTF8_OFFSET:
4069 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4070 use_pcre16? "16" : "8");
4071 break;
4072
4073 default:
4074 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4075 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4076 else
4077 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4078 break;
4079 }
4080
4081 break; /* Out of the /g loop */
4082 }
4083 }
4084
4085 /* If not /g or /G we are done */
4086
4087 if (!do_g && !do_G) break;
4088
4089 /* If we have matched an empty string, first check to see if we are at
4090 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4091 Perl's /g options does. This turns out to be rather cunning. First we set
4092 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4093 same point. If this fails (picked up above) we advance to the next
4094 character. */
4095
4096 g_notempty = 0;
4097
4098 if (use_offsets[0] == use_offsets[1])
4099 {
4100 if (use_offsets[0] == len) break;
4101 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4102 }
4103
4104 /* For /g, update the start offset, leaving the rest alone */
4105
4106 if (do_g) start_offset = use_offsets[1];
4107
4108 /* For /G, update the pointer and length */
4109
4110 else
4111 {
4112 bptr += use_offsets[1] * CHAR_SIZE;
4113 len -= use_offsets[1];
4114 }
4115 } /* End of loop for /g and /G */
4116
4117 NEXT_DATA: continue;
4118 } /* End of loop for data lines */
4119
4120 CONTINUE:
4121
4122 #if !defined NOPOSIX
4123 if (posix || do_posix) regfree(&preg);
4124 #endif
4125
4126 if (re != NULL) new_free(re);
4127 if (extra != NULL)
4128 {
4129 PCRE_FREE_STUDY(extra);
4130 }
4131 if (locale_set)
4132 {
4133 new_free((void *)tables);
4134 setlocale(LC_CTYPE, "C");
4135 locale_set = 0;
4136 }
4137 if (jit_stack != NULL)
4138 {
4139 PCRE_JIT_STACK_FREE(jit_stack);
4140 jit_stack = NULL;
4141 }
4142 }
4143
4144 if (infile == stdin) fprintf(outfile, "\n");
4145
4146 EXIT:
4147
4148 if (infile != NULL && infile != stdin) fclose(infile);
4149 if (outfile != NULL && outfile != stdout) fclose(outfile);
4150
4151 free(buffer);
4152 free(dbuffer);
4153 free(pbuffer);
4154 free(offsets);
4155
4156 #ifdef SUPPORT_PCRE16
4157 if (buffer16 != NULL) free(buffer16);
4158 #endif
4159
4160 return yield;
4161 }
4162
4163 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5