/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 813 - (show annotations)
Tue Dec 20 14:03:16 2011 UTC (7 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 108920 byte(s)
pcretest can now save/restore byte-swapped 16-bit patterns.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA), and without the doublecheck
166 of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167 out the UTF8 support if PCRE is built without it. */
168
169 #ifndef SUPPORT_UTF8
170 #ifndef NOUTF8
171 #define NOUTF8
172 #endif
173 #endif
174
175 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177 only from one place and is handled differently). I couldn't dream up any way of
178 using a single macro to do this in a generic way, because of the many different
179 argument requirements. We know that at least one of SUPPORT_PCRE8 and
180 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181 use these in the definitions of generic macros. */
182
183 #ifdef SUPPORT_PCRE8
184
185 #define PCHARS8(lv, p, len, f) \
186 lv = pchars((pcre_uint8 *)p, len, f)
187
188 #define PCHARSV8(p, len, f) \
189 (void)pchars((pcre_uint8 *)p, len, f)
190
191 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
192 re = pcre_compile((char *)pat, options, error, erroffset, tables)
193
194 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
195 offsets, size_offsets) \
196 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
197 offsets, size_offsets)
198
199 #define PCRE_FREE_STUDY8(extra) \
200 pcre_free_study(extra)
201
202 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
203 pcre_pattern_to_host_byte_order(re, extra, tables)
204
205 #define PCRE_STUDY8(extra, re, options, error) \
206 extra = pcre_study(re, options, error)
207
208 #endif /* SUPPORT_PCRE8 */
209
210
211 #ifdef SUPPORT_PCRE16
212
213 #define PCHARS16(lv, p, len, f) \
214 lv = pchars16((PCRE_SPTR16)p, len, f)
215
216 #define PCHARSV16(p, len, f) \
217 (void)pchars16((PCRE_SPTR16)p, len, f)
218
219 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
220 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
221
222 #define PCRE_FREE_STUDY16(extra) \
223 pcre16_free_study(extra)
224
225 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
226 offsets, size_offsets) \
227 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
228 options, offsets, size_offsets)
229
230 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
231 pcre16_pattern_to_host_byte_order(re, extra, tables)
232
233 #define PCRE_STUDY16(extra, re, options, error) \
234 extra = pcre16_study(re, options, error)
235
236 #endif /* SUPPORT_PCRE16 */
237
238
239 /* ----- Both modes are supported; a runtime test is needed ----- */
240
241 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
242
243 #define PCHARS(lv, p, len, f) \
244 if (use_pcre16) \
245 PCHARS16(lv, p, len, f); \
246 else \
247 PCHARS8(lv, p, len, f)
248
249 #define PCHARSV(p, len, f) \
250 if (use_pcre16) \
251 PCHARSV16(p, len, f); \
252 else \
253 PCHARSV8(p, len, f)
254
255 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
256 if (use_pcre16) \
257 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
258 else \
259 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
260
261 #define PCRE_FREE_STUDY(extra) \
262 if (use_pcre16) \
263 PCRE_FREE_STUDY16(extra); \
264 else \
265 PCRE_FREE_STUDY8(extra)
266
267 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
268 offsets, size_offsets) \
269 if (use_pcre16) \
270 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
271 offsets, size_offsets); \
272 else \
273 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
274 offsets, size_offsets)
275
276 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
277 if (use_pcre16) \
278 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
279 else \
280 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
281
282 #define PCRE_STUDY(extra, re, options, error) \
283 if (use_pcre16) \
284 PCRE_STUDY16(extra, re, options, error); \
285 else \
286 PCRE_STUDY8(extra, re, options, error)
287
288 /* ----- Only 8-bit mode is supported ----- */
289
290 #elif defined SUPPORT_PCRE8
291 #define PCHARS PCHARS8
292 #define PCHARSV PCHARSV8
293 #define PCRE_COMPILE PCRE_COMPILE8
294 #define PCRE_EXEC PCRE_EXEC8
295 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
296 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
297 #define PCRE_STUDY PCRE_STUDY8
298
299 /* ----- Only 16-bit mode is supported ----- */
300
301 #else
302 #define PCHARS PCHARS16
303 #define PCHARSV PCHARSV16
304 #define PCRE_COMPILE PCRE_COMPILE16
305 #define PCRE_EXEC PCRE_EXEC16
306 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
307 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
308 #define PCRE_STUDY PCRE_STUDY16
309 #endif
310
311 /* ----- End of mode-specific function call macros ----- */
312
313
314 /* Other parameters */
315
316 #ifndef CLOCKS_PER_SEC
317 #ifdef CLK_TCK
318 #define CLOCKS_PER_SEC CLK_TCK
319 #else
320 #define CLOCKS_PER_SEC 100
321 #endif
322 #endif
323
324 /* This is the default loop count for timing. */
325
326 #define LOOPREPEAT 500000
327
328 /* Static variables */
329
330 static FILE *outfile;
331 static int log_store = 0;
332 static int callout_count;
333 static int callout_extra;
334 static int callout_fail_count;
335 static int callout_fail_id;
336 static int debug_lengths;
337 static int first_callout;
338 static int locale_set = 0;
339 static int show_malloc;
340 static int use_utf;
341 static size_t gotten_store;
342 static size_t first_gotten_store = 0;
343 static const unsigned char *last_callout_mark = NULL;
344
345 /* The buffers grow automatically if very long input lines are encountered. */
346
347 static int buffer_size = 50000;
348 static pcre_uint8 *buffer = NULL;
349 static pcre_uint8 *dbuffer = NULL;
350 static pcre_uint8 *pbuffer = NULL;
351
352 /* Another buffer is needed translation to 16-bit character strings. It will
353 obtained and extended as required. */
354
355 #ifdef SUPPORT_PCRE16
356 static int buffer16_size = 0;
357 static pcre_uint16 *buffer16 = NULL;
358
359 /* We need the table of operator lengths that is used for 16-bit compiling, in
360 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
361 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
362 appropriately for the 16-bit world. Just as a safety check, make sure that
363 COMPILE_PCRE16 is *not* set. */
364
365 #ifdef COMPILE_PCRE16
366 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
367 #endif
368
369 #if LINK_SIZE == 2
370 #undef LINK_SIZE
371 #define LINK_SIZE 1
372 #elif LINK_SIZE == 3 || LINK_SIZE == 4
373 #undef LINK_SIZE
374 #define LINK_SIZE 2
375 #else
376 #error LINK_SIZE must be either 2, 3, or 4
377 #endif
378
379 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
380
381 #endif /* SUPPORT_PCRE16 */
382
383 /* If we have 8-bit support, default use_pcre16 to false; if there is also
384 16-bit support, it can be changed by an option. If there is no 8-bit support,
385 there must be 16-bit support, so default it to 1. */
386
387 #ifdef SUPPORT_PCRE8
388 static int use_pcre16 = 0;
389 #else
390 static int use_pcre16 = 1;
391 #endif
392
393 /* Textual explanations for runtime error codes */
394
395 static const char *errtexts[] = {
396 NULL, /* 0 is no error */
397 NULL, /* NOMATCH is handled specially */
398 "NULL argument passed",
399 "bad option value",
400 "magic number missing",
401 "unknown opcode - pattern overwritten?",
402 "no more memory",
403 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
404 "match limit exceeded",
405 "callout error code",
406 NULL, /* BADUTF8 is handled specially */
407 "bad UTF-8 offset",
408 NULL, /* PARTIAL is handled specially */
409 "not used - internal error",
410 "internal error - pattern overwritten?",
411 "bad count value",
412 "item unsupported for DFA matching",
413 "backreference condition or recursion test not supported for DFA matching",
414 "match limit not supported for DFA matching",
415 "workspace size exceeded in DFA matching",
416 "too much recursion for DFA matching",
417 "recursion limit exceeded",
418 "not used - internal error",
419 "invalid combination of newline options",
420 "bad offset value",
421 NULL, /* SHORTUTF8 is handled specially */
422 "nested recursion at the same subject position",
423 "JIT stack limit reached",
424 "pattern compiled in wrong mode (8-bit/16-bit error)"
425 };
426
427
428 /*************************************************
429 * Alternate character tables *
430 *************************************************/
431
432 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
433 using the default tables of the library. However, the T option can be used to
434 select alternate sets of tables, for different kinds of testing. Note also that
435 the L (locale) option also adjusts the tables. */
436
437 /* This is the set of tables distributed as default with PCRE. It recognizes
438 only ASCII characters. */
439
440 static const pcre_uint8 tables0[] = {
441
442 /* This table is a lower casing table. */
443
444 0, 1, 2, 3, 4, 5, 6, 7,
445 8, 9, 10, 11, 12, 13, 14, 15,
446 16, 17, 18, 19, 20, 21, 22, 23,
447 24, 25, 26, 27, 28, 29, 30, 31,
448 32, 33, 34, 35, 36, 37, 38, 39,
449 40, 41, 42, 43, 44, 45, 46, 47,
450 48, 49, 50, 51, 52, 53, 54, 55,
451 56, 57, 58, 59, 60, 61, 62, 63,
452 64, 97, 98, 99,100,101,102,103,
453 104,105,106,107,108,109,110,111,
454 112,113,114,115,116,117,118,119,
455 120,121,122, 91, 92, 93, 94, 95,
456 96, 97, 98, 99,100,101,102,103,
457 104,105,106,107,108,109,110,111,
458 112,113,114,115,116,117,118,119,
459 120,121,122,123,124,125,126,127,
460 128,129,130,131,132,133,134,135,
461 136,137,138,139,140,141,142,143,
462 144,145,146,147,148,149,150,151,
463 152,153,154,155,156,157,158,159,
464 160,161,162,163,164,165,166,167,
465 168,169,170,171,172,173,174,175,
466 176,177,178,179,180,181,182,183,
467 184,185,186,187,188,189,190,191,
468 192,193,194,195,196,197,198,199,
469 200,201,202,203,204,205,206,207,
470 208,209,210,211,212,213,214,215,
471 216,217,218,219,220,221,222,223,
472 224,225,226,227,228,229,230,231,
473 232,233,234,235,236,237,238,239,
474 240,241,242,243,244,245,246,247,
475 248,249,250,251,252,253,254,255,
476
477 /* This table is a case flipping table. */
478
479 0, 1, 2, 3, 4, 5, 6, 7,
480 8, 9, 10, 11, 12, 13, 14, 15,
481 16, 17, 18, 19, 20, 21, 22, 23,
482 24, 25, 26, 27, 28, 29, 30, 31,
483 32, 33, 34, 35, 36, 37, 38, 39,
484 40, 41, 42, 43, 44, 45, 46, 47,
485 48, 49, 50, 51, 52, 53, 54, 55,
486 56, 57, 58, 59, 60, 61, 62, 63,
487 64, 97, 98, 99,100,101,102,103,
488 104,105,106,107,108,109,110,111,
489 112,113,114,115,116,117,118,119,
490 120,121,122, 91, 92, 93, 94, 95,
491 96, 65, 66, 67, 68, 69, 70, 71,
492 72, 73, 74, 75, 76, 77, 78, 79,
493 80, 81, 82, 83, 84, 85, 86, 87,
494 88, 89, 90,123,124,125,126,127,
495 128,129,130,131,132,133,134,135,
496 136,137,138,139,140,141,142,143,
497 144,145,146,147,148,149,150,151,
498 152,153,154,155,156,157,158,159,
499 160,161,162,163,164,165,166,167,
500 168,169,170,171,172,173,174,175,
501 176,177,178,179,180,181,182,183,
502 184,185,186,187,188,189,190,191,
503 192,193,194,195,196,197,198,199,
504 200,201,202,203,204,205,206,207,
505 208,209,210,211,212,213,214,215,
506 216,217,218,219,220,221,222,223,
507 224,225,226,227,228,229,230,231,
508 232,233,234,235,236,237,238,239,
509 240,241,242,243,244,245,246,247,
510 248,249,250,251,252,253,254,255,
511
512 /* This table contains bit maps for various character classes. Each map is 32
513 bytes long and the bits run from the least significant end of each byte. The
514 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
515 graph, print, punct, and cntrl. Other classes are built from combinations. */
516
517 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
521
522 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
523 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
524 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
526
527 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
528 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
530 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
531
532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
535 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
536
537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
538 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
539 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
541
542 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
543 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
544 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
545 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546
547 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
548 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
549 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551
552 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
553 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556
557 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
558 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
559 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561
562 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
563 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
564 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566
567 /* This table identifies various classes of character by individual bits:
568 0x01 white space character
569 0x02 letter
570 0x04 decimal digit
571 0x08 hexadecimal digit
572 0x10 alphanumeric or '_'
573 0x80 regular expression metacharacter or binary zero
574 */
575
576 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
577 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
580 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
581 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
582 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
583 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
584 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
585 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
586 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
587 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
588 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
589 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
590 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
591 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
608
609 /* This is a set of tables that came orginally from a Windows user. It seems to
610 be at least an approximation of ISO 8859. In particular, there are characters
611 greater than 128 that are marked as spaces, letters, etc. */
612
613 static const pcre_uint8 tables1[] = {
614 0,1,2,3,4,5,6,7,
615 8,9,10,11,12,13,14,15,
616 16,17,18,19,20,21,22,23,
617 24,25,26,27,28,29,30,31,
618 32,33,34,35,36,37,38,39,
619 40,41,42,43,44,45,46,47,
620 48,49,50,51,52,53,54,55,
621 56,57,58,59,60,61,62,63,
622 64,97,98,99,100,101,102,103,
623 104,105,106,107,108,109,110,111,
624 112,113,114,115,116,117,118,119,
625 120,121,122,91,92,93,94,95,
626 96,97,98,99,100,101,102,103,
627 104,105,106,107,108,109,110,111,
628 112,113,114,115,116,117,118,119,
629 120,121,122,123,124,125,126,127,
630 128,129,130,131,132,133,134,135,
631 136,137,138,139,140,141,142,143,
632 144,145,146,147,148,149,150,151,
633 152,153,154,155,156,157,158,159,
634 160,161,162,163,164,165,166,167,
635 168,169,170,171,172,173,174,175,
636 176,177,178,179,180,181,182,183,
637 184,185,186,187,188,189,190,191,
638 224,225,226,227,228,229,230,231,
639 232,233,234,235,236,237,238,239,
640 240,241,242,243,244,245,246,215,
641 248,249,250,251,252,253,254,223,
642 224,225,226,227,228,229,230,231,
643 232,233,234,235,236,237,238,239,
644 240,241,242,243,244,245,246,247,
645 248,249,250,251,252,253,254,255,
646 0,1,2,3,4,5,6,7,
647 8,9,10,11,12,13,14,15,
648 16,17,18,19,20,21,22,23,
649 24,25,26,27,28,29,30,31,
650 32,33,34,35,36,37,38,39,
651 40,41,42,43,44,45,46,47,
652 48,49,50,51,52,53,54,55,
653 56,57,58,59,60,61,62,63,
654 64,97,98,99,100,101,102,103,
655 104,105,106,107,108,109,110,111,
656 112,113,114,115,116,117,118,119,
657 120,121,122,91,92,93,94,95,
658 96,65,66,67,68,69,70,71,
659 72,73,74,75,76,77,78,79,
660 80,81,82,83,84,85,86,87,
661 88,89,90,123,124,125,126,127,
662 128,129,130,131,132,133,134,135,
663 136,137,138,139,140,141,142,143,
664 144,145,146,147,148,149,150,151,
665 152,153,154,155,156,157,158,159,
666 160,161,162,163,164,165,166,167,
667 168,169,170,171,172,173,174,175,
668 176,177,178,179,180,181,182,183,
669 184,185,186,187,188,189,190,191,
670 224,225,226,227,228,229,230,231,
671 232,233,234,235,236,237,238,239,
672 240,241,242,243,244,245,246,215,
673 248,249,250,251,252,253,254,223,
674 192,193,194,195,196,197,198,199,
675 200,201,202,203,204,205,206,207,
676 208,209,210,211,212,213,214,247,
677 216,217,218,219,220,221,222,255,
678 0,62,0,0,1,0,0,0,
679 0,0,0,0,0,0,0,0,
680 32,0,0,0,1,0,0,0,
681 0,0,0,0,0,0,0,0,
682 0,0,0,0,0,0,255,3,
683 126,0,0,0,126,0,0,0,
684 0,0,0,0,0,0,0,0,
685 0,0,0,0,0,0,0,0,
686 0,0,0,0,0,0,255,3,
687 0,0,0,0,0,0,0,0,
688 0,0,0,0,0,0,12,2,
689 0,0,0,0,0,0,0,0,
690 0,0,0,0,0,0,0,0,
691 254,255,255,7,0,0,0,0,
692 0,0,0,0,0,0,0,0,
693 255,255,127,127,0,0,0,0,
694 0,0,0,0,0,0,0,0,
695 0,0,0,0,254,255,255,7,
696 0,0,0,0,0,4,32,4,
697 0,0,0,128,255,255,127,255,
698 0,0,0,0,0,0,255,3,
699 254,255,255,135,254,255,255,7,
700 0,0,0,0,0,4,44,6,
701 255,255,127,255,255,255,127,255,
702 0,0,0,0,254,255,255,255,
703 255,255,255,255,255,255,255,127,
704 0,0,0,0,254,255,255,255,
705 255,255,255,255,255,255,255,255,
706 0,2,0,0,255,255,255,255,
707 255,255,255,255,255,255,255,127,
708 0,0,0,0,255,255,255,255,
709 255,255,255,255,255,255,255,255,
710 0,0,0,0,254,255,0,252,
711 1,0,0,248,1,0,0,120,
712 0,0,0,0,254,255,255,255,
713 0,0,128,0,0,0,128,0,
714 255,255,255,255,0,0,0,0,
715 0,0,0,0,0,0,0,128,
716 255,255,255,255,0,0,0,0,
717 0,0,0,0,0,0,0,0,
718 128,0,0,0,0,0,0,0,
719 0,1,1,0,1,1,0,0,
720 0,0,0,0,0,0,0,0,
721 0,0,0,0,0,0,0,0,
722 1,0,0,0,128,0,0,0,
723 128,128,128,128,0,0,128,0,
724 28,28,28,28,28,28,28,28,
725 28,28,0,0,0,0,0,128,
726 0,26,26,26,26,26,26,18,
727 18,18,18,18,18,18,18,18,
728 18,18,18,18,18,18,18,18,
729 18,18,18,128,128,0,128,16,
730 0,26,26,26,26,26,26,18,
731 18,18,18,18,18,18,18,18,
732 18,18,18,18,18,18,18,18,
733 18,18,18,128,128,0,0,0,
734 0,0,0,0,0,1,0,0,
735 0,0,0,0,0,0,0,0,
736 0,0,0,0,0,0,0,0,
737 0,0,0,0,0,0,0,0,
738 1,0,0,0,0,0,0,0,
739 0,0,18,0,0,0,0,0,
740 0,0,20,20,0,18,0,0,
741 0,20,18,0,0,0,0,0,
742 18,18,18,18,18,18,18,18,
743 18,18,18,18,18,18,18,18,
744 18,18,18,18,18,18,18,0,
745 18,18,18,18,18,18,18,18,
746 18,18,18,18,18,18,18,18,
747 18,18,18,18,18,18,18,18,
748 18,18,18,18,18,18,18,0,
749 18,18,18,18,18,18,18,18
750 };
751
752
753
754
755 #ifndef HAVE_STRERROR
756 /*************************************************
757 * Provide strerror() for non-ANSI libraries *
758 *************************************************/
759
760 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
761 in their libraries, but can provide the same facility by this simple
762 alternative function. */
763
764 extern int sys_nerr;
765 extern char *sys_errlist[];
766
767 char *
768 strerror(int n)
769 {
770 if (n < 0 || n >= sys_nerr) return "unknown error number";
771 return sys_errlist[n];
772 }
773 #endif /* HAVE_STRERROR */
774
775
776 /*************************************************
777 * JIT memory callback *
778 *************************************************/
779
780 static pcre_jit_stack* jit_callback(void *arg)
781 {
782 return (pcre_jit_stack *)arg;
783 }
784
785
786 /*************************************************
787 * Convert UTF-8 string to value *
788 *************************************************/
789
790 /* This function takes one or more bytes that represents a UTF-8 character,
791 and returns the value of the character.
792
793 Argument:
794 utf8bytes a pointer to the byte vector
795 vptr a pointer to an int to receive the value
796
797 Returns: > 0 => the number of bytes consumed
798 -6 to 0 => malformed UTF-8 character at offset = (-return)
799 */
800
801 #if !defined NOUTF8
802
803 static int
804 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
805 {
806 int c = *utf8bytes++;
807 int d = c;
808 int i, j, s;
809
810 for (i = -1; i < 6; i++) /* i is number of additional bytes */
811 {
812 if ((d & 0x80) == 0) break;
813 d <<= 1;
814 }
815
816 if (i == -1) { *vptr = c; return 1; } /* ascii character */
817 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
818
819 /* i now has a value in the range 1-5 */
820
821 s = 6*i;
822 d = (c & utf8_table3[i]) << s;
823
824 for (j = 0; j < i; j++)
825 {
826 c = *utf8bytes++;
827 if ((c & 0xc0) != 0x80) return -(j+1);
828 s -= 6;
829 d |= (c & 0x3f) << s;
830 }
831
832 /* Check that encoding was the correct unique one */
833
834 for (j = 0; j < utf8_table1_size; j++)
835 if (d <= utf8_table1[j]) break;
836 if (j != i) return -(i+1);
837
838 /* Valid value */
839
840 *vptr = d;
841 return i+1;
842 }
843
844 #endif
845
846
847
848 /*************************************************
849 * Convert character value to UTF-8 *
850 *************************************************/
851
852 /* This function takes an integer value in the range 0 - 0x7fffffff
853 and encodes it as a UTF-8 character in 0 to 6 bytes.
854
855 Arguments:
856 cvalue the character value
857 utf8bytes pointer to buffer for result - at least 6 bytes long
858
859 Returns: number of characters placed in the buffer
860 */
861
862 #if !defined NOUTF8
863
864 static int
865 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
866 {
867 register int i, j;
868 for (i = 0; i < utf8_table1_size; i++)
869 if (cvalue <= utf8_table1[i]) break;
870 utf8bytes += i;
871 for (j = i; j > 0; j--)
872 {
873 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
874 cvalue >>= 6;
875 }
876 *utf8bytes = utf8_table2[i] | cvalue;
877 return i + 1;
878 }
879
880 #endif
881
882
883
884 #ifdef SUPPORT_PCRE16
885 /*************************************************
886 * Convert a string to 16-bit *
887 *************************************************/
888
889 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
890 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
891 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
892 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
893 result is always left in buffer16.
894
895 Arguments:
896 p points to a byte string
897 utf true if UTF-8 (to be converted to UTF-16)
898 len number of bytes in the string (excluding trailing zero)
899
900 Returns: number of 16-bit data items used (excluding trailing zero)
901 OR -1 if a UTF-8 string is malformed
902 */
903
904 static int
905 to16(pcre_uint8 *p, int utf, int len)
906 {
907 pcre_uint16 *pp;
908
909 if (buffer16_size < 2*len + 2)
910 {
911 if (buffer16 != NULL) free(buffer16);
912 buffer16_size = 2*len + 2;
913 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
914 if (buffer16 == NULL)
915 {
916 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
917 exit(1);
918 }
919 }
920
921 pp = buffer16;
922
923 if (!utf)
924 {
925 while (len-- > 0) *pp++ = *p++;
926 }
927
928 else
929 {
930 int c;
931 while (len > 0)
932 {
933 int chlen = utf82ord(p, &c);
934 if (chlen <= 0) return -1;
935 p += chlen;
936 len -= chlen;
937 if (c < 0x10000) *pp++ = c; else
938 {
939 c -= 0x10000;
940 *pp++ = 0xD800 | (c >> 10);
941 *pp++ = 0xDC00 | (c & 0x3ff);
942 }
943 }
944 }
945
946 *pp = 0;
947 return pp - buffer16;
948 }
949 #endif
950
951
952 /*************************************************
953 * Read or extend an input line *
954 *************************************************/
955
956 /* Input lines are read into buffer, but both patterns and data lines can be
957 continued over multiple input lines. In addition, if the buffer fills up, we
958 want to automatically expand it so as to be able to handle extremely large
959 lines that are needed for certain stress tests. When the input buffer is
960 expanded, the other two buffers must also be expanded likewise, and the
961 contents of pbuffer, which are a copy of the input for callouts, must be
962 preserved (for when expansion happens for a data line). This is not the most
963 optimal way of handling this, but hey, this is just a test program!
964
965 Arguments:
966 f the file to read
967 start where in buffer to start (this *must* be within buffer)
968 prompt for stdin or readline()
969
970 Returns: pointer to the start of new data
971 could be a copy of start, or could be moved
972 NULL if no data read and EOF reached
973 */
974
975 static pcre_uint8 *
976 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
977 {
978 pcre_uint8 *here = start;
979
980 for (;;)
981 {
982 int rlen = (int)(buffer_size - (here - buffer));
983
984 if (rlen > 1000)
985 {
986 int dlen;
987
988 /* If libreadline support is required, use readline() to read a line if the
989 input is a terminal. Note that readline() removes the trailing newline, so
990 we must put it back again, to be compatible with fgets(). */
991
992 #ifdef SUPPORT_LIBREADLINE
993 if (isatty(fileno(f)))
994 {
995 size_t len;
996 char *s = readline(prompt);
997 if (s == NULL) return (here == start)? NULL : start;
998 len = strlen(s);
999 if (len > 0) add_history(s);
1000 if (len > rlen - 1) len = rlen - 1;
1001 memcpy(here, s, len);
1002 here[len] = '\n';
1003 here[len+1] = 0;
1004 free(s);
1005 }
1006 else
1007 #endif
1008
1009 /* Read the next line by normal means, prompting if the file is stdin. */
1010
1011 {
1012 if (f == stdin) printf("%s", prompt);
1013 if (fgets((char *)here, rlen, f) == NULL)
1014 return (here == start)? NULL : start;
1015 }
1016
1017 dlen = (int)strlen((char *)here);
1018 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1019 here += dlen;
1020 }
1021
1022 else
1023 {
1024 int new_buffer_size = 2*buffer_size;
1025 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1026 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1027 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1028
1029 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1030 {
1031 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1032 exit(1);
1033 }
1034
1035 memcpy(new_buffer, buffer, buffer_size);
1036 memcpy(new_pbuffer, pbuffer, buffer_size);
1037
1038 buffer_size = new_buffer_size;
1039
1040 start = new_buffer + (start - buffer);
1041 here = new_buffer + (here - buffer);
1042
1043 free(buffer);
1044 free(dbuffer);
1045 free(pbuffer);
1046
1047 buffer = new_buffer;
1048 dbuffer = new_dbuffer;
1049 pbuffer = new_pbuffer;
1050 }
1051 }
1052
1053 return NULL; /* Control never gets here */
1054 }
1055
1056
1057
1058 /*************************************************
1059 * Read number from string *
1060 *************************************************/
1061
1062 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1063 around with conditional compilation, just do the job by hand. It is only used
1064 for unpicking arguments, so just keep it simple.
1065
1066 Arguments:
1067 str string to be converted
1068 endptr where to put the end pointer
1069
1070 Returns: the unsigned long
1071 */
1072
1073 static int
1074 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1075 {
1076 int result = 0;
1077 while(*str != 0 && isspace(*str)) str++;
1078 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1079 *endptr = str;
1080 return(result);
1081 }
1082
1083
1084
1085 /*************************************************
1086 * Print one character *
1087 *************************************************/
1088
1089 /* Print a single character either literally, or as a hex escape. */
1090
1091 static int pchar(int c, FILE *f)
1092 {
1093 if (PRINTOK(c))
1094 {
1095 if (f != NULL) fprintf(f, "%c", c);
1096 return 1;
1097 }
1098
1099 if (c < 0x100)
1100 {
1101 if (use_utf)
1102 {
1103 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1104 return 6;
1105 }
1106 else
1107 {
1108 if (f != NULL) fprintf(f, "\\x%02x", c);
1109 return 4;
1110 }
1111 }
1112
1113 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1114 return (c <= 0x000000ff)? 6 :
1115 (c <= 0x00000fff)? 7 :
1116 (c <= 0x0000ffff)? 8 :
1117 (c <= 0x000fffff)? 9 : 10;
1118 }
1119
1120
1121
1122 #ifdef SUPPORT_PCRE8
1123 /*************************************************
1124 * Print 8-bit character string *
1125 *************************************************/
1126
1127 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1128 If handed a NULL file, just counts chars without printing. */
1129
1130 static int pchars(pcre_uint8 *p, int length, FILE *f)
1131 {
1132 int c = 0;
1133 int yield = 0;
1134
1135 while (length-- > 0)
1136 {
1137 #if !defined NOUTF8
1138 if (use_utf)
1139 {
1140 int rc = utf82ord(p, &c);
1141 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1142 {
1143 length -= rc - 1;
1144 p += rc;
1145 yield += pchar(c, f);
1146 continue;
1147 }
1148 }
1149 #endif
1150 c = *p++;
1151 yield += pchar(c, f);
1152 }
1153
1154 return yield;
1155 }
1156 #endif
1157
1158
1159
1160 #ifdef SUPPORT_PCRE16
1161 /*************************************************
1162 * Print 16-bit character string *
1163 *************************************************/
1164
1165 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1166 If handed a NULL file, just counts chars without printing. */
1167
1168 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1169 {
1170 int yield = 0;
1171
1172 while (length-- > 0)
1173 {
1174 int c = *p++ & 0xffff;
1175 #if !defined NOUTF8
1176 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1177 {
1178 int d = *p & 0xffff;
1179 if (d >= 0xDC00 && d < 0xDFFF)
1180 {
1181 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1182 length--;
1183 p++;
1184 }
1185 }
1186 #endif
1187 yield += pchar(c, f);
1188 }
1189
1190 return yield;
1191 }
1192 #endif
1193
1194
1195
1196 /*************************************************
1197 * Callout function *
1198 *************************************************/
1199
1200 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1201 the match. Yield zero unless more callouts than the fail count, or the callout
1202 data is not zero. */
1203
1204 static int callout(pcre_callout_block *cb)
1205 {
1206 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1207 int i, pre_start, post_start, subject_length;
1208
1209 if (callout_extra)
1210 {
1211 fprintf(f, "Callout %d: last capture = %d\n",
1212 cb->callout_number, cb->capture_last);
1213
1214 for (i = 0; i < cb->capture_top * 2; i += 2)
1215 {
1216 if (cb->offset_vector[i] < 0)
1217 fprintf(f, "%2d: <unset>\n", i/2);
1218 else
1219 {
1220 fprintf(f, "%2d: ", i/2);
1221 PCHARSV(cb->subject + cb->offset_vector[i],
1222 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1223 fprintf(f, "\n");
1224 }
1225 }
1226 }
1227
1228 /* Re-print the subject in canonical form, the first time or if giving full
1229 datails. On subsequent calls in the same match, we use pchars just to find the
1230 printed lengths of the substrings. */
1231
1232 if (f != NULL) fprintf(f, "--->");
1233
1234 PCHARS(pre_start, cb->subject, cb->start_match, f);
1235 PCHARS(post_start, cb->subject + cb->start_match,
1236 cb->current_position - cb->start_match, f);
1237
1238 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1239
1240 PCHARSV(cb->subject + cb->current_position,
1241 cb->subject_length - cb->current_position, f);
1242
1243 if (f != NULL) fprintf(f, "\n");
1244
1245 /* Always print appropriate indicators, with callout number if not already
1246 shown. For automatic callouts, show the pattern offset. */
1247
1248 if (cb->callout_number == 255)
1249 {
1250 fprintf(outfile, "%+3d ", cb->pattern_position);
1251 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1252 }
1253 else
1254 {
1255 if (callout_extra) fprintf(outfile, " ");
1256 else fprintf(outfile, "%3d ", cb->callout_number);
1257 }
1258
1259 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1260 fprintf(outfile, "^");
1261
1262 if (post_start > 0)
1263 {
1264 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1265 fprintf(outfile, "^");
1266 }
1267
1268 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1269 fprintf(outfile, " ");
1270
1271 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1272 pbuffer + cb->pattern_position);
1273
1274 fprintf(outfile, "\n");
1275 first_callout = 0;
1276
1277 if (cb->mark != last_callout_mark)
1278 {
1279 fprintf(outfile, "Latest Mark: %s\n",
1280 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1281 last_callout_mark = cb->mark;
1282 }
1283
1284 if (cb->callout_data != NULL)
1285 {
1286 int callout_data = *((int *)(cb->callout_data));
1287 if (callout_data != 0)
1288 {
1289 fprintf(outfile, "Callout data = %d\n", callout_data);
1290 return callout_data;
1291 }
1292 }
1293
1294 return (cb->callout_number != callout_fail_id)? 0 :
1295 (++callout_count >= callout_fail_count)? 1 : 0;
1296 }
1297
1298
1299 /*************************************************
1300 * Local malloc functions *
1301 *************************************************/
1302
1303 /* Alternative malloc function, to test functionality and save the size of a
1304 compiled re, which is the first store request that pcre_compile() makes. The
1305 show_malloc variable is set only during matching. */
1306
1307 static void *new_malloc(size_t size)
1308 {
1309 void *block = malloc(size);
1310 gotten_store = size;
1311 if (first_gotten_store == 0) first_gotten_store = size;
1312 if (show_malloc)
1313 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1314 return block;
1315 }
1316
1317 static void new_free(void *block)
1318 {
1319 if (show_malloc)
1320 fprintf(outfile, "free %p\n", block);
1321 free(block);
1322 }
1323
1324 /* For recursion malloc/free, to test stacking calls */
1325
1326 static void *stack_malloc(size_t size)
1327 {
1328 void *block = malloc(size);
1329 if (show_malloc)
1330 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1331 return block;
1332 }
1333
1334 static void stack_free(void *block)
1335 {
1336 if (show_malloc)
1337 fprintf(outfile, "stack_free %p\n", block);
1338 free(block);
1339 }
1340
1341
1342 /*************************************************
1343 * Call pcre_fullinfo() *
1344 *************************************************/
1345
1346 /* Get one piece of information from the pcre_fullinfo() function. When only
1347 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1348 value, but the code is defensive. */
1349
1350 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1351 {
1352 int rc;
1353
1354 if (use_pcre16)
1355 #ifdef SUPPORT_PCRE16
1356 rc = pcre16_fullinfo(re, study, option, ptr);
1357 #else
1358 rc = PCRE_ERROR_BADMODE;
1359 #endif
1360 else
1361 #ifdef SUPPORT_PCRE8
1362 rc = pcre_fullinfo(re, study, option, ptr);
1363 #else
1364 rc = PCRE_ERROR_BADMODE;
1365 #endif
1366
1367 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1368 use_pcre16? "16" : "", option);
1369 }
1370
1371
1372
1373 /*************************************************
1374 * Swap byte functions *
1375 *************************************************/
1376
1377 /* The following functions swap the bytes of a pcre_uint16
1378 and pcre_uint32 value.
1379
1380 Arguments:
1381 value any number
1382
1383 Returns: the byte swapped value
1384 */
1385
1386 static pcre_uint32
1387 swap_uint32(pcre_uint32 value)
1388 {
1389 return ((value & 0x000000ff) << 24) |
1390 ((value & 0x0000ff00) << 8) |
1391 ((value & 0x00ff0000) >> 8) |
1392 (value >> 24);
1393 }
1394
1395 static pcre_uint16
1396 swap_uint16(pcre_uint16 value)
1397 {
1398 return (value >> 8) | (value << 8);
1399 }
1400
1401
1402
1403 /*************************************************
1404 * Flip bytes in a compiled pattern *
1405 *************************************************/
1406
1407 /* This function is called if the 'F' option was present on a pattern that is
1408 to be written to a file. We flip the bytes of all the integer fields in the
1409 regex data block and the study block. In 16-bit mode this also flips relevant
1410 bytes in the pattern itself. This is to make it possible to test PCRE's
1411 ability to reload byte-flipped patterns, e.g. those compiled on a different
1412 architecture. */
1413
1414 static void
1415 regexflip(pcre *ere, pcre_extra *extra)
1416 {
1417 real_pcre *re = (real_pcre *)ere;
1418 int op;
1419
1420 #ifdef SUPPORT_PCRE16
1421 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1422 int length = re->name_count * re->name_entry_size;
1423 #ifdef SUPPORT_UTF
1424 BOOL utf = (re->options & PCRE_UTF16) != 0;
1425 BOOL utf16_char = FALSE;
1426 #endif /* SUPPORT_UTF */
1427 #endif /* SUPPORT_PCRE16 */
1428
1429 /* Always flip the bytes in the main data block and study blocks. */
1430
1431 re->magic_number = REVERSED_MAGIC_NUMBER;
1432 re->size = swap_uint32(re->size);
1433 re->options = swap_uint32(re->options);
1434 re->flags = swap_uint16(re->flags);
1435 re->top_bracket = swap_uint16(re->top_bracket);
1436 re->top_backref = swap_uint16(re->top_backref);
1437 re->first_char = swap_uint16(re->first_char);
1438 re->req_char = swap_uint16(re->req_char);
1439 re->name_table_offset = swap_uint16(re->name_table_offset);
1440 re->name_entry_size = swap_uint16(re->name_entry_size);
1441 re->name_count = swap_uint16(re->name_count);
1442
1443 if (extra != NULL)
1444 {
1445 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1446 rsd->size = swap_uint32(rsd->size);
1447 rsd->flags = swap_uint32(rsd->flags);
1448 rsd->minlength = swap_uint32(rsd->minlength);
1449 }
1450
1451 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1452 in the name table, if present, and then in the pattern itself. */
1453
1454 #ifdef SUPPORT_PCRE16
1455 if (!use_pcre16) return;
1456
1457 while(TRUE)
1458 {
1459 /* Swap previous characters. */
1460 while (length-- > 0)
1461 {
1462 *ptr = swap_uint16(*ptr);
1463 ptr++;
1464 }
1465 #ifdef SUPPORT_UTF
1466 if (utf16_char)
1467 {
1468 if ((ptr[-1] & 0xfc00) == 0xd800)
1469 {
1470 /* We know that there is only one extra character in UTF-16. */
1471 *ptr = swap_uint16(*ptr);
1472 ptr++;
1473 }
1474 }
1475 utf16_char = FALSE;
1476 #endif /* SUPPORT_UTF */
1477
1478 /* Get next opcode. */
1479
1480 length = 0;
1481 op = *ptr;
1482 *ptr++ = swap_uint16(op);
1483
1484 switch (op)
1485 {
1486 case OP_END:
1487 return;
1488
1489 case OP_CHAR:
1490 case OP_CHARI:
1491 case OP_NOT:
1492 case OP_NOTI:
1493 case OP_STAR:
1494 case OP_MINSTAR:
1495 case OP_PLUS:
1496 case OP_MINPLUS:
1497 case OP_QUERY:
1498 case OP_MINQUERY:
1499 case OP_UPTO:
1500 case OP_MINUPTO:
1501 case OP_EXACT:
1502 case OP_POSSTAR:
1503 case OP_POSPLUS:
1504 case OP_POSQUERY:
1505 case OP_POSUPTO:
1506 case OP_STARI:
1507 case OP_MINSTARI:
1508 case OP_PLUSI:
1509 case OP_MINPLUSI:
1510 case OP_QUERYI:
1511 case OP_MINQUERYI:
1512 case OP_UPTOI:
1513 case OP_MINUPTOI:
1514 case OP_EXACTI:
1515 case OP_POSSTARI:
1516 case OP_POSPLUSI:
1517 case OP_POSQUERYI:
1518 case OP_POSUPTOI:
1519 case OP_NOTSTAR:
1520 case OP_NOTMINSTAR:
1521 case OP_NOTPLUS:
1522 case OP_NOTMINPLUS:
1523 case OP_NOTQUERY:
1524 case OP_NOTMINQUERY:
1525 case OP_NOTUPTO:
1526 case OP_NOTMINUPTO:
1527 case OP_NOTEXACT:
1528 case OP_NOTPOSSTAR:
1529 case OP_NOTPOSPLUS:
1530 case OP_NOTPOSQUERY:
1531 case OP_NOTPOSUPTO:
1532 case OP_NOTSTARI:
1533 case OP_NOTMINSTARI:
1534 case OP_NOTPLUSI:
1535 case OP_NOTMINPLUSI:
1536 case OP_NOTQUERYI:
1537 case OP_NOTMINQUERYI:
1538 case OP_NOTUPTOI:
1539 case OP_NOTMINUPTOI:
1540 case OP_NOTEXACTI:
1541 case OP_NOTPOSSTARI:
1542 case OP_NOTPOSPLUSI:
1543 case OP_NOTPOSQUERYI:
1544 case OP_NOTPOSUPTOI:
1545 #ifdef SUPPORT_UTF
1546 if (utf) utf16_char = TRUE;
1547 #endif
1548 length = OP_lengths16[op] - 1;
1549 break;
1550
1551 case OP_CLASS:
1552 case OP_NCLASS:
1553 /* Skip the character bit map. */
1554 ptr += 32/sizeof(pcre_uint16);
1555 length = 0;
1556 break;
1557
1558 case OP_XCLASS:
1559 /* Reverse the size of the XCLASS instance. */
1560 ptr++;
1561 *ptr = swap_uint16(*ptr);
1562 if (LINK_SIZE > 1)
1563 {
1564 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1565 ptr++;
1566 *ptr = swap_uint16(*ptr);
1567 }
1568 ptr++;
1569
1570 if (LINK_SIZE > 1)
1571 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1572 (1 + LINK_SIZE + 1);
1573 else
1574 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1575
1576 op = *ptr;
1577 *ptr = swap_uint16(op);
1578 if ((op & XCL_MAP) != 0)
1579 {
1580 /* Skip the character bit map. */
1581 ptr += 32/sizeof(pcre_uint16);
1582 length -= 32/sizeof(pcre_uint16);
1583 }
1584 break;
1585
1586 default:
1587 length = OP_lengths16[op] - 1;
1588 break;
1589 }
1590 }
1591 /* Control should never reach here in 16 bit mode. */
1592 #endif /* SUPPORT_PCRE16 */
1593 }
1594
1595
1596
1597 /*************************************************
1598 * Check match or recursion limit *
1599 *************************************************/
1600
1601 static int
1602 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1603 int start_offset, int options, int *use_offsets, int use_size_offsets,
1604 int flag, unsigned long int *limit, int errnumber, const char *msg)
1605 {
1606 int count;
1607 int min = 0;
1608 int mid = 64;
1609 int max = -1;
1610
1611 extra->flags |= flag;
1612
1613 for (;;)
1614 {
1615 *limit = mid;
1616
1617 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1618 use_offsets, use_size_offsets);
1619
1620 if (count == errnumber)
1621 {
1622 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1623 min = mid;
1624 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1625 }
1626
1627 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1628 count == PCRE_ERROR_PARTIAL)
1629 {
1630 if (mid == min + 1)
1631 {
1632 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1633 break;
1634 }
1635 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1636 max = mid;
1637 mid = (min + mid)/2;
1638 }
1639 else break; /* Some other error */
1640 }
1641
1642 extra->flags &= ~flag;
1643 return count;
1644 }
1645
1646
1647
1648 /*************************************************
1649 * Case-independent strncmp() function *
1650 *************************************************/
1651
1652 /*
1653 Arguments:
1654 s first string
1655 t second string
1656 n number of characters to compare
1657
1658 Returns: < 0, = 0, or > 0, according to the comparison
1659 */
1660
1661 static int
1662 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1663 {
1664 while (n--)
1665 {
1666 int c = tolower(*s++) - tolower(*t++);
1667 if (c) return c;
1668 }
1669 return 0;
1670 }
1671
1672
1673
1674 /*************************************************
1675 * Check newline indicator *
1676 *************************************************/
1677
1678 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1679 a message and return 0 if there is no match.
1680
1681 Arguments:
1682 p points after the leading '<'
1683 f file for error message
1684
1685 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1686 */
1687
1688 static int
1689 check_newline(pcre_uint8 *p, FILE *f)
1690 {
1691 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1692 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1693 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1694 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1695 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1696 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1697 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1698 fprintf(f, "Unknown newline type at: <%s\n", p);
1699 return 0;
1700 }
1701
1702
1703
1704 /*************************************************
1705 * Usage function *
1706 *************************************************/
1707
1708 static void
1709 usage(void)
1710 {
1711 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1712 printf("Input and output default to stdin and stdout.\n");
1713 #ifdef SUPPORT_LIBREADLINE
1714 printf("If input is a terminal, readline() is used to read from it.\n");
1715 #else
1716 printf("This version of pcretest is not linked with readline().\n");
1717 #endif
1718 printf("\nOptions:\n");
1719 #ifdef SUPPORT_PCRE16
1720 printf(" -16 use 16-bit interface\n");
1721 #endif
1722 printf(" -b show compiled code (bytecode)\n");
1723 printf(" -C show PCRE compile-time options and exit\n");
1724 printf(" -d debug: show compiled code and information (-b and -i)\n");
1725 #if !defined NODFA
1726 printf(" -dfa force DFA matching for all subjects\n");
1727 #endif
1728 printf(" -help show usage information\n");
1729 printf(" -i show information about compiled patterns\n"
1730 " -M find MATCH_LIMIT minimum for each subject\n"
1731 " -m output memory used information\n"
1732 " -o <n> set size of offsets vector to <n>\n");
1733 #if !defined NOPOSIX
1734 printf(" -p use POSIX interface\n");
1735 #endif
1736 printf(" -q quiet: do not output PCRE version number at start\n");
1737 printf(" -S <n> set stack size to <n> megabytes\n");
1738 printf(" -s force each pattern to be studied at basic level\n"
1739 " -s+ force each pattern to be studied, using JIT if available\n"
1740 " -t time compilation and execution\n");
1741 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1742 printf(" -tm time execution (matching) only\n");
1743 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1744 }
1745
1746
1747
1748 /*************************************************
1749 * Main Program *
1750 *************************************************/
1751
1752 /* Read lines from named file or stdin and write to named file or stdout; lines
1753 consist of a regular expression, in delimiters and optionally followed by
1754 options, followed by a set of test data, terminated by an empty line. */
1755
1756 int main(int argc, char **argv)
1757 {
1758 FILE *infile = stdin;
1759 int options = 0;
1760 int study_options = 0;
1761 int default_find_match_limit = FALSE;
1762 int op = 1;
1763 int timeit = 0;
1764 int timeitm = 0;
1765 int showinfo = 0;
1766 int showstore = 0;
1767 int force_study = -1;
1768 int force_study_options = 0;
1769 int quiet = 0;
1770 int size_offsets = 45;
1771 int size_offsets_max;
1772 int *offsets = NULL;
1773 #if !defined NOPOSIX
1774 int posix = 0;
1775 #endif
1776 int debug = 0;
1777 int done = 0;
1778 int all_use_dfa = 0;
1779 int yield = 0;
1780 int stack_size;
1781
1782 pcre_jit_stack *jit_stack = NULL;
1783
1784 /* These vectors store, end-to-end, a list of captured substring names. Assume
1785 that 1024 is plenty long enough for the few names we'll be testing. */
1786
1787 pcre_uchar copynames[1024];
1788 pcre_uchar getnames[1024];
1789
1790 pcre_uchar *copynamesptr;
1791 pcre_uchar *getnamesptr;
1792
1793 /* Get buffers from malloc() so that valgrind will check their misuse when
1794 debugging. They grow automatically when very long lines are read. The 16-bit
1795 buffer (buffer16) is obtained only if needed. */
1796
1797 buffer = (pcre_uint8 *)malloc(buffer_size);
1798 dbuffer = (pcre_uint8 *)malloc(buffer_size);
1799 pbuffer = (pcre_uint8 *)malloc(buffer_size);
1800
1801 /* The outfile variable is static so that new_malloc can use it. */
1802
1803 outfile = stdout;
1804
1805 /* The following _setmode() stuff is some Windows magic that tells its runtime
1806 library to translate CRLF into a single LF character. At least, that's what
1807 I've been told: never having used Windows I take this all on trust. Originally
1808 it set 0x8000, but then I was advised that _O_BINARY was better. */
1809
1810 #if defined(_WIN32) || defined(WIN32)
1811 _setmode( _fileno( stdout ), _O_BINARY );
1812 #endif
1813
1814 /* Scan options */
1815
1816 while (argc > 1 && argv[op][0] == '-')
1817 {
1818 pcre_uint8 *endptr;
1819
1820 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1821 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1822 else if (strcmp(argv[op], "-s+") == 0)
1823 {
1824 force_study = 1;
1825 force_study_options = PCRE_STUDY_JIT_COMPILE;
1826 }
1827 #ifdef SUPPORT_PCRE16
1828 else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1829 #endif
1830
1831 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1832 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1833 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1834 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1835 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1836 #if !defined NODFA
1837 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1838 #endif
1839 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1840 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1841 *endptr == 0))
1842 {
1843 op++;
1844 argc--;
1845 }
1846 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1847 {
1848 int both = argv[op][2] == 0;
1849 int temp;
1850 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1851 *endptr == 0))
1852 {
1853 timeitm = temp;
1854 op++;
1855 argc--;
1856 }
1857 else timeitm = LOOPREPEAT;
1858 if (both) timeit = timeitm;
1859 }
1860 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1861 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1862 *endptr == 0))
1863 {
1864 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1865 printf("PCRE: -S not supported on this OS\n");
1866 exit(1);
1867 #else
1868 int rc;
1869 struct rlimit rlim;
1870 getrlimit(RLIMIT_STACK, &rlim);
1871 rlim.rlim_cur = stack_size * 1024 * 1024;
1872 rc = setrlimit(RLIMIT_STACK, &rlim);
1873 if (rc != 0)
1874 {
1875 printf("PCRE: setrlimit() failed with error %d\n", rc);
1876 exit(1);
1877 }
1878 op++;
1879 argc--;
1880 #endif
1881 }
1882 #if !defined NOPOSIX
1883 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1884 #endif
1885 else if (strcmp(argv[op], "-C") == 0)
1886 {
1887 int rc;
1888 unsigned long int lrc;
1889 printf("PCRE version %s\n", pcre_version());
1890 printf("Compiled with\n");
1891
1892 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1893 are set, either both UTFs are supported or both are not supported. */
1894
1895 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1896 printf(" 8-bit and 16-bit support\n");
1897 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1898 if (rc)
1899 printf(" UTF-8 and UTF-16 support\n");
1900 else
1901 printf(" No UTF-8 or UTF-16 support\n");
1902 #elif defined SUPPORT_PCRE8
1903 printf(" 8-bit support only\n");
1904 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1905 printf(" %sUTF-8 support\n", rc? "" : "No ");
1906 #else
1907 printf(" 16-bit support only\n");
1908 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1909 printf(" %sUTF-16 support\n", rc? "" : "No ");
1910 #endif
1911
1912 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1913 printf(" %sUnicode properties support\n", rc? "" : "No ");
1914 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1915 if (rc)
1916 printf(" Just-in-time compiler support\n");
1917 else
1918 printf(" No just-in-time compiler support\n");
1919 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1920 /* Note that these values are always the ASCII values, even
1921 in EBCDIC environments. CR is 13 and NL is 10. */
1922 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1923 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1924 (rc == -2)? "ANYCRLF" :
1925 (rc == -1)? "ANY" : "???");
1926 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1927 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1928 "all Unicode newlines");
1929 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1930 printf(" Internal link size = %d\n", rc);
1931 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1932 printf(" POSIX malloc threshold = %d\n", rc);
1933 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1934 printf(" Default match limit = %ld\n", lrc);
1935 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1936 printf(" Default recursion depth limit = %ld\n", lrc);
1937 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1938 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1939 goto EXIT;
1940 }
1941 else if (strcmp(argv[op], "-help") == 0 ||
1942 strcmp(argv[op], "--help") == 0)
1943 {
1944 usage();
1945 goto EXIT;
1946 }
1947 else
1948 {
1949 printf("** Unknown or malformed option %s\n", argv[op]);
1950 usage();
1951 yield = 1;
1952 goto EXIT;
1953 }
1954 op++;
1955 argc--;
1956 }
1957
1958 /* Get the store for the offsets vector, and remember what it was */
1959
1960 size_offsets_max = size_offsets;
1961 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1962 if (offsets == NULL)
1963 {
1964 printf("** Failed to get %d bytes of memory for offsets vector\n",
1965 (int)(size_offsets_max * sizeof(int)));
1966 yield = 1;
1967 goto EXIT;
1968 }
1969
1970 /* Sort out the input and output files */
1971
1972 if (argc > 1)
1973 {
1974 infile = fopen(argv[op], INPUT_MODE);
1975 if (infile == NULL)
1976 {
1977 printf("** Failed to open %s\n", argv[op]);
1978 yield = 1;
1979 goto EXIT;
1980 }
1981 }
1982
1983 if (argc > 2)
1984 {
1985 outfile = fopen(argv[op+1], OUTPUT_MODE);
1986 if (outfile == NULL)
1987 {
1988 printf("** Failed to open %s\n", argv[op+1]);
1989 yield = 1;
1990 goto EXIT;
1991 }
1992 }
1993
1994 /* Set alternative malloc function */
1995
1996 #ifdef SUPPORT_PCRE8
1997 pcre_malloc = new_malloc;
1998 pcre_free = new_free;
1999 pcre_stack_malloc = stack_malloc;
2000 pcre_stack_free = stack_free;
2001 #endif
2002
2003 #ifdef SUPPORT_PCRE16
2004 pcre16_malloc = new_malloc;
2005 pcre16_free = new_free;
2006 pcre16_stack_malloc = stack_malloc;
2007 pcre16_stack_free = stack_free;
2008 #endif
2009
2010 /* Heading line unless quiet, then prompt for first regex if stdin */
2011
2012 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
2013
2014 /* Main loop */
2015
2016 while (!done)
2017 {
2018 pcre *re = NULL;
2019 pcre_extra *extra = NULL;
2020
2021 #if !defined NOPOSIX /* There are still compilers that require no indent */
2022 regex_t preg;
2023 int do_posix = 0;
2024 #endif
2025
2026 const char *error;
2027 pcre_uint8 *markptr;
2028 pcre_uint8 *p, *pp, *ppp;
2029 pcre_uint8 *to_file = NULL;
2030 const pcre_uint8 *tables = NULL;
2031 unsigned long int true_size, true_study_size = 0;
2032 size_t size, regex_gotten_store;
2033 int do_allcaps = 0;
2034 int do_mark = 0;
2035 int do_study = 0;
2036 int no_force_study = 0;
2037 int do_debug = debug;
2038 int do_G = 0;
2039 int do_g = 0;
2040 int do_showinfo = showinfo;
2041 int do_showrest = 0;
2042 int do_showcaprest = 0;
2043 int do_flip = 0;
2044 int erroroffset, len, delimiter, poffset;
2045
2046 use_utf = 0;
2047 debug_lengths = 1;
2048
2049 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2050 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2051 fflush(outfile);
2052
2053 p = buffer;
2054 while (isspace(*p)) p++;
2055 if (*p == 0) continue;
2056
2057 /* See if the pattern is to be loaded pre-compiled from a file. */
2058
2059 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2060 {
2061 unsigned long int magic, get_options;
2062 pcre_uint8 sbuf[8];
2063 FILE *f;
2064
2065 p++;
2066 pp = p + (int)strlen((char *)p);
2067 while (isspace(pp[-1])) pp--;
2068 *pp = 0;
2069
2070 f = fopen((char *)p, "rb");
2071 if (f == NULL)
2072 {
2073 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2074 continue;
2075 }
2076
2077 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2078
2079 true_size =
2080 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2081 true_study_size =
2082 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2083
2084 re = (real_pcre *)new_malloc(true_size);
2085 regex_gotten_store = first_gotten_store;
2086
2087 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2088
2089 magic = ((real_pcre *)re)->magic_number;
2090 if (magic != MAGIC_NUMBER)
2091 {
2092 if (swap_uint32(magic) == MAGIC_NUMBER)
2093 {
2094 do_flip = 1;
2095 }
2096 else
2097 {
2098 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2099 fclose(f);
2100 continue;
2101 }
2102 }
2103
2104 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2105 do_flip? " (byte-inverted)" : "", p);
2106
2107 /* Now see if there is any following study data. */
2108
2109 if (true_study_size != 0)
2110 {
2111 pcre_study_data *psd;
2112
2113 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2114 extra->flags = PCRE_EXTRA_STUDY_DATA;
2115
2116 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2117 extra->study_data = psd;
2118
2119 if (fread(psd, 1, true_study_size, f) != true_study_size)
2120 {
2121 FAIL_READ:
2122 fprintf(outfile, "Failed to read data from %s\n", p);
2123 if (extra != NULL)
2124 {
2125 PCRE_FREE_STUDY(extra);
2126 }
2127 if (re != NULL) new_free(re);
2128 fclose(f);
2129 continue;
2130 }
2131 fprintf(outfile, "Study data loaded from %s\n", p);
2132 do_study = 1; /* To get the data output if requested */
2133 }
2134 else fprintf(outfile, "No study data\n");
2135
2136 /* Flip the necessary bytes. */
2137 if (do_flip)
2138 {
2139 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2140 }
2141
2142 /* Need to know if UTF-8 for printing data strings */
2143
2144 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2145 use_utf = (get_options & PCRE_UTF8) != 0;
2146
2147 fclose(f);
2148 goto SHOW_INFO;
2149 }
2150
2151 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2152 the pattern; if it isn't complete, read more. */
2153
2154 delimiter = *p++;
2155
2156 if (isalnum(delimiter) || delimiter == '\\')
2157 {
2158 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2159 goto SKIP_DATA;
2160 }
2161
2162 pp = p;
2163 poffset = (int)(p - buffer);
2164
2165 for(;;)
2166 {
2167 while (*pp != 0)
2168 {
2169 if (*pp == '\\' && pp[1] != 0) pp++;
2170 else if (*pp == delimiter) break;
2171 pp++;
2172 }
2173 if (*pp != 0) break;
2174 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2175 {
2176 fprintf(outfile, "** Unexpected EOF\n");
2177 done = 1;
2178 goto CONTINUE;
2179 }
2180 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2181 }
2182
2183 /* The buffer may have moved while being extended; reset the start of data
2184 pointer to the correct relative point in the buffer. */
2185
2186 p = buffer + poffset;
2187
2188 /* If the first character after the delimiter is backslash, make
2189 the pattern end with backslash. This is purely to provide a way
2190 of testing for the error message when a pattern ends with backslash. */
2191
2192 if (pp[1] == '\\') *pp++ = '\\';
2193
2194 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2195 for callouts. */
2196
2197 *pp++ = 0;
2198 strcpy((char *)pbuffer, (char *)p);
2199
2200 /* Look for options after final delimiter */
2201
2202 options = 0;
2203 study_options = 0;
2204 log_store = showstore; /* default from command line */
2205
2206 while (*pp != 0)
2207 {
2208 switch (*pp++)
2209 {
2210 case 'f': options |= PCRE_FIRSTLINE; break;
2211 case 'g': do_g = 1; break;
2212 case 'i': options |= PCRE_CASELESS; break;
2213 case 'm': options |= PCRE_MULTILINE; break;
2214 case 's': options |= PCRE_DOTALL; break;
2215 case 'x': options |= PCRE_EXTENDED; break;
2216
2217 case '+':
2218 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2219 break;
2220
2221 case '=': do_allcaps = 1; break;
2222 case 'A': options |= PCRE_ANCHORED; break;
2223 case 'B': do_debug = 1; break;
2224 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2225 case 'D': do_debug = do_showinfo = 1; break;
2226 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2227 case 'F': do_flip = 1; break;
2228 case 'G': do_G = 1; break;
2229 case 'I': do_showinfo = 1; break;
2230 case 'J': options |= PCRE_DUPNAMES; break;
2231 case 'K': do_mark = 1; break;
2232 case 'M': log_store = 1; break;
2233 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2234
2235 #if !defined NOPOSIX
2236 case 'P': do_posix = 1; break;
2237 #endif
2238
2239 case 'S':
2240 if (do_study == 0)
2241 {
2242 do_study = 1;
2243 if (*pp == '+')
2244 {
2245 study_options |= PCRE_STUDY_JIT_COMPILE;
2246 pp++;
2247 }
2248 }
2249 else
2250 {
2251 do_study = 0;
2252 no_force_study = 1;
2253 }
2254 break;
2255
2256 case 'U': options |= PCRE_UNGREEDY; break;
2257 case 'W': options |= PCRE_UCP; break;
2258 case 'X': options |= PCRE_EXTRA; break;
2259 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2260 case 'Z': debug_lengths = 0; break;
2261 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2262 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2263
2264 case 'T':
2265 switch (*pp++)
2266 {
2267 case '0': tables = tables0; break;
2268 case '1': tables = tables1; break;
2269
2270 case '\r':
2271 case '\n':
2272 case ' ':
2273 case 0:
2274 fprintf(outfile, "** Missing table number after /T\n");
2275 goto SKIP_DATA;
2276
2277 default:
2278 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2279 goto SKIP_DATA;
2280 }
2281 break;
2282
2283 case 'L':
2284 ppp = pp;
2285 /* The '\r' test here is so that it works on Windows. */
2286 /* The '0' test is just in case this is an unterminated line. */
2287 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2288 *ppp = 0;
2289 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2290 {
2291 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2292 goto SKIP_DATA;
2293 }
2294 locale_set = 1;
2295 tables = pcre_maketables();
2296 pp = ppp;
2297 break;
2298
2299 case '>':
2300 to_file = pp;
2301 while (*pp != 0) pp++;
2302 while (isspace(pp[-1])) pp--;
2303 *pp = 0;
2304 break;
2305
2306 case '<':
2307 {
2308 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2309 {
2310 options |= PCRE_JAVASCRIPT_COMPAT;
2311 pp += 3;
2312 }
2313 else
2314 {
2315 int x = check_newline(pp, outfile);
2316 if (x == 0) goto SKIP_DATA;
2317 options |= x;
2318 while (*pp++ != '>');
2319 }
2320 }
2321 break;
2322
2323 case '\r': /* So that it works in Windows */
2324 case '\n':
2325 case ' ':
2326 break;
2327
2328 default:
2329 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2330 goto SKIP_DATA;
2331 }
2332 }
2333
2334 /* Handle compiling via the POSIX interface, which doesn't support the
2335 timing, showing, or debugging options, nor the ability to pass over
2336 local character tables. Neither does it have 16-bit support. */
2337
2338 #if !defined NOPOSIX
2339 if (posix || do_posix)
2340 {
2341 int rc;
2342 int cflags = 0;
2343
2344 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2345 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2346 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2347 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2348 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2349 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2350 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2351
2352 first_gotten_store = 0;
2353 rc = regcomp(&preg, (char *)p, cflags);
2354
2355 /* Compilation failed; go back for another re, skipping to blank line
2356 if non-interactive. */
2357
2358 if (rc != 0)
2359 {
2360 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2361 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2362 goto SKIP_DATA;
2363 }
2364 }
2365
2366 /* Handle compiling via the native interface */
2367
2368 else
2369 #endif /* !defined NOPOSIX */
2370
2371 {
2372 unsigned long int get_options;
2373
2374 /* In 16-bit mode, convert the input. */
2375
2376 #ifdef SUPPORT_PCRE16
2377 if (use_pcre16)
2378 {
2379 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2380 {
2381 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2382 "converted to UTF-16\n");
2383 goto SKIP_DATA;
2384 }
2385 p = (pcre_uint8 *)buffer16;
2386 }
2387 #endif
2388
2389 /* Compile many times when timing */
2390
2391 if (timeit > 0)
2392 {
2393 register int i;
2394 clock_t time_taken;
2395 clock_t start_time = clock();
2396 for (i = 0; i < timeit; i++)
2397 {
2398 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2399 if (re != NULL) free(re);
2400 }
2401 time_taken = clock() - start_time;
2402 fprintf(outfile, "Compile time %.4f milliseconds\n",
2403 (((double)time_taken * 1000.0) / (double)timeit) /
2404 (double)CLOCKS_PER_SEC);
2405 }
2406
2407 first_gotten_store = 0;
2408 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2409
2410 /* Compilation failed; go back for another re, skipping to blank line
2411 if non-interactive. */
2412
2413 if (re == NULL)
2414 {
2415 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2416 SKIP_DATA:
2417 if (infile != stdin)
2418 {
2419 for (;;)
2420 {
2421 if (extend_inputline(infile, buffer, NULL) == NULL)
2422 {
2423 done = 1;
2424 goto CONTINUE;
2425 }
2426 len = (int)strlen((char *)buffer);
2427 while (len > 0 && isspace(buffer[len-1])) len--;
2428 if (len == 0) break;
2429 }
2430 fprintf(outfile, "\n");
2431 }
2432 goto CONTINUE;
2433 }
2434
2435 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2436 within the regex; check for this so that we know how to process the data
2437 lines. */
2438
2439 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2440 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2441
2442 /* Extract the size for possible writing before possibly flipping it,
2443 and remember the store that was got. */
2444
2445 true_size = ((real_pcre *)re)->size;
2446 regex_gotten_store = first_gotten_store;
2447
2448 /* Output code size information if requested */
2449
2450 if (log_store)
2451 fprintf(outfile, "Memory allocation (code space): %d\n",
2452 (int)(first_gotten_store -
2453 sizeof(real_pcre) -
2454 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2455
2456 /* If -s or /S was present, study the regex to generate additional info to
2457 help with the matching, unless the pattern has the SS option, which
2458 suppresses the effect of /S (used for a few test patterns where studying is
2459 never sensible). */
2460
2461 if (do_study || (force_study >= 0 && !no_force_study))
2462 {
2463 if (timeit > 0)
2464 {
2465 register int i;
2466 clock_t time_taken;
2467 clock_t start_time = clock();
2468 for (i = 0; i < timeit; i++)
2469 {
2470 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2471 }
2472 time_taken = clock() - start_time;
2473 if (extra != NULL)
2474 {
2475 PCRE_FREE_STUDY(extra);
2476 }
2477 fprintf(outfile, " Study time %.4f milliseconds\n",
2478 (((double)time_taken * 1000.0) / (double)timeit) /
2479 (double)CLOCKS_PER_SEC);
2480 }
2481 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2482 if (error != NULL)
2483 fprintf(outfile, "Failed to study: %s\n", error);
2484 else if (extra != NULL)
2485 {
2486 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2487 if (log_store)
2488 {
2489 size_t jitsize;
2490 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2491 if (jitsize != 0)
2492 fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2493 }
2494 }
2495 }
2496
2497 /* If /K was present, we set up for handling MARK data. */
2498
2499 if (do_mark)
2500 {
2501 if (extra == NULL)
2502 {
2503 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2504 extra->flags = 0;
2505 }
2506 extra->mark = &markptr;
2507 extra->flags |= PCRE_EXTRA_MARK;
2508 }
2509
2510 /* Extract and display information from the compiled data if required. */
2511
2512 SHOW_INFO:
2513
2514 if (do_debug)
2515 {
2516 fprintf(outfile, "------------------------------------------------------------------\n");
2517 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2518 if (use_pcre16)
2519 pcre16_printint(re, outfile, debug_lengths);
2520 else
2521 pcre_printint(re, outfile, debug_lengths);
2522 #elif defined SUPPORT_PCRE8
2523 pcre_printint(re, outfile, debug_lengths);
2524 #else
2525 pcre16_printint(re, outfile, debug_lengths);
2526 #endif
2527 }
2528
2529 /* We already have the options in get_options (see above) */
2530
2531 if (do_showinfo)
2532 {
2533 unsigned long int all_options;
2534 #if !defined NOINFOCHECK
2535 int old_first_char, old_options, old_count;
2536 #endif
2537 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2538 hascrorlf;
2539 int nameentrysize, namecount;
2540 const pcre_uchar *nametable;
2541
2542 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2543 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2544 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2545 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2546 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2547 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2548 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2549 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2550 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2551 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2552 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2553
2554 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2555 that it gives the same results as the new function. */
2556
2557 #if !defined NOINFOCHECK
2558 if (!use_pcre16)
2559 {
2560 old_count = pcre_info(re, &old_options, &old_first_char);
2561 if (count < 0) fprintf(outfile,
2562 "Error %d from pcre_info()\n", count);
2563 else
2564 {
2565 if (old_count != count) fprintf(outfile,
2566 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2567 old_count);
2568
2569 if (old_first_char != first_char) fprintf(outfile,
2570 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2571 first_char, old_first_char);
2572
2573 if (old_options != (int)get_options) fprintf(outfile,
2574 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2575 get_options, old_options);
2576 }
2577 }
2578 #endif
2579
2580 if (size != regex_gotten_store) fprintf(outfile,
2581 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2582 (int)size, (int)regex_gotten_store);
2583
2584 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2585 if (backrefmax > 0)
2586 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2587
2588 if (namecount > 0)
2589 {
2590 fprintf(outfile, "Named capturing subpatterns:\n");
2591 while (namecount-- > 0)
2592 {
2593 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2594 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2595 GET2(nametable, 0));
2596 nametable += nameentrysize;
2597 }
2598 }
2599
2600 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2601 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2602
2603 all_options = ((real_pcre *)re)->options;
2604 if (do_flip) all_options = swap_uint32(all_options);
2605
2606 if (get_options == 0) fprintf(outfile, "No options\n");
2607 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2608 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2609 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2610 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2611 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2612 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2613 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2614 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2615 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2616 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2617 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2618 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2619 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2620 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2621 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2622 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2623 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2624 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2625
2626 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2627
2628 switch (get_options & PCRE_NEWLINE_BITS)
2629 {
2630 case PCRE_NEWLINE_CR:
2631 fprintf(outfile, "Forced newline sequence: CR\n");
2632 break;
2633
2634 case PCRE_NEWLINE_LF:
2635 fprintf(outfile, "Forced newline sequence: LF\n");
2636 break;
2637
2638 case PCRE_NEWLINE_CRLF:
2639 fprintf(outfile, "Forced newline sequence: CRLF\n");
2640 break;
2641
2642 case PCRE_NEWLINE_ANYCRLF:
2643 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2644 break;
2645
2646 case PCRE_NEWLINE_ANY:
2647 fprintf(outfile, "Forced newline sequence: ANY\n");
2648 break;
2649
2650 default:
2651 break;
2652 }
2653
2654 if (first_char == -1)
2655 {
2656 fprintf(outfile, "First char at start or follows newline\n");
2657 }
2658 else if (first_char < 0)
2659 {
2660 fprintf(outfile, "No first char\n");
2661 }
2662 else
2663 {
2664 const char *caseless =
2665 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2666 "" : " (caseless)";
2667
2668 if (PRINTOK(first_char))
2669 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2670 else
2671 {
2672 fprintf(outfile, "First char = ");
2673 pchar(first_char, outfile);
2674 fprintf(outfile, "%s\n", caseless);
2675 }
2676 }
2677
2678 if (need_char < 0)
2679 {
2680 fprintf(outfile, "No need char\n");
2681 }
2682 else
2683 {
2684 const char *caseless =
2685 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2686 "" : " (caseless)";
2687
2688 if (PRINTOK(need_char))
2689 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2690 else
2691 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2692 }
2693
2694 /* Don't output study size; at present it is in any case a fixed
2695 value, but it varies, depending on the computer architecture, and
2696 so messes up the test suite. (And with the /F option, it might be
2697 flipped.) If study was forced by an external -s, don't show this
2698 information unless -i or -d was also present. This means that, except
2699 when auto-callouts are involved, the output from runs with and without
2700 -s should be identical. */
2701
2702 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2703 {
2704 if (extra == NULL)
2705 fprintf(outfile, "Study returned NULL\n");
2706 else
2707 {
2708 pcre_uint8 *start_bits = NULL;
2709 int minlength;
2710
2711 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2712 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2713
2714 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2715 if (start_bits == NULL)
2716 fprintf(outfile, "No set of starting bytes\n");
2717 else
2718 {
2719 int i;
2720 int c = 24;
2721 fprintf(outfile, "Starting byte set: ");
2722 for (i = 0; i < 256; i++)
2723 {
2724 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2725 {
2726 if (c > 75)
2727 {
2728 fprintf(outfile, "\n ");
2729 c = 2;
2730 }
2731 if (PRINTOK(i) && i != ' ')
2732 {
2733 fprintf(outfile, "%c ", i);
2734 c += 2;
2735 }
2736 else
2737 {
2738 fprintf(outfile, "\\x%02x ", i);
2739 c += 5;
2740 }
2741 }
2742 }
2743 fprintf(outfile, "\n");
2744 }
2745 }
2746
2747 /* Show this only if the JIT was set by /S, not by -s. */
2748
2749 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2750 {
2751 int jit;
2752 new_info(re, extra, PCRE_INFO_JIT, &jit);
2753 if (jit)
2754 fprintf(outfile, "JIT study was successful\n");
2755 else
2756 #ifdef SUPPORT_JIT
2757 fprintf(outfile, "JIT study was not successful\n");
2758 #else
2759 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2760 #endif
2761 }
2762 }
2763 }
2764
2765 /* If the '>' option was present, we write out the regex to a file, and
2766 that is all. The first 8 bytes of the file are the regex length and then
2767 the study length, in big-endian order. */
2768
2769 if (to_file != NULL)
2770 {
2771 FILE *f = fopen((char *)to_file, "wb");
2772 if (f == NULL)
2773 {
2774 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2775 }
2776 else
2777 {
2778 pcre_uint8 sbuf[8];
2779
2780 if (do_flip) regexflip(re, extra);
2781 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2782 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2783 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2784 sbuf[3] = (pcre_uint8)((true_size) & 255);
2785 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2786 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2787 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2788 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2789
2790 if (fwrite(sbuf, 1, 8, f) < 8 ||
2791 fwrite(re, 1, true_size, f) < true_size)
2792 {
2793 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2794 }
2795 else
2796 {
2797 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2798
2799 /* If there is study data, write it. */
2800
2801 if (extra != NULL)
2802 {
2803 if (fwrite(extra->study_data, 1, true_study_size, f) <
2804 true_study_size)
2805 {
2806 fprintf(outfile, "Write error on %s: %s\n", to_file,
2807 strerror(errno));
2808 }
2809 else fprintf(outfile, "Study data written to %s\n", to_file);
2810 }
2811 }
2812 fclose(f);
2813 }
2814
2815 new_free(re);
2816 if (extra != NULL)
2817 {
2818 PCRE_FREE_STUDY(extra);
2819 }
2820 if (locale_set)
2821 {
2822 new_free((void *)tables);
2823 setlocale(LC_CTYPE, "C");
2824 locale_set = 0;
2825 }
2826 continue; /* With next regex */
2827 }
2828 } /* End of non-POSIX compile */
2829
2830 /* Read data lines and test them */
2831
2832 for (;;)
2833 {
2834 pcre_uint8 *q;
2835 pcre_uint8 *bptr;
2836 int *use_offsets = offsets;
2837 int use_size_offsets = size_offsets;
2838 int callout_data = 0;
2839 int callout_data_set = 0;
2840 int count, c;
2841 int copystrings = 0;
2842 int find_match_limit = default_find_match_limit;
2843 int getstrings = 0;
2844 int getlist = 0;
2845 int gmatched = 0;
2846 int start_offset = 0;
2847 int start_offset_sign = 1;
2848 int g_notempty = 0;
2849 int use_dfa = 0;
2850
2851 options = 0;
2852
2853 *copynames = 0;
2854 *getnames = 0;
2855
2856 copynamesptr = copynames;
2857 getnamesptr = getnames;
2858
2859 pcre_callout = callout;
2860 first_callout = 1;
2861 last_callout_mark = NULL;
2862 callout_extra = 0;
2863 callout_count = 0;
2864 callout_fail_count = 999999;
2865 callout_fail_id = -1;
2866 show_malloc = 0;
2867
2868 if (extra != NULL) extra->flags &=
2869 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2870
2871 len = 0;
2872 for (;;)
2873 {
2874 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2875 {
2876 if (len > 0) /* Reached EOF without hitting a newline */
2877 {
2878 fprintf(outfile, "\n");
2879 break;
2880 }
2881 done = 1;
2882 goto CONTINUE;
2883 }
2884 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2885 len = (int)strlen((char *)buffer);
2886 if (buffer[len-1] == '\n') break;
2887 }
2888
2889 while (len > 0 && isspace(buffer[len-1])) len--;
2890 buffer[len] = 0;
2891 if (len == 0) break;
2892
2893 p = buffer;
2894 while (isspace(*p)) p++;
2895
2896 bptr = q = dbuffer;
2897 while ((c = *p++) != 0)
2898 {
2899 int i = 0;
2900 int n = 0;
2901
2902 if (c == '\\') switch ((c = *p++))
2903 {
2904 case 'a': c = 7; break;
2905 case 'b': c = '\b'; break;
2906 case 'e': c = 27; break;
2907 case 'f': c = '\f'; break;
2908 case 'n': c = '\n'; break;
2909 case 'r': c = '\r'; break;
2910 case 't': c = '\t'; break;
2911 case 'v': c = '\v'; break;
2912
2913 case '0': case '1': case '2': case '3':
2914 case '4': case '5': case '6': case '7':
2915 c -= '0';
2916 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2917 c = c * 8 + *p++ - '0';
2918
2919 #if !defined NOUTF8
2920 if (use_utf && c > 255)
2921 {
2922 pcre_uint8 buff8[8];
2923 int ii, utn;
2924 utn = ord2utf8(c, buff8);
2925 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2926 c = buff8[ii]; /* Last byte */
2927 }
2928 #endif
2929 break;
2930
2931 case 'x':
2932
2933 /* Handle \x{..} specially - new Perl thing for utf8 */
2934
2935 #if !defined NOUTF8
2936 if (*p == '{')
2937 {
2938 pcre_uint8 *pt = p;
2939 c = 0;
2940
2941 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2942 when isxdigit() is a macro that refers to its argument more than
2943 once. This is banned by the C Standard, but apparently happens in at
2944 least one MacOS environment. */
2945
2946 for (pt++; isxdigit(*pt); pt++)
2947 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2948 if (*pt == '}')
2949 {
2950 pcre_uint8 buff8[8];
2951 int ii, utn;
2952 if (use_utf)
2953 {
2954 utn = ord2utf8(c, buff8);
2955 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2956 c = buff8[ii]; /* Last byte */
2957 }
2958 else
2959 {
2960 if (c > 255)
2961 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2962 "UTF-8 mode is not enabled.\n"
2963 "** Truncation will probably give the wrong result.\n", c);
2964 }
2965 p = pt + 1;
2966 break;
2967 }
2968 /* Not correct form; fall through */
2969 }
2970 #endif
2971
2972 /* Ordinary \x */
2973
2974 c = 0;
2975 while (i++ < 2 && isxdigit(*p))
2976 {
2977 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2978 p++;
2979 }
2980 break;
2981
2982 case 0: /* \ followed by EOF allows for an empty line */
2983 p--;
2984 continue;
2985
2986 case '>':
2987 if (*p == '-')
2988 {
2989 start_offset_sign = -1;
2990 p++;
2991 }
2992 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2993 start_offset *= start_offset_sign;
2994 continue;
2995
2996 case 'A': /* Option setting */
2997 options |= PCRE_ANCHORED;
2998 continue;
2999
3000 case 'B':
3001 options |= PCRE_NOTBOL;
3002 continue;
3003
3004 case 'C':
3005 if (isdigit(*p)) /* Set copy string */
3006 {
3007 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3008 copystrings |= 1 << n;
3009 }
3010 else if (isalnum(*p))
3011 {
3012 pcre_uchar *npp = copynamesptr;
3013 while (isalnum(*p)) *npp++ = *p++;
3014 *npp++ = 0;
3015 *npp = 0;
3016 n = pcre_get_stringnumber(re, (char *)copynamesptr);
3017 if (n < 0)
3018 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
3019 copynamesptr = npp;
3020 }
3021 else if (*p == '+')
3022 {
3023 callout_extra = 1;
3024 p++;
3025 }
3026 else if (*p == '-')
3027 {
3028 pcre_callout = NULL;
3029 p++;
3030 }
3031 else if (*p == '!')
3032 {
3033 callout_fail_id = 0;
3034 p++;
3035 while(isdigit(*p))
3036 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3037 callout_fail_count = 0;
3038 if (*p == '!')
3039 {
3040 p++;
3041 while(isdigit(*p))
3042 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3043 }
3044 }
3045 else if (*p == '*')
3046 {
3047 int sign = 1;
3048 callout_data = 0;
3049 if (*(++p) == '-') { sign = -1; p++; }
3050 while(isdigit(*p))
3051 callout_data = callout_data * 10 + *p++ - '0';
3052 callout_data *= sign;
3053 callout_data_set = 1;
3054 }
3055 continue;
3056
3057 #if !defined NODFA
3058 case 'D':
3059 #if !defined NOPOSIX
3060 if (posix || do_posix)
3061 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3062 else
3063 #endif
3064 use_dfa = 1;
3065 continue;
3066 #endif
3067
3068 #if !defined NODFA
3069 case 'F':
3070 options |= PCRE_DFA_SHORTEST;
3071 continue;
3072 #endif
3073
3074 case 'G':
3075 if (isdigit(*p))
3076 {
3077 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3078 getstrings |= 1 << n;
3079 }
3080 else if (isalnum(*p))
3081 {
3082 pcre_uchar *npp = getnamesptr;
3083 while (isalnum(*p)) *npp++ = *p++;
3084 *npp++ = 0;
3085 *npp = 0;
3086 n = pcre_get_stringnumber(re, (char *)getnamesptr);
3087 if (n < 0)
3088 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
3089 getnamesptr = npp;
3090 }
3091 continue;
3092
3093 case 'J':
3094 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3095 if (extra != NULL
3096 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3097 && extra->executable_jit != NULL)
3098 {
3099 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3100 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3101 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3102 }
3103 continue;
3104
3105 case 'L':
3106 getlist = 1;
3107 continue;
3108
3109 case 'M':
3110 find_match_limit = 1;
3111 continue;
3112
3113 case 'N':
3114 if ((options & PCRE_NOTEMPTY) != 0)
3115 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3116 else
3117 options |= PCRE_NOTEMPTY;
3118 continue;
3119
3120 case 'O':
3121 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3122 if (n > size_offsets_max)
3123 {
3124 size_offsets_max = n;
3125 free(offsets);
3126 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3127 if (offsets == NULL)
3128 {
3129 printf("** Failed to get %d bytes of memory for offsets vector\n",
3130 (int)(size_offsets_max * sizeof(int)));
3131 yield = 1;
3132 goto EXIT;
3133 }
3134 }
3135 use_size_offsets = n;
3136 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3137 continue;
3138
3139 case 'P':
3140 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3141 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3142 continue;
3143
3144 case 'Q':
3145 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3146 if (extra == NULL)
3147 {
3148 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3149 extra->flags = 0;
3150 }
3151 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3152 extra->match_limit_recursion = n;
3153 continue;
3154
3155 case 'q':
3156 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3157 if (extra == NULL)
3158 {
3159 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3160 extra->flags = 0;
3161 }
3162 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3163 extra->match_limit = n;
3164 continue;
3165
3166 #if !defined NODFA
3167 case 'R':
3168 options |= PCRE_DFA_RESTART;
3169 continue;
3170 #endif
3171
3172 case 'S':
3173 show_malloc = 1;
3174 continue;
3175
3176 case 'Y':
3177 options |= PCRE_NO_START_OPTIMIZE;
3178 continue;
3179
3180 case 'Z':
3181 options |= PCRE_NOTEOL;
3182 continue;
3183
3184 case '?':
3185 options |= PCRE_NO_UTF8_CHECK;
3186 continue;
3187
3188 case '<':
3189 {
3190 int x = check_newline(p, outfile);
3191 if (x == 0) goto NEXT_DATA;
3192 options |= x;
3193 while (*p++ != '>');
3194 }
3195 continue;
3196 }
3197 *q++ = c;
3198 }
3199 *q = 0;
3200 len = (int)(q - dbuffer);
3201
3202 /* Move the data to the end of the buffer so that a read over the end of
3203 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3204 we are using the POSIX interface, we must include the terminating zero. */
3205
3206 #if !defined NOPOSIX
3207 if (posix || do_posix)
3208 {
3209 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3210 bptr += buffer_size - len - 1;
3211 }
3212 else
3213 #endif
3214 {
3215 memmove(bptr + buffer_size - len, bptr, len);
3216 bptr += buffer_size - len;
3217 }
3218
3219 if ((all_use_dfa || use_dfa) && find_match_limit)
3220 {
3221 printf("**Match limit not relevant for DFA matching: ignored\n");
3222 find_match_limit = 0;
3223 }
3224
3225 /* Handle matching via the POSIX interface, which does not
3226 support timing or playing with the match limit or callout data. */
3227
3228 #if !defined NOPOSIX
3229 if (posix || do_posix)
3230 {
3231 int rc;
3232 int eflags = 0;
3233 regmatch_t *pmatch = NULL;
3234 if (use_size_offsets > 0)
3235 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3236 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3237 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3238 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3239
3240 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3241
3242 if (rc != 0)
3243 {
3244 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3245 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3246 }
3247 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3248 != 0)
3249 {
3250 fprintf(outfile, "Matched with REG_NOSUB\n");
3251 }
3252 else
3253 {
3254 size_t i;
3255 for (i = 0; i < (size_t)use_size_offsets; i++)
3256 {
3257 if (pmatch[i].rm_so >= 0)
3258 {
3259 fprintf(outfile, "%2d: ", (int)i);
3260 PCHARSV(dbuffer + pmatch[i].rm_so,
3261 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3262 fprintf(outfile, "\n");
3263 if (do_showcaprest || (i == 0 && do_showrest))
3264 {
3265 fprintf(outfile, "%2d+ ", (int)i);
3266 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3267 outfile);
3268 fprintf(outfile, "\n");
3269 }
3270 }
3271 }
3272 }
3273 free(pmatch);
3274 goto NEXT_DATA;
3275 }
3276
3277 #endif /* !defined NOPOSIX */
3278
3279 /* Handle matching via the native interface - repeats for /g and /G */
3280
3281 #ifdef SUPPORT_PCRE16
3282 if (use_pcre16)
3283 {
3284 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3285 if (len < 0)
3286 {
3287 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3288 "converted to UTF-16\n");
3289 goto NEXT_DATA;
3290 }
3291 bptr = (pcre_uint8 *)buffer16;
3292 }
3293 #endif
3294
3295 for (;; gmatched++) /* Loop for /g or /G */
3296 {
3297 markptr = NULL;
3298
3299 if (timeitm > 0)
3300 {
3301 register int i;
3302 clock_t time_taken;
3303 clock_t start_time = clock();
3304
3305 #if !defined NODFA
3306 if (all_use_dfa || use_dfa)
3307 {
3308 int workspace[1000];
3309 for (i = 0; i < timeitm; i++)
3310 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3311 options | g_notempty, use_offsets, use_size_offsets, workspace,
3312 sizeof(workspace)/sizeof(int));
3313 }
3314 else
3315 #endif
3316
3317 for (i = 0; i < timeitm; i++)
3318 {
3319 PCRE_EXEC(count, re, extra, bptr, len,
3320 start_offset, options | g_notempty, use_offsets, use_size_offsets);
3321 }
3322 time_taken = clock() - start_time;
3323 fprintf(outfile, "Execute time %.4f milliseconds\n",
3324 (((double)time_taken * 1000.0) / (double)timeitm) /
3325 (double)CLOCKS_PER_SEC);
3326 }
3327
3328 /* If find_match_limit is set, we want to do repeated matches with
3329 varying limits in order to find the minimum value for the match limit and
3330 for the recursion limit. The match limits are relevant only to the normal
3331 running of pcre_exec(), so disable the JIT optimization. This makes it
3332 possible to run the same set of tests with and without JIT externally
3333 requested. */
3334
3335 if (find_match_limit)
3336 {
3337 if (extra == NULL)
3338 {
3339 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3340 extra->flags = 0;
3341 }
3342 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3343
3344 (void)check_match_limit(re, extra, bptr, len, start_offset,
3345 options|g_notempty, use_offsets, use_size_offsets,
3346 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3347 PCRE_ERROR_MATCHLIMIT, "match()");
3348
3349 count = check_match_limit(re, extra, bptr, len, start_offset,
3350 options|g_notempty, use_offsets, use_size_offsets,
3351 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3352 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3353 }
3354
3355 /* If callout_data is set, use the interface with additional data */
3356
3357 else if (callout_data_set)
3358 {
3359 if (extra == NULL)
3360 {
3361 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3362 extra->flags = 0;
3363 }
3364 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3365 extra->callout_data = &callout_data;
3366 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3367 options | g_notempty, use_offsets, use_size_offsets);
3368 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3369 }
3370
3371 /* The normal case is just to do the match once, with the default
3372 value of match_limit. */
3373
3374 #if !defined NODFA
3375 else if (all_use_dfa || use_dfa)
3376 {
3377 int workspace[1000];
3378 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3379 options | g_notempty, use_offsets, use_size_offsets, workspace,
3380 sizeof(workspace)/sizeof(int));
3381 if (count == 0)
3382 {
3383 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3384 count = use_size_offsets/2;
3385 }
3386 }
3387 #endif
3388
3389 else
3390 {
3391 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3392 options | g_notempty, use_offsets, use_size_offsets);
3393 if (count == 0)
3394 {
3395 fprintf(outfile, "Matched, but too many substrings\n");
3396 count = use_size_offsets/3;
3397 }
3398 }
3399
3400 /* Matched */
3401
3402 if (count >= 0)
3403 {
3404 int i, maxcount;
3405
3406 #if !defined NODFA
3407 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3408 #endif
3409 maxcount = use_size_offsets/3;
3410
3411 /* This is a check against a lunatic return value. */
3412
3413 if (count > maxcount)
3414 {
3415 fprintf(outfile,
3416 "** PCRE error: returned count %d is too big for offset size %d\n",
3417 count, use_size_offsets);
3418 count = use_size_offsets/3;
3419 if (do_g || do_G)
3420 {
3421 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3422 do_g = do_G = FALSE; /* Break g/G loop */
3423 }
3424 }
3425
3426 /* do_allcaps requests showing of all captures in the pattern, to check
3427 unset ones at the end. */
3428
3429 if (do_allcaps)
3430 {
3431 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3432 count++; /* Allow for full match */
3433 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3434 }
3435
3436 /* Output the captured substrings */
3437
3438 for (i = 0; i < count * 2; i += 2)
3439 {
3440 if (use_offsets[i] < 0)
3441 {
3442 if (use_offsets[i] != -1)
3443 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3444 use_offsets[i], i);
3445 if (use_offsets[i+1] != -1)
3446 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3447 use_offsets[i+1], i+1);
3448 fprintf(outfile, "%2d: <unset>\n", i/2);
3449 }
3450 else
3451 {
3452 fprintf(outfile, "%2d: ", i/2);
3453 PCHARSV(bptr + use_offsets[i],
3454 use_offsets[i+1] - use_offsets[i], outfile);
3455 fprintf(outfile, "\n");
3456 if (do_showcaprest || (i == 0 && do_showrest))
3457 {
3458 fprintf(outfile, "%2d+ ", i/2);
3459 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3460 outfile);
3461 fprintf(outfile, "\n");
3462 }
3463 }
3464 }
3465
3466 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3467
3468 for (i = 0; i < 32; i++)
3469 {
3470 if ((copystrings & (1 << i)) != 0)
3471 {
3472 char copybuffer[256];
3473 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3474 i, copybuffer, sizeof(copybuffer));
3475 if (rc < 0)
3476 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3477 else
3478 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3479 }
3480 }
3481
3482 for (copynamesptr = copynames;
3483 *copynamesptr != 0;
3484 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3485 {
3486 char copybuffer[256];
3487 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3488 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3489 if (rc < 0)
3490 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3491 else
3492 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3493 }
3494
3495 for (i = 0; i < 32; i++)
3496 {
3497 if ((getstrings & (1 << i)) != 0)
3498 {
3499 const char *substring;
3500 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3501 i, &substring);
3502 if (rc < 0)
3503 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3504 else
3505 {
3506 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3507 pcre_free_substring(substring);
3508 }
3509 }
3510 }
3511
3512 for (getnamesptr = getnames;
3513 *getnamesptr != 0;
3514 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3515 {
3516 const char *substring;
3517 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3518 count, (char *)getnamesptr, &substring);
3519 if (rc < 0)
3520 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3521 else
3522 {
3523 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
3524 pcre_free_substring(substring);
3525 }
3526 }
3527
3528 if (getlist)
3529 {
3530 const char **stringlist;
3531 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
3532 &stringlist);
3533 if (rc < 0)
3534 fprintf(outfile, "get substring list failed %d\n", rc);
3535 else
3536 {
3537 for (i = 0; i < count; i++)
3538 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3539 if (stringlist[i] != NULL)
3540 fprintf(outfile, "string list not terminated by NULL\n");
3541 pcre_free_substring_list(stringlist);
3542 }
3543 }
3544 }
3545
3546 /* There was a partial match */
3547
3548 else if (count == PCRE_ERROR_PARTIAL)
3549 {
3550 if (markptr == NULL) fprintf(outfile, "Partial match");
3551 else fprintf(outfile, "Partial match, mark=%s", markptr);
3552 if (use_size_offsets > 1)
3553 {
3554 fprintf(outfile, ": ");
3555 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3556 outfile);
3557 }
3558 fprintf(outfile, "\n");
3559 break; /* Out of the /g loop */
3560 }
3561
3562 /* Failed to match. If this is a /g or /G loop and we previously set
3563 g_notempty after a null match, this is not necessarily the end. We want
3564 to advance the start offset, and continue. We won't be at the end of the
3565 string - that was checked before setting g_notempty.
3566
3567 Complication arises in the case when the newline convention is "any",
3568 "crlf", or "anycrlf". If the previous match was at the end of a line
3569 terminated by CRLF, an advance of one character just passes the \r,
3570 whereas we should prefer the longer newline sequence, as does the code in
3571 pcre_exec(). Fudge the offset value to achieve this. We check for a
3572 newline setting in the pattern; if none was set, use pcre_config() to
3573 find the default.
3574
3575 Otherwise, in the case of UTF-8 matching, the advance must be one
3576 character, not one byte. */
3577
3578 else
3579 {
3580 if (g_notempty != 0)
3581 {
3582 int onechar = 1;
3583 unsigned int obits = ((real_pcre *)re)->options;
3584 use_offsets[0] = start_offset;
3585 if ((obits & PCRE_NEWLINE_BITS) == 0)
3586 {
3587 int d;
3588 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3589 /* Note that these values are always the ASCII ones, even in
3590 EBCDIC environments. CR = 13, NL = 10. */
3591 obits = (d == 13)? PCRE_NEWLINE_CR :
3592 (d == 10)? PCRE_NEWLINE_LF :
3593 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3594 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3595 (d == -1)? PCRE_NEWLINE_ANY : 0;
3596 }
3597 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3598 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3599 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3600 &&
3601 start_offset < len - 1 &&
3602 bptr[start_offset] == '\r' &&
3603 bptr[start_offset+1] == '\n')
3604 onechar++;
3605 else if (use_utf)
3606 {
3607 while (start_offset + onechar < len)
3608 {
3609 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3610 onechar++;
3611 }
3612 }
3613 use_offsets[1] = start_offset + onechar;
3614 }
3615 else
3616 {
3617 switch(count)
3618 {
3619 case PCRE_ERROR_NOMATCH:
3620 if (gmatched == 0)
3621 {
3622 if (markptr == NULL) fprintf(outfile, "No match\n");
3623 else fprintf(outfile, "No match, mark = %s\n", markptr);
3624 }
3625 break;
3626
3627 case PCRE_ERROR_BADUTF8:
3628 case PCRE_ERROR_SHORTUTF8:
3629 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3630 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3631 if (use_size_offsets >= 2)
3632 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3633 use_offsets[1]);
3634 fprintf(outfile, "\n");
3635 break;
3636
3637 default:
3638 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3639 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3640 else
3641 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3642 break;
3643 }
3644
3645 break; /* Out of the /g loop */
3646 }
3647 }
3648
3649 /* If not /g or /G we are done */
3650
3651 if (!do_g && !do_G) break;
3652
3653 /* If we have matched an empty string, first check to see if we are at
3654 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3655 Perl's /g options does. This turns out to be rather cunning. First we set
3656 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3657 same point. If this fails (picked up above) we advance to the next
3658 character. */
3659
3660 g_notempty = 0;
3661
3662 if (use_offsets[0] == use_offsets[1])
3663 {
3664 if (use_offsets[0] == len) break;
3665 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3666 }
3667
3668 /* For /g, update the start offset, leaving the rest alone */
3669
3670 if (do_g) start_offset = use_offsets[1];
3671
3672 /* For /G, update the pointer and length */
3673
3674 else
3675 {
3676 bptr += use_offsets[1];
3677 len -= use_offsets[1];
3678 }
3679 } /* End of loop for /g and /G */
3680
3681 NEXT_DATA: continue;
3682 } /* End of loop for data lines */
3683
3684 CONTINUE:
3685
3686 #if !defined NOPOSIX
3687 if (posix || do_posix) regfree(&preg);
3688 #endif
3689
3690 if (re != NULL) new_free(re);
3691 if (extra != NULL)
3692 {
3693 PCRE_FREE_STUDY(extra);
3694 }
3695 if (locale_set)
3696 {
3697 new_free((void *)tables);
3698 setlocale(LC_CTYPE, "C");
3699 locale_set = 0;
3700 }
3701 if (jit_stack != NULL)
3702 {
3703 pcre_jit_stack_free(jit_stack);
3704 jit_stack = NULL;
3705 }
3706 }
3707
3708 if (infile == stdin) fprintf(outfile, "\n");
3709
3710 EXIT:
3711
3712 if (infile != NULL && infile != stdin) fclose(infile);
3713 if (outfile != NULL && outfile != stdout) fclose(outfile);
3714
3715 free(buffer);
3716 free(dbuffer);
3717 free(pbuffer);
3718 free(offsets);
3719
3720 #ifdef SUPPORT_PCRE16
3721 if (buffer16 != NULL) free(buffer16);
3722 #endif
3723
3724 return yield;
3725 }
3726
3727 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5