/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 814 - (show annotations)
Wed Dec 21 12:05:24 2011 UTC (8 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 110791 byte(s)
Reorganized the tests and made many more work in 16-bit mode.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA), and without the doublecheck
166 of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167 out the UTF8 support if PCRE is built without it. */
168
169 #ifndef SUPPORT_UTF8
170 #ifndef NOUTF8
171 #define NOUTF8
172 #endif
173 #endif
174
175 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177 only from one place and is handled differently). I couldn't dream up any way of
178 using a single macro to do this in a generic way, because of the many different
179 argument requirements. We know that at least one of SUPPORT_PCRE8 and
180 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181 use these in the definitions of generic macros. */
182
183 #ifdef SUPPORT_PCRE8
184
185 #define PCHARS8(lv, p, len, f) \
186 lv = pchars((pcre_uint8 *)p, len, f)
187
188 #define PCHARSV8(p, len, f) \
189 (void)pchars((pcre_uint8 *)p, len, f)
190
191 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
192 re = pcre_compile((char *)pat, options, error, erroffset, tables)
193
194 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
195 offsets, size_offsets, workspace, size_workspace) \
196 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
197 offsets, size_offsets, workspace, size_workspace)
198
199 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
200 offsets, size_offsets) \
201 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
202 offsets, size_offsets)
203
204 #define PCRE_FREE_STUDY8(extra) \
205 pcre_free_study(extra)
206
207 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
208 pcre_pattern_to_host_byte_order(re, extra, tables)
209
210 #define PCRE_STUDY8(extra, re, options, error) \
211 extra = pcre_study(re, options, error)
212
213 #endif /* SUPPORT_PCRE8 */
214
215
216 #ifdef SUPPORT_PCRE16
217
218 #define PCHARS16(lv, p, len, f) \
219 lv = pchars16((PCRE_SPTR16)p, len, f)
220
221 #define PCHARSV16(p, len, f) \
222 (void)pchars16((PCRE_SPTR16)p, len, f)
223
224 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
225 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
226
227 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
228 offsets, size_offsets, workspace, size_workspace) \
229 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
230 options, offsets, size_offsets, workspace, size_workspace)
231
232 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
233 offsets, size_offsets) \
234 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
235 options, offsets, size_offsets)
236
237 #define PCRE_FREE_STUDY16(extra) \
238 pcre16_free_study(extra)
239
240 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
241 pcre16_pattern_to_host_byte_order(re, extra, tables)
242
243 #define PCRE_STUDY16(extra, re, options, error) \
244 extra = pcre16_study(re, options, error)
245
246 #endif /* SUPPORT_PCRE16 */
247
248
249 /* ----- Both modes are supported; a runtime test is needed ----- */
250
251 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
252
253 #define PCHARS(lv, p, len, f) \
254 if (use_pcre16) \
255 PCHARS16(lv, p, len, f); \
256 else \
257 PCHARS8(lv, p, len, f)
258
259 #define PCHARSV(p, len, f) \
260 if (use_pcre16) \
261 PCHARSV16(p, len, f); \
262 else \
263 PCHARSV8(p, len, f)
264
265 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
266 if (use_pcre16) \
267 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
268 else \
269 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
270
271 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
272 offsets, size_offsets, workspace, size_workspace) \
273 if (use_pcre16) \
274 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
275 offsets, size_offsets, workspace, size_workspace); \
276 else \
277 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
278 offsets, size_offsets, workspace, size_workspace)
279
280 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
281 offsets, size_offsets) \
282 if (use_pcre16) \
283 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
284 offsets, size_offsets); \
285 else \
286 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
287 offsets, size_offsets)
288
289 #define PCRE_FREE_STUDY(extra) \
290 if (use_pcre16) \
291 PCRE_FREE_STUDY16(extra); \
292 else \
293 PCRE_FREE_STUDY8(extra)
294
295 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
296 if (use_pcre16) \
297 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
298 else \
299 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
300
301 #define PCRE_STUDY(extra, re, options, error) \
302 if (use_pcre16) \
303 PCRE_STUDY16(extra, re, options, error); \
304 else \
305 PCRE_STUDY8(extra, re, options, error)
306
307 /* ----- Only 8-bit mode is supported ----- */
308
309 #elif defined SUPPORT_PCRE8
310 #define PCHARS PCHARS8
311 #define PCHARSV PCHARSV8
312 #define PCRE_COMPILE PCRE_COMPILE8
313 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
314 #define PCRE_EXEC PCRE_EXEC8
315 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
316 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
317 #define PCRE_STUDY PCRE_STUDY8
318
319 /* ----- Only 16-bit mode is supported ----- */
320
321 #else
322 #define PCHARS PCHARS16
323 #define PCHARSV PCHARSV16
324 #define PCRE_COMPILE PCRE_COMPILE16
325 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
326 #define PCRE_EXEC PCRE_EXEC16
327 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
328 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
329 #define PCRE_STUDY PCRE_STUDY16
330 #endif
331
332 /* ----- End of mode-specific function call macros ----- */
333
334
335 /* Other parameters */
336
337 #ifndef CLOCKS_PER_SEC
338 #ifdef CLK_TCK
339 #define CLOCKS_PER_SEC CLK_TCK
340 #else
341 #define CLOCKS_PER_SEC 100
342 #endif
343 #endif
344
345 /* This is the default loop count for timing. */
346
347 #define LOOPREPEAT 500000
348
349 /* Static variables */
350
351 static FILE *outfile;
352 static int log_store = 0;
353 static int callout_count;
354 static int callout_extra;
355 static int callout_fail_count;
356 static int callout_fail_id;
357 static int debug_lengths;
358 static int first_callout;
359 static int locale_set = 0;
360 static int show_malloc;
361 static int use_utf;
362 static size_t gotten_store;
363 static size_t first_gotten_store = 0;
364 static const unsigned char *last_callout_mark = NULL;
365
366 /* The buffers grow automatically if very long input lines are encountered. */
367
368 static int buffer_size = 50000;
369 static pcre_uint8 *buffer = NULL;
370 static pcre_uint8 *dbuffer = NULL;
371 static pcre_uint8 *pbuffer = NULL;
372
373 /* Another buffer is needed translation to 16-bit character strings. It will
374 obtained and extended as required. */
375
376 #ifdef SUPPORT_PCRE16
377 static int buffer16_size = 0;
378 static pcre_uint16 *buffer16 = NULL;
379
380 /* We need the table of operator lengths that is used for 16-bit compiling, in
381 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
382 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
383 appropriately for the 16-bit world. Just as a safety check, make sure that
384 COMPILE_PCRE16 is *not* set. */
385
386 #ifdef COMPILE_PCRE16
387 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
388 #endif
389
390 #if LINK_SIZE == 2
391 #undef LINK_SIZE
392 #define LINK_SIZE 1
393 #elif LINK_SIZE == 3 || LINK_SIZE == 4
394 #undef LINK_SIZE
395 #define LINK_SIZE 2
396 #else
397 #error LINK_SIZE must be either 2, 3, or 4
398 #endif
399
400 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
401
402 #endif /* SUPPORT_PCRE16 */
403
404 /* If we have 8-bit support, default use_pcre16 to false; if there is also
405 16-bit support, it can be changed by an option. If there is no 8-bit support,
406 there must be 16-bit support, so default it to 1. */
407
408 #ifdef SUPPORT_PCRE8
409 static int use_pcre16 = 0;
410 #else
411 static int use_pcre16 = 1;
412 #endif
413
414 /* Textual explanations for runtime error codes */
415
416 static const char *errtexts[] = {
417 NULL, /* 0 is no error */
418 NULL, /* NOMATCH is handled specially */
419 "NULL argument passed",
420 "bad option value",
421 "magic number missing",
422 "unknown opcode - pattern overwritten?",
423 "no more memory",
424 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
425 "match limit exceeded",
426 "callout error code",
427 NULL, /* BADUTF8 is handled specially */
428 "bad UTF-8 offset",
429 NULL, /* PARTIAL is handled specially */
430 "not used - internal error",
431 "internal error - pattern overwritten?",
432 "bad count value",
433 "item unsupported for DFA matching",
434 "backreference condition or recursion test not supported for DFA matching",
435 "match limit not supported for DFA matching",
436 "workspace size exceeded in DFA matching",
437 "too much recursion for DFA matching",
438 "recursion limit exceeded",
439 "not used - internal error",
440 "invalid combination of newline options",
441 "bad offset value",
442 NULL, /* SHORTUTF8 is handled specially */
443 "nested recursion at the same subject position",
444 "JIT stack limit reached",
445 "pattern compiled in wrong mode (8-bit/16-bit error)"
446 };
447
448
449 /*************************************************
450 * Alternate character tables *
451 *************************************************/
452
453 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
454 using the default tables of the library. However, the T option can be used to
455 select alternate sets of tables, for different kinds of testing. Note also that
456 the L (locale) option also adjusts the tables. */
457
458 /* This is the set of tables distributed as default with PCRE. It recognizes
459 only ASCII characters. */
460
461 static const pcre_uint8 tables0[] = {
462
463 /* This table is a lower casing table. */
464
465 0, 1, 2, 3, 4, 5, 6, 7,
466 8, 9, 10, 11, 12, 13, 14, 15,
467 16, 17, 18, 19, 20, 21, 22, 23,
468 24, 25, 26, 27, 28, 29, 30, 31,
469 32, 33, 34, 35, 36, 37, 38, 39,
470 40, 41, 42, 43, 44, 45, 46, 47,
471 48, 49, 50, 51, 52, 53, 54, 55,
472 56, 57, 58, 59, 60, 61, 62, 63,
473 64, 97, 98, 99,100,101,102,103,
474 104,105,106,107,108,109,110,111,
475 112,113,114,115,116,117,118,119,
476 120,121,122, 91, 92, 93, 94, 95,
477 96, 97, 98, 99,100,101,102,103,
478 104,105,106,107,108,109,110,111,
479 112,113,114,115,116,117,118,119,
480 120,121,122,123,124,125,126,127,
481 128,129,130,131,132,133,134,135,
482 136,137,138,139,140,141,142,143,
483 144,145,146,147,148,149,150,151,
484 152,153,154,155,156,157,158,159,
485 160,161,162,163,164,165,166,167,
486 168,169,170,171,172,173,174,175,
487 176,177,178,179,180,181,182,183,
488 184,185,186,187,188,189,190,191,
489 192,193,194,195,196,197,198,199,
490 200,201,202,203,204,205,206,207,
491 208,209,210,211,212,213,214,215,
492 216,217,218,219,220,221,222,223,
493 224,225,226,227,228,229,230,231,
494 232,233,234,235,236,237,238,239,
495 240,241,242,243,244,245,246,247,
496 248,249,250,251,252,253,254,255,
497
498 /* This table is a case flipping table. */
499
500 0, 1, 2, 3, 4, 5, 6, 7,
501 8, 9, 10, 11, 12, 13, 14, 15,
502 16, 17, 18, 19, 20, 21, 22, 23,
503 24, 25, 26, 27, 28, 29, 30, 31,
504 32, 33, 34, 35, 36, 37, 38, 39,
505 40, 41, 42, 43, 44, 45, 46, 47,
506 48, 49, 50, 51, 52, 53, 54, 55,
507 56, 57, 58, 59, 60, 61, 62, 63,
508 64, 97, 98, 99,100,101,102,103,
509 104,105,106,107,108,109,110,111,
510 112,113,114,115,116,117,118,119,
511 120,121,122, 91, 92, 93, 94, 95,
512 96, 65, 66, 67, 68, 69, 70, 71,
513 72, 73, 74, 75, 76, 77, 78, 79,
514 80, 81, 82, 83, 84, 85, 86, 87,
515 88, 89, 90,123,124,125,126,127,
516 128,129,130,131,132,133,134,135,
517 136,137,138,139,140,141,142,143,
518 144,145,146,147,148,149,150,151,
519 152,153,154,155,156,157,158,159,
520 160,161,162,163,164,165,166,167,
521 168,169,170,171,172,173,174,175,
522 176,177,178,179,180,181,182,183,
523 184,185,186,187,188,189,190,191,
524 192,193,194,195,196,197,198,199,
525 200,201,202,203,204,205,206,207,
526 208,209,210,211,212,213,214,215,
527 216,217,218,219,220,221,222,223,
528 224,225,226,227,228,229,230,231,
529 232,233,234,235,236,237,238,239,
530 240,241,242,243,244,245,246,247,
531 248,249,250,251,252,253,254,255,
532
533 /* This table contains bit maps for various character classes. Each map is 32
534 bytes long and the bits run from the least significant end of each byte. The
535 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
536 graph, print, punct, and cntrl. Other classes are built from combinations. */
537
538 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
539 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
542
543 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
544 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
545 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
547
548 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
549 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
552
553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
554 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
557
558 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
559 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
562
563 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
564 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
567
568 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
569 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
570 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
571 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
572
573 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
574 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
575 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577
578 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
579 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582
583 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
584 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
585 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587
588 /* This table identifies various classes of character by individual bits:
589 0x01 white space character
590 0x02 letter
591 0x04 decimal digit
592 0x08 hexadecimal digit
593 0x10 alphanumeric or '_'
594 0x80 regular expression metacharacter or binary zero
595 */
596
597 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
598 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
601 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
602 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
603 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
604 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
605 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
606 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
607 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
608 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
609 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
610 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
611 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
612 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
624 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
625 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
626 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
627 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
628 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
629
630 /* This is a set of tables that came orginally from a Windows user. It seems to
631 be at least an approximation of ISO 8859. In particular, there are characters
632 greater than 128 that are marked as spaces, letters, etc. */
633
634 static const pcre_uint8 tables1[] = {
635 0,1,2,3,4,5,6,7,
636 8,9,10,11,12,13,14,15,
637 16,17,18,19,20,21,22,23,
638 24,25,26,27,28,29,30,31,
639 32,33,34,35,36,37,38,39,
640 40,41,42,43,44,45,46,47,
641 48,49,50,51,52,53,54,55,
642 56,57,58,59,60,61,62,63,
643 64,97,98,99,100,101,102,103,
644 104,105,106,107,108,109,110,111,
645 112,113,114,115,116,117,118,119,
646 120,121,122,91,92,93,94,95,
647 96,97,98,99,100,101,102,103,
648 104,105,106,107,108,109,110,111,
649 112,113,114,115,116,117,118,119,
650 120,121,122,123,124,125,126,127,
651 128,129,130,131,132,133,134,135,
652 136,137,138,139,140,141,142,143,
653 144,145,146,147,148,149,150,151,
654 152,153,154,155,156,157,158,159,
655 160,161,162,163,164,165,166,167,
656 168,169,170,171,172,173,174,175,
657 176,177,178,179,180,181,182,183,
658 184,185,186,187,188,189,190,191,
659 224,225,226,227,228,229,230,231,
660 232,233,234,235,236,237,238,239,
661 240,241,242,243,244,245,246,215,
662 248,249,250,251,252,253,254,223,
663 224,225,226,227,228,229,230,231,
664 232,233,234,235,236,237,238,239,
665 240,241,242,243,244,245,246,247,
666 248,249,250,251,252,253,254,255,
667 0,1,2,3,4,5,6,7,
668 8,9,10,11,12,13,14,15,
669 16,17,18,19,20,21,22,23,
670 24,25,26,27,28,29,30,31,
671 32,33,34,35,36,37,38,39,
672 40,41,42,43,44,45,46,47,
673 48,49,50,51,52,53,54,55,
674 56,57,58,59,60,61,62,63,
675 64,97,98,99,100,101,102,103,
676 104,105,106,107,108,109,110,111,
677 112,113,114,115,116,117,118,119,
678 120,121,122,91,92,93,94,95,
679 96,65,66,67,68,69,70,71,
680 72,73,74,75,76,77,78,79,
681 80,81,82,83,84,85,86,87,
682 88,89,90,123,124,125,126,127,
683 128,129,130,131,132,133,134,135,
684 136,137,138,139,140,141,142,143,
685 144,145,146,147,148,149,150,151,
686 152,153,154,155,156,157,158,159,
687 160,161,162,163,164,165,166,167,
688 168,169,170,171,172,173,174,175,
689 176,177,178,179,180,181,182,183,
690 184,185,186,187,188,189,190,191,
691 224,225,226,227,228,229,230,231,
692 232,233,234,235,236,237,238,239,
693 240,241,242,243,244,245,246,215,
694 248,249,250,251,252,253,254,223,
695 192,193,194,195,196,197,198,199,
696 200,201,202,203,204,205,206,207,
697 208,209,210,211,212,213,214,247,
698 216,217,218,219,220,221,222,255,
699 0,62,0,0,1,0,0,0,
700 0,0,0,0,0,0,0,0,
701 32,0,0,0,1,0,0,0,
702 0,0,0,0,0,0,0,0,
703 0,0,0,0,0,0,255,3,
704 126,0,0,0,126,0,0,0,
705 0,0,0,0,0,0,0,0,
706 0,0,0,0,0,0,0,0,
707 0,0,0,0,0,0,255,3,
708 0,0,0,0,0,0,0,0,
709 0,0,0,0,0,0,12,2,
710 0,0,0,0,0,0,0,0,
711 0,0,0,0,0,0,0,0,
712 254,255,255,7,0,0,0,0,
713 0,0,0,0,0,0,0,0,
714 255,255,127,127,0,0,0,0,
715 0,0,0,0,0,0,0,0,
716 0,0,0,0,254,255,255,7,
717 0,0,0,0,0,4,32,4,
718 0,0,0,128,255,255,127,255,
719 0,0,0,0,0,0,255,3,
720 254,255,255,135,254,255,255,7,
721 0,0,0,0,0,4,44,6,
722 255,255,127,255,255,255,127,255,
723 0,0,0,0,254,255,255,255,
724 255,255,255,255,255,255,255,127,
725 0,0,0,0,254,255,255,255,
726 255,255,255,255,255,255,255,255,
727 0,2,0,0,255,255,255,255,
728 255,255,255,255,255,255,255,127,
729 0,0,0,0,255,255,255,255,
730 255,255,255,255,255,255,255,255,
731 0,0,0,0,254,255,0,252,
732 1,0,0,248,1,0,0,120,
733 0,0,0,0,254,255,255,255,
734 0,0,128,0,0,0,128,0,
735 255,255,255,255,0,0,0,0,
736 0,0,0,0,0,0,0,128,
737 255,255,255,255,0,0,0,0,
738 0,0,0,0,0,0,0,0,
739 128,0,0,0,0,0,0,0,
740 0,1,1,0,1,1,0,0,
741 0,0,0,0,0,0,0,0,
742 0,0,0,0,0,0,0,0,
743 1,0,0,0,128,0,0,0,
744 128,128,128,128,0,0,128,0,
745 28,28,28,28,28,28,28,28,
746 28,28,0,0,0,0,0,128,
747 0,26,26,26,26,26,26,18,
748 18,18,18,18,18,18,18,18,
749 18,18,18,18,18,18,18,18,
750 18,18,18,128,128,0,128,16,
751 0,26,26,26,26,26,26,18,
752 18,18,18,18,18,18,18,18,
753 18,18,18,18,18,18,18,18,
754 18,18,18,128,128,0,0,0,
755 0,0,0,0,0,1,0,0,
756 0,0,0,0,0,0,0,0,
757 0,0,0,0,0,0,0,0,
758 0,0,0,0,0,0,0,0,
759 1,0,0,0,0,0,0,0,
760 0,0,18,0,0,0,0,0,
761 0,0,20,20,0,18,0,0,
762 0,20,18,0,0,0,0,0,
763 18,18,18,18,18,18,18,18,
764 18,18,18,18,18,18,18,18,
765 18,18,18,18,18,18,18,0,
766 18,18,18,18,18,18,18,18,
767 18,18,18,18,18,18,18,18,
768 18,18,18,18,18,18,18,18,
769 18,18,18,18,18,18,18,0,
770 18,18,18,18,18,18,18,18
771 };
772
773
774
775
776 #ifndef HAVE_STRERROR
777 /*************************************************
778 * Provide strerror() for non-ANSI libraries *
779 *************************************************/
780
781 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
782 in their libraries, but can provide the same facility by this simple
783 alternative function. */
784
785 extern int sys_nerr;
786 extern char *sys_errlist[];
787
788 char *
789 strerror(int n)
790 {
791 if (n < 0 || n >= sys_nerr) return "unknown error number";
792 return sys_errlist[n];
793 }
794 #endif /* HAVE_STRERROR */
795
796
797 /*************************************************
798 * JIT memory callback *
799 *************************************************/
800
801 static pcre_jit_stack* jit_callback(void *arg)
802 {
803 return (pcre_jit_stack *)arg;
804 }
805
806
807 /*************************************************
808 * Convert UTF-8 string to value *
809 *************************************************/
810
811 /* This function takes one or more bytes that represents a UTF-8 character,
812 and returns the value of the character.
813
814 Argument:
815 utf8bytes a pointer to the byte vector
816 vptr a pointer to an int to receive the value
817
818 Returns: > 0 => the number of bytes consumed
819 -6 to 0 => malformed UTF-8 character at offset = (-return)
820 */
821
822 #if !defined NOUTF8
823
824 static int
825 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
826 {
827 int c = *utf8bytes++;
828 int d = c;
829 int i, j, s;
830
831 for (i = -1; i < 6; i++) /* i is number of additional bytes */
832 {
833 if ((d & 0x80) == 0) break;
834 d <<= 1;
835 }
836
837 if (i == -1) { *vptr = c; return 1; } /* ascii character */
838 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
839
840 /* i now has a value in the range 1-5 */
841
842 s = 6*i;
843 d = (c & utf8_table3[i]) << s;
844
845 for (j = 0; j < i; j++)
846 {
847 c = *utf8bytes++;
848 if ((c & 0xc0) != 0x80) return -(j+1);
849 s -= 6;
850 d |= (c & 0x3f) << s;
851 }
852
853 /* Check that encoding was the correct unique one */
854
855 for (j = 0; j < utf8_table1_size; j++)
856 if (d <= utf8_table1[j]) break;
857 if (j != i) return -(i+1);
858
859 /* Valid value */
860
861 *vptr = d;
862 return i+1;
863 }
864
865 #endif
866
867
868
869 /*************************************************
870 * Convert character value to UTF-8 *
871 *************************************************/
872
873 /* This function takes an integer value in the range 0 - 0x7fffffff
874 and encodes it as a UTF-8 character in 0 to 6 bytes.
875
876 Arguments:
877 cvalue the character value
878 utf8bytes pointer to buffer for result - at least 6 bytes long
879
880 Returns: number of characters placed in the buffer
881 */
882
883 #if !defined NOUTF8
884
885 static int
886 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
887 {
888 register int i, j;
889 for (i = 0; i < utf8_table1_size; i++)
890 if (cvalue <= utf8_table1[i]) break;
891 utf8bytes += i;
892 for (j = i; j > 0; j--)
893 {
894 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
895 cvalue >>= 6;
896 }
897 *utf8bytes = utf8_table2[i] | cvalue;
898 return i + 1;
899 }
900
901 #endif
902
903
904
905 #ifdef SUPPORT_PCRE16
906 /*************************************************
907 * Convert a string to 16-bit *
908 *************************************************/
909
910 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
911 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
912 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
913 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
914 result is always left in buffer16.
915
916 Arguments:
917 p points to a byte string
918 utf true if UTF-8 (to be converted to UTF-16)
919 len number of bytes in the string (excluding trailing zero)
920
921 Returns: number of 16-bit data items used (excluding trailing zero)
922 OR -1 if a UTF-8 string is malformed
923 */
924
925 static int
926 to16(pcre_uint8 *p, int utf, int len)
927 {
928 pcre_uint16 *pp;
929
930 if (buffer16_size < 2*len + 2)
931 {
932 if (buffer16 != NULL) free(buffer16);
933 buffer16_size = 2*len + 2;
934 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
935 if (buffer16 == NULL)
936 {
937 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
938 exit(1);
939 }
940 }
941
942 pp = buffer16;
943
944 if (!utf)
945 {
946 while (len-- > 0) *pp++ = *p++;
947 }
948
949 else
950 {
951 int c;
952 while (len > 0)
953 {
954 int chlen = utf82ord(p, &c);
955 if (chlen <= 0) return -1;
956 p += chlen;
957 len -= chlen;
958 if (c < 0x10000) *pp++ = c; else
959 {
960 c -= 0x10000;
961 *pp++ = 0xD800 | (c >> 10);
962 *pp++ = 0xDC00 | (c & 0x3ff);
963 }
964 }
965 }
966
967 *pp = 0;
968 return pp - buffer16;
969 }
970 #endif
971
972
973 /*************************************************
974 * Read or extend an input line *
975 *************************************************/
976
977 /* Input lines are read into buffer, but both patterns and data lines can be
978 continued over multiple input lines. In addition, if the buffer fills up, we
979 want to automatically expand it so as to be able to handle extremely large
980 lines that are needed for certain stress tests. When the input buffer is
981 expanded, the other two buffers must also be expanded likewise, and the
982 contents of pbuffer, which are a copy of the input for callouts, must be
983 preserved (for when expansion happens for a data line). This is not the most
984 optimal way of handling this, but hey, this is just a test program!
985
986 Arguments:
987 f the file to read
988 start where in buffer to start (this *must* be within buffer)
989 prompt for stdin or readline()
990
991 Returns: pointer to the start of new data
992 could be a copy of start, or could be moved
993 NULL if no data read and EOF reached
994 */
995
996 static pcre_uint8 *
997 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
998 {
999 pcre_uint8 *here = start;
1000
1001 for (;;)
1002 {
1003 int rlen = (int)(buffer_size - (here - buffer));
1004
1005 if (rlen > 1000)
1006 {
1007 int dlen;
1008
1009 /* If libreadline support is required, use readline() to read a line if the
1010 input is a terminal. Note that readline() removes the trailing newline, so
1011 we must put it back again, to be compatible with fgets(). */
1012
1013 #ifdef SUPPORT_LIBREADLINE
1014 if (isatty(fileno(f)))
1015 {
1016 size_t len;
1017 char *s = readline(prompt);
1018 if (s == NULL) return (here == start)? NULL : start;
1019 len = strlen(s);
1020 if (len > 0) add_history(s);
1021 if (len > rlen - 1) len = rlen - 1;
1022 memcpy(here, s, len);
1023 here[len] = '\n';
1024 here[len+1] = 0;
1025 free(s);
1026 }
1027 else
1028 #endif
1029
1030 /* Read the next line by normal means, prompting if the file is stdin. */
1031
1032 {
1033 if (f == stdin) printf("%s", prompt);
1034 if (fgets((char *)here, rlen, f) == NULL)
1035 return (here == start)? NULL : start;
1036 }
1037
1038 dlen = (int)strlen((char *)here);
1039 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1040 here += dlen;
1041 }
1042
1043 else
1044 {
1045 int new_buffer_size = 2*buffer_size;
1046 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1047 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1048 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1049
1050 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1051 {
1052 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1053 exit(1);
1054 }
1055
1056 memcpy(new_buffer, buffer, buffer_size);
1057 memcpy(new_pbuffer, pbuffer, buffer_size);
1058
1059 buffer_size = new_buffer_size;
1060
1061 start = new_buffer + (start - buffer);
1062 here = new_buffer + (here - buffer);
1063
1064 free(buffer);
1065 free(dbuffer);
1066 free(pbuffer);
1067
1068 buffer = new_buffer;
1069 dbuffer = new_dbuffer;
1070 pbuffer = new_pbuffer;
1071 }
1072 }
1073
1074 return NULL; /* Control never gets here */
1075 }
1076
1077
1078
1079 /*************************************************
1080 * Read number from string *
1081 *************************************************/
1082
1083 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1084 around with conditional compilation, just do the job by hand. It is only used
1085 for unpicking arguments, so just keep it simple.
1086
1087 Arguments:
1088 str string to be converted
1089 endptr where to put the end pointer
1090
1091 Returns: the unsigned long
1092 */
1093
1094 static int
1095 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1096 {
1097 int result = 0;
1098 while(*str != 0 && isspace(*str)) str++;
1099 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1100 *endptr = str;
1101 return(result);
1102 }
1103
1104
1105
1106 /*************************************************
1107 * Print one character *
1108 *************************************************/
1109
1110 /* Print a single character either literally, or as a hex escape. */
1111
1112 static int pchar(int c, FILE *f)
1113 {
1114 if (PRINTOK(c))
1115 {
1116 if (f != NULL) fprintf(f, "%c", c);
1117 return 1;
1118 }
1119
1120 if (c < 0x100)
1121 {
1122 if (use_utf)
1123 {
1124 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1125 return 6;
1126 }
1127 else
1128 {
1129 if (f != NULL) fprintf(f, "\\x%02x", c);
1130 return 4;
1131 }
1132 }
1133
1134 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1135 return (c <= 0x000000ff)? 6 :
1136 (c <= 0x00000fff)? 7 :
1137 (c <= 0x0000ffff)? 8 :
1138 (c <= 0x000fffff)? 9 : 10;
1139 }
1140
1141
1142
1143 #ifdef SUPPORT_PCRE8
1144 /*************************************************
1145 * Print 8-bit character string *
1146 *************************************************/
1147
1148 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1149 If handed a NULL file, just counts chars without printing. */
1150
1151 static int pchars(pcre_uint8 *p, int length, FILE *f)
1152 {
1153 int c = 0;
1154 int yield = 0;
1155
1156 while (length-- > 0)
1157 {
1158 #if !defined NOUTF8
1159 if (use_utf)
1160 {
1161 int rc = utf82ord(p, &c);
1162 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1163 {
1164 length -= rc - 1;
1165 p += rc;
1166 yield += pchar(c, f);
1167 continue;
1168 }
1169 }
1170 #endif
1171 c = *p++;
1172 yield += pchar(c, f);
1173 }
1174
1175 return yield;
1176 }
1177 #endif
1178
1179
1180
1181 #ifdef SUPPORT_PCRE16
1182 /*************************************************
1183 * Print 16-bit character string *
1184 *************************************************/
1185
1186 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1187 If handed a NULL file, just counts chars without printing. */
1188
1189 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1190 {
1191 int yield = 0;
1192
1193 while (length-- > 0)
1194 {
1195 int c = *p++ & 0xffff;
1196 #if !defined NOUTF8
1197 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1198 {
1199 int d = *p & 0xffff;
1200 if (d >= 0xDC00 && d < 0xDFFF)
1201 {
1202 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1203 length--;
1204 p++;
1205 }
1206 }
1207 #endif
1208 yield += pchar(c, f);
1209 }
1210
1211 return yield;
1212 }
1213 #endif
1214
1215
1216
1217 /*************************************************
1218 * Callout function *
1219 *************************************************/
1220
1221 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1222 the match. Yield zero unless more callouts than the fail count, or the callout
1223 data is not zero. */
1224
1225 static int callout(pcre_callout_block *cb)
1226 {
1227 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1228 int i, pre_start, post_start, subject_length;
1229
1230 if (callout_extra)
1231 {
1232 fprintf(f, "Callout %d: last capture = %d\n",
1233 cb->callout_number, cb->capture_last);
1234
1235 for (i = 0; i < cb->capture_top * 2; i += 2)
1236 {
1237 if (cb->offset_vector[i] < 0)
1238 fprintf(f, "%2d: <unset>\n", i/2);
1239 else
1240 {
1241 fprintf(f, "%2d: ", i/2);
1242 PCHARSV(cb->subject + cb->offset_vector[i],
1243 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1244 fprintf(f, "\n");
1245 }
1246 }
1247 }
1248
1249 /* Re-print the subject in canonical form, the first time or if giving full
1250 datails. On subsequent calls in the same match, we use pchars just to find the
1251 printed lengths of the substrings. */
1252
1253 if (f != NULL) fprintf(f, "--->");
1254
1255 PCHARS(pre_start, cb->subject, cb->start_match, f);
1256 PCHARS(post_start, cb->subject + cb->start_match,
1257 cb->current_position - cb->start_match, f);
1258
1259 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1260
1261 PCHARSV(cb->subject + cb->current_position,
1262 cb->subject_length - cb->current_position, f);
1263
1264 if (f != NULL) fprintf(f, "\n");
1265
1266 /* Always print appropriate indicators, with callout number if not already
1267 shown. For automatic callouts, show the pattern offset. */
1268
1269 if (cb->callout_number == 255)
1270 {
1271 fprintf(outfile, "%+3d ", cb->pattern_position);
1272 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1273 }
1274 else
1275 {
1276 if (callout_extra) fprintf(outfile, " ");
1277 else fprintf(outfile, "%3d ", cb->callout_number);
1278 }
1279
1280 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1281 fprintf(outfile, "^");
1282
1283 if (post_start > 0)
1284 {
1285 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1286 fprintf(outfile, "^");
1287 }
1288
1289 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1290 fprintf(outfile, " ");
1291
1292 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1293 pbuffer + cb->pattern_position);
1294
1295 fprintf(outfile, "\n");
1296 first_callout = 0;
1297
1298 if (cb->mark != last_callout_mark)
1299 {
1300 fprintf(outfile, "Latest Mark: %s\n",
1301 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1302 last_callout_mark = cb->mark;
1303 }
1304
1305 if (cb->callout_data != NULL)
1306 {
1307 int callout_data = *((int *)(cb->callout_data));
1308 if (callout_data != 0)
1309 {
1310 fprintf(outfile, "Callout data = %d\n", callout_data);
1311 return callout_data;
1312 }
1313 }
1314
1315 return (cb->callout_number != callout_fail_id)? 0 :
1316 (++callout_count >= callout_fail_count)? 1 : 0;
1317 }
1318
1319
1320 /*************************************************
1321 * Local malloc functions *
1322 *************************************************/
1323
1324 /* Alternative malloc function, to test functionality and save the size of a
1325 compiled re, which is the first store request that pcre_compile() makes. The
1326 show_malloc variable is set only during matching. */
1327
1328 static void *new_malloc(size_t size)
1329 {
1330 void *block = malloc(size);
1331 gotten_store = size;
1332 if (first_gotten_store == 0) first_gotten_store = size;
1333 if (show_malloc)
1334 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1335 return block;
1336 }
1337
1338 static void new_free(void *block)
1339 {
1340 if (show_malloc)
1341 fprintf(outfile, "free %p\n", block);
1342 free(block);
1343 }
1344
1345 /* For recursion malloc/free, to test stacking calls */
1346
1347 static void *stack_malloc(size_t size)
1348 {
1349 void *block = malloc(size);
1350 if (show_malloc)
1351 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1352 return block;
1353 }
1354
1355 static void stack_free(void *block)
1356 {
1357 if (show_malloc)
1358 fprintf(outfile, "stack_free %p\n", block);
1359 free(block);
1360 }
1361
1362
1363 /*************************************************
1364 * Call pcre_fullinfo() *
1365 *************************************************/
1366
1367 /* Get one piece of information from the pcre_fullinfo() function. When only
1368 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1369 value, but the code is defensive. */
1370
1371 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1372 {
1373 int rc;
1374
1375 if (use_pcre16)
1376 #ifdef SUPPORT_PCRE16
1377 rc = pcre16_fullinfo(re, study, option, ptr);
1378 #else
1379 rc = PCRE_ERROR_BADMODE;
1380 #endif
1381 else
1382 #ifdef SUPPORT_PCRE8
1383 rc = pcre_fullinfo(re, study, option, ptr);
1384 #else
1385 rc = PCRE_ERROR_BADMODE;
1386 #endif
1387
1388 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1389 use_pcre16? "16" : "", option);
1390 }
1391
1392
1393
1394 /*************************************************
1395 * Swap byte functions *
1396 *************************************************/
1397
1398 /* The following functions swap the bytes of a pcre_uint16
1399 and pcre_uint32 value.
1400
1401 Arguments:
1402 value any number
1403
1404 Returns: the byte swapped value
1405 */
1406
1407 static pcre_uint32
1408 swap_uint32(pcre_uint32 value)
1409 {
1410 return ((value & 0x000000ff) << 24) |
1411 ((value & 0x0000ff00) << 8) |
1412 ((value & 0x00ff0000) >> 8) |
1413 (value >> 24);
1414 }
1415
1416 static pcre_uint16
1417 swap_uint16(pcre_uint16 value)
1418 {
1419 return (value >> 8) | (value << 8);
1420 }
1421
1422
1423
1424 /*************************************************
1425 * Flip bytes in a compiled pattern *
1426 *************************************************/
1427
1428 /* This function is called if the 'F' option was present on a pattern that is
1429 to be written to a file. We flip the bytes of all the integer fields in the
1430 regex data block and the study block. In 16-bit mode this also flips relevant
1431 bytes in the pattern itself. This is to make it possible to test PCRE's
1432 ability to reload byte-flipped patterns, e.g. those compiled on a different
1433 architecture. */
1434
1435 static void
1436 regexflip(pcre *ere, pcre_extra *extra)
1437 {
1438 real_pcre *re = (real_pcre *)ere;
1439 int op;
1440
1441 #ifdef SUPPORT_PCRE16
1442 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1443 int length = re->name_count * re->name_entry_size;
1444 #ifdef SUPPORT_UTF
1445 BOOL utf = (re->options & PCRE_UTF16) != 0;
1446 BOOL utf16_char = FALSE;
1447 #endif /* SUPPORT_UTF */
1448 #endif /* SUPPORT_PCRE16 */
1449
1450 /* Always flip the bytes in the main data block and study blocks. */
1451
1452 re->magic_number = REVERSED_MAGIC_NUMBER;
1453 re->size = swap_uint32(re->size);
1454 re->options = swap_uint32(re->options);
1455 re->flags = swap_uint16(re->flags);
1456 re->top_bracket = swap_uint16(re->top_bracket);
1457 re->top_backref = swap_uint16(re->top_backref);
1458 re->first_char = swap_uint16(re->first_char);
1459 re->req_char = swap_uint16(re->req_char);
1460 re->name_table_offset = swap_uint16(re->name_table_offset);
1461 re->name_entry_size = swap_uint16(re->name_entry_size);
1462 re->name_count = swap_uint16(re->name_count);
1463
1464 if (extra != NULL)
1465 {
1466 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1467 rsd->size = swap_uint32(rsd->size);
1468 rsd->flags = swap_uint32(rsd->flags);
1469 rsd->minlength = swap_uint32(rsd->minlength);
1470 }
1471
1472 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1473 in the name table, if present, and then in the pattern itself. */
1474
1475 #ifdef SUPPORT_PCRE16
1476 if (!use_pcre16) return;
1477
1478 while(TRUE)
1479 {
1480 /* Swap previous characters. */
1481 while (length-- > 0)
1482 {
1483 *ptr = swap_uint16(*ptr);
1484 ptr++;
1485 }
1486 #ifdef SUPPORT_UTF
1487 if (utf16_char)
1488 {
1489 if ((ptr[-1] & 0xfc00) == 0xd800)
1490 {
1491 /* We know that there is only one extra character in UTF-16. */
1492 *ptr = swap_uint16(*ptr);
1493 ptr++;
1494 }
1495 }
1496 utf16_char = FALSE;
1497 #endif /* SUPPORT_UTF */
1498
1499 /* Get next opcode. */
1500
1501 length = 0;
1502 op = *ptr;
1503 *ptr++ = swap_uint16(op);
1504
1505 switch (op)
1506 {
1507 case OP_END:
1508 return;
1509
1510 case OP_CHAR:
1511 case OP_CHARI:
1512 case OP_NOT:
1513 case OP_NOTI:
1514 case OP_STAR:
1515 case OP_MINSTAR:
1516 case OP_PLUS:
1517 case OP_MINPLUS:
1518 case OP_QUERY:
1519 case OP_MINQUERY:
1520 case OP_UPTO:
1521 case OP_MINUPTO:
1522 case OP_EXACT:
1523 case OP_POSSTAR:
1524 case OP_POSPLUS:
1525 case OP_POSQUERY:
1526 case OP_POSUPTO:
1527 case OP_STARI:
1528 case OP_MINSTARI:
1529 case OP_PLUSI:
1530 case OP_MINPLUSI:
1531 case OP_QUERYI:
1532 case OP_MINQUERYI:
1533 case OP_UPTOI:
1534 case OP_MINUPTOI:
1535 case OP_EXACTI:
1536 case OP_POSSTARI:
1537 case OP_POSPLUSI:
1538 case OP_POSQUERYI:
1539 case OP_POSUPTOI:
1540 case OP_NOTSTAR:
1541 case OP_NOTMINSTAR:
1542 case OP_NOTPLUS:
1543 case OP_NOTMINPLUS:
1544 case OP_NOTQUERY:
1545 case OP_NOTMINQUERY:
1546 case OP_NOTUPTO:
1547 case OP_NOTMINUPTO:
1548 case OP_NOTEXACT:
1549 case OP_NOTPOSSTAR:
1550 case OP_NOTPOSPLUS:
1551 case OP_NOTPOSQUERY:
1552 case OP_NOTPOSUPTO:
1553 case OP_NOTSTARI:
1554 case OP_NOTMINSTARI:
1555 case OP_NOTPLUSI:
1556 case OP_NOTMINPLUSI:
1557 case OP_NOTQUERYI:
1558 case OP_NOTMINQUERYI:
1559 case OP_NOTUPTOI:
1560 case OP_NOTMINUPTOI:
1561 case OP_NOTEXACTI:
1562 case OP_NOTPOSSTARI:
1563 case OP_NOTPOSPLUSI:
1564 case OP_NOTPOSQUERYI:
1565 case OP_NOTPOSUPTOI:
1566 #ifdef SUPPORT_UTF
1567 if (utf) utf16_char = TRUE;
1568 #endif
1569 length = OP_lengths16[op] - 1;
1570 break;
1571
1572 case OP_CLASS:
1573 case OP_NCLASS:
1574 /* Skip the character bit map. */
1575 ptr += 32/sizeof(pcre_uint16);
1576 length = 0;
1577 break;
1578
1579 case OP_XCLASS:
1580 /* Reverse the size of the XCLASS instance. */
1581 ptr++;
1582 *ptr = swap_uint16(*ptr);
1583 if (LINK_SIZE > 1)
1584 {
1585 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1586 ptr++;
1587 *ptr = swap_uint16(*ptr);
1588 }
1589 ptr++;
1590
1591 if (LINK_SIZE > 1)
1592 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1593 (1 + LINK_SIZE + 1);
1594 else
1595 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1596
1597 op = *ptr;
1598 *ptr = swap_uint16(op);
1599 if ((op & XCL_MAP) != 0)
1600 {
1601 /* Skip the character bit map. */
1602 ptr += 32/sizeof(pcre_uint16);
1603 length -= 32/sizeof(pcre_uint16);
1604 }
1605 break;
1606
1607 default:
1608 length = OP_lengths16[op] - 1;
1609 break;
1610 }
1611 }
1612 /* Control should never reach here in 16 bit mode. */
1613 #endif /* SUPPORT_PCRE16 */
1614 }
1615
1616
1617
1618 /*************************************************
1619 * Check match or recursion limit *
1620 *************************************************/
1621
1622 static int
1623 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1624 int start_offset, int options, int *use_offsets, int use_size_offsets,
1625 int flag, unsigned long int *limit, int errnumber, const char *msg)
1626 {
1627 int count;
1628 int min = 0;
1629 int mid = 64;
1630 int max = -1;
1631
1632 extra->flags |= flag;
1633
1634 for (;;)
1635 {
1636 *limit = mid;
1637
1638 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1639 use_offsets, use_size_offsets);
1640
1641 if (count == errnumber)
1642 {
1643 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1644 min = mid;
1645 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1646 }
1647
1648 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1649 count == PCRE_ERROR_PARTIAL)
1650 {
1651 if (mid == min + 1)
1652 {
1653 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1654 break;
1655 }
1656 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1657 max = mid;
1658 mid = (min + mid)/2;
1659 }
1660 else break; /* Some other error */
1661 }
1662
1663 extra->flags &= ~flag;
1664 return count;
1665 }
1666
1667
1668
1669 /*************************************************
1670 * Case-independent strncmp() function *
1671 *************************************************/
1672
1673 /*
1674 Arguments:
1675 s first string
1676 t second string
1677 n number of characters to compare
1678
1679 Returns: < 0, = 0, or > 0, according to the comparison
1680 */
1681
1682 static int
1683 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1684 {
1685 while (n--)
1686 {
1687 int c = tolower(*s++) - tolower(*t++);
1688 if (c) return c;
1689 }
1690 return 0;
1691 }
1692
1693
1694
1695 /*************************************************
1696 * Check newline indicator *
1697 *************************************************/
1698
1699 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1700 a message and return 0 if there is no match.
1701
1702 Arguments:
1703 p points after the leading '<'
1704 f file for error message
1705
1706 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1707 */
1708
1709 static int
1710 check_newline(pcre_uint8 *p, FILE *f)
1711 {
1712 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1713 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1714 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1715 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1716 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1717 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1718 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1719 fprintf(f, "Unknown newline type at: <%s\n", p);
1720 return 0;
1721 }
1722
1723
1724
1725 /*************************************************
1726 * Usage function *
1727 *************************************************/
1728
1729 static void
1730 usage(void)
1731 {
1732 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1733 printf("Input and output default to stdin and stdout.\n");
1734 #ifdef SUPPORT_LIBREADLINE
1735 printf("If input is a terminal, readline() is used to read from it.\n");
1736 #else
1737 printf("This version of pcretest is not linked with readline().\n");
1738 #endif
1739 printf("\nOptions:\n");
1740 #ifdef SUPPORT_PCRE16
1741 printf(" -16 use 16-bit interface\n");
1742 #endif
1743 printf(" -b show compiled code (bytecode)\n");
1744 printf(" -C show PCRE compile-time options and exit\n");
1745 printf(" -d debug: show compiled code and information (-b and -i)\n");
1746 #if !defined NODFA
1747 printf(" -dfa force DFA matching for all subjects\n");
1748 #endif
1749 printf(" -help show usage information\n");
1750 printf(" -i show information about compiled patterns\n"
1751 " -M find MATCH_LIMIT minimum for each subject\n"
1752 " -m output memory used information\n"
1753 " -o <n> set size of offsets vector to <n>\n");
1754 #if !defined NOPOSIX
1755 printf(" -p use POSIX interface\n");
1756 #endif
1757 printf(" -q quiet: do not output PCRE version number at start\n");
1758 printf(" -S <n> set stack size to <n> megabytes\n");
1759 printf(" -s force each pattern to be studied at basic level\n"
1760 " -s+ force each pattern to be studied, using JIT if available\n"
1761 " -t time compilation and execution\n");
1762 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1763 printf(" -tm time execution (matching) only\n");
1764 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1765 }
1766
1767
1768
1769 /*************************************************
1770 * Main Program *
1771 *************************************************/
1772
1773 /* Read lines from named file or stdin and write to named file or stdout; lines
1774 consist of a regular expression, in delimiters and optionally followed by
1775 options, followed by a set of test data, terminated by an empty line. */
1776
1777 int main(int argc, char **argv)
1778 {
1779 FILE *infile = stdin;
1780 int options = 0;
1781 int study_options = 0;
1782 int default_find_match_limit = FALSE;
1783 int op = 1;
1784 int timeit = 0;
1785 int timeitm = 0;
1786 int showinfo = 0;
1787 int showstore = 0;
1788 int force_study = -1;
1789 int force_study_options = 0;
1790 int quiet = 0;
1791 int size_offsets = 45;
1792 int size_offsets_max;
1793 int *offsets = NULL;
1794 #if !defined NOPOSIX
1795 int posix = 0;
1796 #endif
1797 int debug = 0;
1798 int done = 0;
1799 int all_use_dfa = 0;
1800 int yield = 0;
1801 int stack_size;
1802
1803 pcre_jit_stack *jit_stack = NULL;
1804
1805 /* These vectors store, end-to-end, a list of captured substring names. Assume
1806 that 1024 is plenty long enough for the few names we'll be testing. */
1807
1808 pcre_uchar copynames[1024];
1809 pcre_uchar getnames[1024];
1810
1811 pcre_uchar *copynamesptr;
1812 pcre_uchar *getnamesptr;
1813
1814 /* Get buffers from malloc() so that valgrind will check their misuse when
1815 debugging. They grow automatically when very long lines are read. The 16-bit
1816 buffer (buffer16) is obtained only if needed. */
1817
1818 buffer = (pcre_uint8 *)malloc(buffer_size);
1819 dbuffer = (pcre_uint8 *)malloc(buffer_size);
1820 pbuffer = (pcre_uint8 *)malloc(buffer_size);
1821
1822 /* The outfile variable is static so that new_malloc can use it. */
1823
1824 outfile = stdout;
1825
1826 /* The following _setmode() stuff is some Windows magic that tells its runtime
1827 library to translate CRLF into a single LF character. At least, that's what
1828 I've been told: never having used Windows I take this all on trust. Originally
1829 it set 0x8000, but then I was advised that _O_BINARY was better. */
1830
1831 #if defined(_WIN32) || defined(WIN32)
1832 _setmode( _fileno( stdout ), _O_BINARY );
1833 #endif
1834
1835 /* Scan options */
1836
1837 while (argc > 1 && argv[op][0] == '-')
1838 {
1839 pcre_uint8 *endptr;
1840
1841 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1842 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1843 else if (strcmp(argv[op], "-s+") == 0)
1844 {
1845 force_study = 1;
1846 force_study_options = PCRE_STUDY_JIT_COMPILE;
1847 }
1848 #ifdef SUPPORT_PCRE16
1849 else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1850 #endif
1851
1852 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1853 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1854 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1855 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1856 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1857 #if !defined NODFA
1858 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1859 #endif
1860 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1861 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1862 *endptr == 0))
1863 {
1864 op++;
1865 argc--;
1866 }
1867 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1868 {
1869 int both = argv[op][2] == 0;
1870 int temp;
1871 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1872 *endptr == 0))
1873 {
1874 timeitm = temp;
1875 op++;
1876 argc--;
1877 }
1878 else timeitm = LOOPREPEAT;
1879 if (both) timeit = timeitm;
1880 }
1881 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1882 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1883 *endptr == 0))
1884 {
1885 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1886 printf("PCRE: -S not supported on this OS\n");
1887 exit(1);
1888 #else
1889 int rc;
1890 struct rlimit rlim;
1891 getrlimit(RLIMIT_STACK, &rlim);
1892 rlim.rlim_cur = stack_size * 1024 * 1024;
1893 rc = setrlimit(RLIMIT_STACK, &rlim);
1894 if (rc != 0)
1895 {
1896 printf("PCRE: setrlimit() failed with error %d\n", rc);
1897 exit(1);
1898 }
1899 op++;
1900 argc--;
1901 #endif
1902 }
1903 #if !defined NOPOSIX
1904 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1905 #endif
1906 else if (strcmp(argv[op], "-C") == 0)
1907 {
1908 int rc;
1909 unsigned long int lrc;
1910 printf("PCRE version %s\n", pcre_version());
1911 printf("Compiled with\n");
1912
1913 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1914 are set, either both UTFs are supported or both are not supported. */
1915
1916 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1917 printf(" 8-bit and 16-bit support\n");
1918 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1919 if (rc)
1920 printf(" UTF-8 and UTF-16 support\n");
1921 else
1922 printf(" No UTF-8 or UTF-16 support\n");
1923 #elif defined SUPPORT_PCRE8
1924 printf(" 8-bit support only\n");
1925 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1926 printf(" %sUTF-8 support\n", rc? "" : "No ");
1927 #else
1928 printf(" 16-bit support only\n");
1929 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1930 printf(" %sUTF-16 support\n", rc? "" : "No ");
1931 #endif
1932
1933 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1934 printf(" %sUnicode properties support\n", rc? "" : "No ");
1935 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1936 if (rc)
1937 printf(" Just-in-time compiler support\n");
1938 else
1939 printf(" No just-in-time compiler support\n");
1940 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1941 /* Note that these values are always the ASCII values, even
1942 in EBCDIC environments. CR is 13 and NL is 10. */
1943 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1944 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1945 (rc == -2)? "ANYCRLF" :
1946 (rc == -1)? "ANY" : "???");
1947 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1948 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1949 "all Unicode newlines");
1950 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1951 printf(" Internal link size = %d\n", rc);
1952 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1953 printf(" POSIX malloc threshold = %d\n", rc);
1954 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1955 printf(" Default match limit = %ld\n", lrc);
1956 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1957 printf(" Default recursion depth limit = %ld\n", lrc);
1958 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1959 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1960 goto EXIT;
1961 }
1962 else if (strcmp(argv[op], "-help") == 0 ||
1963 strcmp(argv[op], "--help") == 0)
1964 {
1965 usage();
1966 goto EXIT;
1967 }
1968 else
1969 {
1970 printf("** Unknown or malformed option %s\n", argv[op]);
1971 usage();
1972 yield = 1;
1973 goto EXIT;
1974 }
1975 op++;
1976 argc--;
1977 }
1978
1979 /* Get the store for the offsets vector, and remember what it was */
1980
1981 size_offsets_max = size_offsets;
1982 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1983 if (offsets == NULL)
1984 {
1985 printf("** Failed to get %d bytes of memory for offsets vector\n",
1986 (int)(size_offsets_max * sizeof(int)));
1987 yield = 1;
1988 goto EXIT;
1989 }
1990
1991 /* Sort out the input and output files */
1992
1993 if (argc > 1)
1994 {
1995 infile = fopen(argv[op], INPUT_MODE);
1996 if (infile == NULL)
1997 {
1998 printf("** Failed to open %s\n", argv[op]);
1999 yield = 1;
2000 goto EXIT;
2001 }
2002 }
2003
2004 if (argc > 2)
2005 {
2006 outfile = fopen(argv[op+1], OUTPUT_MODE);
2007 if (outfile == NULL)
2008 {
2009 printf("** Failed to open %s\n", argv[op+1]);
2010 yield = 1;
2011 goto EXIT;
2012 }
2013 }
2014
2015 /* Set alternative malloc function */
2016
2017 #ifdef SUPPORT_PCRE8
2018 pcre_malloc = new_malloc;
2019 pcre_free = new_free;
2020 pcre_stack_malloc = stack_malloc;
2021 pcre_stack_free = stack_free;
2022 #endif
2023
2024 #ifdef SUPPORT_PCRE16
2025 pcre16_malloc = new_malloc;
2026 pcre16_free = new_free;
2027 pcre16_stack_malloc = stack_malloc;
2028 pcre16_stack_free = stack_free;
2029 #endif
2030
2031 /* Heading line unless quiet, then prompt for first regex if stdin */
2032
2033 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
2034
2035 /* Main loop */
2036
2037 while (!done)
2038 {
2039 pcre *re = NULL;
2040 pcre_extra *extra = NULL;
2041
2042 #if !defined NOPOSIX /* There are still compilers that require no indent */
2043 regex_t preg;
2044 int do_posix = 0;
2045 #endif
2046
2047 const char *error;
2048 pcre_uint8 *markptr;
2049 pcre_uint8 *p, *pp, *ppp;
2050 pcre_uint8 *to_file = NULL;
2051 const pcre_uint8 *tables = NULL;
2052 unsigned long int true_size, true_study_size = 0;
2053 size_t size, regex_gotten_store;
2054 int do_allcaps = 0;
2055 int do_mark = 0;
2056 int do_study = 0;
2057 int no_force_study = 0;
2058 int do_debug = debug;
2059 int do_G = 0;
2060 int do_g = 0;
2061 int do_showinfo = showinfo;
2062 int do_showrest = 0;
2063 int do_showcaprest = 0;
2064 int do_flip = 0;
2065 int erroroffset, len, delimiter, poffset;
2066
2067 use_utf = 0;
2068 debug_lengths = 1;
2069
2070 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2071 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2072 fflush(outfile);
2073
2074 p = buffer;
2075 while (isspace(*p)) p++;
2076 if (*p == 0) continue;
2077
2078 /* See if the pattern is to be loaded pre-compiled from a file. */
2079
2080 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2081 {
2082 unsigned long int magic, get_options;
2083 pcre_uint8 sbuf[8];
2084 FILE *f;
2085
2086 p++;
2087 pp = p + (int)strlen((char *)p);
2088 while (isspace(pp[-1])) pp--;
2089 *pp = 0;
2090
2091 f = fopen((char *)p, "rb");
2092 if (f == NULL)
2093 {
2094 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2095 continue;
2096 }
2097
2098 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2099
2100 true_size =
2101 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2102 true_study_size =
2103 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2104
2105 re = (real_pcre *)new_malloc(true_size);
2106 regex_gotten_store = first_gotten_store;
2107
2108 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2109
2110 magic = ((real_pcre *)re)->magic_number;
2111 if (magic != MAGIC_NUMBER)
2112 {
2113 if (swap_uint32(magic) == MAGIC_NUMBER)
2114 {
2115 do_flip = 1;
2116 }
2117 else
2118 {
2119 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2120 fclose(f);
2121 continue;
2122 }
2123 }
2124
2125 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2126 do_flip? " (byte-inverted)" : "", p);
2127
2128 /* Now see if there is any following study data. */
2129
2130 if (true_study_size != 0)
2131 {
2132 pcre_study_data *psd;
2133
2134 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2135 extra->flags = PCRE_EXTRA_STUDY_DATA;
2136
2137 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2138 extra->study_data = psd;
2139
2140 if (fread(psd, 1, true_study_size, f) != true_study_size)
2141 {
2142 FAIL_READ:
2143 fprintf(outfile, "Failed to read data from %s\n", p);
2144 if (extra != NULL)
2145 {
2146 PCRE_FREE_STUDY(extra);
2147 }
2148 if (re != NULL) new_free(re);
2149 fclose(f);
2150 continue;
2151 }
2152 fprintf(outfile, "Study data loaded from %s\n", p);
2153 do_study = 1; /* To get the data output if requested */
2154 }
2155 else fprintf(outfile, "No study data\n");
2156
2157 /* Flip the necessary bytes. */
2158 if (do_flip)
2159 {
2160 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2161 }
2162
2163 /* Need to know if UTF-8 for printing data strings */
2164
2165 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2166 use_utf = (get_options & PCRE_UTF8) != 0;
2167
2168 fclose(f);
2169 goto SHOW_INFO;
2170 }
2171
2172 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2173 the pattern; if it isn't complete, read more. */
2174
2175 delimiter = *p++;
2176
2177 if (isalnum(delimiter) || delimiter == '\\')
2178 {
2179 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2180 goto SKIP_DATA;
2181 }
2182
2183 pp = p;
2184 poffset = (int)(p - buffer);
2185
2186 for(;;)
2187 {
2188 while (*pp != 0)
2189 {
2190 if (*pp == '\\' && pp[1] != 0) pp++;
2191 else if (*pp == delimiter) break;
2192 pp++;
2193 }
2194 if (*pp != 0) break;
2195 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2196 {
2197 fprintf(outfile, "** Unexpected EOF\n");
2198 done = 1;
2199 goto CONTINUE;
2200 }
2201 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2202 }
2203
2204 /* The buffer may have moved while being extended; reset the start of data
2205 pointer to the correct relative point in the buffer. */
2206
2207 p = buffer + poffset;
2208
2209 /* If the first character after the delimiter is backslash, make
2210 the pattern end with backslash. This is purely to provide a way
2211 of testing for the error message when a pattern ends with backslash. */
2212
2213 if (pp[1] == '\\') *pp++ = '\\';
2214
2215 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2216 for callouts. */
2217
2218 *pp++ = 0;
2219 strcpy((char *)pbuffer, (char *)p);
2220
2221 /* Look for options after final delimiter */
2222
2223 options = 0;
2224 study_options = 0;
2225 log_store = showstore; /* default from command line */
2226
2227 while (*pp != 0)
2228 {
2229 switch (*pp++)
2230 {
2231 case 'f': options |= PCRE_FIRSTLINE; break;
2232 case 'g': do_g = 1; break;
2233 case 'i': options |= PCRE_CASELESS; break;
2234 case 'm': options |= PCRE_MULTILINE; break;
2235 case 's': options |= PCRE_DOTALL; break;
2236 case 'x': options |= PCRE_EXTENDED; break;
2237
2238 case '+':
2239 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2240 break;
2241
2242 case '=': do_allcaps = 1; break;
2243 case 'A': options |= PCRE_ANCHORED; break;
2244 case 'B': do_debug = 1; break;
2245 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2246 case 'D': do_debug = do_showinfo = 1; break;
2247 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2248 case 'F': do_flip = 1; break;
2249 case 'G': do_G = 1; break;
2250 case 'I': do_showinfo = 1; break;
2251 case 'J': options |= PCRE_DUPNAMES; break;
2252 case 'K': do_mark = 1; break;
2253 case 'M': log_store = 1; break;
2254 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2255
2256 #if !defined NOPOSIX
2257 case 'P': do_posix = 1; break;
2258 #endif
2259
2260 case 'S':
2261 if (do_study == 0)
2262 {
2263 do_study = 1;
2264 if (*pp == '+')
2265 {
2266 study_options |= PCRE_STUDY_JIT_COMPILE;
2267 pp++;
2268 }
2269 }
2270 else
2271 {
2272 do_study = 0;
2273 no_force_study = 1;
2274 }
2275 break;
2276
2277 case 'U': options |= PCRE_UNGREEDY; break;
2278 case 'W': options |= PCRE_UCP; break;
2279 case 'X': options |= PCRE_EXTRA; break;
2280 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2281 case 'Z': debug_lengths = 0; break;
2282 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2283 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2284
2285 case 'T':
2286 switch (*pp++)
2287 {
2288 case '0': tables = tables0; break;
2289 case '1': tables = tables1; break;
2290
2291 case '\r':
2292 case '\n':
2293 case ' ':
2294 case 0:
2295 fprintf(outfile, "** Missing table number after /T\n");
2296 goto SKIP_DATA;
2297
2298 default:
2299 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2300 goto SKIP_DATA;
2301 }
2302 break;
2303
2304 case 'L':
2305 ppp = pp;
2306 /* The '\r' test here is so that it works on Windows. */
2307 /* The '0' test is just in case this is an unterminated line. */
2308 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2309 *ppp = 0;
2310 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2311 {
2312 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2313 goto SKIP_DATA;
2314 }
2315 locale_set = 1;
2316 tables = pcre_maketables();
2317 pp = ppp;
2318 break;
2319
2320 case '>':
2321 to_file = pp;
2322 while (*pp != 0) pp++;
2323 while (isspace(pp[-1])) pp--;
2324 *pp = 0;
2325 break;
2326
2327 case '<':
2328 {
2329 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2330 {
2331 options |= PCRE_JAVASCRIPT_COMPAT;
2332 pp += 3;
2333 }
2334 else
2335 {
2336 int x = check_newline(pp, outfile);
2337 if (x == 0) goto SKIP_DATA;
2338 options |= x;
2339 while (*pp++ != '>');
2340 }
2341 }
2342 break;
2343
2344 case '\r': /* So that it works in Windows */
2345 case '\n':
2346 case ' ':
2347 break;
2348
2349 default:
2350 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2351 goto SKIP_DATA;
2352 }
2353 }
2354
2355 /* Handle compiling via the POSIX interface, which doesn't support the
2356 timing, showing, or debugging options, nor the ability to pass over
2357 local character tables. Neither does it have 16-bit support. */
2358
2359 #if !defined NOPOSIX
2360 if (posix || do_posix)
2361 {
2362 int rc;
2363 int cflags = 0;
2364
2365 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2366 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2367 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2368 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2369 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2370 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2371 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2372
2373 first_gotten_store = 0;
2374 rc = regcomp(&preg, (char *)p, cflags);
2375
2376 /* Compilation failed; go back for another re, skipping to blank line
2377 if non-interactive. */
2378
2379 if (rc != 0)
2380 {
2381 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2382 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2383 goto SKIP_DATA;
2384 }
2385 }
2386
2387 /* Handle compiling via the native interface */
2388
2389 else
2390 #endif /* !defined NOPOSIX */
2391
2392 {
2393 unsigned long int get_options;
2394
2395 /* In 16-bit mode, convert the input. */
2396
2397 #ifdef SUPPORT_PCRE16
2398 if (use_pcre16)
2399 {
2400 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2401 {
2402 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2403 "converted to UTF-16\n");
2404 goto SKIP_DATA;
2405 }
2406 p = (pcre_uint8 *)buffer16;
2407 }
2408 #endif
2409
2410 /* Compile many times when timing */
2411
2412 if (timeit > 0)
2413 {
2414 register int i;
2415 clock_t time_taken;
2416 clock_t start_time = clock();
2417 for (i = 0; i < timeit; i++)
2418 {
2419 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2420 if (re != NULL) free(re);
2421 }
2422 time_taken = clock() - start_time;
2423 fprintf(outfile, "Compile time %.4f milliseconds\n",
2424 (((double)time_taken * 1000.0) / (double)timeit) /
2425 (double)CLOCKS_PER_SEC);
2426 }
2427
2428 first_gotten_store = 0;
2429 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2430
2431 /* Compilation failed; go back for another re, skipping to blank line
2432 if non-interactive. */
2433
2434 if (re == NULL)
2435 {
2436 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2437 SKIP_DATA:
2438 if (infile != stdin)
2439 {
2440 for (;;)
2441 {
2442 if (extend_inputline(infile, buffer, NULL) == NULL)
2443 {
2444 done = 1;
2445 goto CONTINUE;
2446 }
2447 len = (int)strlen((char *)buffer);
2448 while (len > 0 && isspace(buffer[len-1])) len--;
2449 if (len == 0) break;
2450 }
2451 fprintf(outfile, "\n");
2452 }
2453 goto CONTINUE;
2454 }
2455
2456 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2457 within the regex; check for this so that we know how to process the data
2458 lines. */
2459
2460 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2461 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2462
2463 /* Extract the size for possible writing before possibly flipping it,
2464 and remember the store that was got. */
2465
2466 true_size = ((real_pcre *)re)->size;
2467 regex_gotten_store = first_gotten_store;
2468
2469 /* Output code size information if requested */
2470
2471 if (log_store)
2472 fprintf(outfile, "Memory allocation (code space): %d\n",
2473 (int)(first_gotten_store -
2474 sizeof(real_pcre) -
2475 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2476
2477 /* If -s or /S was present, study the regex to generate additional info to
2478 help with the matching, unless the pattern has the SS option, which
2479 suppresses the effect of /S (used for a few test patterns where studying is
2480 never sensible). */
2481
2482 if (do_study || (force_study >= 0 && !no_force_study))
2483 {
2484 if (timeit > 0)
2485 {
2486 register int i;
2487 clock_t time_taken;
2488 clock_t start_time = clock();
2489 for (i = 0; i < timeit; i++)
2490 {
2491 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2492 }
2493 time_taken = clock() - start_time;
2494 if (extra != NULL)
2495 {
2496 PCRE_FREE_STUDY(extra);
2497 }
2498 fprintf(outfile, " Study time %.4f milliseconds\n",
2499 (((double)time_taken * 1000.0) / (double)timeit) /
2500 (double)CLOCKS_PER_SEC);
2501 }
2502 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2503 if (error != NULL)
2504 fprintf(outfile, "Failed to study: %s\n", error);
2505 else if (extra != NULL)
2506 {
2507 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2508 if (log_store)
2509 {
2510 size_t jitsize;
2511 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2512 if (jitsize != 0)
2513 fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2514 }
2515 }
2516 }
2517
2518 /* If /K was present, we set up for handling MARK data. */
2519
2520 if (do_mark)
2521 {
2522 if (extra == NULL)
2523 {
2524 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2525 extra->flags = 0;
2526 }
2527 extra->mark = &markptr;
2528 extra->flags |= PCRE_EXTRA_MARK;
2529 }
2530
2531 /* Extract and display information from the compiled data if required. */
2532
2533 SHOW_INFO:
2534
2535 if (do_debug)
2536 {
2537 fprintf(outfile, "------------------------------------------------------------------\n");
2538 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2539 if (use_pcre16)
2540 pcre16_printint(re, outfile, debug_lengths);
2541 else
2542 pcre_printint(re, outfile, debug_lengths);
2543 #elif defined SUPPORT_PCRE8
2544 pcre_printint(re, outfile, debug_lengths);
2545 #else
2546 pcre16_printint(re, outfile, debug_lengths);
2547 #endif
2548 }
2549
2550 /* We already have the options in get_options (see above) */
2551
2552 if (do_showinfo)
2553 {
2554 unsigned long int all_options;
2555 #if !defined NOINFOCHECK
2556 int old_first_char, old_options, old_count;
2557 #endif
2558 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2559 hascrorlf;
2560 int nameentrysize, namecount;
2561 const pcre_uchar *nametable;
2562
2563 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2564 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2565 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2566 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2567 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2568 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2569 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2570 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2571 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2572 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2573 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2574
2575 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2576 that it gives the same results as the new function. */
2577
2578 #if !defined NOINFOCHECK
2579 if (!use_pcre16)
2580 {
2581 old_count = pcre_info(re, &old_options, &old_first_char);
2582 if (count < 0) fprintf(outfile,
2583 "Error %d from pcre_info()\n", count);
2584 else
2585 {
2586 if (old_count != count) fprintf(outfile,
2587 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2588 old_count);
2589
2590 if (old_first_char != first_char) fprintf(outfile,
2591 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2592 first_char, old_first_char);
2593
2594 if (old_options != (int)get_options) fprintf(outfile,
2595 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2596 get_options, old_options);
2597 }
2598 }
2599 #endif
2600
2601 if (size != regex_gotten_store) fprintf(outfile,
2602 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2603 (int)size, (int)regex_gotten_store);
2604
2605 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2606 if (backrefmax > 0)
2607 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2608
2609 if (namecount > 0)
2610 {
2611 fprintf(outfile, "Named capturing subpatterns:\n");
2612 while (namecount-- > 0)
2613 {
2614 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2615 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2616 GET2(nametable, 0));
2617 nametable += nameentrysize;
2618 }
2619 }
2620
2621 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2622 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2623
2624 all_options = ((real_pcre *)re)->options;
2625 if (do_flip) all_options = swap_uint32(all_options);
2626
2627 if (get_options == 0) fprintf(outfile, "No options\n");
2628 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2629 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2630 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2631 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2632 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2633 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2634 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2635 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2636 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2637 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2638 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2639 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2640 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2641 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2642 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2643 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2644 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2645 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2646
2647 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2648
2649 switch (get_options & PCRE_NEWLINE_BITS)
2650 {
2651 case PCRE_NEWLINE_CR:
2652 fprintf(outfile, "Forced newline sequence: CR\n");
2653 break;
2654
2655 case PCRE_NEWLINE_LF:
2656 fprintf(outfile, "Forced newline sequence: LF\n");
2657 break;
2658
2659 case PCRE_NEWLINE_CRLF:
2660 fprintf(outfile, "Forced newline sequence: CRLF\n");
2661 break;
2662
2663 case PCRE_NEWLINE_ANYCRLF:
2664 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2665 break;
2666
2667 case PCRE_NEWLINE_ANY:
2668 fprintf(outfile, "Forced newline sequence: ANY\n");
2669 break;
2670
2671 default:
2672 break;
2673 }
2674
2675 if (first_char == -1)
2676 {
2677 fprintf(outfile, "First char at start or follows newline\n");
2678 }
2679 else if (first_char < 0)
2680 {
2681 fprintf(outfile, "No first char\n");
2682 }
2683 else
2684 {
2685 const char *caseless =
2686 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2687 "" : " (caseless)";
2688
2689 if (PRINTOK(first_char))
2690 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2691 else
2692 {
2693 fprintf(outfile, "First char = ");
2694 pchar(first_char, outfile);
2695 fprintf(outfile, "%s\n", caseless);
2696 }
2697 }
2698
2699 if (need_char < 0)
2700 {
2701 fprintf(outfile, "No need char\n");
2702 }
2703 else
2704 {
2705 const char *caseless =
2706 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2707 "" : " (caseless)";
2708
2709 if (PRINTOK(need_char))
2710 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2711 else
2712 {
2713 fprintf(outfile, "Need char = ");
2714 pchar(need_char, outfile);
2715 fprintf(outfile, "%s\n", caseless);
2716 }
2717 }
2718
2719 /* Don't output study size; at present it is in any case a fixed
2720 value, but it varies, depending on the computer architecture, and
2721 so messes up the test suite. (And with the /F option, it might be
2722 flipped.) If study was forced by an external -s, don't show this
2723 information unless -i or -d was also present. This means that, except
2724 when auto-callouts are involved, the output from runs with and without
2725 -s should be identical. */
2726
2727 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2728 {
2729 if (extra == NULL)
2730 fprintf(outfile, "Study returned NULL\n");
2731 else
2732 {
2733 pcre_uint8 *start_bits = NULL;
2734 int minlength;
2735
2736 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2737 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2738
2739 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2740 if (start_bits == NULL)
2741 fprintf(outfile, "No set of starting bytes\n");
2742 else
2743 {
2744 int i;
2745 int c = 24;
2746 fprintf(outfile, "Starting byte set: ");
2747 for (i = 0; i < 256; i++)
2748 {
2749 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2750 {
2751 if (c > 75)
2752 {
2753 fprintf(outfile, "\n ");
2754 c = 2;
2755 }
2756 if (PRINTOK(i) && i != ' ')
2757 {
2758 fprintf(outfile, "%c ", i);
2759 c += 2;
2760 }
2761 else
2762 {
2763 fprintf(outfile, "\\x%02x ", i);
2764 c += 5;
2765 }
2766 }
2767 }
2768 fprintf(outfile, "\n");
2769 }
2770 }
2771
2772 /* Show this only if the JIT was set by /S, not by -s. */
2773
2774 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2775 {
2776 int jit;
2777 new_info(re, extra, PCRE_INFO_JIT, &jit);
2778 if (jit)
2779 fprintf(outfile, "JIT study was successful\n");
2780 else
2781 #ifdef SUPPORT_JIT
2782 fprintf(outfile, "JIT study was not successful\n");
2783 #else
2784 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2785 #endif
2786 }
2787 }
2788 }
2789
2790 /* If the '>' option was present, we write out the regex to a file, and
2791 that is all. The first 8 bytes of the file are the regex length and then
2792 the study length, in big-endian order. */
2793
2794 if (to_file != NULL)
2795 {
2796 FILE *f = fopen((char *)to_file, "wb");
2797 if (f == NULL)
2798 {
2799 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2800 }
2801 else
2802 {
2803 pcre_uint8 sbuf[8];
2804
2805 if (do_flip) regexflip(re, extra);
2806 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2807 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2808 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2809 sbuf[3] = (pcre_uint8)((true_size) & 255);
2810 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2811 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2812 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2813 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2814
2815 if (fwrite(sbuf, 1, 8, f) < 8 ||
2816 fwrite(re, 1, true_size, f) < true_size)
2817 {
2818 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2819 }
2820 else
2821 {
2822 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2823
2824 /* If there is study data, write it. */
2825
2826 if (extra != NULL)
2827 {
2828 if (fwrite(extra->study_data, 1, true_study_size, f) <
2829 true_study_size)
2830 {
2831 fprintf(outfile, "Write error on %s: %s\n", to_file,
2832 strerror(errno));
2833 }
2834 else fprintf(outfile, "Study data written to %s\n", to_file);
2835 }
2836 }
2837 fclose(f);
2838 }
2839
2840 new_free(re);
2841 if (extra != NULL)
2842 {
2843 PCRE_FREE_STUDY(extra);
2844 }
2845 if (locale_set)
2846 {
2847 new_free((void *)tables);
2848 setlocale(LC_CTYPE, "C");
2849 locale_set = 0;
2850 }
2851 continue; /* With next regex */
2852 }
2853 } /* End of non-POSIX compile */
2854
2855 /* Read data lines and test them */
2856
2857 for (;;)
2858 {
2859 pcre_uint8 *q;
2860 pcre_uint8 *bptr;
2861 int *use_offsets = offsets;
2862 int use_size_offsets = size_offsets;
2863 int callout_data = 0;
2864 int callout_data_set = 0;
2865 int count, c;
2866 int copystrings = 0;
2867 int find_match_limit = default_find_match_limit;
2868 int getstrings = 0;
2869 int getlist = 0;
2870 int gmatched = 0;
2871 int start_offset = 0;
2872 int start_offset_sign = 1;
2873 int g_notempty = 0;
2874 int use_dfa = 0;
2875
2876 options = 0;
2877
2878 *copynames = 0;
2879 *getnames = 0;
2880
2881 copynamesptr = copynames;
2882 getnamesptr = getnames;
2883
2884 pcre_callout = callout;
2885 first_callout = 1;
2886 last_callout_mark = NULL;
2887 callout_extra = 0;
2888 callout_count = 0;
2889 callout_fail_count = 999999;
2890 callout_fail_id = -1;
2891 show_malloc = 0;
2892
2893 if (extra != NULL) extra->flags &=
2894 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2895
2896 len = 0;
2897 for (;;)
2898 {
2899 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2900 {
2901 if (len > 0) /* Reached EOF without hitting a newline */
2902 {
2903 fprintf(outfile, "\n");
2904 break;
2905 }
2906 done = 1;
2907 goto CONTINUE;
2908 }
2909 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2910 len = (int)strlen((char *)buffer);
2911 if (buffer[len-1] == '\n') break;
2912 }
2913
2914 while (len > 0 && isspace(buffer[len-1])) len--;
2915 buffer[len] = 0;
2916 if (len == 0) break;
2917
2918 p = buffer;
2919 while (isspace(*p)) p++;
2920
2921 bptr = q = dbuffer;
2922 while ((c = *p++) != 0)
2923 {
2924 int i = 0;
2925 int n = 0;
2926
2927 if (c == '\\') switch ((c = *p++))
2928 {
2929 case 'a': c = 7; break;
2930 case 'b': c = '\b'; break;
2931 case 'e': c = 27; break;
2932 case 'f': c = '\f'; break;
2933 case 'n': c = '\n'; break;
2934 case 'r': c = '\r'; break;
2935 case 't': c = '\t'; break;
2936 case 'v': c = '\v'; break;
2937
2938 case '0': case '1': case '2': case '3':
2939 case '4': case '5': case '6': case '7':
2940 c -= '0';
2941 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2942 c = c * 8 + *p++ - '0';
2943
2944 #if !defined NOUTF8
2945 if (use_utf && c > 255)
2946 {
2947 pcre_uint8 buff8[8];
2948 int ii, utn;
2949 utn = ord2utf8(c, buff8);
2950 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2951 c = buff8[ii]; /* Last byte */
2952 }
2953 #endif
2954 break;
2955
2956 case 'x':
2957
2958 /* Handle \x{..} specially - new Perl thing for utf8 */
2959
2960 #if !defined NOUTF8
2961 if (*p == '{')
2962 {
2963 pcre_uint8 *pt = p;
2964 c = 0;
2965
2966 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2967 when isxdigit() is a macro that refers to its argument more than
2968 once. This is banned by the C Standard, but apparently happens in at
2969 least one MacOS environment. */
2970
2971 for (pt++; isxdigit(*pt); pt++)
2972 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2973 if (*pt == '}')
2974 {
2975 pcre_uint8 buff8[8];
2976 int ii, utn;
2977 if (use_utf)
2978 {
2979 utn = ord2utf8(c, buff8);
2980 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2981 c = buff8[ii]; /* Last byte */
2982 }
2983 else
2984 {
2985 if (c > 255)
2986 {
2987 if (use_pcre16)
2988 fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
2989 "** Because its input is first processed as 8-bit, pcretest "
2990 "does not\n** support such characters in 16-bit mode when "
2991 "UTF-16 is not set.\n", c);
2992 else
2993 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
2994 "and UTF-8 mode is not enabled.\n", c);
2995
2996 fprintf(outfile, "** Truncation will probably give the wrong "
2997 "result.\n");
2998 }
2999 }
3000 p = pt + 1;
3001 break;
3002 }
3003 /* Not correct form; fall through */
3004 }
3005 #endif
3006
3007 /* Ordinary \x */
3008
3009 c = 0;
3010 while (i++ < 2 && isxdigit(*p))
3011 {
3012 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3013 p++;
3014 }
3015 break;
3016
3017 case 0: /* \ followed by EOF allows for an empty line */
3018 p--;
3019 continue;
3020
3021 case '>':
3022 if (*p == '-')
3023 {
3024 start_offset_sign = -1;
3025 p++;
3026 }
3027 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3028 start_offset *= start_offset_sign;
3029 continue;
3030
3031 case 'A': /* Option setting */
3032 options |= PCRE_ANCHORED;
3033 continue;
3034
3035 case 'B':
3036 options |= PCRE_NOTBOL;
3037 continue;
3038
3039 case 'C':
3040 if (isdigit(*p)) /* Set copy string */
3041 {
3042 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3043 copystrings |= 1 << n;
3044 }
3045 else if (isalnum(*p))
3046 {
3047 pcre_uchar *npp = copynamesptr;
3048 while (isalnum(*p)) *npp++ = *p++;
3049 *npp++ = 0;
3050 *npp = 0;
3051 n = pcre_get_stringnumber(re, (char *)copynamesptr);
3052 if (n < 0)
3053 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
3054 copynamesptr = npp;
3055 }
3056 else if (*p == '+')
3057 {
3058 callout_extra = 1;
3059 p++;
3060 }
3061 else if (*p == '-')
3062 {
3063 pcre_callout = NULL;
3064 p++;
3065 }
3066 else if (*p == '!')
3067 {
3068 callout_fail_id = 0;
3069 p++;
3070 while(isdigit(*p))
3071 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3072 callout_fail_count = 0;
3073 if (*p == '!')
3074 {
3075 p++;
3076 while(isdigit(*p))
3077 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3078 }
3079 }
3080 else if (*p == '*')
3081 {
3082 int sign = 1;
3083 callout_data = 0;
3084 if (*(++p) == '-') { sign = -1; p++; }
3085 while(isdigit(*p))
3086 callout_data = callout_data * 10 + *p++ - '0';
3087 callout_data *= sign;
3088 callout_data_set = 1;
3089 }
3090 continue;
3091
3092 #if !defined NODFA
3093 case 'D':
3094 #if !defined NOPOSIX
3095 if (posix || do_posix)
3096 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3097 else
3098 #endif
3099 use_dfa = 1;
3100 continue;
3101 #endif
3102
3103 #if !defined NODFA
3104 case 'F':
3105 options |= PCRE_DFA_SHORTEST;
3106 continue;
3107 #endif
3108
3109 case 'G':
3110 if (isdigit(*p))
3111 {
3112 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3113 getstrings |= 1 << n;
3114 }
3115 else if (isalnum(*p))
3116 {
3117 pcre_uchar *npp = getnamesptr;
3118 while (isalnum(*p)) *npp++ = *p++;
3119 *npp++ = 0;
3120 *npp = 0;
3121 n = pcre_get_stringnumber(re, (char *)getnamesptr);
3122 if (n < 0)
3123 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
3124 getnamesptr = npp;
3125 }
3126 continue;
3127
3128 case 'J':
3129 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3130 if (extra != NULL
3131 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3132 && extra->executable_jit != NULL)
3133 {
3134 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3135 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3136 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3137 }
3138 continue;
3139
3140 case 'L':
3141 getlist = 1;
3142 continue;
3143
3144 case 'M':
3145 find_match_limit = 1;
3146 continue;
3147
3148 case 'N':
3149 if ((options & PCRE_NOTEMPTY) != 0)
3150 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3151 else
3152 options |= PCRE_NOTEMPTY;
3153 continue;
3154
3155 case 'O':
3156 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3157 if (n > size_offsets_max)
3158 {
3159 size_offsets_max = n;
3160 free(offsets);
3161 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3162 if (offsets == NULL)
3163 {
3164 printf("** Failed to get %d bytes of memory for offsets vector\n",
3165 (int)(size_offsets_max * sizeof(int)));
3166 yield = 1;
3167 goto EXIT;
3168 }
3169 }
3170 use_size_offsets = n;
3171 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3172 continue;
3173
3174 case 'P':
3175 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3176 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3177 continue;
3178
3179 case 'Q':
3180 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3181 if (extra == NULL)
3182 {
3183 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3184 extra->flags = 0;
3185 }
3186 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3187 extra->match_limit_recursion = n;
3188 continue;
3189
3190 case 'q':
3191 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3192 if (extra == NULL)
3193 {
3194 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3195 extra->flags = 0;
3196 }
3197 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3198 extra->match_limit = n;
3199 continue;
3200
3201 #if !defined NODFA
3202 case 'R':
3203 options |= PCRE_DFA_RESTART;
3204 continue;
3205 #endif
3206
3207 case 'S':
3208 show_malloc = 1;
3209 continue;
3210
3211 case 'Y':
3212 options |= PCRE_NO_START_OPTIMIZE;
3213 continue;
3214
3215 case 'Z':
3216 options |= PCRE_NOTEOL;
3217 continue;
3218
3219 case '?':
3220 options |= PCRE_NO_UTF8_CHECK;
3221 continue;
3222
3223 case '<':
3224 {
3225 int x = check_newline(p, outfile);
3226 if (x == 0) goto NEXT_DATA;
3227 options |= x;
3228 while (*p++ != '>');
3229 }
3230 continue;
3231 }
3232 *q++ = c;
3233 }
3234 *q = 0;
3235 len = (int)(q - dbuffer);
3236
3237 /* Move the data to the end of the buffer so that a read over the end of
3238 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3239 we are using the POSIX interface, we must include the terminating zero. */
3240
3241 #if !defined NOPOSIX
3242 if (posix || do_posix)
3243 {
3244 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3245 bptr += buffer_size - len - 1;
3246 }
3247 else
3248 #endif
3249 {
3250 memmove(bptr + buffer_size - len, bptr, len);
3251 bptr += buffer_size - len;
3252 }
3253
3254 if ((all_use_dfa || use_dfa) && find_match_limit)
3255 {
3256 printf("**Match limit not relevant for DFA matching: ignored\n");
3257 find_match_limit = 0;
3258 }
3259
3260 /* Handle matching via the POSIX interface, which does not
3261 support timing or playing with the match limit or callout data. */
3262
3263 #if !defined NOPOSIX
3264 if (posix || do_posix)
3265 {
3266 int rc;
3267 int eflags = 0;
3268 regmatch_t *pmatch = NULL;
3269 if (use_size_offsets > 0)
3270 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3271 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3272 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3273 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3274
3275 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3276
3277 if (rc != 0)
3278 {
3279 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3280 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3281 }
3282 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3283 != 0)
3284 {
3285 fprintf(outfile, "Matched with REG_NOSUB\n");
3286 }
3287 else
3288 {
3289 size_t i;
3290 for (i = 0; i < (size_t)use_size_offsets; i++)
3291 {
3292 if (pmatch[i].rm_so >= 0)
3293 {
3294 fprintf(outfile, "%2d: ", (int)i);
3295 PCHARSV(dbuffer + pmatch[i].rm_so,
3296 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3297 fprintf(outfile, "\n");
3298 if (do_showcaprest || (i == 0 && do_showrest))
3299 {
3300 fprintf(outfile, "%2d+ ", (int)i);
3301 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3302 outfile);
3303 fprintf(outfile, "\n");
3304 }
3305 }
3306 }
3307 }
3308 free(pmatch);
3309 goto NEXT_DATA;
3310 }
3311
3312 #endif /* !defined NOPOSIX */
3313
3314 /* Handle matching via the native interface - repeats for /g and /G */
3315
3316 #ifdef SUPPORT_PCRE16
3317 if (use_pcre16)
3318 {
3319 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3320 if (len < 0)
3321 {
3322 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3323 "converted to UTF-16\n");
3324 goto NEXT_DATA;
3325 }
3326 bptr = (pcre_uint8 *)buffer16;
3327 }
3328 #endif
3329
3330 for (;; gmatched++) /* Loop for /g or /G */
3331 {
3332 markptr = NULL;
3333
3334 if (timeitm > 0)
3335 {
3336 register int i;
3337 clock_t time_taken;
3338 clock_t start_time = clock();
3339
3340 #if !defined NODFA
3341 if (all_use_dfa || use_dfa)
3342 {
3343 int workspace[1000];
3344 for (i = 0; i < timeitm; i++)
3345 {
3346 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3347 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3348 (sizeof(workspace)/sizeof(int)));
3349 }
3350 }
3351 else
3352 #endif
3353
3354 for (i = 0; i < timeitm; i++)
3355 {
3356 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3357 (options | g_notempty), use_offsets, use_size_offsets);
3358 }
3359 time_taken = clock() - start_time;
3360 fprintf(outfile, "Execute time %.4f milliseconds\n",
3361 (((double)time_taken * 1000.0) / (double)timeitm) /
3362 (double)CLOCKS_PER_SEC);
3363 }
3364
3365 /* If find_match_limit is set, we want to do repeated matches with
3366 varying limits in order to find the minimum value for the match limit and
3367 for the recursion limit. The match limits are relevant only to the normal
3368 running of pcre_exec(), so disable the JIT optimization. This makes it
3369 possible to run the same set of tests with and without JIT externally
3370 requested. */
3371
3372 if (find_match_limit)
3373 {
3374 if (extra == NULL)
3375 {
3376 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3377 extra->flags = 0;
3378 }
3379 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3380
3381 (void)check_match_limit(re, extra, bptr, len, start_offset,
3382 options|g_notempty, use_offsets, use_size_offsets,
3383 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3384 PCRE_ERROR_MATCHLIMIT, "match()");
3385
3386 count = check_match_limit(re, extra, bptr, len, start_offset,
3387 options|g_notempty, use_offsets, use_size_offsets,
3388 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3389 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3390 }
3391
3392 /* If callout_data is set, use the interface with additional data */
3393
3394 else if (callout_data_set)
3395 {
3396 if (extra == NULL)
3397 {
3398 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3399 extra->flags = 0;
3400 }
3401 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3402 extra->callout_data = &callout_data;
3403 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3404 options | g_notempty, use_offsets, use_size_offsets);
3405 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3406 }
3407
3408 /* The normal case is just to do the match once, with the default
3409 value of match_limit. */
3410
3411 #if !defined NODFA
3412 else if (all_use_dfa || use_dfa)
3413 {
3414 int workspace[1000];
3415 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3416 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3417 (sizeof(workspace)/sizeof(int)));
3418 if (count == 0)
3419 {
3420 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3421 count = use_size_offsets/2;
3422 }
3423 }
3424 #endif
3425
3426 else
3427 {
3428 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3429 options | g_notempty, use_offsets, use_size_offsets);
3430 if (count == 0)
3431 {
3432 fprintf(outfile, "Matched, but too many substrings\n");
3433 count = use_size_offsets/3;
3434 }
3435 }
3436
3437 /* Matched */
3438
3439 if (count >= 0)
3440 {
3441 int i, maxcount;
3442
3443 #if !defined NODFA
3444 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3445 #endif
3446 maxcount = use_size_offsets/3;
3447
3448 /* This is a check against a lunatic return value. */
3449
3450 if (count > maxcount)
3451 {
3452 fprintf(outfile,
3453 "** PCRE error: returned count %d is too big for offset size %d\n",
3454 count, use_size_offsets);
3455 count = use_size_offsets/3;
3456 if (do_g || do_G)
3457 {
3458 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3459 do_g = do_G = FALSE; /* Break g/G loop */
3460 }
3461 }
3462
3463 /* do_allcaps requests showing of all captures in the pattern, to check
3464 unset ones at the end. */
3465
3466 if (do_allcaps)
3467 {
3468 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3469 count++; /* Allow for full match */
3470 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3471 }
3472
3473 /* Output the captured substrings */
3474
3475 for (i = 0; i < count * 2; i += 2)
3476 {
3477 if (use_offsets[i] < 0)
3478 {
3479 if (use_offsets[i] != -1)
3480 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3481 use_offsets[i], i);
3482 if (use_offsets[i+1] != -1)
3483 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3484 use_offsets[i+1], i+1);
3485 fprintf(outfile, "%2d: <unset>\n", i/2);
3486 }
3487 else
3488 {
3489 fprintf(outfile, "%2d: ", i/2);
3490 PCHARSV(bptr + use_offsets[i],
3491 use_offsets[i+1] - use_offsets[i], outfile);
3492 fprintf(outfile, "\n");
3493 if (do_showcaprest || (i == 0 && do_showrest))
3494 {
3495 fprintf(outfile, "%2d+ ", i/2);
3496 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3497 outfile);
3498 fprintf(outfile, "\n");
3499 }
3500 }
3501 }
3502
3503 if (markptr != NULL)
3504 {
3505 int mplen;
3506 if (use_pcre16)
3507 {
3508 pcre_uint16 *mp = (pcre_uint16 *)markptr;
3509 mplen = 0;
3510 while (*mp++ != 0) mplen++;
3511 }
3512 else mplen = (int)strlen((char *)markptr);
3513 fprintf(outfile, "MK: ");
3514 PCHARSV(markptr, mplen, outfile);
3515 fprintf(outfile, "\n");
3516 }
3517
3518 for (i = 0; i < 32; i++)
3519 {
3520 if ((copystrings & (1 << i)) != 0)
3521 {
3522 char copybuffer[256];
3523 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3524 i, copybuffer, sizeof(copybuffer));
3525 if (rc < 0)
3526 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3527 else
3528 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3529 }
3530 }
3531
3532 for (copynamesptr = copynames;
3533 *copynamesptr != 0;
3534 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3535 {
3536 char copybuffer[256];
3537 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3538 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3539 if (rc < 0)
3540 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3541 else
3542 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3543 }
3544
3545 for (i = 0; i < 32; i++)
3546 {
3547 if ((getstrings & (1 << i)) != 0)
3548 {
3549 const char *substring;
3550 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3551 i, &substring);
3552 if (rc < 0)
3553 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3554 else
3555 {
3556 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3557 pcre_free_substring(substring);
3558 }
3559 }
3560 }
3561
3562 for (getnamesptr = getnames;
3563 *getnamesptr != 0;
3564 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3565 {
3566 const char *substring;
3567 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3568 count, (char *)getnamesptr, &substring);
3569 if (rc < 0)
3570 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3571 else
3572 {
3573 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
3574 pcre_free_substring(substring);
3575 }
3576 }
3577
3578 if (getlist)
3579 {
3580 const char **stringlist;
3581 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
3582 &stringlist);
3583 if (rc < 0)
3584 fprintf(outfile, "get substring list failed %d\n", rc);
3585 else
3586 {
3587 for (i = 0; i < count; i++)
3588 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3589 if (stringlist[i] != NULL)
3590 fprintf(outfile, "string list not terminated by NULL\n");
3591 pcre_free_substring_list(stringlist);
3592 }
3593 }
3594 }
3595
3596 /* There was a partial match */
3597
3598 else if (count == PCRE_ERROR_PARTIAL)
3599 {
3600 if (markptr == NULL) fprintf(outfile, "Partial match");
3601 else fprintf(outfile, "Partial match, mark=%s", markptr);
3602 if (use_size_offsets > 1)
3603 {
3604 fprintf(outfile, ": ");
3605 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3606 outfile);
3607 }
3608 fprintf(outfile, "\n");
3609 break; /* Out of the /g loop */
3610 }
3611
3612 /* Failed to match. If this is a /g or /G loop and we previously set
3613 g_notempty after a null match, this is not necessarily the end. We want
3614 to advance the start offset, and continue. We won't be at the end of the
3615 string - that was checked before setting g_notempty.
3616
3617 Complication arises in the case when the newline convention is "any",
3618 "crlf", or "anycrlf". If the previous match was at the end of a line
3619 terminated by CRLF, an advance of one character just passes the \r,
3620 whereas we should prefer the longer newline sequence, as does the code in
3621 pcre_exec(). Fudge the offset value to achieve this. We check for a
3622 newline setting in the pattern; if none was set, use pcre_config() to
3623 find the default.
3624
3625 Otherwise, in the case of UTF-8 matching, the advance must be one
3626 character, not one byte. */
3627
3628 else
3629 {
3630 if (g_notempty != 0)
3631 {
3632 int onechar = 1;
3633 unsigned int obits = ((real_pcre *)re)->options;
3634 use_offsets[0] = start_offset;
3635 if ((obits & PCRE_NEWLINE_BITS) == 0)
3636 {
3637 int d;
3638 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3639 /* Note that these values are always the ASCII ones, even in
3640 EBCDIC environments. CR = 13, NL = 10. */
3641 obits = (d == 13)? PCRE_NEWLINE_CR :
3642 (d == 10)? PCRE_NEWLINE_LF :
3643 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3644 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3645 (d == -1)? PCRE_NEWLINE_ANY : 0;
3646 }
3647 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3648 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3649 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3650 &&
3651 start_offset < len - 1 &&
3652 bptr[start_offset] == '\r' &&
3653 bptr[start_offset+1] == '\n')
3654 onechar++;
3655 else if (use_utf)
3656 {
3657 while (start_offset + onechar < len)
3658 {
3659 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3660 onechar++;
3661 }
3662 }
3663 use_offsets[1] = start_offset + onechar;
3664 }
3665 else
3666 {
3667 switch(count)
3668 {
3669 case PCRE_ERROR_NOMATCH:
3670 if (gmatched == 0)
3671 {
3672 if (markptr == NULL) fprintf(outfile, "No match\n");
3673 else fprintf(outfile, "No match, mark = %s\n", markptr);
3674 }
3675 break;
3676
3677 case PCRE_ERROR_BADUTF8:
3678 case PCRE_ERROR_SHORTUTF8:
3679 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3680 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3681 if (use_size_offsets >= 2)
3682 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3683 use_offsets[1]);
3684 fprintf(outfile, "\n");
3685 break;
3686
3687 default:
3688 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3689 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3690 else
3691 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3692 break;
3693 }
3694
3695 break; /* Out of the /g loop */
3696 }
3697 }
3698
3699 /* If not /g or /G we are done */
3700
3701 if (!do_g && !do_G) break;
3702
3703 /* If we have matched an empty string, first check to see if we are at
3704 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3705 Perl's /g options does. This turns out to be rather cunning. First we set
3706 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3707 same point. If this fails (picked up above) we advance to the next
3708 character. */
3709
3710 g_notempty = 0;
3711
3712 if (use_offsets[0] == use_offsets[1])
3713 {
3714 if (use_offsets[0] == len) break;
3715 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3716 }
3717
3718 /* For /g, update the start offset, leaving the rest alone */
3719
3720 if (do_g) start_offset = use_offsets[1];
3721
3722 /* For /G, update the pointer and length */
3723
3724 else
3725 {
3726 bptr += use_offsets[1];
3727 len -= use_offsets[1];
3728 }
3729 } /* End of loop for /g and /G */
3730
3731 NEXT_DATA: continue;
3732 } /* End of loop for data lines */
3733
3734 CONTINUE:
3735
3736 #if !defined NOPOSIX
3737 if (posix || do_posix) regfree(&preg);
3738 #endif
3739
3740 if (re != NULL) new_free(re);
3741 if (extra != NULL)
3742 {
3743 PCRE_FREE_STUDY(extra);
3744 }
3745 if (locale_set)
3746 {
3747 new_free((void *)tables);
3748 setlocale(LC_CTYPE, "C");
3749 locale_set = 0;
3750 }
3751 if (jit_stack != NULL)
3752 {
3753 pcre_jit_stack_free(jit_stack);
3754 jit_stack = NULL;
3755 }
3756 }
3757
3758 if (infile == stdin) fprintf(outfile, "\n");
3759
3760 EXIT:
3761
3762 if (infile != NULL && infile != stdin) fclose(infile);
3763 if (outfile != NULL && outfile != stdout) fclose(outfile);
3764
3765 free(buffer);
3766 free(dbuffer);
3767 free(pbuffer);
3768 free(offsets);
3769
3770 #ifdef SUPPORT_PCRE16
3771 if (buffer16 != NULL) free(buffer16);
3772 #endif
3773
3774 return yield;
3775 }
3776
3777 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5