/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 810 - (show annotations)
Mon Dec 19 13:34:10 2011 UTC (7 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 104181 byte(s)
Error occurred while calculating annotation data.
A lot more work on pcretest; now runs many (but not all) tests.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA), and without the doublecheck
166 of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167 out the UTF8 support if PCRE is built without it. */
168
169 #ifndef SUPPORT_UTF8
170 #ifndef NOUTF8
171 #define NOUTF8
172 #endif
173 #endif
174
175 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177 only from one place and is handled differently). I couldn't dream up any way of
178 using a single macro to do this in a generic way, because of the many different
179 argument requirements. We know that at least one of SUPPORT_PCRE8 and
180 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181 use these in the definitions of generic macros. */
182
183 #ifdef SUPPORT_PCRE8
184 #define PCHARS8(lv, p, len, f) \
185 lv = pchars((pcre_uint8 *)p, len, f)
186
187 #define PCHARSV8(p, len, f) \
188 (void)pchars((pcre_uint8 *)p, len, f)
189
190 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191 re = pcre_compile((char *)pat, options, error, erroffset, tables)
192
193 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194 offsets, size_offsets) \
195 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196 offsets, size_offsets)
197
198 #define PCRE_STUDY8(extra, re, options, error) \
199 extra = pcre_study(re, options, error)
200
201 #define PCRE_FREE_STUDY8(extra) \
202 pcre_free_study(extra)
203
204 #endif /* SUPPORT_PCRE8 */
205
206
207 #ifdef SUPPORT_PCRE16
208 #define PCHARS16(lv, p, len, f) \
209 lv = pchars16((PCRE_SPTR16)p, len, f)
210
211 #define PCHARSV16(p, len, f) \
212 (void)pchars16((PCRE_SPTR16)p, len, f)
213
214 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
215 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
216
217 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
218 offsets, size_offsets) \
219 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
220 options, offsets, size_offsets)
221
222 #define PCRE_FREE_STUDY16(extra) \
223 pcre16_free_study(extra)
224
225 #define PCRE_STUDY16(extra, re, options, error) \
226 extra = pcre16_study(re, options, error)
227
228 #endif /* SUPPORT_PCRE16 */
229
230
231 /* ----- Both modes are supported; a runtime test is needed ----- */
232
233 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
234
235 #define PCHARS(lv, p, len, f) \
236 if (use_pcre16) \
237 PCHARS16(lv, p, len, f); \
238 else \
239 PCHARS8(lv, p, len, f)
240
241 #define PCHARSV(p, len, f) \
242 if (use_pcre16) \
243 PCHARSV16(p, len, f); \
244 else \
245 PCHARSV8(p, len, f)
246
247 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
248 if (use_pcre16) \
249 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
250 else \
251 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
252
253 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
254 offsets, size_offsets) \
255 if (use_pcre16) \
256 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
257 offsets, size_offsets); \
258 else \
259 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
260 offsets, size_offsets)
261
262 #define PCRE_FREE_STUDY(extra) \
263 if (use_pcre16) \
264 PCRE_FREE_STUDY16(extra); \
265 else \
266 PCRE_FREE_STUDY8(extra)
267
268 #define PCRE_STUDY(extra, re, options, error) \
269 if (use_pcre16) \
270 PCRE_STUDY16(extra, re, options, error); \
271 else \
272 PCRE_STUDY8(extra, re, options, error)
273
274 /* ----- Only 8-bit mode is supported ----- */
275
276 #elif defined SUPPORT_PCRE8
277 #define PCHARS PCHARS8
278 #define PCHARSV PCHARSV8
279 #define PCRE_COMPILE PCRE_COMPILE8
280 #define PCRE_EXEC PCRE_EXEC8
281 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
282 #define PCRE_STUDY PCRE_STUDY8
283
284 /* ----- Only 16-bit mode is supported ----- */
285
286 #else
287 #define PCHARS PCHARS16
288 #define PCHARSV PCHARSV16
289 #define PCRE_COMPILE PCRE_COMPILE16
290 #define PCRE_EXEC PCRE_EXEC16
291 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
292 #define PCRE_STUDY PCRE_STUDY16
293 #endif
294
295 /* ----- End of mode-specific function call macros ----- */
296
297
298 /* Other parameters */
299
300 #ifndef CLOCKS_PER_SEC
301 #ifdef CLK_TCK
302 #define CLOCKS_PER_SEC CLK_TCK
303 #else
304 #define CLOCKS_PER_SEC 100
305 #endif
306 #endif
307
308 /* This is the default loop count for timing. */
309
310 #define LOOPREPEAT 500000
311
312 /* Static variables */
313
314 static FILE *outfile;
315 static int log_store = 0;
316 static int callout_count;
317 static int callout_extra;
318 static int callout_fail_count;
319 static int callout_fail_id;
320 static int debug_lengths;
321 static int first_callout;
322 static int locale_set = 0;
323 static int show_malloc;
324 static int use_utf;
325 static size_t gotten_store;
326 static size_t first_gotten_store = 0;
327 static const unsigned char *last_callout_mark = NULL;
328
329 /* The buffers grow automatically if very long input lines are encountered. */
330
331 static int buffer_size = 50000;
332 static pcre_uint8 *buffer = NULL;
333 static pcre_uint8 *dbuffer = NULL;
334 static pcre_uint8 *pbuffer = NULL;
335
336 #ifdef SUPPORT_PCRE16
337 static int buffer16_size = 0;
338 static pcre_uint16 *buffer16 = NULL;
339 #endif
340
341 /* If we have 8-bit support, default use_pcre16 to false; if there is also
342 16-bit support, it can be changed by an option. If there is no 8-bit support,
343 there must be 16-bit support, so default it to 1. */
344
345 #ifdef SUPPORT_PCRE8
346 static int use_pcre16 = 0;
347 #else
348 static int use_pcre16 = 1;
349 #endif
350
351 /* Textual explanations for runtime error codes */
352
353 static const char *errtexts[] = {
354 NULL, /* 0 is no error */
355 NULL, /* NOMATCH is handled specially */
356 "NULL argument passed",
357 "bad option value",
358 "magic number missing",
359 "unknown opcode - pattern overwritten?",
360 "no more memory",
361 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
362 "match limit exceeded",
363 "callout error code",
364 NULL, /* BADUTF8 is handled specially */
365 "bad UTF-8 offset",
366 NULL, /* PARTIAL is handled specially */
367 "not used - internal error",
368 "internal error - pattern overwritten?",
369 "bad count value",
370 "item unsupported for DFA matching",
371 "backreference condition or recursion test not supported for DFA matching",
372 "match limit not supported for DFA matching",
373 "workspace size exceeded in DFA matching",
374 "too much recursion for DFA matching",
375 "recursion limit exceeded",
376 "not used - internal error",
377 "invalid combination of newline options",
378 "bad offset value",
379 NULL, /* SHORTUTF8 is handled specially */
380 "nested recursion at the same subject position",
381 "JIT stack limit reached",
382 "pattern compiled in wrong mode (8-bit/16-bit error)"
383 };
384
385
386 /*************************************************
387 * Alternate character tables *
388 *************************************************/
389
390 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
391 using the default tables of the library. However, the T option can be used to
392 select alternate sets of tables, for different kinds of testing. Note also that
393 the L (locale) option also adjusts the tables. */
394
395 /* This is the set of tables distributed as default with PCRE. It recognizes
396 only ASCII characters. */
397
398 static const pcre_uint8 tables0[] = {
399
400 /* This table is a lower casing table. */
401
402 0, 1, 2, 3, 4, 5, 6, 7,
403 8, 9, 10, 11, 12, 13, 14, 15,
404 16, 17, 18, 19, 20, 21, 22, 23,
405 24, 25, 26, 27, 28, 29, 30, 31,
406 32, 33, 34, 35, 36, 37, 38, 39,
407 40, 41, 42, 43, 44, 45, 46, 47,
408 48, 49, 50, 51, 52, 53, 54, 55,
409 56, 57, 58, 59, 60, 61, 62, 63,
410 64, 97, 98, 99,100,101,102,103,
411 104,105,106,107,108,109,110,111,
412 112,113,114,115,116,117,118,119,
413 120,121,122, 91, 92, 93, 94, 95,
414 96, 97, 98, 99,100,101,102,103,
415 104,105,106,107,108,109,110,111,
416 112,113,114,115,116,117,118,119,
417 120,121,122,123,124,125,126,127,
418 128,129,130,131,132,133,134,135,
419 136,137,138,139,140,141,142,143,
420 144,145,146,147,148,149,150,151,
421 152,153,154,155,156,157,158,159,
422 160,161,162,163,164,165,166,167,
423 168,169,170,171,172,173,174,175,
424 176,177,178,179,180,181,182,183,
425 184,185,186,187,188,189,190,191,
426 192,193,194,195,196,197,198,199,
427 200,201,202,203,204,205,206,207,
428 208,209,210,211,212,213,214,215,
429 216,217,218,219,220,221,222,223,
430 224,225,226,227,228,229,230,231,
431 232,233,234,235,236,237,238,239,
432 240,241,242,243,244,245,246,247,
433 248,249,250,251,252,253,254,255,
434
435 /* This table is a case flipping table. */
436
437 0, 1, 2, 3, 4, 5, 6, 7,
438 8, 9, 10, 11, 12, 13, 14, 15,
439 16, 17, 18, 19, 20, 21, 22, 23,
440 24, 25, 26, 27, 28, 29, 30, 31,
441 32, 33, 34, 35, 36, 37, 38, 39,
442 40, 41, 42, 43, 44, 45, 46, 47,
443 48, 49, 50, 51, 52, 53, 54, 55,
444 56, 57, 58, 59, 60, 61, 62, 63,
445 64, 97, 98, 99,100,101,102,103,
446 104,105,106,107,108,109,110,111,
447 112,113,114,115,116,117,118,119,
448 120,121,122, 91, 92, 93, 94, 95,
449 96, 65, 66, 67, 68, 69, 70, 71,
450 72, 73, 74, 75, 76, 77, 78, 79,
451 80, 81, 82, 83, 84, 85, 86, 87,
452 88, 89, 90,123,124,125,126,127,
453 128,129,130,131,132,133,134,135,
454 136,137,138,139,140,141,142,143,
455 144,145,146,147,148,149,150,151,
456 152,153,154,155,156,157,158,159,
457 160,161,162,163,164,165,166,167,
458 168,169,170,171,172,173,174,175,
459 176,177,178,179,180,181,182,183,
460 184,185,186,187,188,189,190,191,
461 192,193,194,195,196,197,198,199,
462 200,201,202,203,204,205,206,207,
463 208,209,210,211,212,213,214,215,
464 216,217,218,219,220,221,222,223,
465 224,225,226,227,228,229,230,231,
466 232,233,234,235,236,237,238,239,
467 240,241,242,243,244,245,246,247,
468 248,249,250,251,252,253,254,255,
469
470 /* This table contains bit maps for various character classes. Each map is 32
471 bytes long and the bits run from the least significant end of each byte. The
472 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
473 graph, print, punct, and cntrl. Other classes are built from combinations. */
474
475 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
476 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
477 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
479
480 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
481 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
482 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
484
485 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
486 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
489
490 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
492 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
494
495 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499
500 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
501 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
502 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
504
505 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
506 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
507 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509
510 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
511 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
512 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
514
515 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
516 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519
520 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524
525 /* This table identifies various classes of character by individual bits:
526 0x01 white space character
527 0x02 letter
528 0x04 decimal digit
529 0x08 hexadecimal digit
530 0x10 alphanumeric or '_'
531 0x80 regular expression metacharacter or binary zero
532 */
533
534 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
535 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
538 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
539 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
540 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
541 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
542 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
543 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
544 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
545 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
546 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
547 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
548 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
549 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
557 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
558 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
559 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
561 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
563 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
564 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
566
567 /* This is a set of tables that came orginally from a Windows user. It seems to
568 be at least an approximation of ISO 8859. In particular, there are characters
569 greater than 128 that are marked as spaces, letters, etc. */
570
571 static const pcre_uint8 tables1[] = {
572 0,1,2,3,4,5,6,7,
573 8,9,10,11,12,13,14,15,
574 16,17,18,19,20,21,22,23,
575 24,25,26,27,28,29,30,31,
576 32,33,34,35,36,37,38,39,
577 40,41,42,43,44,45,46,47,
578 48,49,50,51,52,53,54,55,
579 56,57,58,59,60,61,62,63,
580 64,97,98,99,100,101,102,103,
581 104,105,106,107,108,109,110,111,
582 112,113,114,115,116,117,118,119,
583 120,121,122,91,92,93,94,95,
584 96,97,98,99,100,101,102,103,
585 104,105,106,107,108,109,110,111,
586 112,113,114,115,116,117,118,119,
587 120,121,122,123,124,125,126,127,
588 128,129,130,131,132,133,134,135,
589 136,137,138,139,140,141,142,143,
590 144,145,146,147,148,149,150,151,
591 152,153,154,155,156,157,158,159,
592 160,161,162,163,164,165,166,167,
593 168,169,170,171,172,173,174,175,
594 176,177,178,179,180,181,182,183,
595 184,185,186,187,188,189,190,191,
596 224,225,226,227,228,229,230,231,
597 232,233,234,235,236,237,238,239,
598 240,241,242,243,244,245,246,215,
599 248,249,250,251,252,253,254,223,
600 224,225,226,227,228,229,230,231,
601 232,233,234,235,236,237,238,239,
602 240,241,242,243,244,245,246,247,
603 248,249,250,251,252,253,254,255,
604 0,1,2,3,4,5,6,7,
605 8,9,10,11,12,13,14,15,
606 16,17,18,19,20,21,22,23,
607 24,25,26,27,28,29,30,31,
608 32,33,34,35,36,37,38,39,
609 40,41,42,43,44,45,46,47,
610 48,49,50,51,52,53,54,55,
611 56,57,58,59,60,61,62,63,
612 64,97,98,99,100,101,102,103,
613 104,105,106,107,108,109,110,111,
614 112,113,114,115,116,117,118,119,
615 120,121,122,91,92,93,94,95,
616 96,65,66,67,68,69,70,71,
617 72,73,74,75,76,77,78,79,
618 80,81,82,83,84,85,86,87,
619 88,89,90,123,124,125,126,127,
620 128,129,130,131,132,133,134,135,
621 136,137,138,139,140,141,142,143,
622 144,145,146,147,148,149,150,151,
623 152,153,154,155,156,157,158,159,
624 160,161,162,163,164,165,166,167,
625 168,169,170,171,172,173,174,175,
626 176,177,178,179,180,181,182,183,
627 184,185,186,187,188,189,190,191,
628 224,225,226,227,228,229,230,231,
629 232,233,234,235,236,237,238,239,
630 240,241,242,243,244,245,246,215,
631 248,249,250,251,252,253,254,223,
632 192,193,194,195,196,197,198,199,
633 200,201,202,203,204,205,206,207,
634 208,209,210,211,212,213,214,247,
635 216,217,218,219,220,221,222,255,
636 0,62,0,0,1,0,0,0,
637 0,0,0,0,0,0,0,0,
638 32,0,0,0,1,0,0,0,
639 0,0,0,0,0,0,0,0,
640 0,0,0,0,0,0,255,3,
641 126,0,0,0,126,0,0,0,
642 0,0,0,0,0,0,0,0,
643 0,0,0,0,0,0,0,0,
644 0,0,0,0,0,0,255,3,
645 0,0,0,0,0,0,0,0,
646 0,0,0,0,0,0,12,2,
647 0,0,0,0,0,0,0,0,
648 0,0,0,0,0,0,0,0,
649 254,255,255,7,0,0,0,0,
650 0,0,0,0,0,0,0,0,
651 255,255,127,127,0,0,0,0,
652 0,0,0,0,0,0,0,0,
653 0,0,0,0,254,255,255,7,
654 0,0,0,0,0,4,32,4,
655 0,0,0,128,255,255,127,255,
656 0,0,0,0,0,0,255,3,
657 254,255,255,135,254,255,255,7,
658 0,0,0,0,0,4,44,6,
659 255,255,127,255,255,255,127,255,
660 0,0,0,0,254,255,255,255,
661 255,255,255,255,255,255,255,127,
662 0,0,0,0,254,255,255,255,
663 255,255,255,255,255,255,255,255,
664 0,2,0,0,255,255,255,255,
665 255,255,255,255,255,255,255,127,
666 0,0,0,0,255,255,255,255,
667 255,255,255,255,255,255,255,255,
668 0,0,0,0,254,255,0,252,
669 1,0,0,248,1,0,0,120,
670 0,0,0,0,254,255,255,255,
671 0,0,128,0,0,0,128,0,
672 255,255,255,255,0,0,0,0,
673 0,0,0,0,0,0,0,128,
674 255,255,255,255,0,0,0,0,
675 0,0,0,0,0,0,0,0,
676 128,0,0,0,0,0,0,0,
677 0,1,1,0,1,1,0,0,
678 0,0,0,0,0,0,0,0,
679 0,0,0,0,0,0,0,0,
680 1,0,0,0,128,0,0,0,
681 128,128,128,128,0,0,128,0,
682 28,28,28,28,28,28,28,28,
683 28,28,0,0,0,0,0,128,
684 0,26,26,26,26,26,26,18,
685 18,18,18,18,18,18,18,18,
686 18,18,18,18,18,18,18,18,
687 18,18,18,128,128,0,128,16,
688 0,26,26,26,26,26,26,18,
689 18,18,18,18,18,18,18,18,
690 18,18,18,18,18,18,18,18,
691 18,18,18,128,128,0,0,0,
692 0,0,0,0,0,1,0,0,
693 0,0,0,0,0,0,0,0,
694 0,0,0,0,0,0,0,0,
695 0,0,0,0,0,0,0,0,
696 1,0,0,0,0,0,0,0,
697 0,0,18,0,0,0,0,0,
698 0,0,20,20,0,18,0,0,
699 0,20,18,0,0,0,0,0,
700 18,18,18,18,18,18,18,18,
701 18,18,18,18,18,18,18,18,
702 18,18,18,18,18,18,18,0,
703 18,18,18,18,18,18,18,18,
704 18,18,18,18,18,18,18,18,
705 18,18,18,18,18,18,18,18,
706 18,18,18,18,18,18,18,0,
707 18,18,18,18,18,18,18,18
708 };
709
710
711
712
713 #ifndef HAVE_STRERROR
714 /*************************************************
715 * Provide strerror() for non-ANSI libraries *
716 *************************************************/
717
718 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
719 in their libraries, but can provide the same facility by this simple
720 alternative function. */
721
722 extern int sys_nerr;
723 extern char *sys_errlist[];
724
725 char *
726 strerror(int n)
727 {
728 if (n < 0 || n >= sys_nerr) return "unknown error number";
729 return sys_errlist[n];
730 }
731 #endif /* HAVE_STRERROR */
732
733
734 /*************************************************
735 * JIT memory callback *
736 *************************************************/
737
738 static pcre_jit_stack* jit_callback(void *arg)
739 {
740 return (pcre_jit_stack *)arg;
741 }
742
743
744 /*************************************************
745 * Convert UTF-8 string to value *
746 *************************************************/
747
748 /* This function takes one or more bytes that represents a UTF-8 character,
749 and returns the value of the character.
750
751 Argument:
752 utf8bytes a pointer to the byte vector
753 vptr a pointer to an int to receive the value
754
755 Returns: > 0 => the number of bytes consumed
756 -6 to 0 => malformed UTF-8 character at offset = (-return)
757 */
758
759 #if !defined NOUTF8
760
761 static int
762 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
763 {
764 int c = *utf8bytes++;
765 int d = c;
766 int i, j, s;
767
768 for (i = -1; i < 6; i++) /* i is number of additional bytes */
769 {
770 if ((d & 0x80) == 0) break;
771 d <<= 1;
772 }
773
774 if (i == -1) { *vptr = c; return 1; } /* ascii character */
775 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
776
777 /* i now has a value in the range 1-5 */
778
779 s = 6*i;
780 d = (c & utf8_table3[i]) << s;
781
782 for (j = 0; j < i; j++)
783 {
784 c = *utf8bytes++;
785 if ((c & 0xc0) != 0x80) return -(j+1);
786 s -= 6;
787 d |= (c & 0x3f) << s;
788 }
789
790 /* Check that encoding was the correct unique one */
791
792 for (j = 0; j < utf8_table1_size; j++)
793 if (d <= utf8_table1[j]) break;
794 if (j != i) return -(i+1);
795
796 /* Valid value */
797
798 *vptr = d;
799 return i+1;
800 }
801
802 #endif
803
804
805
806 /*************************************************
807 * Convert character value to UTF-8 *
808 *************************************************/
809
810 /* This function takes an integer value in the range 0 - 0x7fffffff
811 and encodes it as a UTF-8 character in 0 to 6 bytes.
812
813 Arguments:
814 cvalue the character value
815 utf8bytes pointer to buffer for result - at least 6 bytes long
816
817 Returns: number of characters placed in the buffer
818 */
819
820 #if !defined NOUTF8
821
822 static int
823 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
824 {
825 register int i, j;
826 for (i = 0; i < utf8_table1_size; i++)
827 if (cvalue <= utf8_table1[i]) break;
828 utf8bytes += i;
829 for (j = i; j > 0; j--)
830 {
831 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
832 cvalue >>= 6;
833 }
834 *utf8bytes = utf8_table2[i] | cvalue;
835 return i + 1;
836 }
837
838 #endif
839
840
841
842 #ifdef SUPPORT_PCRE16
843 /*************************************************
844 * Convert a string to 16-bit *
845 *************************************************/
846
847 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
848 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
849 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
850 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
851 result is always left in buffer16.
852
853 Arguments:
854 p points to a byte string
855 utf true if UTF-8 (to be converted to UTF-16)
856 len number of bytes in the string (excluding trailing zero)
857
858 Returns: number of 16-bit data items used (excluding trailing zero)
859 OR -1 if a UTF-8 string is malformed
860 */
861
862 static int
863 to16(pcre_uint8 *p, int utf, int len)
864 {
865 pcre_uint16 *pp;
866
867 if (buffer16_size < 2*len + 2)
868 {
869 if (buffer16 != NULL) free(buffer16);
870 buffer16_size = 2*len + 2;
871 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
872 if (buffer16 == NULL)
873 {
874 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
875 exit(1);
876 }
877 }
878
879 pp = buffer16;
880
881 if (!utf)
882 {
883 while (len-- > 0) *pp++ = *p++;
884 }
885
886 else
887 {
888 int c;
889 while (len > 0)
890 {
891 int chlen = utf82ord(p, &c);
892 if (chlen <= 0) return -1;
893 p += chlen;
894 len -= chlen;
895 if (c < 0x10000) *pp++ = c; else
896 {
897 c -= 0x10000;
898 *pp++ = 0xD800 | (c >> 10);
899 *pp++ = 0xDC00 | (c & 0x3ff);
900 }
901 }
902 }
903
904 *pp = 0;
905 return pp - buffer16;
906 }
907 #endif
908
909
910 /*************************************************
911 * Read or extend an input line *
912 *************************************************/
913
914 /* Input lines are read into buffer, but both patterns and data lines can be
915 continued over multiple input lines. In addition, if the buffer fills up, we
916 want to automatically expand it so as to be able to handle extremely large
917 lines that are needed for certain stress tests. When the input buffer is
918 expanded, the other two buffers must also be expanded likewise, and the
919 contents of pbuffer, which are a copy of the input for callouts, must be
920 preserved (for when expansion happens for a data line). This is not the most
921 optimal way of handling this, but hey, this is just a test program!
922
923 Arguments:
924 f the file to read
925 start where in buffer to start (this *must* be within buffer)
926 prompt for stdin or readline()
927
928 Returns: pointer to the start of new data
929 could be a copy of start, or could be moved
930 NULL if no data read and EOF reached
931 */
932
933 static pcre_uint8 *
934 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
935 {
936 pcre_uint8 *here = start;
937
938 for (;;)
939 {
940 int rlen = (int)(buffer_size - (here - buffer));
941
942 if (rlen > 1000)
943 {
944 int dlen;
945
946 /* If libreadline support is required, use readline() to read a line if the
947 input is a terminal. Note that readline() removes the trailing newline, so
948 we must put it back again, to be compatible with fgets(). */
949
950 #ifdef SUPPORT_LIBREADLINE
951 if (isatty(fileno(f)))
952 {
953 size_t len;
954 char *s = readline(prompt);
955 if (s == NULL) return (here == start)? NULL : start;
956 len = strlen(s);
957 if (len > 0) add_history(s);
958 if (len > rlen - 1) len = rlen - 1;
959 memcpy(here, s, len);
960 here[len] = '\n';
961 here[len+1] = 0;
962 free(s);
963 }
964 else
965 #endif
966
967 /* Read the next line by normal means, prompting if the file is stdin. */
968
969 {
970 if (f == stdin) printf("%s", prompt);
971 if (fgets((char *)here, rlen, f) == NULL)
972 return (here == start)? NULL : start;
973 }
974
975 dlen = (int)strlen((char *)here);
976 if (dlen > 0 && here[dlen - 1] == '\n') return start;
977 here += dlen;
978 }
979
980 else
981 {
982 int new_buffer_size = 2*buffer_size;
983 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
984 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
985 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
986
987 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
988 {
989 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
990 exit(1);
991 }
992
993 memcpy(new_buffer, buffer, buffer_size);
994 memcpy(new_pbuffer, pbuffer, buffer_size);
995
996 buffer_size = new_buffer_size;
997
998 start = new_buffer + (start - buffer);
999 here = new_buffer + (here - buffer);
1000
1001 free(buffer);
1002 free(dbuffer);
1003 free(pbuffer);
1004
1005 buffer = new_buffer;
1006 dbuffer = new_dbuffer;
1007 pbuffer = new_pbuffer;
1008 }
1009 }
1010
1011 return NULL; /* Control never gets here */
1012 }
1013
1014
1015
1016 /*************************************************
1017 * Read number from string *
1018 *************************************************/
1019
1020 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1021 around with conditional compilation, just do the job by hand. It is only used
1022 for unpicking arguments, so just keep it simple.
1023
1024 Arguments:
1025 str string to be converted
1026 endptr where to put the end pointer
1027
1028 Returns: the unsigned long
1029 */
1030
1031 static int
1032 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1033 {
1034 int result = 0;
1035 while(*str != 0 && isspace(*str)) str++;
1036 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1037 *endptr = str;
1038 return(result);
1039 }
1040
1041
1042
1043 /*************************************************
1044 * Print one character *
1045 *************************************************/
1046
1047 /* Print a single character either literally, or as a hex escape. */
1048
1049 static int pchar(int c, FILE *f)
1050 {
1051 if (PRINTOK(c))
1052 {
1053 if (f != NULL) fprintf(f, "%c", c);
1054 return 1;
1055 }
1056
1057 if (c < 0x100)
1058 {
1059 if (use_utf)
1060 {
1061 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1062 return 6;
1063 }
1064 else
1065 {
1066 if (f != NULL) fprintf(f, "\\x%02x", c);
1067 return 4;
1068 }
1069 }
1070
1071 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1072 return (c <= 0x000000ff)? 6 :
1073 (c <= 0x00000fff)? 7 :
1074 (c <= 0x0000ffff)? 8 :
1075 (c <= 0x000fffff)? 9 : 10;
1076 }
1077
1078
1079
1080 #ifdef SUPPORT_PCRE8
1081 /*************************************************
1082 * Print 8-bit character string *
1083 *************************************************/
1084
1085 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1086 If handed a NULL file, just counts chars without printing. */
1087
1088 static int pchars(pcre_uint8 *p, int length, FILE *f)
1089 {
1090 int c = 0;
1091 int yield = 0;
1092
1093 while (length-- > 0)
1094 {
1095 #if !defined NOUTF8
1096 if (use_utf)
1097 {
1098 int rc = utf82ord(p, &c);
1099 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1100 {
1101 length -= rc - 1;
1102 p += rc;
1103 yield += pchar(c, f);
1104 continue;
1105 }
1106 }
1107 #endif
1108 c = *p++;
1109 yield += pchar(c, f);
1110 }
1111
1112 return yield;
1113 }
1114 #endif
1115
1116
1117
1118 #ifdef SUPPORT_PCRE16
1119 /*************************************************
1120 * Print 16-bit character string *
1121 *************************************************/
1122
1123 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1124 If handed a NULL file, just counts chars without printing. */
1125
1126 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1127 {
1128 int yield = 0;
1129
1130 while (length-- > 0)
1131 {
1132 int c = *p++ & 0xffff;
1133 #if !defined NOUTF8
1134 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1135 {
1136 int d = *p & 0xffff;
1137 if (d >= 0xDC00 && d < 0xDFFF)
1138 {
1139 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1140 length--;
1141 p++;
1142 }
1143 }
1144 #endif
1145 yield += pchar(c, f);
1146 }
1147
1148 return yield;
1149 }
1150 #endif
1151
1152
1153
1154 /*************************************************
1155 * Callout function *
1156 *************************************************/
1157
1158 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1159 the match. Yield zero unless more callouts than the fail count, or the callout
1160 data is not zero. */
1161
1162 static int callout(pcre_callout_block *cb)
1163 {
1164 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1165 int i, pre_start, post_start, subject_length;
1166
1167 if (callout_extra)
1168 {
1169 fprintf(f, "Callout %d: last capture = %d\n",
1170 cb->callout_number, cb->capture_last);
1171
1172 for (i = 0; i < cb->capture_top * 2; i += 2)
1173 {
1174 if (cb->offset_vector[i] < 0)
1175 fprintf(f, "%2d: <unset>\n", i/2);
1176 else
1177 {
1178 fprintf(f, "%2d: ", i/2);
1179 PCHARSV(cb->subject + cb->offset_vector[i],
1180 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1181 fprintf(f, "\n");
1182 }
1183 }
1184 }
1185
1186 /* Re-print the subject in canonical form, the first time or if giving full
1187 datails. On subsequent calls in the same match, we use pchars just to find the
1188 printed lengths of the substrings. */
1189
1190 if (f != NULL) fprintf(f, "--->");
1191
1192 PCHARS(pre_start, cb->subject, cb->start_match, f);
1193 PCHARS(post_start, cb->subject + cb->start_match,
1194 cb->current_position - cb->start_match, f);
1195
1196 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1197
1198 PCHARSV(cb->subject + cb->current_position,
1199 cb->subject_length - cb->current_position, f);
1200
1201 if (f != NULL) fprintf(f, "\n");
1202
1203 /* Always print appropriate indicators, with callout number if not already
1204 shown. For automatic callouts, show the pattern offset. */
1205
1206 if (cb->callout_number == 255)
1207 {
1208 fprintf(outfile, "%+3d ", cb->pattern_position);
1209 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1210 }
1211 else
1212 {
1213 if (callout_extra) fprintf(outfile, " ");
1214 else fprintf(outfile, "%3d ", cb->callout_number);
1215 }
1216
1217 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1218 fprintf(outfile, "^");
1219
1220 if (post_start > 0)
1221 {
1222 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1223 fprintf(outfile, "^");
1224 }
1225
1226 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1227 fprintf(outfile, " ");
1228
1229 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1230 pbuffer + cb->pattern_position);
1231
1232 fprintf(outfile, "\n");
1233 first_callout = 0;
1234
1235 if (cb->mark != last_callout_mark)
1236 {
1237 fprintf(outfile, "Latest Mark: %s\n",
1238 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1239 last_callout_mark = cb->mark;
1240 }
1241
1242 if (cb->callout_data != NULL)
1243 {
1244 int callout_data = *((int *)(cb->callout_data));
1245 if (callout_data != 0)
1246 {
1247 fprintf(outfile, "Callout data = %d\n", callout_data);
1248 return callout_data;
1249 }
1250 }
1251
1252 return (cb->callout_number != callout_fail_id)? 0 :
1253 (++callout_count >= callout_fail_count)? 1 : 0;
1254 }
1255
1256
1257 /*************************************************
1258 * Local malloc functions *
1259 *************************************************/
1260
1261 /* Alternative malloc function, to test functionality and save the size of a
1262 compiled re, which is the first store request that pcre_compile() makes. The
1263 show_malloc variable is set only during matching. */
1264
1265 static void *new_malloc(size_t size)
1266 {
1267 void *block = malloc(size);
1268 gotten_store = size;
1269 if (first_gotten_store == 0) first_gotten_store = size;
1270 if (show_malloc)
1271 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1272 return block;
1273 }
1274
1275 static void new_free(void *block)
1276 {
1277 if (show_malloc)
1278 fprintf(outfile, "free %p\n", block);
1279 free(block);
1280 }
1281
1282 /* For recursion malloc/free, to test stacking calls */
1283
1284 static void *stack_malloc(size_t size)
1285 {
1286 void *block = malloc(size);
1287 if (show_malloc)
1288 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1289 return block;
1290 }
1291
1292 static void stack_free(void *block)
1293 {
1294 if (show_malloc)
1295 fprintf(outfile, "stack_free %p\n", block);
1296 free(block);
1297 }
1298
1299
1300 /*************************************************
1301 * Call pcre_fullinfo() *
1302 *************************************************/
1303
1304 /* Get one piece of information from the pcre_fullinfo() function. When only
1305 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1306 value, but the code is defensive. */
1307
1308 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1309 {
1310 int rc;
1311
1312 if (use_pcre16)
1313 #ifdef SUPPORT_PCRE16
1314 rc = pcre16_fullinfo(re, study, option, ptr);
1315 #else
1316 rc = PCRE_ERROR_BADMODE;
1317 #endif
1318 else
1319 #ifdef SUPPORT_PCRE8
1320 rc = pcre_fullinfo(re, study, option, ptr);
1321 #else
1322 rc = PCRE_ERROR_BADMODE;
1323 #endif
1324
1325 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1326 use_pcre16? "16" : "", option);
1327 }
1328
1329
1330
1331 /*************************************************
1332 * Byte flipping function *
1333 *************************************************/
1334
1335 static unsigned long int
1336 byteflip(unsigned long int value, int n)
1337 {
1338 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1339 return ((value & 0x000000ff) << 24) |
1340 ((value & 0x0000ff00) << 8) |
1341 ((value & 0x00ff0000) >> 8) |
1342 ((value & 0xff000000) >> 24);
1343 }
1344
1345
1346
1347
1348 /*************************************************
1349 * Check match or recursion limit *
1350 *************************************************/
1351
1352 static int
1353 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1354 int start_offset, int options, int *use_offsets, int use_size_offsets,
1355 int flag, unsigned long int *limit, int errnumber, const char *msg)
1356 {
1357 int count;
1358 int min = 0;
1359 int mid = 64;
1360 int max = -1;
1361
1362 extra->flags |= flag;
1363
1364 for (;;)
1365 {
1366 *limit = mid;
1367
1368 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1369 use_offsets, use_size_offsets);
1370
1371 if (count == errnumber)
1372 {
1373 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1374 min = mid;
1375 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1376 }
1377
1378 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1379 count == PCRE_ERROR_PARTIAL)
1380 {
1381 if (mid == min + 1)
1382 {
1383 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1384 break;
1385 }
1386 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1387 max = mid;
1388 mid = (min + mid)/2;
1389 }
1390 else break; /* Some other error */
1391 }
1392
1393 extra->flags &= ~flag;
1394 return count;
1395 }
1396
1397
1398
1399 /*************************************************
1400 * Case-independent strncmp() function *
1401 *************************************************/
1402
1403 /*
1404 Arguments:
1405 s first string
1406 t second string
1407 n number of characters to compare
1408
1409 Returns: < 0, = 0, or > 0, according to the comparison
1410 */
1411
1412 static int
1413 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1414 {
1415 while (n--)
1416 {
1417 int c = tolower(*s++) - tolower(*t++);
1418 if (c) return c;
1419 }
1420 return 0;
1421 }
1422
1423
1424
1425 /*************************************************
1426 * Check newline indicator *
1427 *************************************************/
1428
1429 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1430 a message and return 0 if there is no match.
1431
1432 Arguments:
1433 p points after the leading '<'
1434 f file for error message
1435
1436 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1437 */
1438
1439 static int
1440 check_newline(pcre_uint8 *p, FILE *f)
1441 {
1442 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1443 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1444 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1445 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1446 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1447 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1448 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1449 fprintf(f, "Unknown newline type at: <%s\n", p);
1450 return 0;
1451 }
1452
1453
1454
1455 /*************************************************
1456 * Usage function *
1457 *************************************************/
1458
1459 static void
1460 usage(void)
1461 {
1462 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1463 printf("Input and output default to stdin and stdout.\n");
1464 #ifdef SUPPORT_LIBREADLINE
1465 printf("If input is a terminal, readline() is used to read from it.\n");
1466 #else
1467 printf("This version of pcretest is not linked with readline().\n");
1468 #endif
1469 printf("\nOptions:\n");
1470 #ifdef SUPPORT_PCRE16
1471 printf(" -16 use 16-bit interface\n");
1472 #endif
1473 printf(" -b show compiled code (bytecode)\n");
1474 printf(" -C show PCRE compile-time options and exit\n");
1475 printf(" -d debug: show compiled code and information (-b and -i)\n");
1476 #if !defined NODFA
1477 printf(" -dfa force DFA matching for all subjects\n");
1478 #endif
1479 printf(" -help show usage information\n");
1480 printf(" -i show information about compiled patterns\n"
1481 " -M find MATCH_LIMIT minimum for each subject\n"
1482 " -m output memory used information\n"
1483 " -o <n> set size of offsets vector to <n>\n");
1484 #if !defined NOPOSIX
1485 printf(" -p use POSIX interface\n");
1486 #endif
1487 printf(" -q quiet: do not output PCRE version number at start\n");
1488 printf(" -S <n> set stack size to <n> megabytes\n");
1489 printf(" -s force each pattern to be studied at basic level\n"
1490 " -s+ force each pattern to be studied, using JIT if available\n"
1491 " -t time compilation and execution\n");
1492 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1493 printf(" -tm time execution (matching) only\n");
1494 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1495 }
1496
1497
1498
1499 /*************************************************
1500 * Main Program *
1501 *************************************************/
1502
1503 /* Read lines from named file or stdin and write to named file or stdout; lines
1504 consist of a regular expression, in delimiters and optionally followed by
1505 options, followed by a set of test data, terminated by an empty line. */
1506
1507 int main(int argc, char **argv)
1508 {
1509 FILE *infile = stdin;
1510 int options = 0;
1511 int study_options = 0;
1512 int default_find_match_limit = FALSE;
1513 int op = 1;
1514 int timeit = 0;
1515 int timeitm = 0;
1516 int showinfo = 0;
1517 int showstore = 0;
1518 int force_study = -1;
1519 int force_study_options = 0;
1520 int quiet = 0;
1521 int size_offsets = 45;
1522 int size_offsets_max;
1523 int *offsets = NULL;
1524 #if !defined NOPOSIX
1525 int posix = 0;
1526 #endif
1527 int debug = 0;
1528 int done = 0;
1529 int all_use_dfa = 0;
1530 int yield = 0;
1531 int stack_size;
1532
1533 pcre_jit_stack *jit_stack = NULL;
1534
1535 /* These vectors store, end-to-end, a list of captured substring names. Assume
1536 that 1024 is plenty long enough for the few names we'll be testing. */
1537
1538 pcre_uchar copynames[1024];
1539 pcre_uchar getnames[1024];
1540
1541 pcre_uchar *copynamesptr;
1542 pcre_uchar *getnamesptr;
1543
1544 /* Get buffers from malloc() so that valgrind will check their misuse when
1545 debugging. They grow automatically when very long lines are read. The 16-bit
1546 buffer (buffer16) is obtained only if needed. */
1547
1548 buffer = (pcre_uint8 *)malloc(buffer_size);
1549 dbuffer = (pcre_uint8 *)malloc(buffer_size);
1550 pbuffer = (pcre_uint8 *)malloc(buffer_size);
1551
1552 /* The outfile variable is static so that new_malloc can use it. */
1553
1554 outfile = stdout;
1555
1556 /* The following _setmode() stuff is some Windows magic that tells its runtime
1557 library to translate CRLF into a single LF character. At least, that's what
1558 I've been told: never having used Windows I take this all on trust. Originally
1559 it set 0x8000, but then I was advised that _O_BINARY was better. */
1560
1561 #if defined(_WIN32) || defined(WIN32)
1562 _setmode( _fileno( stdout ), _O_BINARY );
1563 #endif
1564
1565 /* Scan options */
1566
1567 while (argc > 1 && argv[op][0] == '-')
1568 {
1569 pcre_uint8 *endptr;
1570
1571 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1572 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1573 else if (strcmp(argv[op], "-s+") == 0)
1574 {
1575 force_study = 1;
1576 force_study_options = PCRE_STUDY_JIT_COMPILE;
1577 }
1578 #ifdef SUPPORT_PCRE16
1579 else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1580 #endif
1581
1582 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1583 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1584 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1585 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1586 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1587 #if !defined NODFA
1588 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1589 #endif
1590 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1591 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1592 *endptr == 0))
1593 {
1594 op++;
1595 argc--;
1596 }
1597 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1598 {
1599 int both = argv[op][2] == 0;
1600 int temp;
1601 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1602 *endptr == 0))
1603 {
1604 timeitm = temp;
1605 op++;
1606 argc--;
1607 }
1608 else timeitm = LOOPREPEAT;
1609 if (both) timeit = timeitm;
1610 }
1611 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1612 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1613 *endptr == 0))
1614 {
1615 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1616 printf("PCRE: -S not supported on this OS\n");
1617 exit(1);
1618 #else
1619 int rc;
1620 struct rlimit rlim;
1621 getrlimit(RLIMIT_STACK, &rlim);
1622 rlim.rlim_cur = stack_size * 1024 * 1024;
1623 rc = setrlimit(RLIMIT_STACK, &rlim);
1624 if (rc != 0)
1625 {
1626 printf("PCRE: setrlimit() failed with error %d\n", rc);
1627 exit(1);
1628 }
1629 op++;
1630 argc--;
1631 #endif
1632 }
1633 #if !defined NOPOSIX
1634 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1635 #endif
1636 else if (strcmp(argv[op], "-C") == 0)
1637 {
1638 int rc;
1639 unsigned long int lrc;
1640 printf("PCRE version %s\n", pcre_version());
1641 printf("Compiled with\n");
1642
1643 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1644 are set, either both UTFs are supported or both are not supported. */
1645
1646 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1647 printf(" 8-bit and 16-bit support\n");
1648 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1649 if (rc)
1650 printf(" UTF-8 and UTF-16 support\n");
1651 else
1652 printf(" No UTF-8 or UTF-16 support\n");
1653 #elif defined SUPPORT_PCRE8
1654 printf(" 8-bit support only\n");
1655 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1656 printf(" %sUTF-8 support\n", rc? "" : "No ");
1657 #else
1658 printf(" 16-bit support only\n");
1659 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1660 printf(" %sUTF-16 support\n", rc? "" : "No ");
1661 #endif
1662
1663 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1664 printf(" %sUnicode properties support\n", rc? "" : "No ");
1665 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1666 if (rc)
1667 printf(" Just-in-time compiler support\n");
1668 else
1669 printf(" No just-in-time compiler support\n");
1670 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1671 /* Note that these values are always the ASCII values, even
1672 in EBCDIC environments. CR is 13 and NL is 10. */
1673 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1674 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1675 (rc == -2)? "ANYCRLF" :
1676 (rc == -1)? "ANY" : "???");
1677 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1678 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1679 "all Unicode newlines");
1680 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1681 printf(" Internal link size = %d\n", rc);
1682 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1683 printf(" POSIX malloc threshold = %d\n", rc);
1684 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1685 printf(" Default match limit = %ld\n", lrc);
1686 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1687 printf(" Default recursion depth limit = %ld\n", lrc);
1688 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1689 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1690 goto EXIT;
1691 }
1692 else if (strcmp(argv[op], "-help") == 0 ||
1693 strcmp(argv[op], "--help") == 0)
1694 {
1695 usage();
1696 goto EXIT;
1697 }
1698 else
1699 {
1700 printf("** Unknown or malformed option %s\n", argv[op]);
1701 usage();
1702 yield = 1;
1703 goto EXIT;
1704 }
1705 op++;
1706 argc--;
1707 }
1708
1709 /* Get the store for the offsets vector, and remember what it was */
1710
1711 size_offsets_max = size_offsets;
1712 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1713 if (offsets == NULL)
1714 {
1715 printf("** Failed to get %d bytes of memory for offsets vector\n",
1716 (int)(size_offsets_max * sizeof(int)));
1717 yield = 1;
1718 goto EXIT;
1719 }
1720
1721 /* Sort out the input and output files */
1722
1723 if (argc > 1)
1724 {
1725 infile = fopen(argv[op], INPUT_MODE);
1726 if (infile == NULL)
1727 {
1728 printf("** Failed to open %s\n", argv[op]);
1729 yield = 1;
1730 goto EXIT;
1731 }
1732 }
1733
1734 if (argc > 2)
1735 {
1736 outfile = fopen(argv[op+1], OUTPUT_MODE);
1737 if (outfile == NULL)
1738 {
1739 printf("** Failed to open %s\n", argv[op+1]);
1740 yield = 1;
1741 goto EXIT;
1742 }
1743 }
1744
1745 /* Set alternative malloc function */
1746
1747 #ifdef SUPPORT_PCRE8
1748 pcre_malloc = new_malloc;
1749 pcre_free = new_free;
1750 pcre_stack_malloc = stack_malloc;
1751 pcre_stack_free = stack_free;
1752 #endif
1753
1754 #ifdef SUPPORT_PCRE16
1755 pcre16_malloc = new_malloc;
1756 pcre16_free = new_free;
1757 pcre16_stack_malloc = stack_malloc;
1758 pcre16_stack_free = stack_free;
1759 #endif
1760
1761 /* Heading line unless quiet, then prompt for first regex if stdin */
1762
1763 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1764
1765 /* Main loop */
1766
1767 while (!done)
1768 {
1769 pcre *re = NULL;
1770 pcre_extra *extra = NULL;
1771
1772 #if !defined NOPOSIX /* There are still compilers that require no indent */
1773 regex_t preg;
1774 int do_posix = 0;
1775 #endif
1776
1777 const char *error;
1778 pcre_uint8 *markptr;
1779 pcre_uint8 *p, *pp, *ppp;
1780 pcre_uint8 *to_file = NULL;
1781 const pcre_uint8 *tables = NULL;
1782 unsigned long int true_size, true_study_size = 0;
1783 size_t size, regex_gotten_store;
1784 int do_allcaps = 0;
1785 int do_mark = 0;
1786 int do_study = 0;
1787 int no_force_study = 0;
1788 int do_debug = debug;
1789 int do_G = 0;
1790 int do_g = 0;
1791 int do_showinfo = showinfo;
1792 int do_showrest = 0;
1793 int do_showcaprest = 0;
1794 int do_flip = 0;
1795 int erroroffset, len, delimiter, poffset;
1796
1797 use_utf = 0;
1798 debug_lengths = 1;
1799
1800 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1801 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1802 fflush(outfile);
1803
1804 p = buffer;
1805 while (isspace(*p)) p++;
1806 if (*p == 0) continue;
1807
1808 /* See if the pattern is to be loaded pre-compiled from a file. */
1809
1810 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1811 {
1812 unsigned long int magic, get_options;
1813 pcre_uint8 sbuf[8];
1814 FILE *f;
1815
1816 p++;
1817 pp = p + (int)strlen((char *)p);
1818 while (isspace(pp[-1])) pp--;
1819 *pp = 0;
1820
1821 f = fopen((char *)p, "rb");
1822 if (f == NULL)
1823 {
1824 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1825 continue;
1826 }
1827
1828 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1829
1830 true_size =
1831 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1832 true_study_size =
1833 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1834
1835 re = (real_pcre *)new_malloc(true_size);
1836 regex_gotten_store = first_gotten_store;
1837
1838 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1839
1840 magic = ((real_pcre *)re)->magic_number;
1841 if (magic != MAGIC_NUMBER)
1842 {
1843 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1844 {
1845 do_flip = 1;
1846 }
1847 else
1848 {
1849 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1850 fclose(f);
1851 continue;
1852 }
1853 }
1854
1855 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1856 do_flip? " (byte-inverted)" : "", p);
1857
1858 /* Need to know if UTF-8 for printing data strings */
1859
1860 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1861 use_utf = (get_options & PCRE_UTF8) != 0;
1862
1863 /* Now see if there is any following study data. */
1864
1865 if (true_study_size != 0)
1866 {
1867 pcre_study_data *psd;
1868
1869 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1870 extra->flags = PCRE_EXTRA_STUDY_DATA;
1871
1872 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1873 extra->study_data = psd;
1874
1875 if (fread(psd, 1, true_study_size, f) != true_study_size)
1876 {
1877 FAIL_READ:
1878 fprintf(outfile, "Failed to read data from %s\n", p);
1879 if (extra != NULL)
1880 {
1881 PCRE_FREE_STUDY(extra);
1882 }
1883 if (re != NULL) new_free(re);
1884 fclose(f);
1885 continue;
1886 }
1887 fprintf(outfile, "Study data loaded from %s\n", p);
1888 do_study = 1; /* To get the data output if requested */
1889 }
1890 else fprintf(outfile, "No study data\n");
1891
1892 fclose(f);
1893 goto SHOW_INFO;
1894 }
1895
1896 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1897 the pattern; if is isn't complete, read more. */
1898
1899 delimiter = *p++;
1900
1901 if (isalnum(delimiter) || delimiter == '\\')
1902 {
1903 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1904 goto SKIP_DATA;
1905 }
1906
1907 pp = p;
1908 poffset = (int)(p - buffer);
1909
1910 for(;;)
1911 {
1912 while (*pp != 0)
1913 {
1914 if (*pp == '\\' && pp[1] != 0) pp++;
1915 else if (*pp == delimiter) break;
1916 pp++;
1917 }
1918 if (*pp != 0) break;
1919 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1920 {
1921 fprintf(outfile, "** Unexpected EOF\n");
1922 done = 1;
1923 goto CONTINUE;
1924 }
1925 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1926 }
1927
1928 /* The buffer may have moved while being extended; reset the start of data
1929 pointer to the correct relative point in the buffer. */
1930
1931 p = buffer + poffset;
1932
1933 /* If the first character after the delimiter is backslash, make
1934 the pattern end with backslash. This is purely to provide a way
1935 of testing for the error message when a pattern ends with backslash. */
1936
1937 if (pp[1] == '\\') *pp++ = '\\';
1938
1939 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1940 for callouts. */
1941
1942 *pp++ = 0;
1943 strcpy((char *)pbuffer, (char *)p);
1944
1945 /* Look for options after final delimiter */
1946
1947 options = 0;
1948 study_options = 0;
1949 log_store = showstore; /* default from command line */
1950
1951 while (*pp != 0)
1952 {
1953 switch (*pp++)
1954 {
1955 case 'f': options |= PCRE_FIRSTLINE; break;
1956 case 'g': do_g = 1; break;
1957 case 'i': options |= PCRE_CASELESS; break;
1958 case 'm': options |= PCRE_MULTILINE; break;
1959 case 's': options |= PCRE_DOTALL; break;
1960 case 'x': options |= PCRE_EXTENDED; break;
1961
1962 case '+':
1963 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1964 break;
1965
1966 case '=': do_allcaps = 1; break;
1967 case 'A': options |= PCRE_ANCHORED; break;
1968 case 'B': do_debug = 1; break;
1969 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1970 case 'D': do_debug = do_showinfo = 1; break;
1971 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1972 case 'F': do_flip = 1; break;
1973 case 'G': do_G = 1; break;
1974 case 'I': do_showinfo = 1; break;
1975 case 'J': options |= PCRE_DUPNAMES; break;
1976 case 'K': do_mark = 1; break;
1977 case 'M': log_store = 1; break;
1978 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1979
1980 #if !defined NOPOSIX
1981 case 'P': do_posix = 1; break;
1982 #endif
1983
1984 case 'S':
1985 if (do_study == 0)
1986 {
1987 do_study = 1;
1988 if (*pp == '+')
1989 {
1990 study_options |= PCRE_STUDY_JIT_COMPILE;
1991 pp++;
1992 }
1993 }
1994 else
1995 {
1996 do_study = 0;
1997 no_force_study = 1;
1998 }
1999 break;
2000
2001 case 'U': options |= PCRE_UNGREEDY; break;
2002 case 'W': options |= PCRE_UCP; break;
2003 case 'X': options |= PCRE_EXTRA; break;
2004 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2005 case 'Z': debug_lengths = 0; break;
2006 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2007 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2008
2009 case 'T':
2010 switch (*pp++)
2011 {
2012 case '0': tables = tables0; break;
2013 case '1': tables = tables1; break;
2014
2015 case '\r':
2016 case '\n':
2017 case ' ':
2018 case 0:
2019 fprintf(outfile, "** Missing table number after /T\n");
2020 goto SKIP_DATA;
2021
2022 default:
2023 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2024 goto SKIP_DATA;
2025 }
2026 break;
2027
2028 case 'L':
2029 ppp = pp;
2030 /* The '\r' test here is so that it works on Windows. */
2031 /* The '0' test is just in case this is an unterminated line. */
2032 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2033 *ppp = 0;
2034 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2035 {
2036 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2037 goto SKIP_DATA;
2038 }
2039 locale_set = 1;
2040 tables = pcre_maketables();
2041 pp = ppp;
2042 break;
2043
2044 case '>':
2045 to_file = pp;
2046 while (*pp != 0) pp++;
2047 while (isspace(pp[-1])) pp--;
2048 *pp = 0;
2049 break;
2050
2051 case '<':
2052 {
2053 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2054 {
2055 options |= PCRE_JAVASCRIPT_COMPAT;
2056 pp += 3;
2057 }
2058 else
2059 {
2060 int x = check_newline(pp, outfile);
2061 if (x == 0) goto SKIP_DATA;
2062 options |= x;
2063 while (*pp++ != '>');
2064 }
2065 }
2066 break;
2067
2068 case '\r': /* So that it works in Windows */
2069 case '\n':
2070 case ' ':
2071 break;
2072
2073 default:
2074 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2075 goto SKIP_DATA;
2076 }
2077 }
2078
2079 /* Handle compiling via the POSIX interface, which doesn't support the
2080 timing, showing, or debugging options, nor the ability to pass over
2081 local character tables. Neither does it have 16-bit support. */
2082
2083 #if !defined NOPOSIX
2084 if (posix || do_posix)
2085 {
2086 int rc;
2087 int cflags = 0;
2088
2089 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2090 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2091 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2092 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2093 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2094 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2095 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2096
2097 first_gotten_store = 0;
2098 rc = regcomp(&preg, (char *)p, cflags);
2099
2100 /* Compilation failed; go back for another re, skipping to blank line
2101 if non-interactive. */
2102
2103 if (rc != 0)
2104 {
2105 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2106 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2107 goto SKIP_DATA;
2108 }
2109 }
2110
2111 /* Handle compiling via the native interface */
2112
2113 else
2114 #endif /* !defined NOPOSIX */
2115
2116 {
2117 unsigned long int get_options;
2118
2119 /* In 16-bit mode, convert the input. */
2120
2121 #ifdef SUPPORT_PCRE16
2122 if (use_pcre16)
2123 {
2124 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2125 {
2126 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2127 "converted to UTF-16\n");
2128 goto SKIP_DATA;
2129 }
2130 p = (pcre_uint8 *)buffer16;
2131 }
2132 #endif
2133
2134 /* Compile many times when timing */
2135
2136 if (timeit > 0)
2137 {
2138 register int i;
2139 clock_t time_taken;
2140 clock_t start_time = clock();
2141 for (i = 0; i < timeit; i++)
2142 {
2143 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2144 if (re != NULL) free(re);
2145 }
2146 time_taken = clock() - start_time;
2147 fprintf(outfile, "Compile time %.4f milliseconds\n",
2148 (((double)time_taken * 1000.0) / (double)timeit) /
2149 (double)CLOCKS_PER_SEC);
2150 }
2151
2152 first_gotten_store = 0;
2153 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2154
2155 /* Compilation failed; go back for another re, skipping to blank line
2156 if non-interactive. */
2157
2158 if (re == NULL)
2159 {
2160 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2161 SKIP_DATA:
2162 if (infile != stdin)
2163 {
2164 for (;;)
2165 {
2166 if (extend_inputline(infile, buffer, NULL) == NULL)
2167 {
2168 done = 1;
2169 goto CONTINUE;
2170 }
2171 len = (int)strlen((char *)buffer);
2172 while (len > 0 && isspace(buffer[len-1])) len--;
2173 if (len == 0) break;
2174 }
2175 fprintf(outfile, "\n");
2176 }
2177 goto CONTINUE;
2178 }
2179
2180 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2181 within the regex; check for this so that we know how to process the data
2182 lines. */
2183
2184 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2185 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2186
2187 /* Extract the size for possible writing before possibly flipping it,
2188 and remember the store that was got. */
2189
2190 true_size = ((real_pcre *)re)->size;
2191 regex_gotten_store = first_gotten_store;
2192
2193 /* Output code size information if requested */
2194
2195 if (log_store)
2196 fprintf(outfile, "Memory allocation (code space): %d\n",
2197 (int)(first_gotten_store -
2198 sizeof(real_pcre) -
2199 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2200
2201 /* If -s or /S was present, study the regex to generate additional info to
2202 help with the matching, unless the pattern has the SS option, which
2203 suppresses the effect of /S (used for a few test patterns where studying is
2204 never sensible). */
2205
2206 if (do_study || (force_study >= 0 && !no_force_study))
2207 {
2208 if (timeit > 0)
2209 {
2210 register int i;
2211 clock_t time_taken;
2212 clock_t start_time = clock();
2213 for (i = 0; i < timeit; i++)
2214 {
2215 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2216 }
2217 time_taken = clock() - start_time;
2218 if (extra != NULL)
2219 {
2220 PCRE_FREE_STUDY(extra);
2221 }
2222 fprintf(outfile, " Study time %.4f milliseconds\n",
2223 (((double)time_taken * 1000.0) / (double)timeit) /
2224 (double)CLOCKS_PER_SEC);
2225 }
2226 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2227 if (error != NULL)
2228 fprintf(outfile, "Failed to study: %s\n", error);
2229 else if (extra != NULL)
2230 {
2231 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2232 if (log_store)
2233 {
2234 size_t jitsize;
2235 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2236 if (jitsize != 0)
2237 fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2238 }
2239 }
2240 }
2241
2242 /* If /K was present, we set up for handling MARK data. */
2243
2244 if (do_mark)
2245 {
2246 if (extra == NULL)
2247 {
2248 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2249 extra->flags = 0;
2250 }
2251 extra->mark = &markptr;
2252 extra->flags |= PCRE_EXTRA_MARK;
2253 }
2254
2255 /* If the 'F' option was present, we flip the bytes of all the integer
2256 fields in the regex data block and the study block. This is to make it
2257 possible to test PCRE's handling of byte-flipped patterns, e.g. those
2258 compiled on a different architecture. */
2259
2260 if (do_flip)
2261 {
2262 real_pcre *rre = (real_pcre *)re;
2263 rre->magic_number =
2264 byteflip(rre->magic_number, sizeof(rre->magic_number));
2265 rre->size = byteflip(rre->size, sizeof(rre->size));
2266 rre->options = byteflip(rre->options, sizeof(rre->options));
2267 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2268 rre->top_bracket =
2269 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2270 rre->top_backref =
2271 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2272 rre->first_char =
2273 (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2274 rre->req_char =
2275 (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2276 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2277 sizeof(rre->name_table_offset));
2278 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2279 sizeof(rre->name_entry_size));
2280 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2281 sizeof(rre->name_count));
2282
2283 if (extra != NULL)
2284 {
2285 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2286 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2287 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2288 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2289 }
2290 }
2291
2292 /* Extract and display information from the compiled data if required. */
2293
2294 SHOW_INFO:
2295
2296 if (do_debug)
2297 {
2298 fprintf(outfile, "------------------------------------------------------------------\n");
2299 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2300 if (use_pcre16)
2301 pcre16_printint(re, outfile, debug_lengths);
2302 else
2303 pcre_printint(re, outfile, debug_lengths);
2304 #elif defined SUPPORT_PCRE8
2305 pcre_printint(re, outfile, debug_lengths);
2306 #else
2307 pcre16_printint(re, outfile, debug_lengths);
2308 #endif
2309 }
2310
2311 /* We already have the options in get_options (see above) */
2312
2313 if (do_showinfo)
2314 {
2315 unsigned long int all_options;
2316 #if !defined NOINFOCHECK
2317 int old_first_char, old_options, old_count;
2318 #endif
2319 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2320 hascrorlf;
2321 int nameentrysize, namecount;
2322 const pcre_uchar *nametable;
2323
2324 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2325 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2326 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2327 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2328 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2329 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2330 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2331 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2332 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2333 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2334 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2335
2336 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2337 that it gives the same results as the new function. */
2338
2339 #if !defined NOINFOCHECK
2340 if (!use_pcre16)
2341 {
2342 old_count = pcre_info(re, &old_options, &old_first_char);
2343 if (count < 0) fprintf(outfile,
2344 "Error %d from pcre_info()\n", count);
2345 else
2346 {
2347 if (old_count != count) fprintf(outfile,
2348 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2349 old_count);
2350
2351 if (old_first_char != first_char) fprintf(outfile,
2352 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2353 first_char, old_first_char);
2354
2355 if (old_options != (int)get_options) fprintf(outfile,
2356 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2357 get_options, old_options);
2358 }
2359 }
2360 #endif
2361
2362 if (size != regex_gotten_store) fprintf(outfile,
2363 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2364 (int)size, (int)regex_gotten_store);
2365
2366 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2367 if (backrefmax > 0)
2368 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2369
2370 if (namecount > 0)
2371 {
2372 fprintf(outfile, "Named capturing subpatterns:\n");
2373 while (namecount-- > 0)
2374 {
2375 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2376 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2377 GET2(nametable, 0));
2378 nametable += nameentrysize;
2379 }
2380 }
2381
2382 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2383 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2384
2385 all_options = ((real_pcre *)re)->options;
2386 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2387
2388 if (get_options == 0) fprintf(outfile, "No options\n");
2389 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2390 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2391 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2392 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2393 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2394 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2395 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2396 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2397 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2398 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2399 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2400 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2401 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2402 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2403 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2404 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2405 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2406 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2407
2408 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2409
2410 switch (get_options & PCRE_NEWLINE_BITS)
2411 {
2412 case PCRE_NEWLINE_CR:
2413 fprintf(outfile, "Forced newline sequence: CR\n");
2414 break;
2415
2416 case PCRE_NEWLINE_LF:
2417 fprintf(outfile, "Forced newline sequence: LF\n");
2418 break;
2419
2420 case PCRE_NEWLINE_CRLF:
2421 fprintf(outfile, "Forced newline sequence: CRLF\n");
2422 break;
2423
2424 case PCRE_NEWLINE_ANYCRLF:
2425 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2426 break;
2427
2428 case PCRE_NEWLINE_ANY:
2429 fprintf(outfile, "Forced newline sequence: ANY\n");
2430 break;
2431
2432 default:
2433 break;
2434 }
2435
2436 if (first_char == -1)
2437 {
2438 fprintf(outfile, "First char at start or follows newline\n");
2439 }
2440 else if (first_char < 0)
2441 {
2442 fprintf(outfile, "No first char\n");
2443 }
2444 else
2445 {
2446 const char *caseless =
2447 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2448 "" : " (caseless)";
2449
2450 if (PRINTOK(first_char))
2451 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2452 else
2453 {
2454 fprintf(outfile, "First char = ");
2455 pchar(first_char, outfile);
2456 fprintf(outfile, "%s\n", caseless);
2457 }
2458 }
2459
2460 if (need_char < 0)
2461 {
2462 fprintf(outfile, "No need char\n");
2463 }
2464 else
2465 {
2466 const char *caseless =
2467 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2468 "" : " (caseless)";
2469
2470 if (PRINTOK(need_char))
2471 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2472 else
2473 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2474 }
2475
2476 /* Don't output study size; at present it is in any case a fixed
2477 value, but it varies, depending on the computer architecture, and
2478 so messes up the test suite. (And with the /F option, it might be
2479 flipped.) If study was forced by an external -s, don't show this
2480 information unless -i or -d was also present. This means that, except
2481 when auto-callouts are involved, the output from runs with and without
2482 -s should be identical. */
2483
2484 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2485 {
2486 if (extra == NULL)
2487 fprintf(outfile, "Study returned NULL\n");
2488 else
2489 {
2490 pcre_uint8 *start_bits = NULL;
2491 int minlength;
2492
2493 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2494 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2495
2496 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2497 if (start_bits == NULL)
2498 fprintf(outfile, "No set of starting bytes\n");
2499 else
2500 {
2501 int i;
2502 int c = 24;
2503 fprintf(outfile, "Starting byte set: ");
2504 for (i = 0; i < 256; i++)
2505 {
2506 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2507 {
2508 if (c > 75)
2509 {
2510 fprintf(outfile, "\n ");
2511 c = 2;
2512 }
2513 if (PRINTOK(i) && i != ' ')
2514 {
2515 fprintf(outfile, "%c ", i);
2516 c += 2;
2517 }
2518 else
2519 {
2520 fprintf(outfile, "\\x%02x ", i);
2521 c += 5;
2522 }
2523 }
2524 }
2525 fprintf(outfile, "\n");
2526 }
2527 }
2528
2529 /* Show this only if the JIT was set by /S, not by -s. */
2530
2531 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2532 {
2533 int jit;
2534 new_info(re, extra, PCRE_INFO_JIT, &jit);
2535 if (jit)
2536 fprintf(outfile, "JIT study was successful\n");
2537 else
2538 #ifdef SUPPORT_JIT
2539 fprintf(outfile, "JIT study was not successful\n");
2540 #else
2541 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2542 #endif
2543 }
2544 }
2545 }
2546
2547 /* If the '>' option was present, we write out the regex to a file, and
2548 that is all. The first 8 bytes of the file are the regex length and then
2549 the study length, in big-endian order. */
2550
2551 if (to_file != NULL)
2552 {
2553 FILE *f = fopen((char *)to_file, "wb");
2554 if (f == NULL)
2555 {
2556 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2557 }
2558 else
2559 {
2560 pcre_uint8 sbuf[8];
2561 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2562 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2563 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2564 sbuf[3] = (pcre_uint8)((true_size) & 255);
2565
2566 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2567 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2568 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2569 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2570
2571 if (fwrite(sbuf, 1, 8, f) < 8 ||
2572 fwrite(re, 1, true_size, f) < true_size)
2573 {
2574 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2575 }
2576 else
2577 {
2578 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2579
2580 /* If there is study data, write it. */
2581
2582 if (extra != NULL)
2583 {
2584 if (fwrite(extra->study_data, 1, true_study_size, f) <
2585 true_study_size)
2586 {
2587 fprintf(outfile, "Write error on %s: %s\n", to_file,
2588 strerror(errno));
2589 }
2590 else fprintf(outfile, "Study data written to %s\n", to_file);
2591 }
2592 }
2593 fclose(f);
2594 }
2595
2596 new_free(re);
2597 if (extra != NULL)
2598 {
2599 PCRE_FREE_STUDY(extra);
2600 }
2601 if (locale_set)
2602 {
2603 new_free((void *)tables);
2604 setlocale(LC_CTYPE, "C");
2605 locale_set = 0;
2606 }
2607 continue; /* With next regex */
2608 }
2609 } /* End of non-POSIX compile */
2610
2611 /* Read data lines and test them */
2612
2613 for (;;)
2614 {
2615 pcre_uint8 *q;
2616 pcre_uint8 *bptr;
2617 int *use_offsets = offsets;
2618 int use_size_offsets = size_offsets;
2619 int callout_data = 0;
2620 int callout_data_set = 0;
2621 int count, c;
2622 int copystrings = 0;
2623 int find_match_limit = default_find_match_limit;
2624 int getstrings = 0;
2625 int getlist = 0;
2626 int gmatched = 0;
2627 int start_offset = 0;
2628 int start_offset_sign = 1;
2629 int g_notempty = 0;
2630 int use_dfa = 0;
2631
2632 options = 0;
2633
2634 *copynames = 0;
2635 *getnames = 0;
2636
2637 copynamesptr = copynames;
2638 getnamesptr = getnames;
2639
2640 pcre_callout = callout;
2641 first_callout = 1;
2642 last_callout_mark = NULL;
2643 callout_extra = 0;
2644 callout_count = 0;
2645 callout_fail_count = 999999;
2646 callout_fail_id = -1;
2647 show_malloc = 0;
2648
2649 if (extra != NULL) extra->flags &=
2650 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2651
2652 len = 0;
2653 for (;;)
2654 {
2655 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2656 {
2657 if (len > 0) /* Reached EOF without hitting a newline */
2658 {
2659 fprintf(outfile, "\n");
2660 break;
2661 }
2662 done = 1;
2663 goto CONTINUE;
2664 }
2665 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2666 len = (int)strlen((char *)buffer);
2667 if (buffer[len-1] == '\n') break;
2668 }
2669
2670 while (len > 0 && isspace(buffer[len-1])) len--;
2671 buffer[len] = 0;
2672 if (len == 0) break;
2673
2674 p = buffer;
2675 while (isspace(*p)) p++;
2676
2677 bptr = q = dbuffer;
2678 while ((c = *p++) != 0)
2679 {
2680 int i = 0;
2681 int n = 0;
2682
2683 if (c == '\\') switch ((c = *p++))
2684 {
2685 case 'a': c = 7; break;
2686 case 'b': c = '\b'; break;
2687 case 'e': c = 27; break;
2688 case 'f': c = '\f'; break;
2689 case 'n': c = '\n'; break;
2690 case 'r': c = '\r'; break;
2691 case 't': c = '\t'; break;
2692 case 'v': c = '\v'; break;
2693
2694 case '0': case '1': case '2': case '3':
2695 case '4': case '5': case '6': case '7':
2696 c -= '0';
2697 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2698 c = c * 8 + *p++ - '0';
2699
2700 #if !defined NOUTF8
2701 if (use_utf && c > 255)
2702 {
2703 pcre_uint8 buff8[8];
2704 int ii, utn;
2705 utn = ord2utf8(c, buff8);
2706 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2707 c = buff8[ii]; /* Last byte */
2708 }
2709 #endif
2710 break;
2711
2712 case 'x':
2713
2714 /* Handle \x{..} specially - new Perl thing for utf8 */
2715
2716 #if !defined NOUTF8
2717 if (*p == '{')
2718 {
2719 pcre_uint8 *pt = p;
2720 c = 0;
2721
2722 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2723 when isxdigit() is a macro that refers to its argument more than
2724 once. This is banned by the C Standard, but apparently happens in at
2725 least one MacOS environment. */
2726
2727 for (pt++; isxdigit(*pt); pt++)
2728 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2729 if (*pt == '}')
2730 {
2731 pcre_uint8 buff8[8];
2732 int ii, utn;
2733 if (use_utf)
2734 {
2735 utn = ord2utf8(c, buff8);
2736 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2737 c = buff8[ii]; /* Last byte */
2738 }
2739 else
2740 {
2741 if (c > 255)
2742 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2743 "UTF-8 mode is not enabled.\n"
2744 "** Truncation will probably give the wrong result.\n", c);
2745 }
2746 p = pt + 1;
2747 break;
2748 }
2749 /* Not correct form; fall through */
2750 }
2751 #endif
2752
2753 /* Ordinary \x */
2754
2755 c = 0;
2756 while (i++ < 2 && isxdigit(*p))
2757 {
2758 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2759 p++;
2760 }
2761 break;
2762
2763 case 0: /* \ followed by EOF allows for an empty line */
2764 p--;
2765 continue;
2766
2767 case '>':
2768 if (*p == '-')
2769 {
2770 start_offset_sign = -1;
2771 p++;
2772 }
2773 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2774 start_offset *= start_offset_sign;
2775 continue;
2776
2777 case 'A': /* Option setting */
2778 options |= PCRE_ANCHORED;
2779 continue;
2780
2781 case 'B':
2782 options |= PCRE_NOTBOL;
2783 continue;
2784
2785 case 'C':
2786 if (isdigit(*p)) /* Set copy string */
2787 {
2788 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2789 copystrings |= 1 << n;
2790 }
2791 else if (isalnum(*p))
2792 {
2793 pcre_uchar *npp = copynamesptr;
2794 while (isalnum(*p)) *npp++ = *p++;
2795 *npp++ = 0;
2796 *npp = 0;
2797 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2798 if (n < 0)
2799 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2800 copynamesptr = npp;
2801 }
2802 else if (*p == '+')
2803 {
2804 callout_extra = 1;
2805 p++;
2806 }
2807 else if (*p == '-')
2808 {
2809 pcre_callout = NULL;
2810 p++;
2811 }
2812 else if (*p == '!')
2813 {
2814 callout_fail_id = 0;
2815 p++;
2816 while(isdigit(*p))
2817 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2818 callout_fail_count = 0;
2819 if (*p == '!')
2820 {
2821 p++;
2822 while(isdigit(*p))
2823 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2824 }
2825 }
2826 else if (*p == '*')
2827 {
2828 int sign = 1;
2829 callout_data = 0;
2830 if (*(++p) == '-') { sign = -1; p++; }
2831 while(isdigit(*p))
2832 callout_data = callout_data * 10 + *p++ - '0';
2833 callout_data *= sign;
2834 callout_data_set = 1;
2835 }
2836 continue;
2837
2838 #if !defined NODFA
2839 case 'D':
2840 #if !defined NOPOSIX
2841 if (posix || do_posix)
2842 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2843 else
2844 #endif
2845 use_dfa = 1;
2846 continue;
2847 #endif
2848
2849 #if !defined NODFA
2850 case 'F':
2851 options |= PCRE_DFA_SHORTEST;
2852 continue;
2853 #endif
2854
2855 case 'G':
2856 if (isdigit(*p))
2857 {
2858 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2859 getstrings |= 1 << n;
2860 }
2861 else if (isalnum(*p))
2862 {
2863 pcre_uchar *npp = getnamesptr;
2864 while (isalnum(*p)) *npp++ = *p++;
2865 *npp++ = 0;
2866 *npp = 0;
2867 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2868 if (n < 0)
2869 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2870 getnamesptr = npp;
2871 }
2872 continue;
2873
2874 case 'J':
2875 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2876 if (extra != NULL
2877 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2878 && extra->executable_jit != NULL)
2879 {
2880 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2881 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2882 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2883 }
2884 continue;
2885
2886 case 'L':
2887 getlist = 1;
2888 continue;
2889
2890 case 'M':
2891 find_match_limit = 1;
2892 continue;
2893
2894 case 'N':
2895 if ((options & PCRE_NOTEMPTY) != 0)
2896 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2897 else
2898 options |= PCRE_NOTEMPTY;
2899 continue;
2900
2901 case 'O':
2902 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2903 if (n > size_offsets_max)
2904 {
2905 size_offsets_max = n;
2906 free(offsets);
2907 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2908 if (offsets == NULL)
2909 {
2910 printf("** Failed to get %d bytes of memory for offsets vector\n",
2911 (int)(size_offsets_max * sizeof(int)));
2912 yield = 1;
2913 goto EXIT;
2914 }
2915 }
2916 use_size_offsets = n;
2917 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2918 continue;
2919
2920 case 'P':
2921 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2922 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2923 continue;
2924
2925 case 'Q':
2926 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2927 if (extra == NULL)
2928 {
2929 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2930 extra->flags = 0;
2931 }
2932 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2933 extra->match_limit_recursion = n;
2934 continue;
2935
2936 case 'q':
2937 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2938 if (extra == NULL)
2939 {
2940 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2941 extra->flags = 0;
2942 }
2943 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2944 extra->match_limit = n;
2945 continue;
2946
2947 #if !defined NODFA
2948 case 'R':
2949 options |= PCRE_DFA_RESTART;
2950 continue;
2951 #endif
2952
2953 case 'S':
2954 show_malloc = 1;
2955 continue;
2956
2957 case 'Y':
2958 options |= PCRE_NO_START_OPTIMIZE;
2959 continue;
2960
2961 case 'Z':
2962 options |= PCRE_NOTEOL;
2963 continue;
2964
2965 case '?':
2966 options |= PCRE_NO_UTF8_CHECK;
2967 continue;
2968
2969 case '<':
2970 {
2971 int x = check_newline(p, outfile);
2972 if (x == 0) goto NEXT_DATA;
2973 options |= x;
2974 while (*p++ != '>');
2975 }
2976 continue;
2977 }
2978 *q++ = c;
2979 }
2980 *q = 0;
2981 len = (int)(q - dbuffer);
2982
2983 /* Move the data to the end of the buffer so that a read over the end of
2984 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2985 we are using the POSIX interface, we must include the terminating zero. */
2986
2987 #if !defined NOPOSIX
2988 if (posix || do_posix)
2989 {
2990 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2991 bptr += buffer_size - len - 1;
2992 }
2993 else
2994 #endif
2995 {
2996 memmove(bptr + buffer_size - len, bptr, len);
2997 bptr += buffer_size - len;
2998 }
2999
3000 if ((all_use_dfa || use_dfa) && find_match_limit)
3001 {
3002 printf("**Match limit not relevant for DFA matching: ignored\n");
3003 find_match_limit = 0;
3004 }
3005
3006 /* Handle matching via the POSIX interface, which does not
3007 support timing or playing with the match limit or callout data. */
3008
3009 #if !defined NOPOSIX
3010 if (posix || do_posix)
3011 {
3012 int rc;
3013 int eflags = 0;
3014 regmatch_t *pmatch = NULL;
3015 if (use_size_offsets > 0)
3016 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3017 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3018 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3019 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3020
3021 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3022
3023 if (rc != 0)
3024 {
3025 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3026 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3027 }
3028 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3029 != 0)
3030 {
3031 fprintf(outfile, "Matched with REG_NOSUB\n");
3032 }
3033 else
3034 {
3035 size_t i;
3036 for (i = 0; i < (size_t)use_size_offsets; i++)
3037 {
3038 if (pmatch[i].rm_so >= 0)
3039 {
3040 fprintf(outfile, "%2d: ", (int)i);
3041 PCHARSV(dbuffer + pmatch[i].rm_so,
3042 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3043 fprintf(outfile, "\n");
3044 if (do_showcaprest || (i == 0 && do_showrest))
3045 {
3046 fprintf(outfile, "%2d+ ", (int)i);
3047 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3048 outfile);
3049 fprintf(outfile, "\n");
3050 }
3051 }
3052 }
3053 }
3054 free(pmatch);
3055 goto NEXT_DATA;
3056 }
3057
3058 #endif /* !defined NOPOSIX */
3059
3060 /* Handle matching via the native interface - repeats for /g and /G */
3061
3062 #ifdef SUPPORT_PCRE16
3063 if (use_pcre16)
3064 {
3065 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3066 if (len < 0)
3067 {
3068 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3069 "converted to UTF-16\n");
3070 goto NEXT_DATA;
3071 }
3072 bptr = (pcre_uint8 *)buffer16;
3073 }
3074 #endif
3075
3076 for (;; gmatched++) /* Loop for /g or /G */
3077 {
3078 markptr = NULL;
3079
3080 if (timeitm > 0)
3081 {
3082 register int i;
3083 clock_t time_taken;
3084 clock_t start_time = clock();
3085
3086 #if !defined NODFA
3087 if (all_use_dfa || use_dfa)
3088 {
3089 int workspace[1000];
3090 for (i = 0; i < timeitm; i++)
3091 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3092 options | g_notempty, use_offsets, use_size_offsets, workspace,
3093 sizeof(workspace)/sizeof(int));
3094 }
3095 else
3096 #endif
3097
3098 for (i = 0; i < timeitm; i++)
3099 {
3100 PCRE_EXEC(count, re, extra, bptr, len,
3101 start_offset, options | g_notempty, use_offsets, use_size_offsets);
3102 }
3103 time_taken = clock() - start_time;
3104 fprintf(outfile, "Execute time %.4f milliseconds\n",
3105 (((double)time_taken * 1000.0) / (double)timeitm) /
3106 (double)CLOCKS_PER_SEC);
3107 }
3108
3109 /* If find_match_limit is set, we want to do repeated matches with
3110 varying limits in order to find the minimum value for the match limit and
3111 for the recursion limit. The match limits are relevant only to the normal
3112 running of pcre_exec(), so disable the JIT optimization. This makes it
3113 possible to run the same set of tests with and without JIT externally
3114 requested. */
3115
3116 if (find_match_limit)
3117 {
3118 if (extra == NULL)
3119 {
3120 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3121 extra->flags = 0;
3122 }
3123 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3124
3125 (void)check_match_limit(re, extra, bptr, len, start_offset,
3126 options|g_notempty, use_offsets, use_size_offsets,
3127 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3128 PCRE_ERROR_MATCHLIMIT, "match()");
3129
3130 count = check_match_limit(re, extra, bptr, len, start_offset,
3131 options|g_notempty, use_offsets, use_size_offsets,
3132 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3133 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3134 }
3135
3136 /* If callout_data is set, use the interface with additional data */
3137
3138 else if (callout_data_set)
3139 {
3140 if (extra == NULL)
3141 {
3142 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3143 extra->flags = 0;
3144 }
3145 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3146 extra->callout_data = &callout_data;
3147 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3148 options | g_notempty, use_offsets, use_size_offsets);
3149 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3150 }
3151
3152 /* The normal case is just to do the match once, with the default
3153 value of match_limit. */
3154
3155 #if !defined NODFA
3156 else if (all_use_dfa || use_dfa)
3157 {
3158 int workspace[1000];
3159 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3160 options | g_notempty, use_offsets, use_size_offsets, workspace,
3161 sizeof(workspace)/sizeof(int));
3162 if (count == 0)
3163 {
3164 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3165 count = use_size_offsets/2;
3166 }
3167 }
3168 #endif
3169
3170 else
3171 {
3172 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3173 options | g_notempty, use_offsets, use_size_offsets);
3174 if (count == 0)
3175 {
3176 fprintf(outfile, "Matched, but too many substrings\n");
3177 count = use_size_offsets/3;
3178 }
3179 }
3180
3181 /* Matched */
3182
3183 if (count >= 0)
3184 {
3185 int i, maxcount;
3186
3187 #if !defined NODFA
3188 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3189 #endif
3190 maxcount = use_size_offsets/3;
3191
3192 /* This is a check against a lunatic return value. */
3193
3194 if (count > maxcount)
3195 {
3196 fprintf(outfile,
3197 "** PCRE error: returned count %d is too big for offset size %d\n",
3198 count, use_size_offsets);
3199 count = use_size_offsets/3;
3200 if (do_g || do_G)
3201 {
3202 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3203 do_g = do_G = FALSE; /* Break g/G loop */
3204 }
3205 }
3206
3207 /* do_allcaps requests showing of all captures in the pattern, to check
3208 unset ones at the end. */
3209
3210 if (do_allcaps)
3211 {
3212 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3213 count++; /* Allow for full match */
3214 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3215 }
3216
3217 /* Output the captured substrings */
3218
3219 for (i = 0; i < count * 2; i += 2)
3220 {
3221 if (use_offsets[i] < 0)
3222 {
3223 if (use_offsets[i] != -1)
3224 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3225 use_offsets[i], i);
3226 if (use_offsets[i+1] != -1)
3227 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3228 use_offsets[i+1], i+1);
3229 fprintf(outfile, "%2d: <unset>\n", i/2);
3230 }
3231 else
3232 {
3233 fprintf(outfile, "%2d: ", i/2);
3234 PCHARSV(bptr + use_offsets[i],
3235 use_offsets[i+1] - use_offsets[i], outfile);
3236 fprintf(outfile, "\n");
3237 if (do_showcaprest || (i == 0 && do_showrest))
3238 {
3239 fprintf(outfile, "%2d+ ", i/2);
3240 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3241 outfile);
3242 fprintf(outfile, "\n");
3243 }
3244 }
3245 }
3246
3247 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3248
3249 for (i = 0; i < 32; i++)
3250 {
3251 if ((copystrings & (1 << i)) != 0)
3252 {
3253 char copybuffer[256];
3254 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3255 i, copybuffer, sizeof(copybuffer));
3256 if (rc < 0)
3257 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3258 else
3259 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3260 }
3261 }
3262
3263 for (copynamesptr = copynames;
3264 *copynamesptr != 0;
3265 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3266 {
3267 char copybuffer[256];
3268 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3269 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3270 if (rc < 0)
3271 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3272 else
3273 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3274 }
3275
3276 for (i = 0; i < 32; i++)
3277 {
3278 if ((getstrings & (1 << i)) != 0)
3279 {
3280 const char *substring;
3281 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3282 i, &substring);
3283 if (rc < 0)
3284 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3285 else
3286 {
3287 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3288 pcre_free_substring(substring);
3289 }
3290 }
3291 }
3292
3293 for (getnamesptr = getnames;
3294 *getnamesptr != 0;
3295 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3296 {
3297 const char *substring;
3298 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3299 count, (char *)getnamesptr, &substring);
3300 if (rc < 0)
3301 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3302 else
3303 {
3304 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
3305 pcre_free_substring(substring);
3306 }
3307 }
3308
3309 if (getlist)
3310 {
3311 const char **stringlist;
3312 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
3313 &stringlist);
3314 if (rc < 0)
3315 fprintf(outfile, "get substring list failed %d\n", rc);
3316 else
3317 {
3318 for (i = 0; i < count; i++)
3319 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3320 if (stringlist[i] != NULL)
3321 fprintf(outfile, "string list not terminated by NULL\n");
3322 pcre_free_substring_list(stringlist);
3323 }
3324 }
3325 }
3326
3327 /* There was a partial match */
3328
3329 else if (count == PCRE_ERROR_PARTIAL)
3330 {
3331 if (markptr == NULL) fprintf(outfile, "Partial match");
3332 else fprintf(outfile, "Partial match, mark=%s", markptr);
3333 if (use_size_offsets > 1)
3334 {
3335 fprintf(outfile, ": ");
3336 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3337 outfile);
3338 }
3339 fprintf(outfile, "\n");
3340 break; /* Out of the /g loop */
3341 }
3342
3343 /* Failed to match. If this is a /g or /G loop and we previously set
3344 g_notempty after a null match, this is not necessarily the end. We want
3345 to advance the start offset, and continue. We won't be at the end of the
3346 string - that was checked before setting g_notempty.
3347
3348 Complication arises in the case when the newline convention is "any",
3349 "crlf", or "anycrlf". If the previous match was at the end of a line
3350 terminated by CRLF, an advance of one character just passes the \r,
3351 whereas we should prefer the longer newline sequence, as does the code in
3352 pcre_exec(). Fudge the offset value to achieve this. We check for a
3353 newline setting in the pattern; if none was set, use pcre_config() to
3354 find the default.
3355
3356 Otherwise, in the case of UTF-8 matching, the advance must be one
3357 character, not one byte. */
3358
3359 else
3360 {
3361 if (g_notempty != 0)
3362 {
3363 int onechar = 1;
3364 unsigned int obits = ((real_pcre *)re)->options;
3365 use_offsets[0] = start_offset;
3366 if ((obits & PCRE_NEWLINE_BITS) == 0)
3367 {
3368 int d;
3369 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3370 /* Note that these values are always the ASCII ones, even in
3371 EBCDIC environments. CR = 13, NL = 10. */
3372 obits = (d == 13)? PCRE_NEWLINE_CR :
3373 (d == 10)? PCRE_NEWLINE_LF :
3374 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3375 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3376 (d == -1)? PCRE_NEWLINE_ANY : 0;
3377 }
3378 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3379 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3380 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3381 &&
3382 start_offset < len - 1 &&
3383 bptr[start_offset] == '\r' &&
3384 bptr[start_offset+1] == '\n')
3385 onechar++;
3386 else if (use_utf)
3387 {
3388 while (start_offset + onechar < len)
3389 {
3390 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3391 onechar++;
3392 }
3393 }
3394 use_offsets[1] = start_offset + onechar;
3395 }
3396 else
3397 {
3398 switch(count)
3399 {
3400 case PCRE_ERROR_NOMATCH:
3401 if (gmatched == 0)
3402 {
3403 if (markptr == NULL) fprintf(outfile, "No match\n");
3404 else fprintf(outfile, "No match, mark = %s\n", markptr);
3405 }
3406 break;
3407
3408 case PCRE_ERROR_BADUTF8:
3409 case PCRE_ERROR_SHORTUTF8:
3410 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3411 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3412 if (use_size_offsets >= 2)
3413 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3414 use_offsets[1]);
3415 fprintf(outfile, "\n");
3416 break;
3417
3418 default:
3419 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3420 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3421 else
3422 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3423 break;
3424 }
3425
3426 break; /* Out of the /g loop */
3427 }
3428 }
3429
3430 /* If not /g or /G we are done */
3431
3432 if (!do_g && !do_G) break;
3433
3434 /* If we have matched an empty string, first check to see if we are at
3435 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3436 Perl's /g options does. This turns out to be rather cunning. First we set
3437 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3438 same point. If this fails (picked up above) we advance to the next
3439 character. */
3440
3441 g_notempty = 0;
3442
3443 if (use_offsets[0] == use_offsets[1])
3444 {
3445 if (use_offsets[0] == len) break;
3446 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3447 }
3448
3449 /* For /g, update the start offset, leaving the rest alone */
3450
3451 if (do_g) start_offset = use_offsets[1];
3452
3453 /* For /G, update the pointer and length */
3454
3455 else
3456 {
3457 bptr += use_offsets[1];
3458 len -= use_offsets[1];
3459 }
3460 } /* End of loop for /g and /G */
3461
3462 NEXT_DATA: continue;
3463 } /* End of loop for data lines */
3464
3465 CONTINUE:
3466
3467 #if !defined NOPOSIX
3468 if (posix || do_posix) regfree(&preg);
3469 #endif
3470
3471 if (re != NULL) new_free(re);
3472 if (extra != NULL)
3473 {
3474 PCRE_FREE_STUDY(extra);
3475 }
3476 if (locale_set)
3477 {
3478 new_free((void *)tables);
3479 setlocale(LC_CTYPE, "C");
3480 locale_set = 0;
3481 }
3482 if (jit_stack != NULL)
3483 {
3484 pcre_jit_stack_free(jit_stack);
3485 jit_stack = NULL;
3486 }
3487 }
3488
3489 if (infile == stdin) fprintf(outfile, "\n");
3490
3491 EXIT:
3492
3493 if (infile != NULL && infile != stdin) fclose(infile);
3494 if (outfile != NULL && outfile != stdout) fclose(outfile);
3495
3496 free(buffer);
3497 free(dbuffer);
3498 free(pbuffer);
3499 free(offsets);
3500
3501 #ifdef SUPPORT_PCRE16
3502 if (buffer16 != NULL) free(buffer16);
3503 #endif
3504
3505 return yield;
3506 }
3507
3508 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5