/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 801 - (show annotations)
Mon Dec 12 16:23:37 2011 UTC (3 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 94524 byte(s)
Error occurred while calculating annotation data.
Merge changes from trunk r755 to r800 into the 16-bit branch.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utt utt
122 #define _pcre_utt_size utt_size
123 #define _pcre_utt_names utt_names
124 #define _pcre_OP_lengths OP_lengths
125
126 #include "pcre_tables.c"
127
128 /* We also need the pcre_printint() function for printing out compiled
129 patterns. This function is in a separate file so that it can be included in
130 pcre_compile.c when that module is compiled with debugging enabled. It needs to
131 know which case is being compiled. */
132
133 #define COMPILING_PCRETEST
134 #include "pcre_printint.src"
135
136 /* The definition of the macro PRINTABLE, which determines whether to print an
137 output character as-is or as a hex value when showing compiled patterns, is
138 contained in the printint.src file. We uses it here also, in cases when the
139 locale has not been explicitly changed, so as to get consistent output from
140 systems that differ in their output from isprint() even in the "C" locale. */
141
142 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
143
144 /* It is possible to compile this test program without including support for
145 testing the POSIX interface, though this is not available via the standard
146 Makefile. */
147
148 #if !defined NOPOSIX
149 #include "pcreposix.h"
150 #endif
151
152 /* It is also possible, for the benefit of the version currently imported into
153 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
154 interface to the DFA matcher (NODFA), and without the doublecheck of the old
155 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
156 UTF8 support if PCRE is built without it. */
157
158 #ifndef SUPPORT_UTF8
159 #ifndef NOUTF8
160 #define NOUTF8
161 #endif
162 #endif
163
164
165 /* Other parameters */
166
167 #ifndef CLOCKS_PER_SEC
168 #ifdef CLK_TCK
169 #define CLOCKS_PER_SEC CLK_TCK
170 #else
171 #define CLOCKS_PER_SEC 100
172 #endif
173 #endif
174
175 /* This is the default loop count for timing. */
176
177 #define LOOPREPEAT 500000
178
179 /* Static variables */
180
181 static FILE *outfile;
182 static int log_store = 0;
183 static int callout_count;
184 static int callout_extra;
185 static int callout_fail_count;
186 static int callout_fail_id;
187 static int debug_lengths;
188 static int first_callout;
189 static int locale_set = 0;
190 static int show_malloc;
191 static int use_utf8;
192 static size_t gotten_store;
193 static size_t first_gotten_store = 0;
194 static const unsigned char *last_callout_mark = NULL;
195
196 /* The buffers grow automatically if very long input lines are encountered. */
197
198 static int buffer_size = 50000;
199 static pcre_uint8 *buffer = NULL;
200 static pcre_uint8 *dbuffer = NULL;
201 static pcre_uint8 *pbuffer = NULL;
202
203 /* Textual explanations for runtime error codes */
204
205 static const char *errtexts[] = {
206 NULL, /* 0 is no error */
207 NULL, /* NOMATCH is handled specially */
208 "NULL argument passed",
209 "bad option value",
210 "magic number missing",
211 "unknown opcode - pattern overwritten?",
212 "no more memory",
213 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 "match limit exceeded",
215 "callout error code",
216 NULL, /* BADUTF8 is handled specially */
217 "bad UTF-8 offset",
218 NULL, /* PARTIAL is handled specially */
219 "not used - internal error",
220 "internal error - pattern overwritten?",
221 "bad count value",
222 "item unsupported for DFA matching",
223 "backreference condition or recursion test not supported for DFA matching",
224 "match limit not supported for DFA matching",
225 "workspace size exceeded in DFA matching",
226 "too much recursion for DFA matching",
227 "recursion limit exceeded",
228 "not used - internal error",
229 "invalid combination of newline options",
230 "bad offset value",
231 NULL, /* SHORTUTF8 is handled specially */
232 "nested recursion at the same subject position",
233 "JIT stack limit reached"
234 };
235
236
237 /*************************************************
238 * Alternate character tables *
239 *************************************************/
240
241 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242 using the default tables of the library. However, the T option can be used to
243 select alternate sets of tables, for different kinds of testing. Note also that
244 the L (locale) option also adjusts the tables. */
245
246 /* This is the set of tables distributed as default with PCRE. It recognizes
247 only ASCII characters. */
248
249 static const unsigned char tables0[] = {
250
251 /* This table is a lower casing table. */
252
253 0, 1, 2, 3, 4, 5, 6, 7,
254 8, 9, 10, 11, 12, 13, 14, 15,
255 16, 17, 18, 19, 20, 21, 22, 23,
256 24, 25, 26, 27, 28, 29, 30, 31,
257 32, 33, 34, 35, 36, 37, 38, 39,
258 40, 41, 42, 43, 44, 45, 46, 47,
259 48, 49, 50, 51, 52, 53, 54, 55,
260 56, 57, 58, 59, 60, 61, 62, 63,
261 64, 97, 98, 99,100,101,102,103,
262 104,105,106,107,108,109,110,111,
263 112,113,114,115,116,117,118,119,
264 120,121,122, 91, 92, 93, 94, 95,
265 96, 97, 98, 99,100,101,102,103,
266 104,105,106,107,108,109,110,111,
267 112,113,114,115,116,117,118,119,
268 120,121,122,123,124,125,126,127,
269 128,129,130,131,132,133,134,135,
270 136,137,138,139,140,141,142,143,
271 144,145,146,147,148,149,150,151,
272 152,153,154,155,156,157,158,159,
273 160,161,162,163,164,165,166,167,
274 168,169,170,171,172,173,174,175,
275 176,177,178,179,180,181,182,183,
276 184,185,186,187,188,189,190,191,
277 192,193,194,195,196,197,198,199,
278 200,201,202,203,204,205,206,207,
279 208,209,210,211,212,213,214,215,
280 216,217,218,219,220,221,222,223,
281 224,225,226,227,228,229,230,231,
282 232,233,234,235,236,237,238,239,
283 240,241,242,243,244,245,246,247,
284 248,249,250,251,252,253,254,255,
285
286 /* This table is a case flipping table. */
287
288 0, 1, 2, 3, 4, 5, 6, 7,
289 8, 9, 10, 11, 12, 13, 14, 15,
290 16, 17, 18, 19, 20, 21, 22, 23,
291 24, 25, 26, 27, 28, 29, 30, 31,
292 32, 33, 34, 35, 36, 37, 38, 39,
293 40, 41, 42, 43, 44, 45, 46, 47,
294 48, 49, 50, 51, 52, 53, 54, 55,
295 56, 57, 58, 59, 60, 61, 62, 63,
296 64, 97, 98, 99,100,101,102,103,
297 104,105,106,107,108,109,110,111,
298 112,113,114,115,116,117,118,119,
299 120,121,122, 91, 92, 93, 94, 95,
300 96, 65, 66, 67, 68, 69, 70, 71,
301 72, 73, 74, 75, 76, 77, 78, 79,
302 80, 81, 82, 83, 84, 85, 86, 87,
303 88, 89, 90,123,124,125,126,127,
304 128,129,130,131,132,133,134,135,
305 136,137,138,139,140,141,142,143,
306 144,145,146,147,148,149,150,151,
307 152,153,154,155,156,157,158,159,
308 160,161,162,163,164,165,166,167,
309 168,169,170,171,172,173,174,175,
310 176,177,178,179,180,181,182,183,
311 184,185,186,187,188,189,190,191,
312 192,193,194,195,196,197,198,199,
313 200,201,202,203,204,205,206,207,
314 208,209,210,211,212,213,214,215,
315 216,217,218,219,220,221,222,223,
316 224,225,226,227,228,229,230,231,
317 232,233,234,235,236,237,238,239,
318 240,241,242,243,244,245,246,247,
319 248,249,250,251,252,253,254,255,
320
321 /* This table contains bit maps for various character classes. Each map is 32
322 bytes long and the bits run from the least significant end of each byte. The
323 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324 graph, print, punct, and cntrl. Other classes are built from combinations. */
325
326 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375
376 /* This table identifies various classes of character by individual bits:
377 0x01 white space character
378 0x02 letter
379 0x04 decimal digit
380 0x08 hexadecimal digit
381 0x10 alphanumeric or '_'
382 0x80 regular expression metacharacter or binary zero
383 */
384
385 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
386 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
387 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
388 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
389 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
390 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
391 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
392 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
393 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
396 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
397 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
398 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
399 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
400 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417
418 /* This is a set of tables that came orginally from a Windows user. It seems to
419 be at least an approximation of ISO 8859. In particular, there are characters
420 greater than 128 that are marked as spaces, letters, etc. */
421
422 static const unsigned char tables1[] = {
423 0,1,2,3,4,5,6,7,
424 8,9,10,11,12,13,14,15,
425 16,17,18,19,20,21,22,23,
426 24,25,26,27,28,29,30,31,
427 32,33,34,35,36,37,38,39,
428 40,41,42,43,44,45,46,47,
429 48,49,50,51,52,53,54,55,
430 56,57,58,59,60,61,62,63,
431 64,97,98,99,100,101,102,103,
432 104,105,106,107,108,109,110,111,
433 112,113,114,115,116,117,118,119,
434 120,121,122,91,92,93,94,95,
435 96,97,98,99,100,101,102,103,
436 104,105,106,107,108,109,110,111,
437 112,113,114,115,116,117,118,119,
438 120,121,122,123,124,125,126,127,
439 128,129,130,131,132,133,134,135,
440 136,137,138,139,140,141,142,143,
441 144,145,146,147,148,149,150,151,
442 152,153,154,155,156,157,158,159,
443 160,161,162,163,164,165,166,167,
444 168,169,170,171,172,173,174,175,
445 176,177,178,179,180,181,182,183,
446 184,185,186,187,188,189,190,191,
447 224,225,226,227,228,229,230,231,
448 232,233,234,235,236,237,238,239,
449 240,241,242,243,244,245,246,215,
450 248,249,250,251,252,253,254,223,
451 224,225,226,227,228,229,230,231,
452 232,233,234,235,236,237,238,239,
453 240,241,242,243,244,245,246,247,
454 248,249,250,251,252,253,254,255,
455 0,1,2,3,4,5,6,7,
456 8,9,10,11,12,13,14,15,
457 16,17,18,19,20,21,22,23,
458 24,25,26,27,28,29,30,31,
459 32,33,34,35,36,37,38,39,
460 40,41,42,43,44,45,46,47,
461 48,49,50,51,52,53,54,55,
462 56,57,58,59,60,61,62,63,
463 64,97,98,99,100,101,102,103,
464 104,105,106,107,108,109,110,111,
465 112,113,114,115,116,117,118,119,
466 120,121,122,91,92,93,94,95,
467 96,65,66,67,68,69,70,71,
468 72,73,74,75,76,77,78,79,
469 80,81,82,83,84,85,86,87,
470 88,89,90,123,124,125,126,127,
471 128,129,130,131,132,133,134,135,
472 136,137,138,139,140,141,142,143,
473 144,145,146,147,148,149,150,151,
474 152,153,154,155,156,157,158,159,
475 160,161,162,163,164,165,166,167,
476 168,169,170,171,172,173,174,175,
477 176,177,178,179,180,181,182,183,
478 184,185,186,187,188,189,190,191,
479 224,225,226,227,228,229,230,231,
480 232,233,234,235,236,237,238,239,
481 240,241,242,243,244,245,246,215,
482 248,249,250,251,252,253,254,223,
483 192,193,194,195,196,197,198,199,
484 200,201,202,203,204,205,206,207,
485 208,209,210,211,212,213,214,247,
486 216,217,218,219,220,221,222,255,
487 0,62,0,0,1,0,0,0,
488 0,0,0,0,0,0,0,0,
489 32,0,0,0,1,0,0,0,
490 0,0,0,0,0,0,0,0,
491 0,0,0,0,0,0,255,3,
492 126,0,0,0,126,0,0,0,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 0,0,0,0,0,0,255,3,
496 0,0,0,0,0,0,0,0,
497 0,0,0,0,0,0,12,2,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,0,0,0,0,
500 254,255,255,7,0,0,0,0,
501 0,0,0,0,0,0,0,0,
502 255,255,127,127,0,0,0,0,
503 0,0,0,0,0,0,0,0,
504 0,0,0,0,254,255,255,7,
505 0,0,0,0,0,4,32,4,
506 0,0,0,128,255,255,127,255,
507 0,0,0,0,0,0,255,3,
508 254,255,255,135,254,255,255,7,
509 0,0,0,0,0,4,44,6,
510 255,255,127,255,255,255,127,255,
511 0,0,0,0,254,255,255,255,
512 255,255,255,255,255,255,255,127,
513 0,0,0,0,254,255,255,255,
514 255,255,255,255,255,255,255,255,
515 0,2,0,0,255,255,255,255,
516 255,255,255,255,255,255,255,127,
517 0,0,0,0,255,255,255,255,
518 255,255,255,255,255,255,255,255,
519 0,0,0,0,254,255,0,252,
520 1,0,0,248,1,0,0,120,
521 0,0,0,0,254,255,255,255,
522 0,0,128,0,0,0,128,0,
523 255,255,255,255,0,0,0,0,
524 0,0,0,0,0,0,0,128,
525 255,255,255,255,0,0,0,0,
526 0,0,0,0,0,0,0,0,
527 128,0,0,0,0,0,0,0,
528 0,1,1,0,1,1,0,0,
529 0,0,0,0,0,0,0,0,
530 0,0,0,0,0,0,0,0,
531 1,0,0,0,128,0,0,0,
532 128,128,128,128,0,0,128,0,
533 28,28,28,28,28,28,28,28,
534 28,28,0,0,0,0,0,128,
535 0,26,26,26,26,26,26,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,128,128,0,128,16,
539 0,26,26,26,26,26,26,18,
540 18,18,18,18,18,18,18,18,
541 18,18,18,18,18,18,18,18,
542 18,18,18,128,128,0,0,0,
543 0,0,0,0,0,1,0,0,
544 0,0,0,0,0,0,0,0,
545 0,0,0,0,0,0,0,0,
546 0,0,0,0,0,0,0,0,
547 1,0,0,0,0,0,0,0,
548 0,0,18,0,0,0,0,0,
549 0,0,20,20,0,18,0,0,
550 0,20,18,0,0,0,0,0,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,0,
554 18,18,18,18,18,18,18,18,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,18,
557 18,18,18,18,18,18,18,0,
558 18,18,18,18,18,18,18,18
559 };
560
561
562
563
564 #ifndef HAVE_STRERROR
565 /*************************************************
566 * Provide strerror() for non-ANSI libraries *
567 *************************************************/
568
569 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570 in their libraries, but can provide the same facility by this simple
571 alternative function. */
572
573 extern int sys_nerr;
574 extern char *sys_errlist[];
575
576 char *
577 strerror(int n)
578 {
579 if (n < 0 || n >= sys_nerr) return "unknown error number";
580 return sys_errlist[n];
581 }
582 #endif /* HAVE_STRERROR */
583
584
585 /*************************************************
586 * JIT memory callback *
587 *************************************************/
588
589 static pcre_jit_stack* jit_callback(void *arg)
590 {
591 return (pcre_jit_stack *)arg;
592 }
593
594
595 /*************************************************
596 * Read or extend an input line *
597 *************************************************/
598
599 /* Input lines are read into buffer, but both patterns and data lines can be
600 continued over multiple input lines. In addition, if the buffer fills up, we
601 want to automatically expand it so as to be able to handle extremely large
602 lines that are needed for certain stress tests. When the input buffer is
603 expanded, the other two buffers must also be expanded likewise, and the
604 contents of pbuffer, which are a copy of the input for callouts, must be
605 preserved (for when expansion happens for a data line). This is not the most
606 optimal way of handling this, but hey, this is just a test program!
607
608 Arguments:
609 f the file to read
610 start where in buffer to start (this *must* be within buffer)
611 prompt for stdin or readline()
612
613 Returns: pointer to the start of new data
614 could be a copy of start, or could be moved
615 NULL if no data read and EOF reached
616 */
617
618 static pcre_uint8 *
619 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
620 {
621 pcre_uint8 *here = start;
622
623 for (;;)
624 {
625 int rlen = (int)(buffer_size - (here - buffer));
626
627 if (rlen > 1000)
628 {
629 int dlen;
630
631 /* If libreadline support is required, use readline() to read a line if the
632 input is a terminal. Note that readline() removes the trailing newline, so
633 we must put it back again, to be compatible with fgets(). */
634
635 #ifdef SUPPORT_LIBREADLINE
636 if (isatty(fileno(f)))
637 {
638 size_t len;
639 char *s = readline(prompt);
640 if (s == NULL) return (here == start)? NULL : start;
641 len = strlen(s);
642 if (len > 0) add_history(s);
643 if (len > rlen - 1) len = rlen - 1;
644 memcpy(here, s, len);
645 here[len] = '\n';
646 here[len+1] = 0;
647 free(s);
648 }
649 else
650 #endif
651
652 /* Read the next line by normal means, prompting if the file is stdin. */
653
654 {
655 if (f == stdin) printf("%s", prompt);
656 if (fgets((char *)here, rlen, f) == NULL)
657 return (here == start)? NULL : start;
658 }
659
660 dlen = (int)strlen((char *)here);
661 if (dlen > 0 && here[dlen - 1] == '\n') return start;
662 here += dlen;
663 }
664
665 else
666 {
667 int new_buffer_size = 2*buffer_size;
668 pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
669 pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670 pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671
672 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673 {
674 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
675 exit(1);
676 }
677
678 memcpy(new_buffer, buffer, buffer_size);
679 memcpy(new_pbuffer, pbuffer, buffer_size);
680
681 buffer_size = new_buffer_size;
682
683 start = new_buffer + (start - buffer);
684 here = new_buffer + (here - buffer);
685
686 free(buffer);
687 free(dbuffer);
688 free(pbuffer);
689
690 buffer = new_buffer;
691 dbuffer = new_dbuffer;
692 pbuffer = new_pbuffer;
693 }
694 }
695
696 return NULL; /* Control never gets here */
697 }
698
699
700
701
702
703
704
705 /*************************************************
706 * Read number from string *
707 *************************************************/
708
709 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
710 around with conditional compilation, just do the job by hand. It is only used
711 for unpicking arguments, so just keep it simple.
712
713 Arguments:
714 str string to be converted
715 endptr where to put the end pointer
716
717 Returns: the unsigned long
718 */
719
720 static int
721 get_value(unsigned char *str, unsigned char **endptr)
722 {
723 int result = 0;
724 while(*str != 0 && isspace(*str)) str++;
725 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
726 *endptr = str;
727 return(result);
728 }
729
730
731
732
733 /*************************************************
734 * Convert UTF-8 string to value *
735 *************************************************/
736
737 /* This function takes one or more bytes that represents a UTF-8 character,
738 and returns the value of the character.
739
740 Argument:
741 utf8bytes a pointer to the byte vector
742 vptr a pointer to an int to receive the value
743
744 Returns: > 0 => the number of bytes consumed
745 -6 to 0 => malformed UTF-8 character at offset = (-return)
746 */
747
748 #if !defined NOUTF8
749
750 static int
751 utf82ord(unsigned char *utf8bytes, int *vptr)
752 {
753 int c = *utf8bytes++;
754 int d = c;
755 int i, j, s;
756
757 for (i = -1; i < 6; i++) /* i is number of additional bytes */
758 {
759 if ((d & 0x80) == 0) break;
760 d <<= 1;
761 }
762
763 if (i == -1) { *vptr = c; return 1; } /* ascii character */
764 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
765
766 /* i now has a value in the range 1-5 */
767
768 s = 6*i;
769 d = (c & utf8_table3[i]) << s;
770
771 for (j = 0; j < i; j++)
772 {
773 c = *utf8bytes++;
774 if ((c & 0xc0) != 0x80) return -(j+1);
775 s -= 6;
776 d |= (c & 0x3f) << s;
777 }
778
779 /* Check that encoding was the correct unique one */
780
781 for (j = 0; j < utf8_table1_size; j++)
782 if (d <= utf8_table1[j]) break;
783 if (j != i) return -(i+1);
784
785 /* Valid value */
786
787 *vptr = d;
788 return i+1;
789 }
790
791 #endif
792
793
794
795 /*************************************************
796 * Convert character value to UTF-8 *
797 *************************************************/
798
799 /* This function takes an integer value in the range 0 - 0x7fffffff
800 and encodes it as a UTF-8 character in 0 to 6 bytes.
801
802 Arguments:
803 cvalue the character value
804 utf8bytes pointer to buffer for result - at least 6 bytes long
805
806 Returns: number of characters placed in the buffer
807 */
808
809 #if !defined NOUTF8
810
811 static int
812 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
813 {
814 register int i, j;
815 for (i = 0; i < utf8_table1_size; i++)
816 if (cvalue <= utf8_table1[i]) break;
817 utf8bytes += i;
818 for (j = i; j > 0; j--)
819 {
820 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
821 cvalue >>= 6;
822 }
823 *utf8bytes = utf8_table2[i] | cvalue;
824 return i + 1;
825 }
826
827 #endif
828
829
830
831 /*************************************************
832 * Print character string *
833 *************************************************/
834
835 /* Character string printing function. Must handle UTF-8 strings in utf8
836 mode. Yields number of characters printed. If handed a NULL file, just counts
837 chars without printing. */
838
839 static int pchars(unsigned char *p, int length, FILE *f)
840 {
841 int c = 0;
842 int yield = 0;
843
844 while (length-- > 0)
845 {
846 #if !defined NOUTF8
847 if (use_utf8)
848 {
849 int rc = utf82ord(p, &c);
850
851 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
852 {
853 length -= rc - 1;
854 p += rc;
855 if (PRINTHEX(c))
856 {
857 if (f != NULL) fprintf(f, "%c", c);
858 yield++;
859 }
860 else
861 {
862 int n = 4;
863 if (f != NULL) fprintf(f, "\\x{%02x}", c);
864 yield += (n <= 0x000000ff)? 2 :
865 (n <= 0x00000fff)? 3 :
866 (n <= 0x0000ffff)? 4 :
867 (n <= 0x000fffff)? 5 : 6;
868 }
869 continue;
870 }
871 }
872 #endif
873
874 /* Not UTF-8, or malformed UTF-8 */
875
876 c = *p++;
877 if (PRINTHEX(c))
878 {
879 if (f != NULL) fprintf(f, "%c", c);
880 yield++;
881 }
882 else
883 {
884 if (f != NULL) fprintf(f, "\\x%02x", c);
885 yield += 4;
886 }
887 }
888
889 return yield;
890 }
891
892
893
894 /*************************************************
895 * Callout function *
896 *************************************************/
897
898 /* Called from PCRE as a result of the (?C) item. We print out where we are in
899 the match. Yield zero unless more callouts than the fail count, or the callout
900 data is not zero. */
901
902 static int callout(pcre_callout_block *cb)
903 {
904 FILE *f = (first_callout | callout_extra)? outfile : NULL;
905 int i, pre_start, post_start, subject_length;
906
907 if (callout_extra)
908 {
909 fprintf(f, "Callout %d: last capture = %d\n",
910 cb->callout_number, cb->capture_last);
911
912 for (i = 0; i < cb->capture_top * 2; i += 2)
913 {
914 if (cb->offset_vector[i] < 0)
915 fprintf(f, "%2d: <unset>\n", i/2);
916 else
917 {
918 fprintf(f, "%2d: ", i/2);
919 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
920 cb->offset_vector[i+1] - cb->offset_vector[i], f);
921 fprintf(f, "\n");
922 }
923 }
924 }
925
926 /* Re-print the subject in canonical form, the first time or if giving full
927 datails. On subsequent calls in the same match, we use pchars just to find the
928 printed lengths of the substrings. */
929
930 if (f != NULL) fprintf(f, "--->");
931
932 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
933 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
934 cb->current_position - cb->start_match, f);
935
936 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
937
938 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
939 cb->subject_length - cb->current_position, f);
940
941 if (f != NULL) fprintf(f, "\n");
942
943 /* Always print appropriate indicators, with callout number if not already
944 shown. For automatic callouts, show the pattern offset. */
945
946 if (cb->callout_number == 255)
947 {
948 fprintf(outfile, "%+3d ", cb->pattern_position);
949 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
950 }
951 else
952 {
953 if (callout_extra) fprintf(outfile, " ");
954 else fprintf(outfile, "%3d ", cb->callout_number);
955 }
956
957 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
958 fprintf(outfile, "^");
959
960 if (post_start > 0)
961 {
962 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
963 fprintf(outfile, "^");
964 }
965
966 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
967 fprintf(outfile, " ");
968
969 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
970 pbuffer + cb->pattern_position);
971
972 fprintf(outfile, "\n");
973 first_callout = 0;
974
975 if (cb->mark != last_callout_mark)
976 {
977 fprintf(outfile, "Latest Mark: %s\n",
978 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979 last_callout_mark = cb->mark;
980 }
981
982 if (cb->callout_data != NULL)
983 {
984 int callout_data = *((int *)(cb->callout_data));
985 if (callout_data != 0)
986 {
987 fprintf(outfile, "Callout data = %d\n", callout_data);
988 return callout_data;
989 }
990 }
991
992 return (cb->callout_number != callout_fail_id)? 0 :
993 (++callout_count >= callout_fail_count)? 1 : 0;
994 }
995
996
997 /*************************************************
998 * Local malloc functions *
999 *************************************************/
1000
1001 /* Alternative malloc function, to test functionality and save the size of a
1002 compiled re, which is the first store request that pcre_compile() makes. The
1003 show_malloc variable is set only during matching. */
1004
1005 static void *new_malloc(size_t size)
1006 {
1007 void *block = malloc(size);
1008 gotten_store = size;
1009 if (first_gotten_store == 0) first_gotten_store = size;
1010 if (show_malloc)
1011 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1012 return block;
1013 }
1014
1015 static void new_free(void *block)
1016 {
1017 if (show_malloc)
1018 fprintf(outfile, "free %p\n", block);
1019 free(block);
1020 }
1021
1022 /* For recursion malloc/free, to test stacking calls */
1023
1024 static void *stack_malloc(size_t size)
1025 {
1026 void *block = malloc(size);
1027 if (show_malloc)
1028 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1029 return block;
1030 }
1031
1032 static void stack_free(void *block)
1033 {
1034 if (show_malloc)
1035 fprintf(outfile, "stack_free %p\n", block);
1036 free(block);
1037 }
1038
1039
1040 /*************************************************
1041 * Call pcre_fullinfo() *
1042 *************************************************/
1043
1044 /* Get one piece of information from the pcre_fullinfo() function */
1045
1046 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1047 {
1048 int rc;
1049 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1050 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1051 }
1052
1053
1054
1055 /*************************************************
1056 * Byte flipping function *
1057 *************************************************/
1058
1059 static unsigned long int
1060 byteflip(unsigned long int value, int n)
1061 {
1062 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1063 return ((value & 0x000000ff) << 24) |
1064 ((value & 0x0000ff00) << 8) |
1065 ((value & 0x00ff0000) >> 8) |
1066 ((value & 0xff000000) >> 24);
1067 }
1068
1069
1070
1071
1072 /*************************************************
1073 * Check match or recursion limit *
1074 *************************************************/
1075
1076 static int
1077 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1078 int start_offset, int options, int *use_offsets, int use_size_offsets,
1079 int flag, unsigned long int *limit, int errnumber, const char *msg)
1080 {
1081 int count;
1082 int min = 0;
1083 int mid = 64;
1084 int max = -1;
1085
1086 extra->flags |= flag;
1087
1088 for (;;)
1089 {
1090 *limit = mid;
1091
1092 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1093 use_offsets, use_size_offsets);
1094
1095 if (count == errnumber)
1096 {
1097 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1098 min = mid;
1099 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1100 }
1101
1102 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1103 count == PCRE_ERROR_PARTIAL)
1104 {
1105 if (mid == min + 1)
1106 {
1107 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1108 break;
1109 }
1110 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1111 max = mid;
1112 mid = (min + mid)/2;
1113 }
1114 else break; /* Some other error */
1115 }
1116
1117 extra->flags &= ~flag;
1118 return count;
1119 }
1120
1121
1122
1123 /*************************************************
1124 * Case-independent strncmp() function *
1125 *************************************************/
1126
1127 /*
1128 Arguments:
1129 s first string
1130 t second string
1131 n number of characters to compare
1132
1133 Returns: < 0, = 0, or > 0, according to the comparison
1134 */
1135
1136 static int
1137 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1138 {
1139 while (n--)
1140 {
1141 int c = tolower(*s++) - tolower(*t++);
1142 if (c) return c;
1143 }
1144 return 0;
1145 }
1146
1147
1148
1149 /*************************************************
1150 * Check newline indicator *
1151 *************************************************/
1152
1153 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1154 a message and return 0 if there is no match.
1155
1156 Arguments:
1157 p points after the leading '<'
1158 f file for error message
1159
1160 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1161 */
1162
1163 static int
1164 check_newline(pcre_uint8 *p, FILE *f)
1165 {
1166 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1167 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1168 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1169 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1170 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1171 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1172 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1173 fprintf(f, "Unknown newline type at: <%s\n", p);
1174 return 0;
1175 }
1176
1177
1178
1179 /*************************************************
1180 * Usage function *
1181 *************************************************/
1182
1183 static void
1184 usage(void)
1185 {
1186 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1187 printf("Input and output default to stdin and stdout.\n");
1188 #ifdef SUPPORT_LIBREADLINE
1189 printf("If input is a terminal, readline() is used to read from it.\n");
1190 #else
1191 printf("This version of pcretest is not linked with readline().\n");
1192 #endif
1193 printf("\nOptions:\n");
1194 printf(" -b show compiled code (bytecode)\n");
1195 printf(" -C show PCRE compile-time options and exit\n");
1196 printf(" -d debug: show compiled code and information (-b and -i)\n");
1197 #if !defined NODFA
1198 printf(" -dfa force DFA matching for all subjects\n");
1199 #endif
1200 printf(" -help show usage information\n");
1201 printf(" -i show information about compiled patterns\n"
1202 " -M find MATCH_LIMIT minimum for each subject\n"
1203 " -m output memory used information\n"
1204 " -o <n> set size of offsets vector to <n>\n");
1205 #if !defined NOPOSIX
1206 printf(" -p use POSIX interface\n");
1207 #endif
1208 printf(" -q quiet: do not output PCRE version number at start\n");
1209 printf(" -S <n> set stack size to <n> megabytes\n");
1210 printf(" -s force each pattern to be studied at basic level\n"
1211 " -s+ force each pattern to be studied, using JIT if available\n"
1212 " -t time compilation and execution\n");
1213 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1214 printf(" -tm time execution (matching) only\n");
1215 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1216 }
1217
1218
1219
1220 /*************************************************
1221 * Main Program *
1222 *************************************************/
1223
1224 /* Read lines from named file or stdin and write to named file or stdout; lines
1225 consist of a regular expression, in delimiters and optionally followed by
1226 options, followed by a set of test data, terminated by an empty line. */
1227
1228 int main(int argc, char **argv)
1229 {
1230 FILE *infile = stdin;
1231 int options = 0;
1232 int study_options = 0;
1233 int default_find_match_limit = FALSE;
1234 int op = 1;
1235 int timeit = 0;
1236 int timeitm = 0;
1237 int showinfo = 0;
1238 int showstore = 0;
1239 int force_study = -1;
1240 int force_study_options = 0;
1241 int quiet = 0;
1242 int size_offsets = 45;
1243 int size_offsets_max;
1244 int *offsets = NULL;
1245 #if !defined NOPOSIX
1246 int posix = 0;
1247 #endif
1248 int debug = 0;
1249 int done = 0;
1250 int all_use_dfa = 0;
1251 int yield = 0;
1252 int stack_size;
1253
1254 pcre_jit_stack *jit_stack = NULL;
1255
1256
1257 /* These vectors store, end-to-end, a list of captured substring names. Assume
1258 that 1024 is plenty long enough for the few names we'll be testing. */
1259
1260 pcre_uchar copynames[1024];
1261 pcre_uchar getnames[1024];
1262
1263 pcre_uchar *copynamesptr;
1264 pcre_uchar *getnamesptr;
1265
1266 /* Get buffers from malloc() so that Electric Fence will check their misuse
1267 when I am debugging. They grow automatically when very long lines are read. */
1268
1269 buffer = (pcre_uint8 *)malloc(buffer_size);
1270 dbuffer = (pcre_uint8 *)malloc(buffer_size);
1271 pbuffer = (pcre_uint8 *)malloc(buffer_size);
1272
1273 /* The outfile variable is static so that new_malloc can use it. */
1274
1275 outfile = stdout;
1276
1277 /* The following _setmode() stuff is some Windows magic that tells its runtime
1278 library to translate CRLF into a single LF character. At least, that's what
1279 I've been told: never having used Windows I take this all on trust. Originally
1280 it set 0x8000, but then I was advised that _O_BINARY was better. */
1281
1282 #if defined(_WIN32) || defined(WIN32)
1283 _setmode( _fileno( stdout ), _O_BINARY );
1284 #endif
1285
1286 /* Scan options */
1287
1288 while (argc > 1 && argv[op][0] == '-')
1289 {
1290 unsigned char *endptr;
1291
1292 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1293 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1294 else if (strcmp(argv[op], "-s+") == 0)
1295 {
1296 force_study = 1;
1297 force_study_options = PCRE_STUDY_JIT_COMPILE;
1298 }
1299 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1300 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1301 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1302 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1303 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1304 #if !defined NODFA
1305 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1306 #endif
1307 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1308 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1309 *endptr == 0))
1310 {
1311 op++;
1312 argc--;
1313 }
1314 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1315 {
1316 int both = argv[op][2] == 0;
1317 int temp;
1318 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1319 *endptr == 0))
1320 {
1321 timeitm = temp;
1322 op++;
1323 argc--;
1324 }
1325 else timeitm = LOOPREPEAT;
1326 if (both) timeit = timeitm;
1327 }
1328 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1329 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1330 *endptr == 0))
1331 {
1332 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1333 printf("PCRE: -S not supported on this OS\n");
1334 exit(1);
1335 #else
1336 int rc;
1337 struct rlimit rlim;
1338 getrlimit(RLIMIT_STACK, &rlim);
1339 rlim.rlim_cur = stack_size * 1024 * 1024;
1340 rc = setrlimit(RLIMIT_STACK, &rlim);
1341 if (rc != 0)
1342 {
1343 printf("PCRE: setrlimit() failed with error %d\n", rc);
1344 exit(1);
1345 }
1346 op++;
1347 argc--;
1348 #endif
1349 }
1350 #if !defined NOPOSIX
1351 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1352 #endif
1353 else if (strcmp(argv[op], "-C") == 0)
1354 {
1355 int rc;
1356 unsigned long int lrc;
1357 printf("PCRE version %s\n", pcre_version());
1358 printf("Compiled with\n");
1359 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1360 printf(" %sUTF-8 support\n", rc? "" : "No ");
1361 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1362 printf(" %sUnicode properties support\n", rc? "" : "No ");
1363 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1364 if (rc)
1365 printf(" Just-in-time compiler support\n");
1366 else
1367 printf(" No just-in-time compiler support\n");
1368 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1369 /* Note that these values are always the ASCII values, even
1370 in EBCDIC environments. CR is 13 and NL is 10. */
1371 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1372 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1373 (rc == -2)? "ANYCRLF" :
1374 (rc == -1)? "ANY" : "???");
1375 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1376 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1377 "all Unicode newlines");
1378 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1379 printf(" Internal link size = %d\n", rc);
1380 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1381 printf(" POSIX malloc threshold = %d\n", rc);
1382 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1383 printf(" Default match limit = %ld\n", lrc);
1384 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1385 printf(" Default recursion depth limit = %ld\n", lrc);
1386 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1387 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1388 goto EXIT;
1389 }
1390 else if (strcmp(argv[op], "-help") == 0 ||
1391 strcmp(argv[op], "--help") == 0)
1392 {
1393 usage();
1394 goto EXIT;
1395 }
1396 else
1397 {
1398 printf("** Unknown or malformed option %s\n", argv[op]);
1399 usage();
1400 yield = 1;
1401 goto EXIT;
1402 }
1403 op++;
1404 argc--;
1405 }
1406
1407 /* Get the store for the offsets vector, and remember what it was */
1408
1409 size_offsets_max = size_offsets;
1410 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1411 if (offsets == NULL)
1412 {
1413 printf("** Failed to get %d bytes of memory for offsets vector\n",
1414 (int)(size_offsets_max * sizeof(int)));
1415 yield = 1;
1416 goto EXIT;
1417 }
1418
1419 /* Sort out the input and output files */
1420
1421 if (argc > 1)
1422 {
1423 infile = fopen(argv[op], INPUT_MODE);
1424 if (infile == NULL)
1425 {
1426 printf("** Failed to open %s\n", argv[op]);
1427 yield = 1;
1428 goto EXIT;
1429 }
1430 }
1431
1432 if (argc > 2)
1433 {
1434 outfile = fopen(argv[op+1], OUTPUT_MODE);
1435 if (outfile == NULL)
1436 {
1437 printf("** Failed to open %s\n", argv[op+1]);
1438 yield = 1;
1439 goto EXIT;
1440 }
1441 }
1442
1443 /* Set alternative malloc function */
1444
1445 pcre_malloc = new_malloc;
1446 pcre_free = new_free;
1447 pcre_stack_malloc = stack_malloc;
1448 pcre_stack_free = stack_free;
1449
1450 /* Heading line unless quiet, then prompt for first regex if stdin */
1451
1452 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1453
1454 /* Main loop */
1455
1456 while (!done)
1457 {
1458 pcre *re = NULL;
1459 pcre_extra *extra = NULL;
1460
1461 #if !defined NOPOSIX /* There are still compilers that require no indent */
1462 regex_t preg;
1463 int do_posix = 0;
1464 #endif
1465
1466 const char *error;
1467 unsigned char *markptr;
1468 unsigned char *p, *pp, *ppp;
1469 unsigned char *to_file = NULL;
1470 const unsigned char *tables = NULL;
1471 unsigned long int true_size, true_study_size = 0;
1472 size_t size, regex_gotten_store;
1473 int do_allcaps = 0;
1474 int do_mark = 0;
1475 int do_study = 0;
1476 int no_force_study = 0;
1477 int do_debug = debug;
1478 int do_G = 0;
1479 int do_g = 0;
1480 int do_showinfo = showinfo;
1481 int do_showrest = 0;
1482 int do_showcaprest = 0;
1483 int do_flip = 0;
1484 int erroroffset, len, delimiter, poffset;
1485
1486 use_utf8 = 0;
1487 debug_lengths = 1;
1488
1489 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1490 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1491 fflush(outfile);
1492
1493 p = buffer;
1494 while (isspace(*p)) p++;
1495 if (*p == 0) continue;
1496
1497 /* See if the pattern is to be loaded pre-compiled from a file. */
1498
1499 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1500 {
1501 unsigned long int magic, get_options;
1502 pcre_uint8 sbuf[8];
1503 FILE *f;
1504
1505 p++;
1506 pp = p + (int)strlen((char *)p);
1507 while (isspace(pp[-1])) pp--;
1508 *pp = 0;
1509
1510 f = fopen((char *)p, "rb");
1511 if (f == NULL)
1512 {
1513 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1514 continue;
1515 }
1516
1517 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1518
1519 true_size =
1520 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1521 true_study_size =
1522 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1523
1524 re = (real_pcre *)new_malloc(true_size);
1525 regex_gotten_store = first_gotten_store;
1526
1527 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1528
1529 magic = ((real_pcre *)re)->magic_number;
1530 if (magic != MAGIC_NUMBER)
1531 {
1532 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1533 {
1534 do_flip = 1;
1535 }
1536 else
1537 {
1538 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1539 fclose(f);
1540 continue;
1541 }
1542 }
1543
1544 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1545 do_flip? " (byte-inverted)" : "", p);
1546
1547 /* Need to know if UTF-8 for printing data strings */
1548
1549 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1550 use_utf8 = (get_options & PCRE_UTF8) != 0;
1551
1552 /* Now see if there is any following study data. */
1553
1554 if (true_study_size != 0)
1555 {
1556 pcre_study_data *psd;
1557
1558 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1559 extra->flags = PCRE_EXTRA_STUDY_DATA;
1560
1561 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1562 extra->study_data = psd;
1563
1564 if (fread(psd, 1, true_study_size, f) != true_study_size)
1565 {
1566 FAIL_READ:
1567 fprintf(outfile, "Failed to read data from %s\n", p);
1568 if (extra != NULL) pcre_free_study(extra);
1569 if (re != NULL) new_free(re);
1570 fclose(f);
1571 continue;
1572 }
1573 fprintf(outfile, "Study data loaded from %s\n", p);
1574 do_study = 1; /* To get the data output if requested */
1575 }
1576 else fprintf(outfile, "No study data\n");
1577
1578 fclose(f);
1579 goto SHOW_INFO;
1580 }
1581
1582 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1583 the pattern; if is isn't complete, read more. */
1584
1585 delimiter = *p++;
1586
1587 if (isalnum(delimiter) || delimiter == '\\')
1588 {
1589 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1590 goto SKIP_DATA;
1591 }
1592
1593 pp = p;
1594 poffset = (int)(p - buffer);
1595
1596 for(;;)
1597 {
1598 while (*pp != 0)
1599 {
1600 if (*pp == '\\' && pp[1] != 0) pp++;
1601 else if (*pp == delimiter) break;
1602 pp++;
1603 }
1604 if (*pp != 0) break;
1605 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1606 {
1607 fprintf(outfile, "** Unexpected EOF\n");
1608 done = 1;
1609 goto CONTINUE;
1610 }
1611 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1612 }
1613
1614 /* The buffer may have moved while being extended; reset the start of data
1615 pointer to the correct relative point in the buffer. */
1616
1617 p = buffer + poffset;
1618
1619 /* If the first character after the delimiter is backslash, make
1620 the pattern end with backslash. This is purely to provide a way
1621 of testing for the error message when a pattern ends with backslash. */
1622
1623 if (pp[1] == '\\') *pp++ = '\\';
1624
1625 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1626 for callouts. */
1627
1628 *pp++ = 0;
1629 strcpy((char *)pbuffer, (char *)p);
1630
1631 /* Look for options after final delimiter */
1632
1633 options = 0;
1634 study_options = 0;
1635 log_store = showstore; /* default from command line */
1636
1637 while (*pp != 0)
1638 {
1639 switch (*pp++)
1640 {
1641 case 'f': options |= PCRE_FIRSTLINE; break;
1642 case 'g': do_g = 1; break;
1643 case 'i': options |= PCRE_CASELESS; break;
1644 case 'm': options |= PCRE_MULTILINE; break;
1645 case 's': options |= PCRE_DOTALL; break;
1646 case 'x': options |= PCRE_EXTENDED; break;
1647
1648 case '+':
1649 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1650 break;
1651
1652 case '=': do_allcaps = 1; break;
1653 case 'A': options |= PCRE_ANCHORED; break;
1654 case 'B': do_debug = 1; break;
1655 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1656 case 'D': do_debug = do_showinfo = 1; break;
1657 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1658 case 'F': do_flip = 1; break;
1659 case 'G': do_G = 1; break;
1660 case 'I': do_showinfo = 1; break;
1661 case 'J': options |= PCRE_DUPNAMES; break;
1662 case 'K': do_mark = 1; break;
1663 case 'M': log_store = 1; break;
1664 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1665
1666 #if !defined NOPOSIX
1667 case 'P': do_posix = 1; break;
1668 #endif
1669
1670 case 'S':
1671 if (do_study == 0)
1672 {
1673 do_study = 1;
1674 if (*pp == '+')
1675 {
1676 study_options |= PCRE_STUDY_JIT_COMPILE;
1677 pp++;
1678 }
1679 }
1680 else
1681 {
1682 do_study = 0;
1683 no_force_study = 1;
1684 }
1685 break;
1686
1687 case 'U': options |= PCRE_UNGREEDY; break;
1688 case 'W': options |= PCRE_UCP; break;
1689 case 'X': options |= PCRE_EXTRA; break;
1690 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1691 case 'Z': debug_lengths = 0; break;
1692 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1693 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1694
1695 case 'T':
1696 switch (*pp++)
1697 {
1698 case '0': tables = tables0; break;
1699 case '1': tables = tables1; break;
1700
1701 case '\r':
1702 case '\n':
1703 case ' ':
1704 case 0:
1705 fprintf(outfile, "** Missing table number after /T\n");
1706 goto SKIP_DATA;
1707
1708 default:
1709 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1710 goto SKIP_DATA;
1711 }
1712 break;
1713
1714 case 'L':
1715 ppp = pp;
1716 /* The '\r' test here is so that it works on Windows. */
1717 /* The '0' test is just in case this is an unterminated line. */
1718 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1719 *ppp = 0;
1720 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1721 {
1722 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1723 goto SKIP_DATA;
1724 }
1725 locale_set = 1;
1726 tables = pcre_maketables();
1727 pp = ppp;
1728 break;
1729
1730 case '>':
1731 to_file = pp;
1732 while (*pp != 0) pp++;
1733 while (isspace(pp[-1])) pp--;
1734 *pp = 0;
1735 break;
1736
1737 case '<':
1738 {
1739 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1740 {
1741 options |= PCRE_JAVASCRIPT_COMPAT;
1742 pp += 3;
1743 }
1744 else
1745 {
1746 int x = check_newline(pp, outfile);
1747 if (x == 0) goto SKIP_DATA;
1748 options |= x;
1749 while (*pp++ != '>');
1750 }
1751 }
1752 break;
1753
1754 case '\r': /* So that it works in Windows */
1755 case '\n':
1756 case ' ':
1757 break;
1758
1759 default:
1760 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1761 goto SKIP_DATA;
1762 }
1763 }
1764
1765 /* Handle compiling via the POSIX interface, which doesn't support the
1766 timing, showing, or debugging options, nor the ability to pass over
1767 local character tables. */
1768
1769 #if !defined NOPOSIX
1770 if (posix || do_posix)
1771 {
1772 int rc;
1773 int cflags = 0;
1774
1775 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1776 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1777 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1778 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1779 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1780 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1781 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1782
1783 first_gotten_store = 0;
1784 rc = regcomp(&preg, (char *)p, cflags);
1785
1786 /* Compilation failed; go back for another re, skipping to blank line
1787 if non-interactive. */
1788
1789 if (rc != 0)
1790 {
1791 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1792 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1793 goto SKIP_DATA;
1794 }
1795 }
1796
1797 /* Handle compiling via the native interface */
1798
1799 else
1800 #endif /* !defined NOPOSIX */
1801
1802 {
1803 unsigned long int get_options;
1804
1805 if (timeit > 0)
1806 {
1807 register int i;
1808 clock_t time_taken;
1809 clock_t start_time = clock();
1810 for (i = 0; i < timeit; i++)
1811 {
1812 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1813 if (re != NULL) free(re);
1814 }
1815 time_taken = clock() - start_time;
1816 fprintf(outfile, "Compile time %.4f milliseconds\n",
1817 (((double)time_taken * 1000.0) / (double)timeit) /
1818 (double)CLOCKS_PER_SEC);
1819 }
1820
1821 first_gotten_store = 0;
1822 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1823
1824 /* Compilation failed; go back for another re, skipping to blank line
1825 if non-interactive. */
1826
1827 if (re == NULL)
1828 {
1829 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1830 SKIP_DATA:
1831 if (infile != stdin)
1832 {
1833 for (;;)
1834 {
1835 if (extend_inputline(infile, buffer, NULL) == NULL)
1836 {
1837 done = 1;
1838 goto CONTINUE;
1839 }
1840 len = (int)strlen((char *)buffer);
1841 while (len > 0 && isspace(buffer[len-1])) len--;
1842 if (len == 0) break;
1843 }
1844 fprintf(outfile, "\n");
1845 }
1846 goto CONTINUE;
1847 }
1848
1849 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1850 within the regex; check for this so that we know how to process the data
1851 lines. */
1852
1853 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1854 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1855
1856 /* Extract the size for possible writing before possibly flipping it,
1857 and remember the store that was got. */
1858
1859 true_size = ((real_pcre *)re)->size;
1860 regex_gotten_store = first_gotten_store;
1861
1862 /* Output code size information if requested */
1863
1864 if (log_store)
1865 fprintf(outfile, "Memory allocation (code space): %d\n",
1866 (int)(first_gotten_store -
1867 sizeof(real_pcre) -
1868 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1869
1870 /* If -s or /S was present, study the regex to generate additional info to
1871 help with the matching, unless the pattern has the SS option, which
1872 suppresses the effect of /S (used for a few test patterns where studying is
1873 never sensible). */
1874
1875 if (do_study || (force_study >= 0 && !no_force_study))
1876 {
1877 if (timeit > 0)
1878 {
1879 register int i;
1880 clock_t time_taken;
1881 clock_t start_time = clock();
1882 for (i = 0; i < timeit; i++)
1883 extra = pcre_study(re, study_options | force_study_options, &error);
1884 time_taken = clock() - start_time;
1885 if (extra != NULL) pcre_free_study(extra);
1886 fprintf(outfile, " Study time %.4f milliseconds\n",
1887 (((double)time_taken * 1000.0) / (double)timeit) /
1888 (double)CLOCKS_PER_SEC);
1889 }
1890 extra = pcre_study(re, study_options | force_study_options, &error);
1891 if (error != NULL)
1892 fprintf(outfile, "Failed to study: %s\n", error);
1893 else if (extra != NULL)
1894 {
1895 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1896 if (log_store)
1897 {
1898 size_t jitsize;
1899 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
1900 if (jitsize != 0)
1901 fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
1902 }
1903 }
1904 }
1905
1906 /* If /K was present, we set up for handling MARK data. */
1907
1908 if (do_mark)
1909 {
1910 if (extra == NULL)
1911 {
1912 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1913 extra->flags = 0;
1914 }
1915 extra->mark = &markptr;
1916 extra->flags |= PCRE_EXTRA_MARK;
1917 }
1918
1919 /* If the 'F' option was present, we flip the bytes of all the integer
1920 fields in the regex data block and the study block. This is to make it
1921 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1922 compiled on a different architecture. */
1923
1924 if (do_flip)
1925 {
1926 real_pcre *rre = (real_pcre *)re;
1927 rre->magic_number =
1928 byteflip(rre->magic_number, sizeof(rre->magic_number));
1929 rre->size = byteflip(rre->size, sizeof(rre->size));
1930 rre->options = byteflip(rre->options, sizeof(rre->options));
1931 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1932 rre->top_bracket =
1933 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1934 rre->top_backref =
1935 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1936 rre->first_char =
1937 (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
1938 rre->req_char =
1939 (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
1940 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1941 sizeof(rre->name_table_offset));
1942 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1943 sizeof(rre->name_entry_size));
1944 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1945 sizeof(rre->name_count));
1946
1947 if (extra != NULL)
1948 {
1949 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1950 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1951 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1952 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1953 }
1954 }
1955
1956 /* Extract information from the compiled data if required. There are now
1957 two info-returning functions. The old one has a limited interface and
1958 returns only limited data. Check that it agrees with the newer one. */
1959
1960 SHOW_INFO:
1961
1962 if (do_debug)
1963 {
1964 fprintf(outfile, "------------------------------------------------------------------\n");
1965 pcre_printint(re, outfile, debug_lengths);
1966 }
1967
1968 /* We already have the options in get_options (see above) */
1969
1970 if (do_showinfo)
1971 {
1972 unsigned long int all_options;
1973 #if !defined NOINFOCHECK
1974 int old_first_char, old_options, old_count;
1975 #endif
1976 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1977 hascrorlf;
1978 int nameentrysize, namecount;
1979 const pcre_uchar *nametable;
1980
1981 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1982 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1983 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1984 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1985 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1986 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1987 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1988 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1989 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1990 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1991 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1992
1993 #if !defined NOINFOCHECK
1994 old_count = pcre_info(re, &old_options, &old_first_char);
1995 if (count < 0) fprintf(outfile,
1996 "Error %d from pcre_info()\n", count);
1997 else
1998 {
1999 if (old_count != count) fprintf(outfile,
2000 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2001 old_count);
2002
2003 if (old_first_char != first_char) fprintf(outfile,
2004 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2005 first_char, old_first_char);
2006
2007 if (old_options != (int)get_options) fprintf(outfile,
2008 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2009 get_options, old_options);
2010 }
2011 #endif
2012
2013 if (size != regex_gotten_store) fprintf(outfile,
2014 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2015 (int)size, (int)regex_gotten_store);
2016
2017 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2018 if (backrefmax > 0)
2019 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2020
2021 if (namecount > 0)
2022 {
2023 fprintf(outfile, "Named capturing subpatterns:\n");
2024 while (namecount-- > 0)
2025 {
2026 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2027 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2028 GET2(nametable, 0));
2029 nametable += nameentrysize;
2030 }
2031 }
2032
2033 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2034 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2035
2036 all_options = ((real_pcre *)re)->options;
2037 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2038
2039 if (get_options == 0) fprintf(outfile, "No options\n");
2040 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2041 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2042 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2043 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2044 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2045 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2046 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2047 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2048 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2049 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2050 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2051 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2052 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2053 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2054 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2055 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2056 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2057 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2058
2059 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2060
2061 switch (get_options & PCRE_NEWLINE_BITS)
2062 {
2063 case PCRE_NEWLINE_CR:
2064 fprintf(outfile, "Forced newline sequence: CR\n");
2065 break;
2066
2067 case PCRE_NEWLINE_LF:
2068 fprintf(outfile, "Forced newline sequence: LF\n");
2069 break;
2070
2071 case PCRE_NEWLINE_CRLF:
2072 fprintf(outfile, "Forced newline sequence: CRLF\n");
2073 break;
2074
2075 case PCRE_NEWLINE_ANYCRLF:
2076 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2077 break;
2078
2079 case PCRE_NEWLINE_ANY:
2080 fprintf(outfile, "Forced newline sequence: ANY\n");
2081 break;
2082
2083 default:
2084 break;
2085 }
2086
2087 if (first_char == -1)
2088 {
2089 fprintf(outfile, "First char at start or follows newline\n");
2090 }
2091 else if (first_char < 0)
2092 {
2093 fprintf(outfile, "No first char\n");
2094 }
2095 else
2096 {
2097 const char *caseless =
2098 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2099 "" : " (caseless)";
2100
2101 if (PRINTHEX(first_char))
2102 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2103 else
2104 fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2105 }
2106
2107 if (need_char < 0)
2108 {
2109 fprintf(outfile, "No need char\n");
2110 }
2111 else
2112 {
2113 const char *caseless =
2114 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2115 "" : " (caseless)";
2116
2117 if (PRINTHEX(need_char))
2118 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2119 else
2120 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2121 }
2122
2123 /* Don't output study size; at present it is in any case a fixed
2124 value, but it varies, depending on the computer architecture, and
2125 so messes up the test suite. (And with the /F option, it might be
2126 flipped.) If study was forced by an external -s, don't show this
2127 information unless -i or -d was also present. This means that, except
2128 when auto-callouts are involved, the output from runs with and without
2129 -s should be identical. */
2130
2131 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2132 {
2133 if (extra == NULL)
2134 fprintf(outfile, "Study returned NULL\n");
2135 else
2136 {
2137 pcre_uint8 *start_bits = NULL;
2138 int minlength;
2139
2140 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2141 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2142
2143 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2144 if (start_bits == NULL)
2145 fprintf(outfile, "No set of starting bytes\n");
2146 else
2147 {
2148 int i;
2149 int c = 24;
2150 fprintf(outfile, "Starting byte set: ");
2151 for (i = 0; i < 256; i++)
2152 {
2153 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2154 {
2155 if (c > 75)
2156 {
2157 fprintf(outfile, "\n ");
2158 c = 2;
2159 }
2160 if (PRINTHEX(i) && i != ' ')
2161 {
2162 fprintf(outfile, "%c ", i);
2163 c += 2;
2164 }
2165 else
2166 {
2167 fprintf(outfile, "\\x%02x ", i);
2168 c += 5;
2169 }
2170 }
2171 }
2172 fprintf(outfile, "\n");
2173 }
2174 }
2175
2176 /* Show this only if the JIT was set by /S, not by -s. */
2177
2178 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2179 {
2180 int jit;
2181 new_info(re, extra, PCRE_INFO_JIT, &jit);
2182 if (jit)
2183 fprintf(outfile, "JIT study was successful\n");
2184 else
2185 #ifdef SUPPORT_JIT
2186 fprintf(outfile, "JIT study was not successful\n");
2187 #else
2188 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2189 #endif
2190 }
2191 }
2192 }
2193
2194 /* If the '>' option was present, we write out the regex to a file, and
2195 that is all. The first 8 bytes of the file are the regex length and then
2196 the study length, in big-endian order. */
2197
2198 if (to_file != NULL)
2199 {
2200 FILE *f = fopen((char *)to_file, "wb");
2201 if (f == NULL)
2202 {
2203 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2204 }
2205 else
2206 {
2207 pcre_uint8 sbuf[8];
2208 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2209 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2210 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2211 sbuf[3] = (pcre_uint8)((true_size) & 255);
2212
2213 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2214 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2215 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2216 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2217
2218 if (fwrite(sbuf, 1, 8, f) < 8 ||
2219 fwrite(re, 1, true_size, f) < true_size)
2220 {
2221 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2222 }
2223 else
2224 {
2225 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2226
2227 /* If there is study data, write it. */
2228
2229 if (extra != NULL)
2230 {
2231 if (fwrite(extra->study_data, 1, true_study_size, f) <
2232 true_study_size)
2233 {
2234 fprintf(outfile, "Write error on %s: %s\n", to_file,
2235 strerror(errno));
2236 }
2237 else fprintf(outfile, "Study data written to %s\n", to_file);
2238 }
2239 }
2240 fclose(f);
2241 }
2242
2243 new_free(re);
2244 if (extra != NULL) pcre_free_study(extra);
2245 if (locale_set)
2246 {
2247 new_free((void *)tables);
2248 setlocale(LC_CTYPE, "C");
2249 locale_set = 0;
2250 }
2251 continue; /* With next regex */
2252 }
2253 } /* End of non-POSIX compile */
2254
2255 /* Read data lines and test them */
2256
2257 for (;;)
2258 {
2259 pcre_uint8 *q;
2260 pcre_uint8 *bptr;
2261 int *use_offsets = offsets;
2262 int use_size_offsets = size_offsets;
2263 int callout_data = 0;
2264 int callout_data_set = 0;
2265 int count, c;
2266 int copystrings = 0;
2267 int find_match_limit = default_find_match_limit;
2268 int getstrings = 0;
2269 int getlist = 0;
2270 int gmatched = 0;
2271 int start_offset = 0;
2272 int start_offset_sign = 1;
2273 int g_notempty = 0;
2274 int use_dfa = 0;
2275
2276 options = 0;
2277
2278 *copynames = 0;
2279 *getnames = 0;
2280
2281 copynamesptr = copynames;
2282 getnamesptr = getnames;
2283
2284 pcre_callout = callout;
2285 first_callout = 1;
2286 last_callout_mark = NULL;
2287 callout_extra = 0;
2288 callout_count = 0;
2289 callout_fail_count = 999999;
2290 callout_fail_id = -1;
2291 show_malloc = 0;
2292
2293 if (extra != NULL) extra->flags &=
2294 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2295
2296 len = 0;
2297 for (;;)
2298 {
2299 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2300 {
2301 if (len > 0) /* Reached EOF without hitting a newline */
2302 {
2303 fprintf(outfile, "\n");
2304 break;
2305 }
2306 done = 1;
2307 goto CONTINUE;
2308 }
2309 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2310 len = (int)strlen((char *)buffer);
2311 if (buffer[len-1] == '\n') break;
2312 }
2313
2314 while (len > 0 && isspace(buffer[len-1])) len--;
2315 buffer[len] = 0;
2316 if (len == 0) break;
2317
2318 p = buffer;
2319 while (isspace(*p)) p++;
2320
2321 bptr = q = dbuffer;
2322 while ((c = *p++) != 0)
2323 {
2324 int i = 0;
2325 int n = 0;
2326
2327 if (c == '\\') switch ((c = *p++))
2328 {
2329 case 'a': c = 7; break;
2330 case 'b': c = '\b'; break;
2331 case 'e': c = 27; break;
2332 case 'f': c = '\f'; break;
2333 case 'n': c = '\n'; break;
2334 case 'r': c = '\r'; break;
2335 case 't': c = '\t'; break;
2336 case 'v': c = '\v'; break;
2337
2338 case '0': case '1': case '2': case '3':
2339 case '4': case '5': case '6': case '7':
2340 c -= '0';
2341 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2342 c = c * 8 + *p++ - '0';
2343
2344 #if !defined NOUTF8
2345 if (use_utf8 && c > 255)
2346 {
2347 unsigned char buff8[8];
2348 int ii, utn;
2349 utn = ord2utf8(c, buff8);
2350 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2351 c = buff8[ii]; /* Last byte */
2352 }
2353 #endif
2354 break;
2355
2356 case 'x':
2357
2358 /* Handle \x{..} specially - new Perl thing for utf8 */
2359
2360 #if !defined NOUTF8
2361 if (*p == '{')
2362 {
2363 unsigned char *pt = p;
2364 c = 0;
2365
2366 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2367 when isxdigit() is a macro that refers to its argument more than
2368 once. This is banned by the C Standard, but apparently happens in at
2369 least one MacOS environment. */
2370
2371 for (pt++; isxdigit(*pt); pt++)
2372 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2373 if (*pt == '}')
2374 {
2375 unsigned char buff8[8];
2376 int ii, utn;
2377 if (use_utf8)
2378 {
2379 utn = ord2utf8(c, buff8);
2380 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2381 c = buff8[ii]; /* Last byte */
2382 }
2383 else
2384 {
2385 if (c > 255)
2386 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2387 "UTF-8 mode is not enabled.\n"
2388 "** Truncation will probably give the wrong result.\n", c);
2389 }
2390 p = pt + 1;
2391 break;
2392 }
2393 /* Not correct form; fall through */
2394 }
2395 #endif
2396
2397 /* Ordinary \x */
2398
2399 c = 0;
2400 while (i++ < 2 && isxdigit(*p))
2401 {
2402 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2403 p++;
2404 }
2405 break;
2406
2407 case 0: /* \ followed by EOF allows for an empty line */
2408 p--;
2409 continue;
2410
2411 case '>':
2412 if (*p == '-')
2413 {
2414 start_offset_sign = -1;
2415 p++;
2416 }
2417 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2418 start_offset *= start_offset_sign;
2419 continue;
2420
2421 case 'A': /* Option setting */
2422 options |= PCRE_ANCHORED;
2423 continue;
2424
2425 case 'B':
2426 options |= PCRE_NOTBOL;
2427 continue;
2428
2429 case 'C':
2430 if (isdigit(*p)) /* Set copy string */
2431 {
2432 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2433 copystrings |= 1 << n;
2434 }
2435 else if (isalnum(*p))
2436 {
2437 pcre_uchar *npp = copynamesptr;
2438 while (isalnum(*p)) *npp++ = *p++;
2439 *npp++ = 0;
2440 *npp = 0;
2441 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2442 if (n < 0)
2443 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2444 copynamesptr = npp;
2445 }
2446 else if (*p == '+')
2447 {
2448 callout_extra = 1;
2449 p++;
2450 }
2451 else if (*p == '-')
2452 {
2453 pcre_callout = NULL;
2454 p++;
2455 }
2456 else if (*p == '!')
2457 {
2458 callout_fail_id = 0;
2459 p++;
2460 while(isdigit(*p))
2461 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2462 callout_fail_count = 0;
2463 if (*p == '!')
2464 {
2465 p++;
2466 while(isdigit(*p))
2467 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2468 }
2469 }
2470 else if (*p == '*')
2471 {
2472 int sign = 1;
2473 callout_data = 0;
2474 if (*(++p) == '-') { sign = -1; p++; }
2475 while(isdigit(*p))
2476 callout_data = callout_data * 10 + *p++ - '0';
2477 callout_data *= sign;
2478 callout_data_set = 1;
2479 }
2480 continue;
2481
2482 #if !defined NODFA
2483 case 'D':
2484 #if !defined NOPOSIX
2485 if (posix || do_posix)
2486 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2487 else
2488 #endif
2489 use_dfa = 1;
2490 continue;
2491 #endif
2492
2493 #if !defined NODFA
2494 case 'F':
2495 options |= PCRE_DFA_SHORTEST;
2496 continue;
2497 #endif
2498
2499 case 'G':
2500 if (isdigit(*p))
2501 {
2502 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2503 getstrings |= 1 << n;
2504 }
2505 else if (isalnum(*p))
2506 {
2507 pcre_uchar *npp = getnamesptr;
2508 while (isalnum(*p)) *npp++ = *p++;
2509 *npp++ = 0;
2510 *npp = 0;
2511 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2512 if (n < 0)
2513 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2514 getnamesptr = npp;
2515 }
2516 continue;
2517
2518 case 'J':
2519 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2520 if (extra != NULL
2521 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2522 && extra->executable_jit != NULL)
2523 {
2524 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2525 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2526 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2527 }
2528 continue;
2529
2530 case 'L':
2531 getlist = 1;
2532 continue;
2533
2534 case 'M':
2535 find_match_limit = 1;
2536 continue;
2537
2538 case 'N':
2539 if ((options & PCRE_NOTEMPTY) != 0)
2540 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2541 else
2542 options |= PCRE_NOTEMPTY;
2543 continue;
2544
2545 case 'O':
2546 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2547 if (n > size_offsets_max)
2548 {
2549 size_offsets_max = n;
2550 free(offsets);
2551 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2552 if (offsets == NULL)
2553 {
2554 printf("** Failed to get %d bytes of memory for offsets vector\n",
2555 (int)(size_offsets_max * sizeof(int)));
2556 yield = 1;
2557 goto EXIT;
2558 }
2559 }
2560 use_size_offsets = n;
2561 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2562 continue;
2563
2564 case 'P':
2565 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2566 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2567 continue;
2568
2569 case 'Q':
2570 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2571 if (extra == NULL)
2572 {
2573 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2574 extra->flags = 0;
2575 }
2576 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2577 extra->match_limit_recursion = n;
2578 continue;
2579
2580 case 'q':
2581 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2582 if (extra == NULL)
2583 {
2584 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2585 extra->flags = 0;
2586 }
2587 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2588 extra->match_limit = n;
2589 continue;
2590
2591 #if !defined NODFA
2592 case 'R':
2593 options |= PCRE_DFA_RESTART;
2594 continue;
2595 #endif
2596
2597 case 'S':
2598 show_malloc = 1;
2599 continue;
2600
2601 case 'Y':
2602 options |= PCRE_NO_START_OPTIMIZE;
2603 continue;
2604
2605 case 'Z':
2606 options |= PCRE_NOTEOL;
2607 continue;
2608
2609 case '?':
2610 options |= PCRE_NO_UTF8_CHECK;
2611 continue;
2612
2613 case '<':
2614 {
2615 int x = check_newline(p, outfile);
2616 if (x == 0) goto NEXT_DATA;
2617 options |= x;
2618 while (*p++ != '>');
2619 }
2620 continue;
2621 }
2622 *q++ = c;
2623 }
2624 *q = 0;
2625 len = (int)(q - dbuffer);
2626
2627 /* Move the data to the end of the buffer so that a read over the end of
2628 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2629 we are using the POSIX interface, we must include the terminating zero. */
2630
2631 #if !defined NOPOSIX
2632 if (posix || do_posix)
2633 {
2634 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2635 bptr += buffer_size - len - 1;
2636 }
2637 else
2638 #endif
2639 {
2640 memmove(bptr + buffer_size - len, bptr, len);
2641 bptr += buffer_size - len;
2642 }
2643
2644 if ((all_use_dfa || use_dfa) && find_match_limit)
2645 {
2646 printf("**Match limit not relevant for DFA matching: ignored\n");
2647 find_match_limit = 0;
2648 }
2649
2650 /* Handle matching via the POSIX interface, which does not
2651 support timing or playing with the match limit or callout data. */
2652
2653 #if !defined NOPOSIX
2654 if (posix || do_posix)
2655 {
2656 int rc;
2657 int eflags = 0;
2658 regmatch_t *pmatch = NULL;
2659 if (use_size_offsets > 0)
2660 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2661 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2662 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2663 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2664
2665 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2666
2667 if (rc != 0)
2668 {
2669 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2670 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2671 }
2672 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2673 != 0)
2674 {
2675 fprintf(outfile, "Matched with REG_NOSUB\n");
2676 }
2677 else
2678 {
2679 size_t i;
2680 for (i = 0; i < (size_t)use_size_offsets; i++)
2681 {
2682 if (pmatch[i].rm_so >= 0)
2683 {
2684 fprintf(outfile, "%2d: ", (int)i);
2685 (void)pchars(dbuffer + pmatch[i].rm_so,
2686 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2687 fprintf(outfile, "\n");
2688 if (do_showcaprest || (i == 0 && do_showrest))
2689 {
2690 fprintf(outfile, "%2d+ ", (int)i);
2691 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2692 outfile);
2693 fprintf(outfile, "\n");
2694 }
2695 }
2696 }
2697 }
2698 free(pmatch);
2699 }
2700
2701 /* Handle matching via the native interface - repeats for /g and /G */
2702
2703 else
2704 #endif /* !defined NOPOSIX */
2705
2706 for (;; gmatched++) /* Loop for /g or /G */
2707 {
2708 markptr = NULL;
2709
2710 if (timeitm > 0)
2711 {
2712 register int i;
2713 clock_t time_taken;
2714 clock_t start_time = clock();
2715
2716 #if !defined NODFA
2717 if (all_use_dfa || use_dfa)
2718 {
2719 int workspace[1000];
2720 for (i = 0; i < timeitm; i++)
2721 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2722 options | g_notempty, use_offsets, use_size_offsets, workspace,
2723 sizeof(workspace)/sizeof(int));
2724 }
2725 else
2726 #endif
2727
2728 for (i = 0; i < timeitm; i++)
2729 count = pcre_exec(re, extra, (char *)bptr, len,
2730 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2731
2732 time_taken = clock() - start_time;
2733 fprintf(outfile, "Execute time %.4f milliseconds\n",
2734 (((double)time_taken * 1000.0) / (double)timeitm) /
2735 (double)CLOCKS_PER_SEC);
2736 }
2737
2738 /* If find_match_limit is set, we want to do repeated matches with
2739 varying limits in order to find the minimum value for the match limit and
2740 for the recursion limit. The match limits are relevant only to the normal
2741 running of pcre_exec(), so disable the JIT optimization. This makes it
2742 possible to run the same set of tests with and without JIT externally
2743 requested. */
2744
2745 if (find_match_limit)
2746 {
2747 if (extra == NULL)
2748 {
2749 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2750 extra->flags = 0;
2751 }
2752 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2753
2754 (void)check_match_limit(re, extra, bptr, len, start_offset,
2755 options|g_notempty, use_offsets, use_size_offsets,
2756 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2757 PCRE_ERROR_MATCHLIMIT, "match()");
2758
2759 count = check_match_limit(re, extra, bptr, len, start_offset,
2760 options|g_notempty, use_offsets, use_size_offsets,
2761 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2762 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2763 }
2764
2765 /* If callout_data is set, use the interface with additional data */
2766
2767 else if (callout_data_set)
2768 {
2769 if (extra == NULL)
2770 {
2771 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2772 extra->flags = 0;
2773 }
2774 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2775 extra->callout_data = &callout_data;
2776 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2777 options | g_notempty, use_offsets, use_size_offsets);
2778 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2779 }
2780
2781 /* The normal case is just to do the match once, with the default
2782 value of match_limit. */
2783
2784 #if !defined NODFA
2785 else if (all_use_dfa || use_dfa)
2786 {
2787 int workspace[1000];
2788 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2789 options | g_notempty, use_offsets, use_size_offsets, workspace,
2790 sizeof(workspace)/sizeof(int));
2791 if (count == 0)
2792 {
2793 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2794 count = use_size_offsets/2;
2795 }
2796 }
2797 #endif
2798
2799 else
2800 {
2801 count = pcre_exec(re, extra, (char *)bptr, len,
2802 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2803 if (count == 0)
2804 {
2805 fprintf(outfile, "Matched, but too many substrings\n");
2806 count = use_size_offsets/3;
2807 }
2808 }
2809
2810 /* Matched */
2811
2812 if (count >= 0)
2813 {
2814 int i, maxcount;
2815
2816 #if !defined NODFA
2817 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2818 #endif
2819 maxcount = use_size_offsets/3;
2820
2821 /* This is a check against a lunatic return value. */
2822
2823 if (count > maxcount)
2824 {
2825 fprintf(outfile,
2826 "** PCRE error: returned count %d is too big for offset size %d\n",
2827 count, use_size_offsets);
2828 count = use_size_offsets/3;
2829 if (do_g || do_G)
2830 {
2831 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2832 do_g = do_G = FALSE; /* Break g/G loop */
2833 }
2834 }
2835
2836 /* do_allcaps requests showing of all captures in the pattern, to check
2837 unset ones at the end. */
2838
2839 if (do_allcaps)
2840 {
2841 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2842 count++; /* Allow for full match */
2843 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2844 }
2845
2846 /* Output the captured substrings */
2847
2848 for (i = 0; i < count * 2; i += 2)
2849 {
2850 if (use_offsets[i] < 0)
2851 {
2852 if (use_offsets[i] != -1)
2853 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2854 use_offsets[i], i);
2855 if (use_offsets[i+1] != -1)
2856 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2857 use_offsets[i+1], i+1);
2858 fprintf(outfile, "%2d: <unset>\n", i/2);
2859 }
2860 else
2861 {
2862 fprintf(outfile, "%2d: ", i/2);
2863 (void)pchars(bptr + use_offsets[i],
2864 use_offsets[i+1] - use_offsets[i], outfile);
2865 fprintf(outfile, "\n");
2866 if (do_showcaprest || (i == 0 && do_showrest))
2867 {
2868 fprintf(outfile, "%2d+ ", i/2);
2869 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2870 outfile);
2871 fprintf(outfile, "\n");
2872 }
2873 }
2874 }
2875
2876 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2877
2878 for (i = 0; i < 32; i++)
2879 {
2880 if ((copystrings & (1 << i)) != 0)
2881 {
2882 char copybuffer[256];
2883 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2884 i, copybuffer, sizeof(copybuffer));
2885 if (rc < 0)
2886 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2887 else
2888 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2889 }
2890 }
2891
2892 for (copynamesptr = copynames;
2893 *copynamesptr != 0;
2894 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2895 {
2896 char copybuffer[256];
2897 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2898 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2899 if (rc < 0)
2900 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2901 else
2902 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2903 }
2904
2905 for (i = 0; i < 32; i++)
2906 {
2907 if ((getstrings & (1 << i)) != 0)
2908 {
2909 const char *substring;
2910 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2911 i, &substring);
2912 if (rc < 0)
2913 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2914 else
2915 {
2916 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2917 pcre_free_substring(substring);
2918 }
2919 }
2920 }
2921
2922 for (getnamesptr = getnames;
2923 *getnamesptr != 0;
2924 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2925 {
2926 const char *substring;
2927 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2928 count, (char *)getnamesptr, &substring);
2929 if (rc < 0)
2930 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2931 else
2932 {
2933 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2934 pcre_free_substring(substring);
2935 }
2936 }
2937
2938 if (getlist)
2939 {
2940 const char **stringlist;
2941 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2942 &stringlist);
2943 if (rc < 0)
2944 fprintf(outfile, "get substring list failed %d\n", rc);
2945 else
2946 {
2947 for (i = 0; i < count; i++)
2948 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2949 if (stringlist[i] != NULL)
2950 fprintf(outfile, "string list not terminated by NULL\n");
2951 pcre_free_substring_list(stringlist);
2952 }
2953 }
2954 }
2955
2956 /* There was a partial match */
2957
2958 else if (count == PCRE_ERROR_PARTIAL)
2959 {
2960 if (markptr == NULL) fprintf(outfile, "Partial match");
2961 else fprintf(outfile, "Partial match, mark=%s", markptr);
2962 if (use_size_offsets > 1)
2963 {
2964 fprintf(outfile, ": ");
2965 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2966 outfile);
2967 }
2968 fprintf(outfile, "\n");
2969 break; /* Out of the /g loop */
2970 }
2971
2972 /* Failed to match. If this is a /g or /G loop and we previously set
2973 g_notempty after a null match, this is not necessarily the end. We want
2974 to advance the start offset, and continue. We won't be at the end of the
2975 string - that was checked before setting g_notempty.
2976
2977 Complication arises in the case when the newline convention is "any",
2978 "crlf", or "anycrlf". If the previous match was at the end of a line
2979 terminated by CRLF, an advance of one character just passes the \r,
2980 whereas we should prefer the longer newline sequence, as does the code in
2981 pcre_exec(). Fudge the offset value to achieve this. We check for a
2982 newline setting in the pattern; if none was set, use pcre_config() to
2983 find the default.
2984
2985 Otherwise, in the case of UTF-8 matching, the advance must be one
2986 character, not one byte. */
2987
2988 else
2989 {
2990 if (g_notempty != 0)
2991 {
2992 int onechar = 1;
2993 unsigned int obits = ((real_pcre *)re)->options;
2994 use_offsets[0] = start_offset;
2995 if ((obits & PCRE_NEWLINE_BITS) == 0)
2996 {
2997 int d;
2998 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2999 /* Note that these values are always the ASCII ones, even in
3000 EBCDIC environments. CR = 13, NL = 10. */
3001 obits = (d == 13)? PCRE_NEWLINE_CR :
3002 (d == 10)? PCRE_NEWLINE_LF :
3003 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3004 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3005 (d == -1)? PCRE_NEWLINE_ANY : 0;
3006 }
3007 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3008 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3009 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3010 &&
3011 start_offset < len - 1 &&
3012 bptr[start_offset] == '\r' &&
3013 bptr[start_offset+1] == '\n')
3014 onechar++;
3015 else if (use_utf8)
3016 {
3017 while (start_offset + onechar < len)
3018 {
3019 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3020 onechar++;
3021 }
3022 }
3023 use_offsets[1] = start_offset + onechar;
3024 }
3025 else
3026 {
3027 switch(count)
3028 {
3029 case PCRE_ERROR_NOMATCH:
3030 if (gmatched == 0)
3031 {
3032 if (markptr == NULL) fprintf(outfile, "No match\n");
3033 else fprintf(outfile, "No match, mark = %s\n", markptr);
3034 }
3035 break;
3036
3037 case PCRE_ERROR_BADUTF8:
3038 case PCRE_ERROR_SHORTUTF8:
3039 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3040 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3041 if (use_size_offsets >= 2)
3042 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3043 use_offsets[1]);
3044 fprintf(outfile, "\n");
3045 break;
3046
3047 default:
3048 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3049 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3050 else
3051 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3052 break;
3053 }
3054
3055 break; /* Out of the /g loop */
3056 }
3057 }
3058
3059 /* If not /g or /G we are done */
3060
3061 if (!do_g && !do_G) break;
3062
3063 /* If we have matched an empty string, first check to see if we are at
3064 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3065 Perl's /g options does. This turns out to be rather cunning. First we set
3066 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3067 same point. If this fails (picked up above) we advance to the next
3068 character. */
3069
3070 g_notempty = 0;
3071
3072 if (use_offsets[0] == use_offsets[1])
3073 {
3074 if (use_offsets[0] == len) break;
3075 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3076 }
3077
3078 /* For /g, update the start offset, leaving the rest alone */
3079
3080 if (do_g) start_offset = use_offsets[1];
3081
3082 /* For /G, update the pointer and length */
3083
3084 else
3085 {
3086 bptr += use_offsets[1];
3087 len -= use_offsets[1];
3088 }
3089 } /* End of loop for /g and /G */
3090
3091 NEXT_DATA: continue;
3092 } /* End of loop for data lines */
3093
3094 CONTINUE:
3095
3096 #if !defined NOPOSIX
3097 if (posix || do_posix) regfree(&preg);
3098 #endif
3099
3100 if (re != NULL) new_free(re);
3101 if (extra != NULL) pcre_free_study(extra);
3102 if (locale_set)
3103 {
3104 new_free((void *)tables);
3105 setlocale(LC_CTYPE, "C");
3106 locale_set = 0;
3107 }
3108 if (jit_stack != NULL)
3109 {
3110 pcre_jit_stack_free(jit_stack);
3111 jit_stack = NULL;
3112 }
3113 }
3114
3115 if (infile == stdin) fprintf(outfile, "\n");
3116
3117 EXIT:
3118
3119 if (infile != NULL && infile != stdin) fclose(infile);
3120 if (outfile != NULL && outfile != stdout) fclose(outfile);
3121
3122 free(buffer);
3123 free(dbuffer);
3124 free(pbuffer);
3125 free(offsets);
3126
3127 return yield;
3128 }
3129
3130 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5