/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 676 - (show annotations)
Sat Aug 27 15:53:04 2011 UTC (4 years ago) by ph10
File MIME type: text/plain
File size: 94287 byte(s)
Error occurred while calculating annotation data.
Give PCRE_ERROR_JIT_STACKLIMIT when JIT runs out of stack.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utf8_char_sizes utf8_char_sizes
122 #define _pcre_utt utt
123 #define _pcre_utt_size utt_size
124 #define _pcre_utt_names utt_names
125 #define _pcre_OP_lengths OP_lengths
126
127 #include "pcre_tables.c"
128
129 /* We also need the pcre_printint() function for printing out compiled
130 patterns. This function is in a separate file so that it can be included in
131 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 know which case is being compiled. */
133
134 #define COMPILING_PCRETEST
135 #include "pcre_printint.src"
136
137 /* The definition of the macro PRINTABLE, which determines whether to print an
138 output character as-is or as a hex value when showing compiled patterns, is
139 contained in the printint.src file. We uses it here also, in cases when the
140 locale has not been explicitly changed, so as to get consistent output from
141 systems that differ in their output from isprint() even in the "C" locale. */
142
143 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144
145 /* It is possible to compile this test program without including support for
146 testing the POSIX interface, though this is not available via the standard
147 Makefile. */
148
149 #if !defined NOPOSIX
150 #include "pcreposix.h"
151 #endif
152
153 /* It is also possible, for the benefit of the version currently imported into
154 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155 interface to the DFA matcher (NODFA), and without the doublecheck of the old
156 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157 UTF8 support if PCRE is built without it. */
158
159 #ifndef SUPPORT_UTF8
160 #ifndef NOUTF8
161 #define NOUTF8
162 #endif
163 #endif
164
165
166 /* Other parameters */
167
168 #ifndef CLOCKS_PER_SEC
169 #ifdef CLK_TCK
170 #define CLOCKS_PER_SEC CLK_TCK
171 #else
172 #define CLOCKS_PER_SEC 100
173 #endif
174 #endif
175
176 /* This is the default loop count for timing. */
177
178 #define LOOPREPEAT 500000
179
180 /* Static variables */
181
182 static FILE *outfile;
183 static int log_store = 0;
184 static int callout_count;
185 static int callout_extra;
186 static int callout_fail_count;
187 static int callout_fail_id;
188 static int debug_lengths;
189 static int first_callout;
190 static int locale_set = 0;
191 static int show_malloc;
192 static int use_utf8;
193 static size_t gotten_store;
194 static const unsigned char *last_callout_mark = NULL;
195
196 /* The buffers grow automatically if very long input lines are encountered. */
197
198 static int buffer_size = 50000;
199 static uschar *buffer = NULL;
200 static uschar *dbuffer = NULL;
201 static uschar *pbuffer = NULL;
202
203 /* Textual explanations for runtime error codes */
204
205 static const char *errtexts[] = {
206 NULL, /* 0 is no error */
207 NULL, /* NOMATCH is handled specially */
208 "NULL argument passed",
209 "bad option value",
210 "magic number missing",
211 "unknown opcode - pattern overwritten?",
212 "no more memory",
213 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 "match limit exceeded",
215 "callout error code",
216 NULL, /* BADUTF8 is handled specially */
217 "bad UTF-8 offset",
218 NULL, /* PARTIAL is handled specially */
219 "not used - internal error",
220 "internal error - pattern overwritten?",
221 "bad count value",
222 "item unsupported for DFA matching",
223 "backreference condition or recursion test not supported for DFA matching",
224 "match limit not supported for DFA matching",
225 "workspace size exceeded in DFA matching",
226 "too much recursion for DFA matching",
227 "recursion limit exceeded",
228 "not used - internal error",
229 "invalid combination of newline options",
230 "bad offset value",
231 NULL, /* SHORTUTF8 is handled specially */
232 "nested recursion at the same subject position",
233 "JIT stack limit reached"
234 };
235
236
237 /*************************************************
238 * Alternate character tables *
239 *************************************************/
240
241 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242 using the default tables of the library. However, the T option can be used to
243 select alternate sets of tables, for different kinds of testing. Note also that
244 the L (locale) option also adjusts the tables. */
245
246 /* This is the set of tables distributed as default with PCRE. It recognizes
247 only ASCII characters. */
248
249 static const unsigned char tables0[] = {
250
251 /* This table is a lower casing table. */
252
253 0, 1, 2, 3, 4, 5, 6, 7,
254 8, 9, 10, 11, 12, 13, 14, 15,
255 16, 17, 18, 19, 20, 21, 22, 23,
256 24, 25, 26, 27, 28, 29, 30, 31,
257 32, 33, 34, 35, 36, 37, 38, 39,
258 40, 41, 42, 43, 44, 45, 46, 47,
259 48, 49, 50, 51, 52, 53, 54, 55,
260 56, 57, 58, 59, 60, 61, 62, 63,
261 64, 97, 98, 99,100,101,102,103,
262 104,105,106,107,108,109,110,111,
263 112,113,114,115,116,117,118,119,
264 120,121,122, 91, 92, 93, 94, 95,
265 96, 97, 98, 99,100,101,102,103,
266 104,105,106,107,108,109,110,111,
267 112,113,114,115,116,117,118,119,
268 120,121,122,123,124,125,126,127,
269 128,129,130,131,132,133,134,135,
270 136,137,138,139,140,141,142,143,
271 144,145,146,147,148,149,150,151,
272 152,153,154,155,156,157,158,159,
273 160,161,162,163,164,165,166,167,
274 168,169,170,171,172,173,174,175,
275 176,177,178,179,180,181,182,183,
276 184,185,186,187,188,189,190,191,
277 192,193,194,195,196,197,198,199,
278 200,201,202,203,204,205,206,207,
279 208,209,210,211,212,213,214,215,
280 216,217,218,219,220,221,222,223,
281 224,225,226,227,228,229,230,231,
282 232,233,234,235,236,237,238,239,
283 240,241,242,243,244,245,246,247,
284 248,249,250,251,252,253,254,255,
285
286 /* This table is a case flipping table. */
287
288 0, 1, 2, 3, 4, 5, 6, 7,
289 8, 9, 10, 11, 12, 13, 14, 15,
290 16, 17, 18, 19, 20, 21, 22, 23,
291 24, 25, 26, 27, 28, 29, 30, 31,
292 32, 33, 34, 35, 36, 37, 38, 39,
293 40, 41, 42, 43, 44, 45, 46, 47,
294 48, 49, 50, 51, 52, 53, 54, 55,
295 56, 57, 58, 59, 60, 61, 62, 63,
296 64, 97, 98, 99,100,101,102,103,
297 104,105,106,107,108,109,110,111,
298 112,113,114,115,116,117,118,119,
299 120,121,122, 91, 92, 93, 94, 95,
300 96, 65, 66, 67, 68, 69, 70, 71,
301 72, 73, 74, 75, 76, 77, 78, 79,
302 80, 81, 82, 83, 84, 85, 86, 87,
303 88, 89, 90,123,124,125,126,127,
304 128,129,130,131,132,133,134,135,
305 136,137,138,139,140,141,142,143,
306 144,145,146,147,148,149,150,151,
307 152,153,154,155,156,157,158,159,
308 160,161,162,163,164,165,166,167,
309 168,169,170,171,172,173,174,175,
310 176,177,178,179,180,181,182,183,
311 184,185,186,187,188,189,190,191,
312 192,193,194,195,196,197,198,199,
313 200,201,202,203,204,205,206,207,
314 208,209,210,211,212,213,214,215,
315 216,217,218,219,220,221,222,223,
316 224,225,226,227,228,229,230,231,
317 232,233,234,235,236,237,238,239,
318 240,241,242,243,244,245,246,247,
319 248,249,250,251,252,253,254,255,
320
321 /* This table contains bit maps for various character classes. Each map is 32
322 bytes long and the bits run from the least significant end of each byte. The
323 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324 graph, print, punct, and cntrl. Other classes are built from combinations. */
325
326 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375
376 /* This table identifies various classes of character by individual bits:
377 0x01 white space character
378 0x02 letter
379 0x04 decimal digit
380 0x08 hexadecimal digit
381 0x10 alphanumeric or '_'
382 0x80 regular expression metacharacter or binary zero
383 */
384
385 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
386 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
387 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
388 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
389 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
390 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
391 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
392 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
393 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
396 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
397 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
398 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
399 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
400 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417
418 /* This is a set of tables that came orginally from a Windows user. It seems to
419 be at least an approximation of ISO 8859. In particular, there are characters
420 greater than 128 that are marked as spaces, letters, etc. */
421
422 static const unsigned char tables1[] = {
423 0,1,2,3,4,5,6,7,
424 8,9,10,11,12,13,14,15,
425 16,17,18,19,20,21,22,23,
426 24,25,26,27,28,29,30,31,
427 32,33,34,35,36,37,38,39,
428 40,41,42,43,44,45,46,47,
429 48,49,50,51,52,53,54,55,
430 56,57,58,59,60,61,62,63,
431 64,97,98,99,100,101,102,103,
432 104,105,106,107,108,109,110,111,
433 112,113,114,115,116,117,118,119,
434 120,121,122,91,92,93,94,95,
435 96,97,98,99,100,101,102,103,
436 104,105,106,107,108,109,110,111,
437 112,113,114,115,116,117,118,119,
438 120,121,122,123,124,125,126,127,
439 128,129,130,131,132,133,134,135,
440 136,137,138,139,140,141,142,143,
441 144,145,146,147,148,149,150,151,
442 152,153,154,155,156,157,158,159,
443 160,161,162,163,164,165,166,167,
444 168,169,170,171,172,173,174,175,
445 176,177,178,179,180,181,182,183,
446 184,185,186,187,188,189,190,191,
447 224,225,226,227,228,229,230,231,
448 232,233,234,235,236,237,238,239,
449 240,241,242,243,244,245,246,215,
450 248,249,250,251,252,253,254,223,
451 224,225,226,227,228,229,230,231,
452 232,233,234,235,236,237,238,239,
453 240,241,242,243,244,245,246,247,
454 248,249,250,251,252,253,254,255,
455 0,1,2,3,4,5,6,7,
456 8,9,10,11,12,13,14,15,
457 16,17,18,19,20,21,22,23,
458 24,25,26,27,28,29,30,31,
459 32,33,34,35,36,37,38,39,
460 40,41,42,43,44,45,46,47,
461 48,49,50,51,52,53,54,55,
462 56,57,58,59,60,61,62,63,
463 64,97,98,99,100,101,102,103,
464 104,105,106,107,108,109,110,111,
465 112,113,114,115,116,117,118,119,
466 120,121,122,91,92,93,94,95,
467 96,65,66,67,68,69,70,71,
468 72,73,74,75,76,77,78,79,
469 80,81,82,83,84,85,86,87,
470 88,89,90,123,124,125,126,127,
471 128,129,130,131,132,133,134,135,
472 136,137,138,139,140,141,142,143,
473 144,145,146,147,148,149,150,151,
474 152,153,154,155,156,157,158,159,
475 160,161,162,163,164,165,166,167,
476 168,169,170,171,172,173,174,175,
477 176,177,178,179,180,181,182,183,
478 184,185,186,187,188,189,190,191,
479 224,225,226,227,228,229,230,231,
480 232,233,234,235,236,237,238,239,
481 240,241,242,243,244,245,246,215,
482 248,249,250,251,252,253,254,223,
483 192,193,194,195,196,197,198,199,
484 200,201,202,203,204,205,206,207,
485 208,209,210,211,212,213,214,247,
486 216,217,218,219,220,221,222,255,
487 0,62,0,0,1,0,0,0,
488 0,0,0,0,0,0,0,0,
489 32,0,0,0,1,0,0,0,
490 0,0,0,0,0,0,0,0,
491 0,0,0,0,0,0,255,3,
492 126,0,0,0,126,0,0,0,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 0,0,0,0,0,0,255,3,
496 0,0,0,0,0,0,0,0,
497 0,0,0,0,0,0,12,2,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,0,0,0,0,
500 254,255,255,7,0,0,0,0,
501 0,0,0,0,0,0,0,0,
502 255,255,127,127,0,0,0,0,
503 0,0,0,0,0,0,0,0,
504 0,0,0,0,254,255,255,7,
505 0,0,0,0,0,4,32,4,
506 0,0,0,128,255,255,127,255,
507 0,0,0,0,0,0,255,3,
508 254,255,255,135,254,255,255,7,
509 0,0,0,0,0,4,44,6,
510 255,255,127,255,255,255,127,255,
511 0,0,0,0,254,255,255,255,
512 255,255,255,255,255,255,255,127,
513 0,0,0,0,254,255,255,255,
514 255,255,255,255,255,255,255,255,
515 0,2,0,0,255,255,255,255,
516 255,255,255,255,255,255,255,127,
517 0,0,0,0,255,255,255,255,
518 255,255,255,255,255,255,255,255,
519 0,0,0,0,254,255,0,252,
520 1,0,0,248,1,0,0,120,
521 0,0,0,0,254,255,255,255,
522 0,0,128,0,0,0,128,0,
523 255,255,255,255,0,0,0,0,
524 0,0,0,0,0,0,0,128,
525 255,255,255,255,0,0,0,0,
526 0,0,0,0,0,0,0,0,
527 128,0,0,0,0,0,0,0,
528 0,1,1,0,1,1,0,0,
529 0,0,0,0,0,0,0,0,
530 0,0,0,0,0,0,0,0,
531 1,0,0,0,128,0,0,0,
532 128,128,128,128,0,0,128,0,
533 28,28,28,28,28,28,28,28,
534 28,28,0,0,0,0,0,128,
535 0,26,26,26,26,26,26,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,128,128,0,128,16,
539 0,26,26,26,26,26,26,18,
540 18,18,18,18,18,18,18,18,
541 18,18,18,18,18,18,18,18,
542 18,18,18,128,128,0,0,0,
543 0,0,0,0,0,1,0,0,
544 0,0,0,0,0,0,0,0,
545 0,0,0,0,0,0,0,0,
546 0,0,0,0,0,0,0,0,
547 1,0,0,0,0,0,0,0,
548 0,0,18,0,0,0,0,0,
549 0,0,20,20,0,18,0,0,
550 0,20,18,0,0,0,0,0,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,0,
554 18,18,18,18,18,18,18,18,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,18,
557 18,18,18,18,18,18,18,0,
558 18,18,18,18,18,18,18,18
559 };
560
561
562
563
564 #ifndef HAVE_STRERROR
565 /*************************************************
566 * Provide strerror() for non-ANSI libraries *
567 *************************************************/
568
569 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570 in their libraries, but can provide the same facility by this simple
571 alternative function. */
572
573 extern int sys_nerr;
574 extern char *sys_errlist[];
575
576 char *
577 strerror(int n)
578 {
579 if (n < 0 || n >= sys_nerr) return "unknown error number";
580 return sys_errlist[n];
581 }
582 #endif /* HAVE_STRERROR */
583
584
585 /*************************************************
586 * JIT memory callback *
587 *************************************************/
588
589 static pcre_jit_stack* jit_callback(void *arg)
590 {
591 return (pcre_jit_stack *)arg;
592 }
593
594
595 /*************************************************
596 * Read or extend an input line *
597 *************************************************/
598
599 /* Input lines are read into buffer, but both patterns and data lines can be
600 continued over multiple input lines. In addition, if the buffer fills up, we
601 want to automatically expand it so as to be able to handle extremely large
602 lines that are needed for certain stress tests. When the input buffer is
603 expanded, the other two buffers must also be expanded likewise, and the
604 contents of pbuffer, which are a copy of the input for callouts, must be
605 preserved (for when expansion happens for a data line). This is not the most
606 optimal way of handling this, but hey, this is just a test program!
607
608 Arguments:
609 f the file to read
610 start where in buffer to start (this *must* be within buffer)
611 prompt for stdin or readline()
612
613 Returns: pointer to the start of new data
614 could be a copy of start, or could be moved
615 NULL if no data read and EOF reached
616 */
617
618 static uschar *
619 extend_inputline(FILE *f, uschar *start, const char *prompt)
620 {
621 uschar *here = start;
622
623 for (;;)
624 {
625 int rlen = (int)(buffer_size - (here - buffer));
626
627 if (rlen > 1000)
628 {
629 int dlen;
630
631 /* If libreadline support is required, use readline() to read a line if the
632 input is a terminal. Note that readline() removes the trailing newline, so
633 we must put it back again, to be compatible with fgets(). */
634
635 #ifdef SUPPORT_LIBREADLINE
636 if (isatty(fileno(f)))
637 {
638 size_t len;
639 char *s = readline(prompt);
640 if (s == NULL) return (here == start)? NULL : start;
641 len = strlen(s);
642 if (len > 0) add_history(s);
643 if (len > rlen - 1) len = rlen - 1;
644 memcpy(here, s, len);
645 here[len] = '\n';
646 here[len+1] = 0;
647 free(s);
648 }
649 else
650 #endif
651
652 /* Read the next line by normal means, prompting if the file is stdin. */
653
654 {
655 if (f == stdin) printf("%s", prompt);
656 if (fgets((char *)here, rlen, f) == NULL)
657 return (here == start)? NULL : start;
658 }
659
660 dlen = (int)strlen((char *)here);
661 if (dlen > 0 && here[dlen - 1] == '\n') return start;
662 here += dlen;
663 }
664
665 else
666 {
667 int new_buffer_size = 2*buffer_size;
668 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
669 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671
672 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673 {
674 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
675 exit(1);
676 }
677
678 memcpy(new_buffer, buffer, buffer_size);
679 memcpy(new_pbuffer, pbuffer, buffer_size);
680
681 buffer_size = new_buffer_size;
682
683 start = new_buffer + (start - buffer);
684 here = new_buffer + (here - buffer);
685
686 free(buffer);
687 free(dbuffer);
688 free(pbuffer);
689
690 buffer = new_buffer;
691 dbuffer = new_dbuffer;
692 pbuffer = new_pbuffer;
693 }
694 }
695
696 return NULL; /* Control never gets here */
697 }
698
699
700
701
702
703
704
705 /*************************************************
706 * Read number from string *
707 *************************************************/
708
709 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
710 around with conditional compilation, just do the job by hand. It is only used
711 for unpicking arguments, so just keep it simple.
712
713 Arguments:
714 str string to be converted
715 endptr where to put the end pointer
716
717 Returns: the unsigned long
718 */
719
720 static int
721 get_value(unsigned char *str, unsigned char **endptr)
722 {
723 int result = 0;
724 while(*str != 0 && isspace(*str)) str++;
725 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
726 *endptr = str;
727 return(result);
728 }
729
730
731
732
733 /*************************************************
734 * Convert UTF-8 string to value *
735 *************************************************/
736
737 /* This function takes one or more bytes that represents a UTF-8 character,
738 and returns the value of the character.
739
740 Argument:
741 utf8bytes a pointer to the byte vector
742 vptr a pointer to an int to receive the value
743
744 Returns: > 0 => the number of bytes consumed
745 -6 to 0 => malformed UTF-8 character at offset = (-return)
746 */
747
748 #if !defined NOUTF8
749
750 static int
751 utf82ord(unsigned char *utf8bytes, int *vptr)
752 {
753 int c = *utf8bytes++;
754 int d = c;
755 int i, j, s;
756
757 for (i = -1; i < 6; i++) /* i is number of additional bytes */
758 {
759 if ((d & 0x80) == 0) break;
760 d <<= 1;
761 }
762
763 if (i == -1) { *vptr = c; return 1; } /* ascii character */
764 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
765
766 /* i now has a value in the range 1-5 */
767
768 s = 6*i;
769 d = (c & utf8_table3[i]) << s;
770
771 for (j = 0; j < i; j++)
772 {
773 c = *utf8bytes++;
774 if ((c & 0xc0) != 0x80) return -(j+1);
775 s -= 6;
776 d |= (c & 0x3f) << s;
777 }
778
779 /* Check that encoding was the correct unique one */
780
781 for (j = 0; j < utf8_table1_size; j++)
782 if (d <= utf8_table1[j]) break;
783 if (j != i) return -(i+1);
784
785 /* Valid value */
786
787 *vptr = d;
788 return i+1;
789 }
790
791 #endif
792
793
794
795 /*************************************************
796 * Convert character value to UTF-8 *
797 *************************************************/
798
799 /* This function takes an integer value in the range 0 - 0x7fffffff
800 and encodes it as a UTF-8 character in 0 to 6 bytes.
801
802 Arguments:
803 cvalue the character value
804 utf8bytes pointer to buffer for result - at least 6 bytes long
805
806 Returns: number of characters placed in the buffer
807 */
808
809 #if !defined NOUTF8
810
811 static int
812 ord2utf8(int cvalue, uschar *utf8bytes)
813 {
814 register int i, j;
815 for (i = 0; i < utf8_table1_size; i++)
816 if (cvalue <= utf8_table1[i]) break;
817 utf8bytes += i;
818 for (j = i; j > 0; j--)
819 {
820 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
821 cvalue >>= 6;
822 }
823 *utf8bytes = utf8_table2[i] | cvalue;
824 return i + 1;
825 }
826
827 #endif
828
829
830
831 /*************************************************
832 * Print character string *
833 *************************************************/
834
835 /* Character string printing function. Must handle UTF-8 strings in utf8
836 mode. Yields number of characters printed. If handed a NULL file, just counts
837 chars without printing. */
838
839 static int pchars(unsigned char *p, int length, FILE *f)
840 {
841 int c = 0;
842 int yield = 0;
843
844 while (length-- > 0)
845 {
846 #if !defined NOUTF8
847 if (use_utf8)
848 {
849 int rc = utf82ord(p, &c);
850
851 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
852 {
853 length -= rc - 1;
854 p += rc;
855 if (PRINTHEX(c))
856 {
857 if (f != NULL) fprintf(f, "%c", c);
858 yield++;
859 }
860 else
861 {
862 int n = 4;
863 if (f != NULL) fprintf(f, "\\x{%02x}", c);
864 yield += (n <= 0x000000ff)? 2 :
865 (n <= 0x00000fff)? 3 :
866 (n <= 0x0000ffff)? 4 :
867 (n <= 0x000fffff)? 5 : 6;
868 }
869 continue;
870 }
871 }
872 #endif
873
874 /* Not UTF-8, or malformed UTF-8 */
875
876 c = *p++;
877 if (PRINTHEX(c))
878 {
879 if (f != NULL) fprintf(f, "%c", c);
880 yield++;
881 }
882 else
883 {
884 if (f != NULL) fprintf(f, "\\x%02x", c);
885 yield += 4;
886 }
887 }
888
889 return yield;
890 }
891
892
893
894 /*************************************************
895 * Callout function *
896 *************************************************/
897
898 /* Called from PCRE as a result of the (?C) item. We print out where we are in
899 the match. Yield zero unless more callouts than the fail count, or the callout
900 data is not zero. */
901
902 static int callout(pcre_callout_block *cb)
903 {
904 FILE *f = (first_callout | callout_extra)? outfile : NULL;
905 int i, pre_start, post_start, subject_length;
906
907 if (callout_extra)
908 {
909 fprintf(f, "Callout %d: last capture = %d\n",
910 cb->callout_number, cb->capture_last);
911
912 for (i = 0; i < cb->capture_top * 2; i += 2)
913 {
914 if (cb->offset_vector[i] < 0)
915 fprintf(f, "%2d: <unset>\n", i/2);
916 else
917 {
918 fprintf(f, "%2d: ", i/2);
919 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
920 cb->offset_vector[i+1] - cb->offset_vector[i], f);
921 fprintf(f, "\n");
922 }
923 }
924 }
925
926 /* Re-print the subject in canonical form, the first time or if giving full
927 datails. On subsequent calls in the same match, we use pchars just to find the
928 printed lengths of the substrings. */
929
930 if (f != NULL) fprintf(f, "--->");
931
932 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
933 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
934 cb->current_position - cb->start_match, f);
935
936 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
937
938 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
939 cb->subject_length - cb->current_position, f);
940
941 if (f != NULL) fprintf(f, "\n");
942
943 /* Always print appropriate indicators, with callout number if not already
944 shown. For automatic callouts, show the pattern offset. */
945
946 if (cb->callout_number == 255)
947 {
948 fprintf(outfile, "%+3d ", cb->pattern_position);
949 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
950 }
951 else
952 {
953 if (callout_extra) fprintf(outfile, " ");
954 else fprintf(outfile, "%3d ", cb->callout_number);
955 }
956
957 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
958 fprintf(outfile, "^");
959
960 if (post_start > 0)
961 {
962 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
963 fprintf(outfile, "^");
964 }
965
966 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
967 fprintf(outfile, " ");
968
969 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
970 pbuffer + cb->pattern_position);
971
972 fprintf(outfile, "\n");
973 first_callout = 0;
974
975 if (cb->mark != last_callout_mark)
976 {
977 fprintf(outfile, "Latest Mark: %s\n",
978 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979 last_callout_mark = cb->mark;
980 }
981
982 if (cb->callout_data != NULL)
983 {
984 int callout_data = *((int *)(cb->callout_data));
985 if (callout_data != 0)
986 {
987 fprintf(outfile, "Callout data = %d\n", callout_data);
988 return callout_data;
989 }
990 }
991
992 return (cb->callout_number != callout_fail_id)? 0 :
993 (++callout_count >= callout_fail_count)? 1 : 0;
994 }
995
996
997 /*************************************************
998 * Local malloc functions *
999 *************************************************/
1000
1001 /* Alternative malloc function, to test functionality and save the size of a
1002 compiled re. The show_malloc variable is set only during matching. */
1003
1004 static void *new_malloc(size_t size)
1005 {
1006 void *block = malloc(size);
1007 gotten_store = size;
1008 if (show_malloc)
1009 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1010 return block;
1011 }
1012
1013 static void new_free(void *block)
1014 {
1015 if (show_malloc)
1016 fprintf(outfile, "free %p\n", block);
1017 free(block);
1018 }
1019
1020 /* For recursion malloc/free, to test stacking calls */
1021
1022 static void *stack_malloc(size_t size)
1023 {
1024 void *block = malloc(size);
1025 if (show_malloc)
1026 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1027 return block;
1028 }
1029
1030 static void stack_free(void *block)
1031 {
1032 if (show_malloc)
1033 fprintf(outfile, "stack_free %p\n", block);
1034 free(block);
1035 }
1036
1037
1038 /*************************************************
1039 * Call pcre_fullinfo() *
1040 *************************************************/
1041
1042 /* Get one piece of information from the pcre_fullinfo() function */
1043
1044 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1045 {
1046 int rc;
1047 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1048 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1049 }
1050
1051
1052
1053 /*************************************************
1054 * Check for supported JIT architecture *
1055 *************************************************/
1056
1057 /* If it won't JIT-compile a very simple regex, return FALSE. */
1058
1059 static int check_jit_arch(void)
1060 {
1061 const char *error;
1062 int erroffset, rc;
1063 pcre *re = pcre_compile("abc", 0, &error, &erroffset, NULL);
1064 pcre_extra *extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
1065 rc = extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1066 extra->executable_jit != NULL;
1067 pcre_free_study(extra);
1068 free(re);
1069 return rc;
1070 }
1071
1072
1073 /*************************************************
1074 * Byte flipping function *
1075 *************************************************/
1076
1077 static unsigned long int
1078 byteflip(unsigned long int value, int n)
1079 {
1080 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1081 return ((value & 0x000000ff) << 24) |
1082 ((value & 0x0000ff00) << 8) |
1083 ((value & 0x00ff0000) >> 8) |
1084 ((value & 0xff000000) >> 24);
1085 }
1086
1087
1088
1089
1090 /*************************************************
1091 * Check match or recursion limit *
1092 *************************************************/
1093
1094 static int
1095 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1096 int start_offset, int options, int *use_offsets, int use_size_offsets,
1097 int flag, unsigned long int *limit, int errnumber, const char *msg)
1098 {
1099 int count;
1100 int min = 0;
1101 int mid = 64;
1102 int max = -1;
1103
1104 extra->flags |= flag;
1105
1106 for (;;)
1107 {
1108 *limit = mid;
1109
1110 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1111 use_offsets, use_size_offsets);
1112
1113 if (count == errnumber)
1114 {
1115 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1116 min = mid;
1117 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1118 }
1119
1120 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1121 count == PCRE_ERROR_PARTIAL)
1122 {
1123 if (mid == min + 1)
1124 {
1125 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1126 break;
1127 }
1128 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1129 max = mid;
1130 mid = (min + mid)/2;
1131 }
1132 else break; /* Some other error */
1133 }
1134
1135 extra->flags &= ~flag;
1136 return count;
1137 }
1138
1139
1140
1141 /*************************************************
1142 * Case-independent strncmp() function *
1143 *************************************************/
1144
1145 /*
1146 Arguments:
1147 s first string
1148 t second string
1149 n number of characters to compare
1150
1151 Returns: < 0, = 0, or > 0, according to the comparison
1152 */
1153
1154 static int
1155 strncmpic(uschar *s, uschar *t, int n)
1156 {
1157 while (n--)
1158 {
1159 int c = tolower(*s++) - tolower(*t++);
1160 if (c) return c;
1161 }
1162 return 0;
1163 }
1164
1165
1166
1167 /*************************************************
1168 * Check newline indicator *
1169 *************************************************/
1170
1171 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1172 a message and return 0 if there is no match.
1173
1174 Arguments:
1175 p points after the leading '<'
1176 f file for error message
1177
1178 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1179 */
1180
1181 static int
1182 check_newline(uschar *p, FILE *f)
1183 {
1184 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1185 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1186 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1187 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1188 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1189 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1190 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1191 fprintf(f, "Unknown newline type at: <%s\n", p);
1192 return 0;
1193 }
1194
1195
1196
1197 /*************************************************
1198 * Usage function *
1199 *************************************************/
1200
1201 static void
1202 usage(void)
1203 {
1204 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1205 printf("Input and output default to stdin and stdout.\n");
1206 #ifdef SUPPORT_LIBREADLINE
1207 printf("If input is a terminal, readline() is used to read from it.\n");
1208 #else
1209 printf("This version of pcretest is not linked with readline().\n");
1210 #endif
1211 printf("\nOptions:\n");
1212 printf(" -b show compiled code (bytecode)\n");
1213 printf(" -C show PCRE compile-time options and exit\n");
1214 printf(" -d debug: show compiled code and information (-b and -i)\n");
1215 #if !defined NODFA
1216 printf(" -dfa force DFA matching for all subjects\n");
1217 #endif
1218 printf(" -help show usage information\n");
1219 printf(" -i show information about compiled patterns\n"
1220 " -M find MATCH_LIMIT minimum for each subject\n"
1221 " -m output memory used information\n"
1222 " -o <n> set size of offsets vector to <n>\n");
1223 #if !defined NOPOSIX
1224 printf(" -p use POSIX interface\n");
1225 #endif
1226 printf(" -q quiet: do not output PCRE version number at start\n");
1227 printf(" -S <n> set stack size to <n> megabytes\n");
1228 printf(" -s force each pattern to be studied at basic level\n"
1229 " -s+ force each pattern to be studied, using JIT if available\n"
1230 " -t time compilation and execution\n");
1231 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1232 printf(" -tm time execution (matching) only\n");
1233 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1234 }
1235
1236
1237
1238 /*************************************************
1239 * Main Program *
1240 *************************************************/
1241
1242 /* Read lines from named file or stdin and write to named file or stdout; lines
1243 consist of a regular expression, in delimiters and optionally followed by
1244 options, followed by a set of test data, terminated by an empty line. */
1245
1246 int main(int argc, char **argv)
1247 {
1248 FILE *infile = stdin;
1249 int options = 0;
1250 int study_options = 0;
1251 int default_find_match_limit = FALSE;
1252 int op = 1;
1253 int timeit = 0;
1254 int timeitm = 0;
1255 int showinfo = 0;
1256 int showstore = 0;
1257 int force_study = -1;
1258 int force_study_options = 0;
1259 int quiet = 0;
1260 int size_offsets = 45;
1261 int size_offsets_max;
1262 int *offsets = NULL;
1263 #if !defined NOPOSIX
1264 int posix = 0;
1265 #endif
1266 int debug = 0;
1267 int done = 0;
1268 int all_use_dfa = 0;
1269 int yield = 0;
1270 int stack_size;
1271
1272 pcre_jit_stack *jit_stack = NULL;
1273
1274
1275 /* These vectors store, end-to-end, a list of captured substring names. Assume
1276 that 1024 is plenty long enough for the few names we'll be testing. */
1277
1278 uschar copynames[1024];
1279 uschar getnames[1024];
1280
1281 uschar *copynamesptr;
1282 uschar *getnamesptr;
1283
1284 /* Get buffers from malloc() so that Electric Fence will check their misuse
1285 when I am debugging. They grow automatically when very long lines are read. */
1286
1287 buffer = (unsigned char *)malloc(buffer_size);
1288 dbuffer = (unsigned char *)malloc(buffer_size);
1289 pbuffer = (unsigned char *)malloc(buffer_size);
1290
1291 /* The outfile variable is static so that new_malloc can use it. */
1292
1293 outfile = stdout;
1294
1295 /* The following _setmode() stuff is some Windows magic that tells its runtime
1296 library to translate CRLF into a single LF character. At least, that's what
1297 I've been told: never having used Windows I take this all on trust. Originally
1298 it set 0x8000, but then I was advised that _O_BINARY was better. */
1299
1300 #if defined(_WIN32) || defined(WIN32)
1301 _setmode( _fileno( stdout ), _O_BINARY );
1302 #endif
1303
1304 /* Scan options */
1305
1306 while (argc > 1 && argv[op][0] == '-')
1307 {
1308 unsigned char *endptr;
1309
1310 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1311 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1312 else if (strcmp(argv[op], "-s+") == 0)
1313 {
1314 force_study = 1;
1315 force_study_options = PCRE_STUDY_JIT_COMPILE;
1316 }
1317 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1318 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1319 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1320 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1321 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1322 #if !defined NODFA
1323 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1324 #endif
1325 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1326 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1327 *endptr == 0))
1328 {
1329 op++;
1330 argc--;
1331 }
1332 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1333 {
1334 int both = argv[op][2] == 0;
1335 int temp;
1336 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1337 *endptr == 0))
1338 {
1339 timeitm = temp;
1340 op++;
1341 argc--;
1342 }
1343 else timeitm = LOOPREPEAT;
1344 if (both) timeit = timeitm;
1345 }
1346 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1347 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1348 *endptr == 0))
1349 {
1350 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1351 printf("PCRE: -S not supported on this OS\n");
1352 exit(1);
1353 #else
1354 int rc;
1355 struct rlimit rlim;
1356 getrlimit(RLIMIT_STACK, &rlim);
1357 rlim.rlim_cur = stack_size * 1024 * 1024;
1358 rc = setrlimit(RLIMIT_STACK, &rlim);
1359 if (rc != 0)
1360 {
1361 printf("PCRE: setrlimit() failed with error %d\n", rc);
1362 exit(1);
1363 }
1364 op++;
1365 argc--;
1366 #endif
1367 }
1368 #if !defined NOPOSIX
1369 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1370 #endif
1371 else if (strcmp(argv[op], "-C") == 0)
1372 {
1373 int rc;
1374 unsigned long int lrc;
1375 printf("PCRE version %s\n", pcre_version());
1376 printf("Compiled with\n");
1377 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1378 printf(" %sUTF-8 support\n", rc? "" : "No ");
1379 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1380 printf(" %sUnicode properties support\n", rc? "" : "No ");
1381 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1382 if (rc)
1383 printf(" Just-in-time compiler support%s\n", check_jit_arch()?
1384 "" : " (but this architecture is unsupported)");
1385 else
1386 printf(" No just-in-time compiler support\n");
1387 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1388 /* Note that these values are always the ASCII values, even
1389 in EBCDIC environments. CR is 13 and NL is 10. */
1390 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1391 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1392 (rc == -2)? "ANYCRLF" :
1393 (rc == -1)? "ANY" : "???");
1394 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1395 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1396 "all Unicode newlines");
1397 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1398 printf(" Internal link size = %d\n", rc);
1399 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1400 printf(" POSIX malloc threshold = %d\n", rc);
1401 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1402 printf(" Default match limit = %ld\n", lrc);
1403 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1404 printf(" Default recursion depth limit = %ld\n", lrc);
1405 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1406 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1407 goto EXIT;
1408 }
1409 else if (strcmp(argv[op], "-help") == 0 ||
1410 strcmp(argv[op], "--help") == 0)
1411 {
1412 usage();
1413 goto EXIT;
1414 }
1415 else
1416 {
1417 printf("** Unknown or malformed option %s\n", argv[op]);
1418 usage();
1419 yield = 1;
1420 goto EXIT;
1421 }
1422 op++;
1423 argc--;
1424 }
1425
1426 /* Get the store for the offsets vector, and remember what it was */
1427
1428 size_offsets_max = size_offsets;
1429 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1430 if (offsets == NULL)
1431 {
1432 printf("** Failed to get %d bytes of memory for offsets vector\n",
1433 (int)(size_offsets_max * sizeof(int)));
1434 yield = 1;
1435 goto EXIT;
1436 }
1437
1438 /* Sort out the input and output files */
1439
1440 if (argc > 1)
1441 {
1442 infile = fopen(argv[op], INPUT_MODE);
1443 if (infile == NULL)
1444 {
1445 printf("** Failed to open %s\n", argv[op]);
1446 yield = 1;
1447 goto EXIT;
1448 }
1449 }
1450
1451 if (argc > 2)
1452 {
1453 outfile = fopen(argv[op+1], OUTPUT_MODE);
1454 if (outfile == NULL)
1455 {
1456 printf("** Failed to open %s\n", argv[op+1]);
1457 yield = 1;
1458 goto EXIT;
1459 }
1460 }
1461
1462 /* Set alternative malloc function */
1463
1464 pcre_malloc = new_malloc;
1465 pcre_free = new_free;
1466 pcre_stack_malloc = stack_malloc;
1467 pcre_stack_free = stack_free;
1468
1469 /* Heading line unless quiet, then prompt for first regex if stdin */
1470
1471 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1472
1473 /* Main loop */
1474
1475 while (!done)
1476 {
1477 pcre *re = NULL;
1478 pcre_extra *extra = NULL;
1479
1480 #if !defined NOPOSIX /* There are still compilers that require no indent */
1481 regex_t preg;
1482 int do_posix = 0;
1483 #endif
1484
1485 const char *error;
1486 unsigned char *markptr;
1487 unsigned char *p, *pp, *ppp;
1488 unsigned char *to_file = NULL;
1489 const unsigned char *tables = NULL;
1490 unsigned long int true_size, true_study_size = 0;
1491 size_t size, regex_gotten_store;
1492 int do_allcaps = 0;
1493 int do_mark = 0;
1494 int do_study = 0;
1495 int no_force_study = 0;
1496 int do_debug = debug;
1497 int do_G = 0;
1498 int do_g = 0;
1499 int do_showinfo = showinfo;
1500 int do_showrest = 0;
1501 int do_showcaprest = 0;
1502 int do_flip = 0;
1503 int erroroffset, len, delimiter, poffset;
1504
1505 use_utf8 = 0;
1506 debug_lengths = 1;
1507
1508 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1509 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1510 fflush(outfile);
1511
1512 p = buffer;
1513 while (isspace(*p)) p++;
1514 if (*p == 0) continue;
1515
1516 /* See if the pattern is to be loaded pre-compiled from a file. */
1517
1518 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1519 {
1520 unsigned long int magic, get_options;
1521 uschar sbuf[8];
1522 FILE *f;
1523
1524 p++;
1525 pp = p + (int)strlen((char *)p);
1526 while (isspace(pp[-1])) pp--;
1527 *pp = 0;
1528
1529 f = fopen((char *)p, "rb");
1530 if (f == NULL)
1531 {
1532 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1533 continue;
1534 }
1535
1536 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1537
1538 true_size =
1539 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1540 true_study_size =
1541 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1542
1543 re = (real_pcre *)new_malloc(true_size);
1544 regex_gotten_store = gotten_store;
1545
1546 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1547
1548 magic = ((real_pcre *)re)->magic_number;
1549 if (magic != MAGIC_NUMBER)
1550 {
1551 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1552 {
1553 do_flip = 1;
1554 }
1555 else
1556 {
1557 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1558 fclose(f);
1559 continue;
1560 }
1561 }
1562
1563 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1564 do_flip? " (byte-inverted)" : "", p);
1565
1566 /* Need to know if UTF-8 for printing data strings */
1567
1568 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1569 use_utf8 = (get_options & PCRE_UTF8) != 0;
1570
1571 /* Now see if there is any following study data. */
1572
1573 if (true_study_size != 0)
1574 {
1575 pcre_study_data *psd;
1576
1577 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1578 extra->flags = PCRE_EXTRA_STUDY_DATA;
1579
1580 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1581 extra->study_data = psd;
1582
1583 if (fread(psd, 1, true_study_size, f) != true_study_size)
1584 {
1585 FAIL_READ:
1586 fprintf(outfile, "Failed to read data from %s\n", p);
1587 if (extra != NULL) pcre_free_study(extra);
1588 if (re != NULL) new_free(re);
1589 fclose(f);
1590 continue;
1591 }
1592 fprintf(outfile, "Study data loaded from %s\n", p);
1593 do_study = 1; /* To get the data output if requested */
1594 }
1595 else fprintf(outfile, "No study data\n");
1596
1597 fclose(f);
1598 goto SHOW_INFO;
1599 }
1600
1601 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1602 the pattern; if is isn't complete, read more. */
1603
1604 delimiter = *p++;
1605
1606 if (isalnum(delimiter) || delimiter == '\\')
1607 {
1608 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1609 goto SKIP_DATA;
1610 }
1611
1612 pp = p;
1613 poffset = (int)(p - buffer);
1614
1615 for(;;)
1616 {
1617 while (*pp != 0)
1618 {
1619 if (*pp == '\\' && pp[1] != 0) pp++;
1620 else if (*pp == delimiter) break;
1621 pp++;
1622 }
1623 if (*pp != 0) break;
1624 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1625 {
1626 fprintf(outfile, "** Unexpected EOF\n");
1627 done = 1;
1628 goto CONTINUE;
1629 }
1630 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1631 }
1632
1633 /* The buffer may have moved while being extended; reset the start of data
1634 pointer to the correct relative point in the buffer. */
1635
1636 p = buffer + poffset;
1637
1638 /* If the first character after the delimiter is backslash, make
1639 the pattern end with backslash. This is purely to provide a way
1640 of testing for the error message when a pattern ends with backslash. */
1641
1642 if (pp[1] == '\\') *pp++ = '\\';
1643
1644 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1645 for callouts. */
1646
1647 *pp++ = 0;
1648 strcpy((char *)pbuffer, (char *)p);
1649
1650 /* Look for options after final delimiter */
1651
1652 options = 0;
1653 log_store = showstore; /* default from command line */
1654
1655 while (*pp != 0)
1656 {
1657 switch (*pp++)
1658 {
1659 case 'f': options |= PCRE_FIRSTLINE; break;
1660 case 'g': do_g = 1; break;
1661 case 'i': options |= PCRE_CASELESS; break;
1662 case 'm': options |= PCRE_MULTILINE; break;
1663 case 's': options |= PCRE_DOTALL; break;
1664 case 'x': options |= PCRE_EXTENDED; break;
1665
1666 case '+':
1667 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1668 break;
1669
1670 case '=': do_allcaps = 1; break;
1671 case 'A': options |= PCRE_ANCHORED; break;
1672 case 'B': do_debug = 1; break;
1673 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1674 case 'D': do_debug = do_showinfo = 1; break;
1675 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1676 case 'F': do_flip = 1; break;
1677 case 'G': do_G = 1; break;
1678 case 'I': do_showinfo = 1; break;
1679 case 'J': options |= PCRE_DUPNAMES; break;
1680 case 'K': do_mark = 1; break;
1681 case 'M': log_store = 1; break;
1682 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1683
1684 #if !defined NOPOSIX
1685 case 'P': do_posix = 1; break;
1686 #endif
1687
1688 case 'S':
1689 if (do_study == 0)
1690 {
1691 do_study = 1;
1692 if (*pp == '+')
1693 {
1694 study_options |= PCRE_STUDY_JIT_COMPILE;
1695 pp++;
1696 }
1697 }
1698 else
1699 {
1700 do_study = 0;
1701 no_force_study = 1;
1702 }
1703 break;
1704
1705 case 'U': options |= PCRE_UNGREEDY; break;
1706 case 'W': options |= PCRE_UCP; break;
1707 case 'X': options |= PCRE_EXTRA; break;
1708 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1709 case 'Z': debug_lengths = 0; break;
1710 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1711 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1712
1713 case 'T':
1714 switch (*pp++)
1715 {
1716 case '0': tables = tables0; break;
1717 case '1': tables = tables1; break;
1718
1719 case '\r':
1720 case '\n':
1721 case ' ':
1722 case 0:
1723 fprintf(outfile, "** Missing table number after /T\n");
1724 goto SKIP_DATA;
1725
1726 default:
1727 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1728 goto SKIP_DATA;
1729 }
1730 break;
1731
1732 case 'L':
1733 ppp = pp;
1734 /* The '\r' test here is so that it works on Windows. */
1735 /* The '0' test is just in case this is an unterminated line. */
1736 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1737 *ppp = 0;
1738 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1739 {
1740 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1741 goto SKIP_DATA;
1742 }
1743 locale_set = 1;
1744 tables = pcre_maketables();
1745 pp = ppp;
1746 break;
1747
1748 case '>':
1749 to_file = pp;
1750 while (*pp != 0) pp++;
1751 while (isspace(pp[-1])) pp--;
1752 *pp = 0;
1753 break;
1754
1755 case '<':
1756 {
1757 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1758 {
1759 options |= PCRE_JAVASCRIPT_COMPAT;
1760 pp += 3;
1761 }
1762 else
1763 {
1764 int x = check_newline(pp, outfile);
1765 if (x == 0) goto SKIP_DATA;
1766 options |= x;
1767 while (*pp++ != '>');
1768 }
1769 }
1770 break;
1771
1772 case '\r': /* So that it works in Windows */
1773 case '\n':
1774 case ' ':
1775 break;
1776
1777 default:
1778 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1779 goto SKIP_DATA;
1780 }
1781 }
1782
1783 /* Handle compiling via the POSIX interface, which doesn't support the
1784 timing, showing, or debugging options, nor the ability to pass over
1785 local character tables. */
1786
1787 #if !defined NOPOSIX
1788 if (posix || do_posix)
1789 {
1790 int rc;
1791 int cflags = 0;
1792
1793 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1794 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1795 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1796 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1797 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1798 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1799 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1800
1801 rc = regcomp(&preg, (char *)p, cflags);
1802
1803 /* Compilation failed; go back for another re, skipping to blank line
1804 if non-interactive. */
1805
1806 if (rc != 0)
1807 {
1808 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1809 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1810 goto SKIP_DATA;
1811 }
1812 }
1813
1814 /* Handle compiling via the native interface */
1815
1816 else
1817 #endif /* !defined NOPOSIX */
1818
1819 {
1820 unsigned long int get_options;
1821
1822 if (timeit > 0)
1823 {
1824 register int i;
1825 clock_t time_taken;
1826 clock_t start_time = clock();
1827 for (i = 0; i < timeit; i++)
1828 {
1829 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1830 if (re != NULL) free(re);
1831 }
1832 time_taken = clock() - start_time;
1833 fprintf(outfile, "Compile time %.4f milliseconds\n",
1834 (((double)time_taken * 1000.0) / (double)timeit) /
1835 (double)CLOCKS_PER_SEC);
1836 }
1837
1838 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1839
1840 /* Compilation failed; go back for another re, skipping to blank line
1841 if non-interactive. */
1842
1843 if (re == NULL)
1844 {
1845 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1846 SKIP_DATA:
1847 if (infile != stdin)
1848 {
1849 for (;;)
1850 {
1851 if (extend_inputline(infile, buffer, NULL) == NULL)
1852 {
1853 done = 1;
1854 goto CONTINUE;
1855 }
1856 len = (int)strlen((char *)buffer);
1857 while (len > 0 && isspace(buffer[len-1])) len--;
1858 if (len == 0) break;
1859 }
1860 fprintf(outfile, "\n");
1861 }
1862 goto CONTINUE;
1863 }
1864
1865 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1866 within the regex; check for this so that we know how to process the data
1867 lines. */
1868
1869 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1870 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1871
1872 /* Print information if required. There are now two info-returning
1873 functions. The old one has a limited interface and returns only limited
1874 data. Check that it agrees with the newer one. */
1875
1876 if (log_store)
1877 fprintf(outfile, "Memory allocation (code space): %d\n",
1878 (int)(gotten_store -
1879 sizeof(real_pcre) -
1880 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1881
1882 /* Extract the size for possible writing before possibly flipping it,
1883 and remember the store that was got. */
1884
1885 true_size = ((real_pcre *)re)->size;
1886 regex_gotten_store = gotten_store;
1887
1888 /* If -s or /S was present, study the regex to generate additional info to
1889 help with the matching, unless the pattern has the SS option, which
1890 suppresses the effect of /S (used for a few test patterns where studying is
1891 never sensible). */
1892
1893 if (do_study || (force_study >= 0 && !no_force_study))
1894 {
1895 if (timeit > 0)
1896 {
1897 register int i;
1898 clock_t time_taken;
1899 clock_t start_time = clock();
1900 for (i = 0; i < timeit; i++)
1901 extra = pcre_study(re, study_options | force_study_options, &error);
1902 time_taken = clock() - start_time;
1903 if (extra != NULL) pcre_free_study(extra);
1904 fprintf(outfile, " Study time %.4f milliseconds\n",
1905 (((double)time_taken * 1000.0) / (double)timeit) /
1906 (double)CLOCKS_PER_SEC);
1907 }
1908 extra = pcre_study(re, study_options | force_study_options, &error);
1909 if (error != NULL)
1910 fprintf(outfile, "Failed to study: %s\n", error);
1911 else if (extra != NULL)
1912 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1913 }
1914
1915 /* If /K was present, we set up for handling MARK data. */
1916
1917 if (do_mark)
1918 {
1919 if (extra == NULL)
1920 {
1921 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1922 extra->flags = 0;
1923 }
1924 extra->mark = &markptr;
1925 extra->flags |= PCRE_EXTRA_MARK;
1926 }
1927
1928 /* If the 'F' option was present, we flip the bytes of all the integer
1929 fields in the regex data block and the study block. This is to make it
1930 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1931 compiled on a different architecture. */
1932
1933 if (do_flip)
1934 {
1935 real_pcre *rre = (real_pcre *)re;
1936 rre->magic_number =
1937 byteflip(rre->magic_number, sizeof(rre->magic_number));
1938 rre->size = byteflip(rre->size, sizeof(rre->size));
1939 rre->options = byteflip(rre->options, sizeof(rre->options));
1940 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1941 rre->top_bracket =
1942 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1943 rre->top_backref =
1944 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1945 rre->first_byte =
1946 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1947 rre->req_byte =
1948 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1949 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1950 sizeof(rre->name_table_offset));
1951 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1952 sizeof(rre->name_entry_size));
1953 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1954 sizeof(rre->name_count));
1955
1956 if (extra != NULL)
1957 {
1958 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1959 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1960 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1961 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1962 }
1963 }
1964
1965 /* Extract information from the compiled data if required */
1966
1967 SHOW_INFO:
1968
1969 if (do_debug)
1970 {
1971 fprintf(outfile, "------------------------------------------------------------------\n");
1972 pcre_printint(re, outfile, debug_lengths);
1973 }
1974
1975 /* We already have the options in get_options (see above) */
1976
1977 if (do_showinfo)
1978 {
1979 unsigned long int all_options;
1980 #if !defined NOINFOCHECK
1981 int old_first_char, old_options, old_count;
1982 #endif
1983 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1984 hascrorlf;
1985 int nameentrysize, namecount;
1986 const uschar *nametable;
1987
1988 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1989 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1990 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1991 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1992 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1993 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1994 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1995 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1996 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1997 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1998 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1999
2000 #if !defined NOINFOCHECK
2001 old_count = pcre_info(re, &old_options, &old_first_char);
2002 if (count < 0) fprintf(outfile,
2003 "Error %d from pcre_info()\n", count);
2004 else
2005 {
2006 if (old_count != count) fprintf(outfile,
2007 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2008 old_count);
2009
2010 if (old_first_char != first_char) fprintf(outfile,
2011 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2012 first_char, old_first_char);
2013
2014 if (old_options != (int)get_options) fprintf(outfile,
2015 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2016 get_options, old_options);
2017 }
2018 #endif
2019
2020 if (size != regex_gotten_store) fprintf(outfile,
2021 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2022 (int)size, (int)regex_gotten_store);
2023
2024 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2025 if (backrefmax > 0)
2026 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2027
2028 if (namecount > 0)
2029 {
2030 fprintf(outfile, "Named capturing subpatterns:\n");
2031 while (namecount-- > 0)
2032 {
2033 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2034 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2035 GET2(nametable, 0));
2036 nametable += nameentrysize;
2037 }
2038 }
2039
2040 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2041 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2042
2043 all_options = ((real_pcre *)re)->options;
2044 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2045
2046 if (get_options == 0) fprintf(outfile, "No options\n");
2047 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2048 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2049 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2050 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2051 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2052 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2053 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2054 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2055 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2056 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2057 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2058 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2059 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2060 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2061 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2062 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2063 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2064 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2065
2066 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2067
2068 switch (get_options & PCRE_NEWLINE_BITS)
2069 {
2070 case PCRE_NEWLINE_CR:
2071 fprintf(outfile, "Forced newline sequence: CR\n");
2072 break;
2073
2074 case PCRE_NEWLINE_LF:
2075 fprintf(outfile, "Forced newline sequence: LF\n");
2076 break;
2077
2078 case PCRE_NEWLINE_CRLF:
2079 fprintf(outfile, "Forced newline sequence: CRLF\n");
2080 break;
2081
2082 case PCRE_NEWLINE_ANYCRLF:
2083 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2084 break;
2085
2086 case PCRE_NEWLINE_ANY:
2087 fprintf(outfile, "Forced newline sequence: ANY\n");
2088 break;
2089
2090 default:
2091 break;
2092 }
2093
2094 if (first_char == -1)
2095 {
2096 fprintf(outfile, "First char at start or follows newline\n");
2097 }
2098 else if (first_char < 0)
2099 {
2100 fprintf(outfile, "No first char\n");
2101 }
2102 else
2103 {
2104 int ch = first_char & 255;
2105 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2106 "" : " (caseless)";
2107 if (PRINTHEX(ch))
2108 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2109 else
2110 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2111 }
2112
2113 if (need_char < 0)
2114 {
2115 fprintf(outfile, "No need char\n");
2116 }
2117 else
2118 {
2119 int ch = need_char & 255;
2120 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2121 "" : " (caseless)";
2122 if (PRINTHEX(ch))
2123 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2124 else
2125 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2126 }
2127
2128 /* Don't output study size; at present it is in any case a fixed
2129 value, but it varies, depending on the computer architecture, and
2130 so messes up the test suite. (And with the /F option, it might be
2131 flipped.) If study was forced by an external -s, don't show this
2132 information unless -i or -d was also present. This means that, except
2133 when auto-callouts are involved, the output from runs with and without
2134 -s should be identical. */
2135
2136 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2137 {
2138 if (extra == NULL)
2139 fprintf(outfile, "Study returned NULL\n");
2140 else
2141 {
2142 uschar *start_bits = NULL;
2143 int minlength;
2144
2145 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2146 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2147
2148 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2149 if (start_bits == NULL)
2150 fprintf(outfile, "No set of starting bytes\n");
2151 else
2152 {
2153 int i;
2154 int c = 24;
2155 fprintf(outfile, "Starting byte set: ");
2156 for (i = 0; i < 256; i++)
2157 {
2158 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2159 {
2160 if (c > 75)
2161 {
2162 fprintf(outfile, "\n ");
2163 c = 2;
2164 }
2165 if (PRINTHEX(i) && i != ' ')
2166 {
2167 fprintf(outfile, "%c ", i);
2168 c += 2;
2169 }
2170 else
2171 {
2172 fprintf(outfile, "\\x%02x ", i);
2173 c += 5;
2174 }
2175 }
2176 }
2177 fprintf(outfile, "\n");
2178 }
2179 }
2180
2181 /* Show this only if the JIT was set by /S, not by -s. */
2182
2183 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2184 {
2185 int jit;
2186 new_info(re, extra, PCRE_INFO_JIT, &jit);
2187 if (jit)
2188 fprintf(outfile, "JIT study was successful\n");
2189 else
2190 #ifdef SUPPORT_JIT
2191 fprintf(outfile, "JIT study was not successful\n");
2192 #else
2193 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2194 #endif
2195 }
2196 }
2197 }
2198
2199 /* If the '>' option was present, we write out the regex to a file, and
2200 that is all. The first 8 bytes of the file are the regex length and then
2201 the study length, in big-endian order. */
2202
2203 if (to_file != NULL)
2204 {
2205 FILE *f = fopen((char *)to_file, "wb");
2206 if (f == NULL)
2207 {
2208 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2209 }
2210 else
2211 {
2212 uschar sbuf[8];
2213 sbuf[0] = (uschar)((true_size >> 24) & 255);
2214 sbuf[1] = (uschar)((true_size >> 16) & 255);
2215 sbuf[2] = (uschar)((true_size >> 8) & 255);
2216 sbuf[3] = (uschar)((true_size) & 255);
2217
2218 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2219 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2220 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2221 sbuf[7] = (uschar)((true_study_size) & 255);
2222
2223 if (fwrite(sbuf, 1, 8, f) < 8 ||
2224 fwrite(re, 1, true_size, f) < true_size)
2225 {
2226 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2227 }
2228 else
2229 {
2230 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2231
2232 /* If there is study data, write it. */
2233
2234 if (extra != NULL)
2235 {
2236 if (fwrite(extra->study_data, 1, true_study_size, f) <
2237 true_study_size)
2238 {
2239 fprintf(outfile, "Write error on %s: %s\n", to_file,
2240 strerror(errno));
2241 }
2242 else fprintf(outfile, "Study data written to %s\n", to_file);
2243 }
2244 }
2245 fclose(f);
2246 }
2247
2248 new_free(re);
2249 if (extra != NULL) pcre_free_study(extra);
2250 if (locale_set)
2251 {
2252 new_free((void *)tables);
2253 setlocale(LC_CTYPE, "C");
2254 locale_set = 0;
2255 }
2256 continue; /* With next regex */
2257 }
2258 } /* End of non-POSIX compile */
2259
2260 /* Read data lines and test them */
2261
2262 for (;;)
2263 {
2264 uschar *q;
2265 uschar *bptr;
2266 int *use_offsets = offsets;
2267 int use_size_offsets = size_offsets;
2268 int callout_data = 0;
2269 int callout_data_set = 0;
2270 int count, c;
2271 int copystrings = 0;
2272 int find_match_limit = default_find_match_limit;
2273 int getstrings = 0;
2274 int getlist = 0;
2275 int gmatched = 0;
2276 int start_offset = 0;
2277 int start_offset_sign = 1;
2278 int g_notempty = 0;
2279 int use_dfa = 0;
2280
2281 options = 0;
2282
2283 *copynames = 0;
2284 *getnames = 0;
2285
2286 copynamesptr = copynames;
2287 getnamesptr = getnames;
2288
2289 pcre_callout = callout;
2290 first_callout = 1;
2291 last_callout_mark = NULL;
2292 callout_extra = 0;
2293 callout_count = 0;
2294 callout_fail_count = 999999;
2295 callout_fail_id = -1;
2296 show_malloc = 0;
2297
2298 if (extra != NULL) extra->flags &=
2299 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2300
2301 len = 0;
2302 for (;;)
2303 {
2304 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2305 {
2306 if (len > 0) /* Reached EOF without hitting a newline */
2307 {
2308 fprintf(outfile, "\n");
2309 break;
2310 }
2311 done = 1;
2312 goto CONTINUE;
2313 }
2314 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2315 len = (int)strlen((char *)buffer);
2316 if (buffer[len-1] == '\n') break;
2317 }
2318
2319 while (len > 0 && isspace(buffer[len-1])) len--;
2320 buffer[len] = 0;
2321 if (len == 0) break;
2322
2323 p = buffer;
2324 while (isspace(*p)) p++;
2325
2326 bptr = q = dbuffer;
2327 while ((c = *p++) != 0)
2328 {
2329 int i = 0;
2330 int n = 0;
2331
2332 if (c == '\\') switch ((c = *p++))
2333 {
2334 case 'a': c = 7; break;
2335 case 'b': c = '\b'; break;
2336 case 'e': c = 27; break;
2337 case 'f': c = '\f'; break;
2338 case 'n': c = '\n'; break;
2339 case 'r': c = '\r'; break;
2340 case 't': c = '\t'; break;
2341 case 'v': c = '\v'; break;
2342
2343 case '0': case '1': case '2': case '3':
2344 case '4': case '5': case '6': case '7':
2345 c -= '0';
2346 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2347 c = c * 8 + *p++ - '0';
2348
2349 #if !defined NOUTF8
2350 if (use_utf8 && c > 255)
2351 {
2352 unsigned char buff8[8];
2353 int ii, utn;
2354 utn = ord2utf8(c, buff8);
2355 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2356 c = buff8[ii]; /* Last byte */
2357 }
2358 #endif
2359 break;
2360
2361 case 'x':
2362
2363 /* Handle \x{..} specially - new Perl thing for utf8 */
2364
2365 #if !defined NOUTF8
2366 if (*p == '{')
2367 {
2368 unsigned char *pt = p;
2369 c = 0;
2370 while (isxdigit(*(++pt)))
2371 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2372 if (*pt == '}')
2373 {
2374 unsigned char buff8[8];
2375 int ii, utn;
2376 if (use_utf8)
2377 {
2378 utn = ord2utf8(c, buff8);
2379 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2380 c = buff8[ii]; /* Last byte */
2381 }
2382 else
2383 {
2384 if (c > 255)
2385 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2386 "UTF-8 mode is not enabled.\n"
2387 "** Truncation will probably give the wrong result.\n", c);
2388 }
2389 p = pt + 1;
2390 break;
2391 }
2392 /* Not correct form; fall through */
2393 }
2394 #endif
2395
2396 /* Ordinary \x */
2397
2398 c = 0;
2399 while (i++ < 2 && isxdigit(*p))
2400 {
2401 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2402 p++;
2403 }
2404 break;
2405
2406 case 0: /* \ followed by EOF allows for an empty line */
2407 p--;
2408 continue;
2409
2410 case '>':
2411 if (*p == '-')
2412 {
2413 start_offset_sign = -1;
2414 p++;
2415 }
2416 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2417 start_offset *= start_offset_sign;
2418 continue;
2419
2420 case 'A': /* Option setting */
2421 options |= PCRE_ANCHORED;
2422 continue;
2423
2424 case 'B':
2425 options |= PCRE_NOTBOL;
2426 continue;
2427
2428 case 'C':
2429 if (isdigit(*p)) /* Set copy string */
2430 {
2431 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2432 copystrings |= 1 << n;
2433 }
2434 else if (isalnum(*p))
2435 {
2436 uschar *npp = copynamesptr;
2437 while (isalnum(*p)) *npp++ = *p++;
2438 *npp++ = 0;
2439 *npp = 0;
2440 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2441 if (n < 0)
2442 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2443 copynamesptr = npp;
2444 }
2445 else if (*p == '+')
2446 {
2447 callout_extra = 1;
2448 p++;
2449 }
2450 else if (*p == '-')
2451 {
2452 pcre_callout = NULL;
2453 p++;
2454 }
2455 else if (*p == '!')
2456 {
2457 callout_fail_id = 0;
2458 p++;
2459 while(isdigit(*p))
2460 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2461 callout_fail_count = 0;
2462 if (*p == '!')
2463 {
2464 p++;
2465 while(isdigit(*p))
2466 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2467 }
2468 }
2469 else if (*p == '*')
2470 {
2471 int sign = 1;
2472 callout_data = 0;
2473 if (*(++p) == '-') { sign = -1; p++; }
2474 while(isdigit(*p))
2475 callout_data = callout_data * 10 + *p++ - '0';
2476 callout_data *= sign;
2477 callout_data_set = 1;
2478 }
2479 continue;
2480
2481 #if !defined NODFA
2482 case 'D':
2483 #if !defined NOPOSIX
2484 if (posix || do_posix)
2485 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2486 else
2487 #endif
2488 use_dfa = 1;
2489 continue;
2490 #endif
2491
2492 #if !defined NODFA
2493 case 'F':
2494 options |= PCRE_DFA_SHORTEST;
2495 continue;
2496 #endif
2497
2498 case 'G':
2499 if (isdigit(*p))
2500 {
2501 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2502 getstrings |= 1 << n;
2503 }
2504 else if (isalnum(*p))
2505 {
2506 uschar *npp = getnamesptr;
2507 while (isalnum(*p)) *npp++ = *p++;
2508 *npp++ = 0;
2509 *npp = 0;
2510 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2511 if (n < 0)
2512 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2513 getnamesptr = npp;
2514 }
2515 continue;
2516
2517 case 'J':
2518 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2519 if (extra != NULL
2520 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2521 && extra->executable_jit != NULL)
2522 {
2523 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2524 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2525 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2526 }
2527 continue;
2528
2529 case 'L':
2530 getlist = 1;
2531 continue;
2532
2533 case 'M':
2534 find_match_limit = 1;
2535 continue;
2536
2537 case 'N':
2538 if ((options & PCRE_NOTEMPTY) != 0)
2539 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2540 else
2541 options |= PCRE_NOTEMPTY;
2542 continue;
2543
2544 case 'O':
2545 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2546 if (n > size_offsets_max)
2547 {
2548 size_offsets_max = n;
2549 free(offsets);
2550 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2551 if (offsets == NULL)
2552 {
2553 printf("** Failed to get %d bytes of memory for offsets vector\n",
2554 (int)(size_offsets_max * sizeof(int)));
2555 yield = 1;
2556 goto EXIT;
2557 }
2558 }
2559 use_size_offsets = n;
2560 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2561 continue;
2562
2563 case 'P':
2564 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2565 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2566 continue;
2567
2568 case 'Q':
2569 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2570 if (extra == NULL)
2571 {
2572 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2573 extra->flags = 0;
2574 }
2575 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2576 extra->match_limit_recursion = n;
2577 continue;
2578
2579 case 'q':
2580 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2581 if (extra == NULL)
2582 {
2583 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2584 extra->flags = 0;
2585 }
2586 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2587 extra->match_limit = n;
2588 continue;
2589
2590 #if !defined NODFA
2591 case 'R':
2592 options |= PCRE_DFA_RESTART;
2593 continue;
2594 #endif
2595
2596 case 'S':
2597 show_malloc = 1;
2598 continue;
2599
2600 case 'Y':
2601 options |= PCRE_NO_START_OPTIMIZE;
2602 continue;
2603
2604 case 'Z':
2605 options |= PCRE_NOTEOL;
2606 continue;
2607
2608 case '?':
2609 options |= PCRE_NO_UTF8_CHECK;
2610 continue;
2611
2612 case '<':
2613 {
2614 int x = check_newline(p, outfile);
2615 if (x == 0) goto NEXT_DATA;
2616 options |= x;
2617 while (*p++ != '>');
2618 }
2619 continue;
2620 }
2621 *q++ = c;
2622 }
2623 *q = 0;
2624 len = (int)(q - dbuffer);
2625
2626 /* Move the data to the end of the buffer so that a read over the end of
2627 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2628 we are using the POSIX interface, we must include the terminating zero. */
2629
2630 #if !defined NOPOSIX
2631 if (posix || do_posix)
2632 {
2633 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2634 bptr += buffer_size - len - 1;
2635 }
2636 else
2637 #endif
2638 {
2639 memmove(bptr + buffer_size - len, bptr, len);
2640 bptr += buffer_size - len;
2641 }
2642
2643 if ((all_use_dfa || use_dfa) && find_match_limit)
2644 {
2645 printf("**Match limit not relevant for DFA matching: ignored\n");
2646 find_match_limit = 0;
2647 }
2648
2649 /* Handle matching via the POSIX interface, which does not
2650 support timing or playing with the match limit or callout data. */
2651
2652 #if !defined NOPOSIX
2653 if (posix || do_posix)
2654 {
2655 int rc;
2656 int eflags = 0;
2657 regmatch_t *pmatch = NULL;
2658 if (use_size_offsets > 0)
2659 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2660 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2661 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2662 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2663
2664 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2665
2666 if (rc != 0)
2667 {
2668 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2669 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2670 }
2671 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2672 != 0)
2673 {
2674 fprintf(outfile, "Matched with REG_NOSUB\n");
2675 }
2676 else
2677 {
2678 size_t i;
2679 for (i = 0; i < (size_t)use_size_offsets; i++)
2680 {
2681 if (pmatch[i].rm_so >= 0)
2682 {
2683 fprintf(outfile, "%2d: ", (int)i);
2684 (void)pchars(dbuffer + pmatch[i].rm_so,
2685 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2686 fprintf(outfile, "\n");
2687 if (do_showcaprest || (i == 0 && do_showrest))
2688 {
2689 fprintf(outfile, "%2d+ ", (int)i);
2690 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2691 outfile);
2692 fprintf(outfile, "\n");
2693 }
2694 }
2695 }
2696 }
2697 free(pmatch);
2698 }
2699
2700 /* Handle matching via the native interface - repeats for /g and /G */
2701
2702 else
2703 #endif /* !defined NOPOSIX */
2704
2705 for (;; gmatched++) /* Loop for /g or /G */
2706 {
2707 markptr = NULL;
2708
2709 if (timeitm > 0)
2710 {
2711 register int i;
2712 clock_t time_taken;
2713 clock_t start_time = clock();
2714
2715 #if !defined NODFA
2716 if (all_use_dfa || use_dfa)
2717 {
2718 int workspace[1000];
2719 for (i = 0; i < timeitm; i++)
2720 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2721 options | g_notempty, use_offsets, use_size_offsets, workspace,
2722 sizeof(workspace)/sizeof(int));
2723 }
2724 else
2725 #endif
2726
2727 for (i = 0; i < timeitm; i++)
2728 count = pcre_exec(re, extra, (char *)bptr, len,
2729 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2730
2731 time_taken = clock() - start_time;
2732 fprintf(outfile, "Execute time %.4f milliseconds\n",
2733 (((double)time_taken * 1000.0) / (double)timeitm) /
2734 (double)CLOCKS_PER_SEC);
2735 }
2736
2737 /* If find_match_limit is set, we want to do repeated matches with
2738 varying limits in order to find the minimum value for the match limit and
2739 for the recursion limit. The match limits are relevant only to the normal
2740 running of pcre_exec(), so disable the JIT optimization. This makes it
2741 possible to run the same set of tests with and without JIT externally
2742 requested. */
2743
2744 if (find_match_limit)
2745 {
2746 if (extra == NULL)
2747 {
2748 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2749 extra->flags = 0;
2750 }
2751 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2752
2753 (void)check_match_limit(re, extra, bptr, len, start_offset,
2754 options|g_notempty, use_offsets, use_size_offsets,
2755 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2756 PCRE_ERROR_MATCHLIMIT, "match()");
2757
2758 count = check_match_limit(re, extra, bptr, len, start_offset,
2759 options|g_notempty, use_offsets, use_size_offsets,
2760 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2761 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2762 }
2763
2764 /* If callout_data is set, use the interface with additional data */
2765
2766 else if (callout_data_set)
2767 {
2768 if (extra == NULL)
2769 {
2770 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2771 extra->flags = 0;
2772 }
2773 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2774 extra->callout_data = &callout_data;
2775 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2776 options | g_notempty, use_offsets, use_size_offsets);
2777 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2778 }
2779
2780 /* The normal case is just to do the match once, with the default
2781 value of match_limit. */
2782
2783 #if !defined NODFA
2784 else if (all_use_dfa || use_dfa)
2785 {
2786 int workspace[1000];
2787 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2788 options | g_notempty, use_offsets, use_size_offsets, workspace,
2789 sizeof(workspace)/sizeof(int));
2790 if (count == 0)
2791 {
2792 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2793 count = use_size_offsets/2;
2794 }
2795 }
2796 #endif
2797
2798 else
2799 {
2800 count = pcre_exec(re, extra, (char *)bptr, len,
2801 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2802 if (count == 0)
2803 {
2804 fprintf(outfile, "Matched, but too many substrings\n");
2805 count = use_size_offsets/3;
2806 }
2807 }
2808
2809 /* Matched */
2810
2811 if (count >= 0)
2812 {
2813 int i, maxcount;
2814
2815 #if !defined NODFA
2816 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2817 #endif
2818 maxcount = use_size_offsets/3;
2819
2820 /* This is a check against a lunatic return value. */
2821
2822 if (count > maxcount)
2823 {
2824 fprintf(outfile,
2825 "** PCRE error: returned count %d is too big for offset size %d\n",
2826 count, use_size_offsets);
2827 count = use_size_offsets/3;
2828 if (do_g || do_G)
2829 {
2830 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2831 do_g = do_G = FALSE; /* Break g/G loop */
2832 }
2833 }
2834
2835 /* do_allcaps requests showing of all captures in the pattern, to check
2836 unset ones at the end. */
2837
2838 if (do_allcaps)
2839 {
2840 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2841 count++; /* Allow for full match */
2842 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2843 }
2844
2845 /* Output the captured substrings */
2846
2847 for (i = 0; i < count * 2; i += 2)
2848 {
2849 if (use_offsets[i] < 0)
2850 {
2851 if (use_offsets[i] != -1)
2852 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2853 use_offsets[i], i);
2854 if (use_offsets[i+1] != -1)
2855 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2856 use_offsets[i+1], i+1);
2857 fprintf(outfile, "%2d: <unset>\n", i/2);
2858 }
2859 else
2860 {
2861 fprintf(outfile, "%2d: ", i/2);
2862 (void)pchars(bptr + use_offsets[i],
2863 use_offsets[i+1] - use_offsets[i], outfile);
2864 fprintf(outfile, "\n");
2865 if (do_showcaprest || (i == 0 && do_showrest))
2866 {
2867 fprintf(outfile, "%2d+ ", i/2);
2868 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2869 outfile);
2870 fprintf(outfile, "\n");
2871 }
2872 }
2873 }
2874
2875 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2876
2877 for (i = 0; i < 32; i++)
2878 {
2879 if ((copystrings & (1 << i)) != 0)
2880 {
2881 char copybuffer[256];
2882 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2883 i, copybuffer, sizeof(copybuffer));
2884 if (rc < 0)
2885 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2886 else
2887 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2888 }
2889 }
2890
2891 for (copynamesptr = copynames;
2892 *copynamesptr != 0;
2893 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2894 {
2895 char copybuffer[256];
2896 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2897 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2898 if (rc < 0)
2899 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2900 else
2901 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2902 }
2903
2904 for (i = 0; i < 32; i++)
2905 {
2906 if ((getstrings & (1 << i)) != 0)
2907 {
2908 const char *substring;
2909 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2910 i, &substring);
2911 if (rc < 0)
2912 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2913 else
2914 {
2915 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2916 pcre_free_substring(substring);
2917 }
2918 }
2919 }
2920
2921 for (getnamesptr = getnames;
2922 *getnamesptr != 0;
2923 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2924 {
2925 const char *substring;
2926 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2927 count, (char *)getnamesptr, &substring);
2928 if (rc < 0)
2929 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2930 else
2931 {
2932 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2933 pcre_free_substring(substring);
2934 }
2935 }
2936
2937 if (getlist)
2938 {
2939 const char **stringlist;
2940 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2941 &stringlist);
2942 if (rc < 0)
2943 fprintf(outfile, "get substring list failed %d\n", rc);
2944 else
2945 {
2946 for (i = 0; i < count; i++)
2947 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2948 if (stringlist[i] != NULL)
2949 fprintf(outfile, "string list not terminated by NULL\n");
2950 pcre_free_substring_list(stringlist);
2951 }
2952 }
2953 }
2954
2955 /* There was a partial match */
2956
2957 else if (count == PCRE_ERROR_PARTIAL)
2958 {
2959 if (markptr == NULL) fprintf(outfile, "Partial match");
2960 else fprintf(outfile, "Partial match, mark=%s", markptr);
2961 if (use_size_offsets > 1)
2962 {
2963 fprintf(outfile, ": ");
2964 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2965 outfile);
2966 }
2967 fprintf(outfile, "\n");
2968 break; /* Out of the /g loop */
2969 }
2970
2971 /* Failed to match. If this is a /g or /G loop and we previously set
2972 g_notempty after a null match, this is not necessarily the end. We want
2973 to advance the start offset, and continue. We won't be at the end of the
2974 string - that was checked before setting g_notempty.
2975
2976 Complication arises in the case when the newline convention is "any",
2977 "crlf", or "anycrlf". If the previous match was at the end of a line
2978 terminated by CRLF, an advance of one character just passes the \r,
2979 whereas we should prefer the longer newline sequence, as does the code in
2980 pcre_exec(). Fudge the offset value to achieve this. We check for a
2981 newline setting in the pattern; if none was set, use pcre_config() to
2982 find the default.
2983
2984 Otherwise, in the case of UTF-8 matching, the advance must be one
2985 character, not one byte. */
2986
2987 else
2988 {
2989 if (g_notempty != 0)
2990 {
2991 int onechar = 1;
2992 unsigned int obits = ((real_pcre *)re)->options;
2993 use_offsets[0] = start_offset;
2994 if ((obits & PCRE_NEWLINE_BITS) == 0)
2995 {
2996 int d;
2997 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2998 /* Note that these values are always the ASCII ones, even in
2999 EBCDIC environments. CR = 13, NL = 10. */
3000 obits = (d == 13)? PCRE_NEWLINE_CR :
3001 (d == 10)? PCRE_NEWLINE_LF :
3002 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3003 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3004 (d == -1)? PCRE_NEWLINE_ANY : 0;
3005 }
3006 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3007 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3008 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3009 &&
3010 start_offset < len - 1 &&
3011 bptr[start_offset] == '\r' &&
3012 bptr[start_offset+1] == '\n')
3013 onechar++;
3014 else if (use_utf8)
3015 {
3016 while (start_offset + onechar < len)
3017 {
3018 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3019 onechar++;
3020 }
3021 }
3022 use_offsets[1] = start_offset + onechar;
3023 }
3024 else
3025 {
3026 switch(count)
3027 {
3028 case PCRE_ERROR_NOMATCH:
3029 if (gmatched == 0)
3030 {
3031 if (markptr == NULL) fprintf(outfile, "No match\n");
3032 else fprintf(outfile, "No match, mark = %s\n", markptr);
3033 }
3034 break;
3035
3036 case PCRE_ERROR_BADUTF8:
3037 case PCRE_ERROR_SHORTUTF8:
3038 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3039 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3040 if (use_size_offsets >= 2)
3041 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3042 use_offsets[1]);
3043 fprintf(outfile, "\n");
3044 break;
3045
3046 default:
3047 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3048 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3049 else
3050 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3051 break;
3052 }
3053
3054 break; /* Out of the /g loop */
3055 }
3056 }
3057
3058 /* If not /g or /G we are done */
3059
3060 if (!do_g && !do_G) break;
3061
3062 /* If we have matched an empty string, first check to see if we are at
3063 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3064 Perl's /g options does. This turns out to be rather cunning. First we set
3065 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3066 same point. If this fails (picked up above) we advance to the next
3067 character. */
3068
3069 g_notempty = 0;
3070
3071 if (use_offsets[0] == use_offsets[1])
3072 {
3073 if (use_offsets[0] == len) break;
3074 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3075 }
3076
3077 /* For /g, update the start offset, leaving the rest alone */
3078
3079 if (do_g) start_offset = use_offsets[1];
3080
3081 /* For /G, update the pointer and length */
3082
3083 else
3084 {
3085 bptr += use_offsets[1];
3086 len -= use_offsets[1];
3087 }
3088 } /* End of loop for /g and /G */
3089
3090 NEXT_DATA: continue;
3091 } /* End of loop for data lines */
3092
3093 CONTINUE:
3094
3095 #if !defined NOPOSIX
3096 if (posix || do_posix) regfree(&preg);
3097 #endif
3098
3099 if (re != NULL) new_free(re);
3100 if (extra != NULL) pcre_free_study(extra);
3101 if (locale_set)
3102 {
3103 new_free((void *)tables);
3104 setlocale(LC_CTYPE, "C");
3105 locale_set = 0;
3106 }
3107 if (jit_stack != NULL)
3108 {
3109 pcre_jit_stack_free(jit_stack);
3110 jit_stack = NULL;
3111 }
3112 }
3113
3114 if (infile == stdin) fprintf(outfile, "\n");
3115
3116 EXIT:
3117
3118 if (infile != NULL && infile != stdin) fclose(infile);
3119 if (outfile != NULL && outfile != stdout) fclose(outfile);
3120
3121 free(buffer);
3122 free(dbuffer);
3123 free(pbuffer);
3124 free(offsets);
3125
3126 return yield;
3127 }
3128
3129 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5