/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 667 - (show annotations)
Mon Aug 22 14:57:32 2011 UTC (4 years ago) by ph10
File MIME type: text/plain
File size: 93548 byte(s)
Error occurred while calculating annotation data.
Commit all the changes for JIT support, but without any documentation yet.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utf8_char_sizes utf8_char_sizes
122 #define _pcre_utt utt
123 #define _pcre_utt_size utt_size
124 #define _pcre_utt_names utt_names
125 #define _pcre_OP_lengths OP_lengths
126
127 #include "pcre_tables.c"
128
129 /* We also need the pcre_printint() function for printing out compiled
130 patterns. This function is in a separate file so that it can be included in
131 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 know which case is being compiled. */
133
134 #define COMPILING_PCRETEST
135 #include "pcre_printint.src"
136
137 /* The definition of the macro PRINTABLE, which determines whether to print an
138 output character as-is or as a hex value when showing compiled patterns, is
139 contained in the printint.src file. We uses it here also, in cases when the
140 locale has not been explicitly changed, so as to get consistent output from
141 systems that differ in their output from isprint() even in the "C" locale. */
142
143 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144
145 /* It is possible to compile this test program without including support for
146 testing the POSIX interface, though this is not available via the standard
147 Makefile. */
148
149 #if !defined NOPOSIX
150 #include "pcreposix.h"
151 #endif
152
153 /* It is also possible, for the benefit of the version currently imported into
154 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155 interface to the DFA matcher (NODFA), and without the doublecheck of the old
156 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157 UTF8 support if PCRE is built without it. */
158
159 #ifndef SUPPORT_UTF8
160 #ifndef NOUTF8
161 #define NOUTF8
162 #endif
163 #endif
164
165
166 /* Other parameters */
167
168 #ifndef CLOCKS_PER_SEC
169 #ifdef CLK_TCK
170 #define CLOCKS_PER_SEC CLK_TCK
171 #else
172 #define CLOCKS_PER_SEC 100
173 #endif
174 #endif
175
176 /* This is the default loop count for timing. */
177
178 #define LOOPREPEAT 500000
179
180 /* Static variables */
181
182 static FILE *outfile;
183 static int log_store = 0;
184 static int callout_count;
185 static int callout_extra;
186 static int callout_fail_count;
187 static int callout_fail_id;
188 static int debug_lengths;
189 static int first_callout;
190 static int locale_set = 0;
191 static int show_malloc;
192 static int use_utf8;
193 static size_t gotten_store;
194 static const unsigned char *last_callout_mark = NULL;
195
196 /* The buffers grow automatically if very long input lines are encountered. */
197
198 static int buffer_size = 50000;
199 static uschar *buffer = NULL;
200 static uschar *dbuffer = NULL;
201 static uschar *pbuffer = NULL;
202
203 /* Textual explanations for runtime error codes */
204
205 static const char *errtexts[] = {
206 NULL, /* 0 is no error */
207 NULL, /* NOMATCH is handled specially */
208 "NULL argument passed",
209 "bad option value",
210 "magic number missing",
211 "unknown opcode - pattern overwritten?",
212 "no more memory",
213 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 "match limit exceeded",
215 "callout error code",
216 NULL, /* BADUTF8 is handled specially */
217 "bad UTF-8 offset",
218 NULL, /* PARTIAL is handled specially */
219 "not used - internal error",
220 "internal error - pattern overwritten?",
221 "bad count value",
222 "item unsupported for DFA matching",
223 "backreference condition or recursion test not supported for DFA matching",
224 "match limit not supported for DFA matching",
225 "workspace size exceeded in DFA matching",
226 "too much recursion for DFA matching",
227 "recursion limit exceeded",
228 "not used - internal error",
229 "invalid combination of newline options",
230 "bad offset value",
231 NULL, /* SHORTUTF8 is handled specially */
232 "nested recursion at the same subject position"
233 };
234
235
236 /*************************************************
237 * Alternate character tables *
238 *************************************************/
239
240 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
241 using the default tables of the library. However, the T option can be used to
242 select alternate sets of tables, for different kinds of testing. Note also that
243 the L (locale) option also adjusts the tables. */
244
245 /* This is the set of tables distributed as default with PCRE. It recognizes
246 only ASCII characters. */
247
248 static const unsigned char tables0[] = {
249
250 /* This table is a lower casing table. */
251
252 0, 1, 2, 3, 4, 5, 6, 7,
253 8, 9, 10, 11, 12, 13, 14, 15,
254 16, 17, 18, 19, 20, 21, 22, 23,
255 24, 25, 26, 27, 28, 29, 30, 31,
256 32, 33, 34, 35, 36, 37, 38, 39,
257 40, 41, 42, 43, 44, 45, 46, 47,
258 48, 49, 50, 51, 52, 53, 54, 55,
259 56, 57, 58, 59, 60, 61, 62, 63,
260 64, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122, 91, 92, 93, 94, 95,
264 96, 97, 98, 99,100,101,102,103,
265 104,105,106,107,108,109,110,111,
266 112,113,114,115,116,117,118,119,
267 120,121,122,123,124,125,126,127,
268 128,129,130,131,132,133,134,135,
269 136,137,138,139,140,141,142,143,
270 144,145,146,147,148,149,150,151,
271 152,153,154,155,156,157,158,159,
272 160,161,162,163,164,165,166,167,
273 168,169,170,171,172,173,174,175,
274 176,177,178,179,180,181,182,183,
275 184,185,186,187,188,189,190,191,
276 192,193,194,195,196,197,198,199,
277 200,201,202,203,204,205,206,207,
278 208,209,210,211,212,213,214,215,
279 216,217,218,219,220,221,222,223,
280 224,225,226,227,228,229,230,231,
281 232,233,234,235,236,237,238,239,
282 240,241,242,243,244,245,246,247,
283 248,249,250,251,252,253,254,255,
284
285 /* This table is a case flipping table. */
286
287 0, 1, 2, 3, 4, 5, 6, 7,
288 8, 9, 10, 11, 12, 13, 14, 15,
289 16, 17, 18, 19, 20, 21, 22, 23,
290 24, 25, 26, 27, 28, 29, 30, 31,
291 32, 33, 34, 35, 36, 37, 38, 39,
292 40, 41, 42, 43, 44, 45, 46, 47,
293 48, 49, 50, 51, 52, 53, 54, 55,
294 56, 57, 58, 59, 60, 61, 62, 63,
295 64, 97, 98, 99,100,101,102,103,
296 104,105,106,107,108,109,110,111,
297 112,113,114,115,116,117,118,119,
298 120,121,122, 91, 92, 93, 94, 95,
299 96, 65, 66, 67, 68, 69, 70, 71,
300 72, 73, 74, 75, 76, 77, 78, 79,
301 80, 81, 82, 83, 84, 85, 86, 87,
302 88, 89, 90,123,124,125,126,127,
303 128,129,130,131,132,133,134,135,
304 136,137,138,139,140,141,142,143,
305 144,145,146,147,148,149,150,151,
306 152,153,154,155,156,157,158,159,
307 160,161,162,163,164,165,166,167,
308 168,169,170,171,172,173,174,175,
309 176,177,178,179,180,181,182,183,
310 184,185,186,187,188,189,190,191,
311 192,193,194,195,196,197,198,199,
312 200,201,202,203,204,205,206,207,
313 208,209,210,211,212,213,214,215,
314 216,217,218,219,220,221,222,223,
315 224,225,226,227,228,229,230,231,
316 232,233,234,235,236,237,238,239,
317 240,241,242,243,244,245,246,247,
318 248,249,250,251,252,253,254,255,
319
320 /* This table contains bit maps for various character classes. Each map is 32
321 bytes long and the bits run from the least significant end of each byte. The
322 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
323 graph, print, punct, and cntrl. Other classes are built from combinations. */
324
325 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
326 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329
330 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
331 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334
335 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349
350 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
351 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
352 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354
355 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
356 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
357 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359
360 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
361 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
362 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364
365 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
366 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369
370 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374
375 /* This table identifies various classes of character by individual bits:
376 0x01 white space character
377 0x02 letter
378 0x04 decimal digit
379 0x08 hexadecimal digit
380 0x10 alphanumeric or '_'
381 0x80 regular expression metacharacter or binary zero
382 */
383
384 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
385 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
386 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
387 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
388 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
389 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
390 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
391 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
392 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
393 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
395 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
396 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
397 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
398 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
399 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
416
417 /* This is a set of tables that came orginally from a Windows user. It seems to
418 be at least an approximation of ISO 8859. In particular, there are characters
419 greater than 128 that are marked as spaces, letters, etc. */
420
421 static const unsigned char tables1[] = {
422 0,1,2,3,4,5,6,7,
423 8,9,10,11,12,13,14,15,
424 16,17,18,19,20,21,22,23,
425 24,25,26,27,28,29,30,31,
426 32,33,34,35,36,37,38,39,
427 40,41,42,43,44,45,46,47,
428 48,49,50,51,52,53,54,55,
429 56,57,58,59,60,61,62,63,
430 64,97,98,99,100,101,102,103,
431 104,105,106,107,108,109,110,111,
432 112,113,114,115,116,117,118,119,
433 120,121,122,91,92,93,94,95,
434 96,97,98,99,100,101,102,103,
435 104,105,106,107,108,109,110,111,
436 112,113,114,115,116,117,118,119,
437 120,121,122,123,124,125,126,127,
438 128,129,130,131,132,133,134,135,
439 136,137,138,139,140,141,142,143,
440 144,145,146,147,148,149,150,151,
441 152,153,154,155,156,157,158,159,
442 160,161,162,163,164,165,166,167,
443 168,169,170,171,172,173,174,175,
444 176,177,178,179,180,181,182,183,
445 184,185,186,187,188,189,190,191,
446 224,225,226,227,228,229,230,231,
447 232,233,234,235,236,237,238,239,
448 240,241,242,243,244,245,246,215,
449 248,249,250,251,252,253,254,223,
450 224,225,226,227,228,229,230,231,
451 232,233,234,235,236,237,238,239,
452 240,241,242,243,244,245,246,247,
453 248,249,250,251,252,253,254,255,
454 0,1,2,3,4,5,6,7,
455 8,9,10,11,12,13,14,15,
456 16,17,18,19,20,21,22,23,
457 24,25,26,27,28,29,30,31,
458 32,33,34,35,36,37,38,39,
459 40,41,42,43,44,45,46,47,
460 48,49,50,51,52,53,54,55,
461 56,57,58,59,60,61,62,63,
462 64,97,98,99,100,101,102,103,
463 104,105,106,107,108,109,110,111,
464 112,113,114,115,116,117,118,119,
465 120,121,122,91,92,93,94,95,
466 96,65,66,67,68,69,70,71,
467 72,73,74,75,76,77,78,79,
468 80,81,82,83,84,85,86,87,
469 88,89,90,123,124,125,126,127,
470 128,129,130,131,132,133,134,135,
471 136,137,138,139,140,141,142,143,
472 144,145,146,147,148,149,150,151,
473 152,153,154,155,156,157,158,159,
474 160,161,162,163,164,165,166,167,
475 168,169,170,171,172,173,174,175,
476 176,177,178,179,180,181,182,183,
477 184,185,186,187,188,189,190,191,
478 224,225,226,227,228,229,230,231,
479 232,233,234,235,236,237,238,239,
480 240,241,242,243,244,245,246,215,
481 248,249,250,251,252,253,254,223,
482 192,193,194,195,196,197,198,199,
483 200,201,202,203,204,205,206,207,
484 208,209,210,211,212,213,214,247,
485 216,217,218,219,220,221,222,255,
486 0,62,0,0,1,0,0,0,
487 0,0,0,0,0,0,0,0,
488 32,0,0,0,1,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,255,3,
491 126,0,0,0,126,0,0,0,
492 0,0,0,0,0,0,0,0,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,255,3,
495 0,0,0,0,0,0,0,0,
496 0,0,0,0,0,0,12,2,
497 0,0,0,0,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 254,255,255,7,0,0,0,0,
500 0,0,0,0,0,0,0,0,
501 255,255,127,127,0,0,0,0,
502 0,0,0,0,0,0,0,0,
503 0,0,0,0,254,255,255,7,
504 0,0,0,0,0,4,32,4,
505 0,0,0,128,255,255,127,255,
506 0,0,0,0,0,0,255,3,
507 254,255,255,135,254,255,255,7,
508 0,0,0,0,0,4,44,6,
509 255,255,127,255,255,255,127,255,
510 0,0,0,0,254,255,255,255,
511 255,255,255,255,255,255,255,127,
512 0,0,0,0,254,255,255,255,
513 255,255,255,255,255,255,255,255,
514 0,2,0,0,255,255,255,255,
515 255,255,255,255,255,255,255,127,
516 0,0,0,0,255,255,255,255,
517 255,255,255,255,255,255,255,255,
518 0,0,0,0,254,255,0,252,
519 1,0,0,248,1,0,0,120,
520 0,0,0,0,254,255,255,255,
521 0,0,128,0,0,0,128,0,
522 255,255,255,255,0,0,0,0,
523 0,0,0,0,0,0,0,128,
524 255,255,255,255,0,0,0,0,
525 0,0,0,0,0,0,0,0,
526 128,0,0,0,0,0,0,0,
527 0,1,1,0,1,1,0,0,
528 0,0,0,0,0,0,0,0,
529 0,0,0,0,0,0,0,0,
530 1,0,0,0,128,0,0,0,
531 128,128,128,128,0,0,128,0,
532 28,28,28,28,28,28,28,28,
533 28,28,0,0,0,0,0,128,
534 0,26,26,26,26,26,26,18,
535 18,18,18,18,18,18,18,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,128,128,0,128,16,
538 0,26,26,26,26,26,26,18,
539 18,18,18,18,18,18,18,18,
540 18,18,18,18,18,18,18,18,
541 18,18,18,128,128,0,0,0,
542 0,0,0,0,0,1,0,0,
543 0,0,0,0,0,0,0,0,
544 0,0,0,0,0,0,0,0,
545 0,0,0,0,0,0,0,0,
546 1,0,0,0,0,0,0,0,
547 0,0,18,0,0,0,0,0,
548 0,0,20,20,0,18,0,0,
549 0,20,18,0,0,0,0,0,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,0,
553 18,18,18,18,18,18,18,18,
554 18,18,18,18,18,18,18,18,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,0,
557 18,18,18,18,18,18,18,18
558 };
559
560
561
562
563 #ifndef HAVE_STRERROR
564 /*************************************************
565 * Provide strerror() for non-ANSI libraries *
566 *************************************************/
567
568 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
569 in their libraries, but can provide the same facility by this simple
570 alternative function. */
571
572 extern int sys_nerr;
573 extern char *sys_errlist[];
574
575 char *
576 strerror(int n)
577 {
578 if (n < 0 || n >= sys_nerr) return "unknown error number";
579 return sys_errlist[n];
580 }
581 #endif /* HAVE_STRERROR */
582
583
584 /*************************************************
585 * JIT memory callback *
586 *************************************************/
587
588 static pcre_jit_stack* jit_callback(void *arg)
589 {
590 return (pcre_jit_stack *)arg;
591 }
592
593
594 /*************************************************
595 * Read or extend an input line *
596 *************************************************/
597
598 /* Input lines are read into buffer, but both patterns and data lines can be
599 continued over multiple input lines. In addition, if the buffer fills up, we
600 want to automatically expand it so as to be able to handle extremely large
601 lines that are needed for certain stress tests. When the input buffer is
602 expanded, the other two buffers must also be expanded likewise, and the
603 contents of pbuffer, which are a copy of the input for callouts, must be
604 preserved (for when expansion happens for a data line). This is not the most
605 optimal way of handling this, but hey, this is just a test program!
606
607 Arguments:
608 f the file to read
609 start where in buffer to start (this *must* be within buffer)
610 prompt for stdin or readline()
611
612 Returns: pointer to the start of new data
613 could be a copy of start, or could be moved
614 NULL if no data read and EOF reached
615 */
616
617 static uschar *
618 extend_inputline(FILE *f, uschar *start, const char *prompt)
619 {
620 uschar *here = start;
621
622 for (;;)
623 {
624 int rlen = (int)(buffer_size - (here - buffer));
625
626 if (rlen > 1000)
627 {
628 int dlen;
629
630 /* If libreadline support is required, use readline() to read a line if the
631 input is a terminal. Note that readline() removes the trailing newline, so
632 we must put it back again, to be compatible with fgets(). */
633
634 #ifdef SUPPORT_LIBREADLINE
635 if (isatty(fileno(f)))
636 {
637 size_t len;
638 char *s = readline(prompt);
639 if (s == NULL) return (here == start)? NULL : start;
640 len = strlen(s);
641 if (len > 0) add_history(s);
642 if (len > rlen - 1) len = rlen - 1;
643 memcpy(here, s, len);
644 here[len] = '\n';
645 here[len+1] = 0;
646 free(s);
647 }
648 else
649 #endif
650
651 /* Read the next line by normal means, prompting if the file is stdin. */
652
653 {
654 if (f == stdin) printf("%s", prompt);
655 if (fgets((char *)here, rlen, f) == NULL)
656 return (here == start)? NULL : start;
657 }
658
659 dlen = (int)strlen((char *)here);
660 if (dlen > 0 && here[dlen - 1] == '\n') return start;
661 here += dlen;
662 }
663
664 else
665 {
666 int new_buffer_size = 2*buffer_size;
667 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
668 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
669 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
670
671 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
672 {
673 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
674 exit(1);
675 }
676
677 memcpy(new_buffer, buffer, buffer_size);
678 memcpy(new_pbuffer, pbuffer, buffer_size);
679
680 buffer_size = new_buffer_size;
681
682 start = new_buffer + (start - buffer);
683 here = new_buffer + (here - buffer);
684
685 free(buffer);
686 free(dbuffer);
687 free(pbuffer);
688
689 buffer = new_buffer;
690 dbuffer = new_dbuffer;
691 pbuffer = new_pbuffer;
692 }
693 }
694
695 return NULL; /* Control never gets here */
696 }
697
698
699
700
701
702
703
704 /*************************************************
705 * Read number from string *
706 *************************************************/
707
708 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
709 around with conditional compilation, just do the job by hand. It is only used
710 for unpicking arguments, so just keep it simple.
711
712 Arguments:
713 str string to be converted
714 endptr where to put the end pointer
715
716 Returns: the unsigned long
717 */
718
719 static int
720 get_value(unsigned char *str, unsigned char **endptr)
721 {
722 int result = 0;
723 while(*str != 0 && isspace(*str)) str++;
724 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
725 *endptr = str;
726 return(result);
727 }
728
729
730
731
732 /*************************************************
733 * Convert UTF-8 string to value *
734 *************************************************/
735
736 /* This function takes one or more bytes that represents a UTF-8 character,
737 and returns the value of the character.
738
739 Argument:
740 utf8bytes a pointer to the byte vector
741 vptr a pointer to an int to receive the value
742
743 Returns: > 0 => the number of bytes consumed
744 -6 to 0 => malformed UTF-8 character at offset = (-return)
745 */
746
747 #if !defined NOUTF8
748
749 static int
750 utf82ord(unsigned char *utf8bytes, int *vptr)
751 {
752 int c = *utf8bytes++;
753 int d = c;
754 int i, j, s;
755
756 for (i = -1; i < 6; i++) /* i is number of additional bytes */
757 {
758 if ((d & 0x80) == 0) break;
759 d <<= 1;
760 }
761
762 if (i == -1) { *vptr = c; return 1; } /* ascii character */
763 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
764
765 /* i now has a value in the range 1-5 */
766
767 s = 6*i;
768 d = (c & utf8_table3[i]) << s;
769
770 for (j = 0; j < i; j++)
771 {
772 c = *utf8bytes++;
773 if ((c & 0xc0) != 0x80) return -(j+1);
774 s -= 6;
775 d |= (c & 0x3f) << s;
776 }
777
778 /* Check that encoding was the correct unique one */
779
780 for (j = 0; j < utf8_table1_size; j++)
781 if (d <= utf8_table1[j]) break;
782 if (j != i) return -(i+1);
783
784 /* Valid value */
785
786 *vptr = d;
787 return i+1;
788 }
789
790 #endif
791
792
793
794 /*************************************************
795 * Convert character value to UTF-8 *
796 *************************************************/
797
798 /* This function takes an integer value in the range 0 - 0x7fffffff
799 and encodes it as a UTF-8 character in 0 to 6 bytes.
800
801 Arguments:
802 cvalue the character value
803 utf8bytes pointer to buffer for result - at least 6 bytes long
804
805 Returns: number of characters placed in the buffer
806 */
807
808 #if !defined NOUTF8
809
810 static int
811 ord2utf8(int cvalue, uschar *utf8bytes)
812 {
813 register int i, j;
814 for (i = 0; i < utf8_table1_size; i++)
815 if (cvalue <= utf8_table1[i]) break;
816 utf8bytes += i;
817 for (j = i; j > 0; j--)
818 {
819 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
820 cvalue >>= 6;
821 }
822 *utf8bytes = utf8_table2[i] | cvalue;
823 return i + 1;
824 }
825
826 #endif
827
828
829
830 /*************************************************
831 * Print character string *
832 *************************************************/
833
834 /* Character string printing function. Must handle UTF-8 strings in utf8
835 mode. Yields number of characters printed. If handed a NULL file, just counts
836 chars without printing. */
837
838 static int pchars(unsigned char *p, int length, FILE *f)
839 {
840 int c = 0;
841 int yield = 0;
842
843 while (length-- > 0)
844 {
845 #if !defined NOUTF8
846 if (use_utf8)
847 {
848 int rc = utf82ord(p, &c);
849
850 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
851 {
852 length -= rc - 1;
853 p += rc;
854 if (PRINTHEX(c))
855 {
856 if (f != NULL) fprintf(f, "%c", c);
857 yield++;
858 }
859 else
860 {
861 int n = 4;
862 if (f != NULL) fprintf(f, "\\x{%02x}", c);
863 yield += (n <= 0x000000ff)? 2 :
864 (n <= 0x00000fff)? 3 :
865 (n <= 0x0000ffff)? 4 :
866 (n <= 0x000fffff)? 5 : 6;
867 }
868 continue;
869 }
870 }
871 #endif
872
873 /* Not UTF-8, or malformed UTF-8 */
874
875 c = *p++;
876 if (PRINTHEX(c))
877 {
878 if (f != NULL) fprintf(f, "%c", c);
879 yield++;
880 }
881 else
882 {
883 if (f != NULL) fprintf(f, "\\x%02x", c);
884 yield += 4;
885 }
886 }
887
888 return yield;
889 }
890
891
892
893 /*************************************************
894 * Callout function *
895 *************************************************/
896
897 /* Called from PCRE as a result of the (?C) item. We print out where we are in
898 the match. Yield zero unless more callouts than the fail count, or the callout
899 data is not zero. */
900
901 static int callout(pcre_callout_block *cb)
902 {
903 FILE *f = (first_callout | callout_extra)? outfile : NULL;
904 int i, pre_start, post_start, subject_length;
905
906 if (callout_extra)
907 {
908 fprintf(f, "Callout %d: last capture = %d\n",
909 cb->callout_number, cb->capture_last);
910
911 for (i = 0; i < cb->capture_top * 2; i += 2)
912 {
913 if (cb->offset_vector[i] < 0)
914 fprintf(f, "%2d: <unset>\n", i/2);
915 else
916 {
917 fprintf(f, "%2d: ", i/2);
918 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
919 cb->offset_vector[i+1] - cb->offset_vector[i], f);
920 fprintf(f, "\n");
921 }
922 }
923 }
924
925 /* Re-print the subject in canonical form, the first time or if giving full
926 datails. On subsequent calls in the same match, we use pchars just to find the
927 printed lengths of the substrings. */
928
929 if (f != NULL) fprintf(f, "--->");
930
931 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
932 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
933 cb->current_position - cb->start_match, f);
934
935 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
936
937 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
938 cb->subject_length - cb->current_position, f);
939
940 if (f != NULL) fprintf(f, "\n");
941
942 /* Always print appropriate indicators, with callout number if not already
943 shown. For automatic callouts, show the pattern offset. */
944
945 if (cb->callout_number == 255)
946 {
947 fprintf(outfile, "%+3d ", cb->pattern_position);
948 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
949 }
950 else
951 {
952 if (callout_extra) fprintf(outfile, " ");
953 else fprintf(outfile, "%3d ", cb->callout_number);
954 }
955
956 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
957 fprintf(outfile, "^");
958
959 if (post_start > 0)
960 {
961 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
962 fprintf(outfile, "^");
963 }
964
965 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
966 fprintf(outfile, " ");
967
968 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
969 pbuffer + cb->pattern_position);
970
971 fprintf(outfile, "\n");
972 first_callout = 0;
973
974 if (cb->mark != last_callout_mark)
975 {
976 fprintf(outfile, "Latest Mark: %s\n",
977 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
978 last_callout_mark = cb->mark;
979 }
980
981 if (cb->callout_data != NULL)
982 {
983 int callout_data = *((int *)(cb->callout_data));
984 if (callout_data != 0)
985 {
986 fprintf(outfile, "Callout data = %d\n", callout_data);
987 return callout_data;
988 }
989 }
990
991 return (cb->callout_number != callout_fail_id)? 0 :
992 (++callout_count >= callout_fail_count)? 1 : 0;
993 }
994
995
996 /*************************************************
997 * Local malloc functions *
998 *************************************************/
999
1000 /* Alternative malloc function, to test functionality and save the size of a
1001 compiled re. The show_malloc variable is set only during matching. */
1002
1003 static void *new_malloc(size_t size)
1004 {
1005 void *block = malloc(size);
1006 gotten_store = size;
1007 if (show_malloc)
1008 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1009 return block;
1010 }
1011
1012 static void new_free(void *block)
1013 {
1014 if (show_malloc)
1015 fprintf(outfile, "free %p\n", block);
1016 free(block);
1017 }
1018
1019 /* For recursion malloc/free, to test stacking calls */
1020
1021 static void *stack_malloc(size_t size)
1022 {
1023 void *block = malloc(size);
1024 if (show_malloc)
1025 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1026 return block;
1027 }
1028
1029 static void stack_free(void *block)
1030 {
1031 if (show_malloc)
1032 fprintf(outfile, "stack_free %p\n", block);
1033 free(block);
1034 }
1035
1036
1037 /*************************************************
1038 * Call pcre_fullinfo() *
1039 *************************************************/
1040
1041 /* Get one piece of information from the pcre_fullinfo() function */
1042
1043 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1044 {
1045 int rc;
1046 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1047 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1048 }
1049
1050
1051
1052 /*************************************************
1053 * Byte flipping function *
1054 *************************************************/
1055
1056 static unsigned long int
1057 byteflip(unsigned long int value, int n)
1058 {
1059 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1060 return ((value & 0x000000ff) << 24) |
1061 ((value & 0x0000ff00) << 8) |
1062 ((value & 0x00ff0000) >> 8) |
1063 ((value & 0xff000000) >> 24);
1064 }
1065
1066
1067
1068
1069 /*************************************************
1070 * Check match or recursion limit *
1071 *************************************************/
1072
1073 static int
1074 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1075 int start_offset, int options, int *use_offsets, int use_size_offsets,
1076 int flag, unsigned long int *limit, int errnumber, const char *msg)
1077 {
1078 int count;
1079 int min = 0;
1080 int mid = 64;
1081 int max = -1;
1082
1083 extra->flags |= flag;
1084
1085 for (;;)
1086 {
1087 *limit = mid;
1088
1089 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1090 use_offsets, use_size_offsets);
1091
1092 if (count == errnumber)
1093 {
1094 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1095 min = mid;
1096 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1097 }
1098
1099 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1100 count == PCRE_ERROR_PARTIAL)
1101 {
1102 if (mid == min + 1)
1103 {
1104 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1105 break;
1106 }
1107 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1108 max = mid;
1109 mid = (min + mid)/2;
1110 }
1111 else break; /* Some other error */
1112 }
1113
1114 extra->flags &= ~flag;
1115 return count;
1116 }
1117
1118
1119
1120 /*************************************************
1121 * Case-independent strncmp() function *
1122 *************************************************/
1123
1124 /*
1125 Arguments:
1126 s first string
1127 t second string
1128 n number of characters to compare
1129
1130 Returns: < 0, = 0, or > 0, according to the comparison
1131 */
1132
1133 static int
1134 strncmpic(uschar *s, uschar *t, int n)
1135 {
1136 while (n--)
1137 {
1138 int c = tolower(*s++) - tolower(*t++);
1139 if (c) return c;
1140 }
1141 return 0;
1142 }
1143
1144
1145
1146 /*************************************************
1147 * Check newline indicator *
1148 *************************************************/
1149
1150 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1151 a message and return 0 if there is no match.
1152
1153 Arguments:
1154 p points after the leading '<'
1155 f file for error message
1156
1157 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1158 */
1159
1160 static int
1161 check_newline(uschar *p, FILE *f)
1162 {
1163 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1164 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1165 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1166 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1167 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1168 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1169 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1170 fprintf(f, "Unknown newline type at: <%s\n", p);
1171 return 0;
1172 }
1173
1174
1175
1176 /*************************************************
1177 * Usage function *
1178 *************************************************/
1179
1180 static void
1181 usage(void)
1182 {
1183 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1184 printf("Input and output default to stdin and stdout.\n");
1185 #ifdef SUPPORT_LIBREADLINE
1186 printf("If input is a terminal, readline() is used to read from it.\n");
1187 #else
1188 printf("This version of pcretest is not linked with readline().\n");
1189 #endif
1190 printf("\nOptions:\n");
1191 printf(" -b show compiled code (bytecode)\n");
1192 printf(" -C show PCRE compile-time options and exit\n");
1193 printf(" -d debug: show compiled code and information (-b and -i)\n");
1194 #if !defined NODFA
1195 printf(" -dfa force DFA matching for all subjects\n");
1196 #endif
1197 printf(" -help show usage information\n");
1198 printf(" -i show information about compiled patterns\n"
1199 " -M find MATCH_LIMIT minimum for each subject\n"
1200 " -m output memory used information\n"
1201 " -o <n> set size of offsets vector to <n>\n");
1202 #if !defined NOPOSIX
1203 printf(" -p use POSIX interface\n");
1204 #endif
1205 printf(" -q quiet: do not output PCRE version number at start\n");
1206 printf(" -S <n> set stack size to <n> megabytes\n");
1207 printf(" -s force each pattern to be studied at basic level\n"
1208 " -s+ force each pattern to be studied, using JIT if available\n"
1209 " -t time compilation and execution\n");
1210 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1211 printf(" -tm time execution (matching) only\n");
1212 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1213 }
1214
1215
1216
1217 /*************************************************
1218 * Main Program *
1219 *************************************************/
1220
1221 /* Read lines from named file or stdin and write to named file or stdout; lines
1222 consist of a regular expression, in delimiters and optionally followed by
1223 options, followed by a set of test data, terminated by an empty line. */
1224
1225 int main(int argc, char **argv)
1226 {
1227 FILE *infile = stdin;
1228 int options = 0;
1229 int study_options = 0;
1230 int default_find_match_limit = FALSE;
1231 int op = 1;
1232 int timeit = 0;
1233 int timeitm = 0;
1234 int showinfo = 0;
1235 int showstore = 0;
1236 int force_study = -1;
1237 int force_study_options = 0;
1238 int quiet = 0;
1239 int size_offsets = 45;
1240 int size_offsets_max;
1241 int *offsets = NULL;
1242 #if !defined NOPOSIX
1243 int posix = 0;
1244 #endif
1245 int debug = 0;
1246 int done = 0;
1247 int all_use_dfa = 0;
1248 int yield = 0;
1249 int stack_size;
1250
1251 pcre_jit_stack *jit_stack = NULL;
1252
1253
1254 /* These vectors store, end-to-end, a list of captured substring names. Assume
1255 that 1024 is plenty long enough for the few names we'll be testing. */
1256
1257 uschar copynames[1024];
1258 uschar getnames[1024];
1259
1260 uschar *copynamesptr;
1261 uschar *getnamesptr;
1262
1263 /* Get buffers from malloc() so that Electric Fence will check their misuse
1264 when I am debugging. They grow automatically when very long lines are read. */
1265
1266 buffer = (unsigned char *)malloc(buffer_size);
1267 dbuffer = (unsigned char *)malloc(buffer_size);
1268 pbuffer = (unsigned char *)malloc(buffer_size);
1269
1270 /* The outfile variable is static so that new_malloc can use it. */
1271
1272 outfile = stdout;
1273
1274 /* The following _setmode() stuff is some Windows magic that tells its runtime
1275 library to translate CRLF into a single LF character. At least, that's what
1276 I've been told: never having used Windows I take this all on trust. Originally
1277 it set 0x8000, but then I was advised that _O_BINARY was better. */
1278
1279 #if defined(_WIN32) || defined(WIN32)
1280 _setmode( _fileno( stdout ), _O_BINARY );
1281 #endif
1282
1283 /* Scan options */
1284
1285 while (argc > 1 && argv[op][0] == '-')
1286 {
1287 unsigned char *endptr;
1288
1289 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1290 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1291 else if (strcmp(argv[op], "-s+") == 0)
1292 {
1293 force_study = 1;
1294 force_study_options = PCRE_STUDY_JIT_COMPILE;
1295 }
1296 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1297 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1298 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1299 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1300 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1301 #if !defined NODFA
1302 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1303 #endif
1304 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1305 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1306 *endptr == 0))
1307 {
1308 op++;
1309 argc--;
1310 }
1311 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1312 {
1313 int both = argv[op][2] == 0;
1314 int temp;
1315 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1316 *endptr == 0))
1317 {
1318 timeitm = temp;
1319 op++;
1320 argc--;
1321 }
1322 else timeitm = LOOPREPEAT;
1323 if (both) timeit = timeitm;
1324 }
1325 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1326 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1327 *endptr == 0))
1328 {
1329 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1330 printf("PCRE: -S not supported on this OS\n");
1331 exit(1);
1332 #else
1333 int rc;
1334 struct rlimit rlim;
1335 getrlimit(RLIMIT_STACK, &rlim);
1336 rlim.rlim_cur = stack_size * 1024 * 1024;
1337 rc = setrlimit(RLIMIT_STACK, &rlim);
1338 if (rc != 0)
1339 {
1340 printf("PCRE: setrlimit() failed with error %d\n", rc);
1341 exit(1);
1342 }
1343 op++;
1344 argc--;
1345 #endif
1346 }
1347 #if !defined NOPOSIX
1348 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1349 #endif
1350 else if (strcmp(argv[op], "-C") == 0)
1351 {
1352 int rc;
1353 unsigned long int lrc;
1354 printf("PCRE version %s\n", pcre_version());
1355 printf("Compiled with\n");
1356 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1357 printf(" %sUTF-8 support\n", rc? "" : "No ");
1358 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1359 printf(" %sUnicode properties support\n", rc? "" : "No ");
1360 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1361 printf(" %sJust-in-time compiler support\n", rc? "" : "No ");
1362 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1363 /* Note that these values are always the ASCII values, even
1364 in EBCDIC environments. CR is 13 and NL is 10. */
1365 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1366 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1367 (rc == -2)? "ANYCRLF" :
1368 (rc == -1)? "ANY" : "???");
1369 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1370 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1371 "all Unicode newlines");
1372 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1373 printf(" Internal link size = %d\n", rc);
1374 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1375 printf(" POSIX malloc threshold = %d\n", rc);
1376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1377 printf(" Default match limit = %ld\n", lrc);
1378 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1379 printf(" Default recursion depth limit = %ld\n", lrc);
1380 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1381 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1382 goto EXIT;
1383 }
1384 else if (strcmp(argv[op], "-help") == 0 ||
1385 strcmp(argv[op], "--help") == 0)
1386 {
1387 usage();
1388 goto EXIT;
1389 }
1390 else
1391 {
1392 printf("** Unknown or malformed option %s\n", argv[op]);
1393 usage();
1394 yield = 1;
1395 goto EXIT;
1396 }
1397 op++;
1398 argc--;
1399 }
1400
1401 /* Get the store for the offsets vector, and remember what it was */
1402
1403 size_offsets_max = size_offsets;
1404 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1405 if (offsets == NULL)
1406 {
1407 printf("** Failed to get %d bytes of memory for offsets vector\n",
1408 (int)(size_offsets_max * sizeof(int)));
1409 yield = 1;
1410 goto EXIT;
1411 }
1412
1413 /* Sort out the input and output files */
1414
1415 if (argc > 1)
1416 {
1417 infile = fopen(argv[op], INPUT_MODE);
1418 if (infile == NULL)
1419 {
1420 printf("** Failed to open %s\n", argv[op]);
1421 yield = 1;
1422 goto EXIT;
1423 }
1424 }
1425
1426 if (argc > 2)
1427 {
1428 outfile = fopen(argv[op+1], OUTPUT_MODE);
1429 if (outfile == NULL)
1430 {
1431 printf("** Failed to open %s\n", argv[op+1]);
1432 yield = 1;
1433 goto EXIT;
1434 }
1435 }
1436
1437 /* Set alternative malloc function */
1438
1439 pcre_malloc = new_malloc;
1440 pcre_free = new_free;
1441 pcre_stack_malloc = stack_malloc;
1442 pcre_stack_free = stack_free;
1443
1444 /* Heading line unless quiet, then prompt for first regex if stdin */
1445
1446 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1447
1448 /* Main loop */
1449
1450 while (!done)
1451 {
1452 pcre *re = NULL;
1453 pcre_extra *extra = NULL;
1454
1455 #if !defined NOPOSIX /* There are still compilers that require no indent */
1456 regex_t preg;
1457 int do_posix = 0;
1458 #endif
1459
1460 const char *error;
1461 unsigned char *markptr;
1462 unsigned char *p, *pp, *ppp;
1463 unsigned char *to_file = NULL;
1464 const unsigned char *tables = NULL;
1465 unsigned long int true_size, true_study_size = 0;
1466 size_t size, regex_gotten_store;
1467 int do_allcaps = 0;
1468 int do_mark = 0;
1469 int do_study = 0;
1470 int no_force_study = 0;
1471 int do_debug = debug;
1472 int do_G = 0;
1473 int do_g = 0;
1474 int do_showinfo = showinfo;
1475 int do_showrest = 0;
1476 int do_showcaprest = 0;
1477 int do_flip = 0;
1478 int erroroffset, len, delimiter, poffset;
1479
1480 use_utf8 = 0;
1481 debug_lengths = 1;
1482
1483 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1484 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1485 fflush(outfile);
1486
1487 p = buffer;
1488 while (isspace(*p)) p++;
1489 if (*p == 0) continue;
1490
1491 /* See if the pattern is to be loaded pre-compiled from a file. */
1492
1493 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1494 {
1495 unsigned long int magic, get_options;
1496 uschar sbuf[8];
1497 FILE *f;
1498
1499 p++;
1500 pp = p + (int)strlen((char *)p);
1501 while (isspace(pp[-1])) pp--;
1502 *pp = 0;
1503
1504 f = fopen((char *)p, "rb");
1505 if (f == NULL)
1506 {
1507 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1508 continue;
1509 }
1510
1511 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1512
1513 true_size =
1514 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1515 true_study_size =
1516 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1517
1518 re = (real_pcre *)new_malloc(true_size);
1519 regex_gotten_store = gotten_store;
1520
1521 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1522
1523 magic = ((real_pcre *)re)->magic_number;
1524 if (magic != MAGIC_NUMBER)
1525 {
1526 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1527 {
1528 do_flip = 1;
1529 }
1530 else
1531 {
1532 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1533 fclose(f);
1534 continue;
1535 }
1536 }
1537
1538 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1539 do_flip? " (byte-inverted)" : "", p);
1540
1541 /* Need to know if UTF-8 for printing data strings */
1542
1543 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1544 use_utf8 = (get_options & PCRE_UTF8) != 0;
1545
1546 /* Now see if there is any following study data. */
1547
1548 if (true_study_size != 0)
1549 {
1550 pcre_study_data *psd;
1551
1552 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1553 extra->flags = PCRE_EXTRA_STUDY_DATA;
1554
1555 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1556 extra->study_data = psd;
1557
1558 if (fread(psd, 1, true_study_size, f) != true_study_size)
1559 {
1560 FAIL_READ:
1561 fprintf(outfile, "Failed to read data from %s\n", p);
1562 if (extra != NULL) pcre_free_study(extra);
1563 if (re != NULL) new_free(re);
1564 fclose(f);
1565 continue;
1566 }
1567 fprintf(outfile, "Study data loaded from %s\n", p);
1568 do_study = 1; /* To get the data output if requested */
1569 }
1570 else fprintf(outfile, "No study data\n");
1571
1572 fclose(f);
1573 goto SHOW_INFO;
1574 }
1575
1576 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1577 the pattern; if is isn't complete, read more. */
1578
1579 delimiter = *p++;
1580
1581 if (isalnum(delimiter) || delimiter == '\\')
1582 {
1583 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1584 goto SKIP_DATA;
1585 }
1586
1587 pp = p;
1588 poffset = (int)(p - buffer);
1589
1590 for(;;)
1591 {
1592 while (*pp != 0)
1593 {
1594 if (*pp == '\\' && pp[1] != 0) pp++;
1595 else if (*pp == delimiter) break;
1596 pp++;
1597 }
1598 if (*pp != 0) break;
1599 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1600 {
1601 fprintf(outfile, "** Unexpected EOF\n");
1602 done = 1;
1603 goto CONTINUE;
1604 }
1605 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1606 }
1607
1608 /* The buffer may have moved while being extended; reset the start of data
1609 pointer to the correct relative point in the buffer. */
1610
1611 p = buffer + poffset;
1612
1613 /* If the first character after the delimiter is backslash, make
1614 the pattern end with backslash. This is purely to provide a way
1615 of testing for the error message when a pattern ends with backslash. */
1616
1617 if (pp[1] == '\\') *pp++ = '\\';
1618
1619 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1620 for callouts. */
1621
1622 *pp++ = 0;
1623 strcpy((char *)pbuffer, (char *)p);
1624
1625 /* Look for options after final delimiter */
1626
1627 options = 0;
1628 log_store = showstore; /* default from command line */
1629
1630 while (*pp != 0)
1631 {
1632 switch (*pp++)
1633 {
1634 case 'f': options |= PCRE_FIRSTLINE; break;
1635 case 'g': do_g = 1; break;
1636 case 'i': options |= PCRE_CASELESS; break;
1637 case 'm': options |= PCRE_MULTILINE; break;
1638 case 's': options |= PCRE_DOTALL; break;
1639 case 'x': options |= PCRE_EXTENDED; break;
1640
1641 case '+':
1642 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1643 break;
1644
1645 case '=': do_allcaps = 1; break;
1646 case 'A': options |= PCRE_ANCHORED; break;
1647 case 'B': do_debug = 1; break;
1648 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1649 case 'D': do_debug = do_showinfo = 1; break;
1650 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1651 case 'F': do_flip = 1; break;
1652 case 'G': do_G = 1; break;
1653 case 'I': do_showinfo = 1; break;
1654 case 'J': options |= PCRE_DUPNAMES; break;
1655 case 'K': do_mark = 1; break;
1656 case 'M': log_store = 1; break;
1657 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1658
1659 #if !defined NOPOSIX
1660 case 'P': do_posix = 1; break;
1661 #endif
1662
1663 case 'S':
1664 if (do_study == 0)
1665 {
1666 do_study = 1;
1667 if (*pp == '+')
1668 {
1669 study_options |= PCRE_STUDY_JIT_COMPILE;
1670 pp++;
1671 }
1672 }
1673 else
1674 {
1675 do_study = 0;
1676 no_force_study = 1;
1677 }
1678 break;
1679
1680 case 'U': options |= PCRE_UNGREEDY; break;
1681 case 'W': options |= PCRE_UCP; break;
1682 case 'X': options |= PCRE_EXTRA; break;
1683 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1684 case 'Z': debug_lengths = 0; break;
1685 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1686 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1687
1688 case 'T':
1689 switch (*pp++)
1690 {
1691 case '0': tables = tables0; break;
1692 case '1': tables = tables1; break;
1693
1694 case '\r':
1695 case '\n':
1696 case ' ':
1697 case 0:
1698 fprintf(outfile, "** Missing table number after /T\n");
1699 goto SKIP_DATA;
1700
1701 default:
1702 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1703 goto SKIP_DATA;
1704 }
1705 break;
1706
1707 case 'L':
1708 ppp = pp;
1709 /* The '\r' test here is so that it works on Windows. */
1710 /* The '0' test is just in case this is an unterminated line. */
1711 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1712 *ppp = 0;
1713 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1714 {
1715 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1716 goto SKIP_DATA;
1717 }
1718 locale_set = 1;
1719 tables = pcre_maketables();
1720 pp = ppp;
1721 break;
1722
1723 case '>':
1724 to_file = pp;
1725 while (*pp != 0) pp++;
1726 while (isspace(pp[-1])) pp--;
1727 *pp = 0;
1728 break;
1729
1730 case '<':
1731 {
1732 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1733 {
1734 options |= PCRE_JAVASCRIPT_COMPAT;
1735 pp += 3;
1736 }
1737 else
1738 {
1739 int x = check_newline(pp, outfile);
1740 if (x == 0) goto SKIP_DATA;
1741 options |= x;
1742 while (*pp++ != '>');
1743 }
1744 }
1745 break;
1746
1747 case '\r': /* So that it works in Windows */
1748 case '\n':
1749 case ' ':
1750 break;
1751
1752 default:
1753 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1754 goto SKIP_DATA;
1755 }
1756 }
1757
1758 /* Handle compiling via the POSIX interface, which doesn't support the
1759 timing, showing, or debugging options, nor the ability to pass over
1760 local character tables. */
1761
1762 #if !defined NOPOSIX
1763 if (posix || do_posix)
1764 {
1765 int rc;
1766 int cflags = 0;
1767
1768 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1769 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1770 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1771 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1772 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1773 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1774 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1775
1776 rc = regcomp(&preg, (char *)p, cflags);
1777
1778 /* Compilation failed; go back for another re, skipping to blank line
1779 if non-interactive. */
1780
1781 if (rc != 0)
1782 {
1783 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1784 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1785 goto SKIP_DATA;
1786 }
1787 }
1788
1789 /* Handle compiling via the native interface */
1790
1791 else
1792 #endif /* !defined NOPOSIX */
1793
1794 {
1795 unsigned long int get_options;
1796
1797 if (timeit > 0)
1798 {
1799 register int i;
1800 clock_t time_taken;
1801 clock_t start_time = clock();
1802 for (i = 0; i < timeit; i++)
1803 {
1804 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1805 if (re != NULL) free(re);
1806 }
1807 time_taken = clock() - start_time;
1808 fprintf(outfile, "Compile time %.4f milliseconds\n",
1809 (((double)time_taken * 1000.0) / (double)timeit) /
1810 (double)CLOCKS_PER_SEC);
1811 }
1812
1813 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1814
1815 /* Compilation failed; go back for another re, skipping to blank line
1816 if non-interactive. */
1817
1818 if (re == NULL)
1819 {
1820 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1821 SKIP_DATA:
1822 if (infile != stdin)
1823 {
1824 for (;;)
1825 {
1826 if (extend_inputline(infile, buffer, NULL) == NULL)
1827 {
1828 done = 1;
1829 goto CONTINUE;
1830 }
1831 len = (int)strlen((char *)buffer);
1832 while (len > 0 && isspace(buffer[len-1])) len--;
1833 if (len == 0) break;
1834 }
1835 fprintf(outfile, "\n");
1836 }
1837 goto CONTINUE;
1838 }
1839
1840 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1841 within the regex; check for this so that we know how to process the data
1842 lines. */
1843
1844 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1845 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1846
1847 /* Print information if required. There are now two info-returning
1848 functions. The old one has a limited interface and returns only limited
1849 data. Check that it agrees with the newer one. */
1850
1851 if (log_store)
1852 fprintf(outfile, "Memory allocation (code space): %d\n",
1853 (int)(gotten_store -
1854 sizeof(real_pcre) -
1855 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1856
1857 /* Extract the size for possible writing before possibly flipping it,
1858 and remember the store that was got. */
1859
1860 true_size = ((real_pcre *)re)->size;
1861 regex_gotten_store = gotten_store;
1862
1863 /* If -s or /S was present, study the regex to generate additional info to
1864 help with the matching, unless the pattern has the SS option, which
1865 suppresses the effect of /S (used for a few test patterns where studying is
1866 never sensible). */
1867
1868 if (do_study || (force_study >= 0 && !no_force_study))
1869 {
1870 if (timeit > 0)
1871 {
1872 register int i;
1873 clock_t time_taken;
1874 clock_t start_time = clock();
1875 for (i = 0; i < timeit; i++)
1876 extra = pcre_study(re, study_options | force_study_options, &error);
1877 time_taken = clock() - start_time;
1878 if (extra != NULL) pcre_free_study(extra);
1879 fprintf(outfile, " Study time %.4f milliseconds\n",
1880 (((double)time_taken * 1000.0) / (double)timeit) /
1881 (double)CLOCKS_PER_SEC);
1882 }
1883 extra = pcre_study(re, study_options | force_study_options, &error);
1884 if (error != NULL)
1885 fprintf(outfile, "Failed to study: %s\n", error);
1886 else if (extra != NULL)
1887 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1888 }
1889
1890 /* If /K was present, we set up for handling MARK data. */
1891
1892 if (do_mark)
1893 {
1894 if (extra == NULL)
1895 {
1896 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1897 extra->flags = 0;
1898 }
1899 extra->mark = &markptr;
1900 extra->flags |= PCRE_EXTRA_MARK;
1901 }
1902
1903 /* If the 'F' option was present, we flip the bytes of all the integer
1904 fields in the regex data block and the study block. This is to make it
1905 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1906 compiled on a different architecture. */
1907
1908 if (do_flip)
1909 {
1910 real_pcre *rre = (real_pcre *)re;
1911 rre->magic_number =
1912 byteflip(rre->magic_number, sizeof(rre->magic_number));
1913 rre->size = byteflip(rre->size, sizeof(rre->size));
1914 rre->options = byteflip(rre->options, sizeof(rre->options));
1915 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1916 rre->top_bracket =
1917 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1918 rre->top_backref =
1919 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1920 rre->first_byte =
1921 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1922 rre->req_byte =
1923 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1924 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1925 sizeof(rre->name_table_offset));
1926 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1927 sizeof(rre->name_entry_size));
1928 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1929 sizeof(rre->name_count));
1930
1931 if (extra != NULL)
1932 {
1933 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1934 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1935 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1936 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1937 }
1938 }
1939
1940 /* Extract information from the compiled data if required */
1941
1942 SHOW_INFO:
1943
1944 if (do_debug)
1945 {
1946 fprintf(outfile, "------------------------------------------------------------------\n");
1947 pcre_printint(re, outfile, debug_lengths);
1948 }
1949
1950 /* We already have the options in get_options (see above) */
1951
1952 if (do_showinfo)
1953 {
1954 unsigned long int all_options;
1955 #if !defined NOINFOCHECK
1956 int old_first_char, old_options, old_count;
1957 #endif
1958 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1959 hascrorlf;
1960 int nameentrysize, namecount;
1961 const uschar *nametable;
1962
1963 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1964 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1965 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1966 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1967 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1968 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1969 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1970 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1971 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1972 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1973 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1974
1975 #if !defined NOINFOCHECK
1976 old_count = pcre_info(re, &old_options, &old_first_char);
1977 if (count < 0) fprintf(outfile,
1978 "Error %d from pcre_info()\n", count);
1979 else
1980 {
1981 if (old_count != count) fprintf(outfile,
1982 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1983 old_count);
1984
1985 if (old_first_char != first_char) fprintf(outfile,
1986 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1987 first_char, old_first_char);
1988
1989 if (old_options != (int)get_options) fprintf(outfile,
1990 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1991 get_options, old_options);
1992 }
1993 #endif
1994
1995 if (size != regex_gotten_store) fprintf(outfile,
1996 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1997 (int)size, (int)regex_gotten_store);
1998
1999 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2000 if (backrefmax > 0)
2001 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2002
2003 if (namecount > 0)
2004 {
2005 fprintf(outfile, "Named capturing subpatterns:\n");
2006 while (namecount-- > 0)
2007 {
2008 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2009 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2010 GET2(nametable, 0));
2011 nametable += nameentrysize;
2012 }
2013 }
2014
2015 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2016 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2017
2018 all_options = ((real_pcre *)re)->options;
2019 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2020
2021 if (get_options == 0) fprintf(outfile, "No options\n");
2022 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2023 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2024 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2025 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2026 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2027 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2028 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2029 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2030 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2031 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2032 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2033 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2034 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2035 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2036 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2037 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2038 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2039 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2040
2041 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2042
2043 switch (get_options & PCRE_NEWLINE_BITS)
2044 {
2045 case PCRE_NEWLINE_CR:
2046 fprintf(outfile, "Forced newline sequence: CR\n");
2047 break;
2048
2049 case PCRE_NEWLINE_LF:
2050 fprintf(outfile, "Forced newline sequence: LF\n");
2051 break;
2052
2053 case PCRE_NEWLINE_CRLF:
2054 fprintf(outfile, "Forced newline sequence: CRLF\n");
2055 break;
2056
2057 case PCRE_NEWLINE_ANYCRLF:
2058 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2059 break;
2060
2061 case PCRE_NEWLINE_ANY:
2062 fprintf(outfile, "Forced newline sequence: ANY\n");
2063 break;
2064
2065 default:
2066 break;
2067 }
2068
2069 if (first_char == -1)
2070 {
2071 fprintf(outfile, "First char at start or follows newline\n");
2072 }
2073 else if (first_char < 0)
2074 {
2075 fprintf(outfile, "No first char\n");
2076 }
2077 else
2078 {
2079 int ch = first_char & 255;
2080 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2081 "" : " (caseless)";
2082 if (PRINTHEX(ch))
2083 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2084 else
2085 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2086 }
2087
2088 if (need_char < 0)
2089 {
2090 fprintf(outfile, "No need char\n");
2091 }
2092 else
2093 {
2094 int ch = need_char & 255;
2095 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2096 "" : " (caseless)";
2097 if (PRINTHEX(ch))
2098 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2099 else
2100 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2101 }
2102
2103 /* Don't output study size; at present it is in any case a fixed
2104 value, but it varies, depending on the computer architecture, and
2105 so messes up the test suite. (And with the /F option, it might be
2106 flipped.) If study was forced by an external -s, don't show this
2107 information unless -i or -d was also present. This means that, except
2108 when auto-callouts are involved, the output from runs with and without
2109 -s should be identical. */
2110
2111 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2112 {
2113 if (extra == NULL)
2114 fprintf(outfile, "Study returned NULL\n");
2115 else
2116 {
2117 uschar *start_bits = NULL;
2118 int minlength;
2119
2120 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2121 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2122
2123 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2124 if (start_bits == NULL)
2125 fprintf(outfile, "No set of starting bytes\n");
2126 else
2127 {
2128 int i;
2129 int c = 24;
2130 fprintf(outfile, "Starting byte set: ");
2131 for (i = 0; i < 256; i++)
2132 {
2133 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2134 {
2135 if (c > 75)
2136 {
2137 fprintf(outfile, "\n ");
2138 c = 2;
2139 }
2140 if (PRINTHEX(i) && i != ' ')
2141 {
2142 fprintf(outfile, "%c ", i);
2143 c += 2;
2144 }
2145 else
2146 {
2147 fprintf(outfile, "\\x%02x ", i);
2148 c += 5;
2149 }
2150 }
2151 }
2152 fprintf(outfile, "\n");
2153 }
2154 }
2155
2156 /* Show this only if the JIT was set by /S, not by -s. */
2157
2158 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2159 {
2160 int jit;
2161 new_info(re, extra, PCRE_INFO_JIT, &jit);
2162 if (jit)
2163 fprintf(outfile, "JIT study was successful\n");
2164 else
2165 #ifdef SUPPORT_JIT
2166 fprintf(outfile, "JIT study was not successful\n");
2167 #else
2168 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2169 #endif
2170 }
2171 }
2172 }
2173
2174 /* If the '>' option was present, we write out the regex to a file, and
2175 that is all. The first 8 bytes of the file are the regex length and then
2176 the study length, in big-endian order. */
2177
2178 if (to_file != NULL)
2179 {
2180 FILE *f = fopen((char *)to_file, "wb");
2181 if (f == NULL)
2182 {
2183 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2184 }
2185 else
2186 {
2187 uschar sbuf[8];
2188 sbuf[0] = (uschar)((true_size >> 24) & 255);
2189 sbuf[1] = (uschar)((true_size >> 16) & 255);
2190 sbuf[2] = (uschar)((true_size >> 8) & 255);
2191 sbuf[3] = (uschar)((true_size) & 255);
2192
2193 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2194 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2195 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2196 sbuf[7] = (uschar)((true_study_size) & 255);
2197
2198 if (fwrite(sbuf, 1, 8, f) < 8 ||
2199 fwrite(re, 1, true_size, f) < true_size)
2200 {
2201 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2202 }
2203 else
2204 {
2205 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2206
2207 /* If there is study data, write it. */
2208
2209 if (extra != NULL)
2210 {
2211 if (fwrite(extra->study_data, 1, true_study_size, f) <
2212 true_study_size)
2213 {
2214 fprintf(outfile, "Write error on %s: %s\n", to_file,
2215 strerror(errno));
2216 }
2217 else fprintf(outfile, "Study data written to %s\n", to_file);
2218 }
2219 }
2220 fclose(f);
2221 }
2222
2223 new_free(re);
2224 if (extra != NULL) pcre_free_study(extra);
2225 if (locale_set)
2226 {
2227 new_free((void *)tables);
2228 setlocale(LC_CTYPE, "C");
2229 locale_set = 0;
2230 }
2231 continue; /* With next regex */
2232 }
2233 } /* End of non-POSIX compile */
2234
2235 /* Read data lines and test them */
2236
2237 for (;;)
2238 {
2239 uschar *q;
2240 uschar *bptr;
2241 int *use_offsets = offsets;
2242 int use_size_offsets = size_offsets;
2243 int callout_data = 0;
2244 int callout_data_set = 0;
2245 int count, c;
2246 int copystrings = 0;
2247 int find_match_limit = default_find_match_limit;
2248 int getstrings = 0;
2249 int getlist = 0;
2250 int gmatched = 0;
2251 int start_offset = 0;
2252 int start_offset_sign = 1;
2253 int g_notempty = 0;
2254 int use_dfa = 0;
2255
2256 options = 0;
2257
2258 *copynames = 0;
2259 *getnames = 0;
2260
2261 copynamesptr = copynames;
2262 getnamesptr = getnames;
2263
2264 pcre_callout = callout;
2265 first_callout = 1;
2266 last_callout_mark = NULL;
2267 callout_extra = 0;
2268 callout_count = 0;
2269 callout_fail_count = 999999;
2270 callout_fail_id = -1;
2271 show_malloc = 0;
2272
2273 if (extra != NULL) extra->flags &=
2274 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2275
2276 len = 0;
2277 for (;;)
2278 {
2279 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2280 {
2281 if (len > 0) /* Reached EOF without hitting a newline */
2282 {
2283 fprintf(outfile, "\n");
2284 break;
2285 }
2286 done = 1;
2287 goto CONTINUE;
2288 }
2289 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2290 len = (int)strlen((char *)buffer);
2291 if (buffer[len-1] == '\n') break;
2292 }
2293
2294 while (len > 0 && isspace(buffer[len-1])) len--;
2295 buffer[len] = 0;
2296 if (len == 0) break;
2297
2298 p = buffer;
2299 while (isspace(*p)) p++;
2300
2301 bptr = q = dbuffer;
2302 while ((c = *p++) != 0)
2303 {
2304 int i = 0;
2305 int n = 0;
2306
2307 if (c == '\\') switch ((c = *p++))
2308 {
2309 case 'a': c = 7; break;
2310 case 'b': c = '\b'; break;
2311 case 'e': c = 27; break;
2312 case 'f': c = '\f'; break;
2313 case 'n': c = '\n'; break;
2314 case 'r': c = '\r'; break;
2315 case 't': c = '\t'; break;
2316 case 'v': c = '\v'; break;
2317
2318 case '0': case '1': case '2': case '3':
2319 case '4': case '5': case '6': case '7':
2320 c -= '0';
2321 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2322 c = c * 8 + *p++ - '0';
2323
2324 #if !defined NOUTF8
2325 if (use_utf8 && c > 255)
2326 {
2327 unsigned char buff8[8];
2328 int ii, utn;
2329 utn = ord2utf8(c, buff8);
2330 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2331 c = buff8[ii]; /* Last byte */
2332 }
2333 #endif
2334 break;
2335
2336 case 'x':
2337
2338 /* Handle \x{..} specially - new Perl thing for utf8 */
2339
2340 #if !defined NOUTF8
2341 if (*p == '{')
2342 {
2343 unsigned char *pt = p;
2344 c = 0;
2345 while (isxdigit(*(++pt)))
2346 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2347 if (*pt == '}')
2348 {
2349 unsigned char buff8[8];
2350 int ii, utn;
2351 if (use_utf8)
2352 {
2353 utn = ord2utf8(c, buff8);
2354 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2355 c = buff8[ii]; /* Last byte */
2356 }
2357 else
2358 {
2359 if (c > 255)
2360 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2361 "UTF-8 mode is not enabled.\n"
2362 "** Truncation will probably give the wrong result.\n", c);
2363 }
2364 p = pt + 1;
2365 break;
2366 }
2367 /* Not correct form; fall through */
2368 }
2369 #endif
2370
2371 /* Ordinary \x */
2372
2373 c = 0;
2374 while (i++ < 2 && isxdigit(*p))
2375 {
2376 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2377 p++;
2378 }
2379 break;
2380
2381 case 0: /* \ followed by EOF allows for an empty line */
2382 p--;
2383 continue;
2384
2385 case '>':
2386 if (*p == '-')
2387 {
2388 start_offset_sign = -1;
2389 p++;
2390 }
2391 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2392 start_offset *= start_offset_sign;
2393 continue;
2394
2395 case 'A': /* Option setting */
2396 options |= PCRE_ANCHORED;
2397 continue;
2398
2399 case 'B':
2400 options |= PCRE_NOTBOL;
2401 continue;
2402
2403 case 'C':
2404 if (isdigit(*p)) /* Set copy string */
2405 {
2406 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2407 copystrings |= 1 << n;
2408 }
2409 else if (isalnum(*p))
2410 {
2411 uschar *npp = copynamesptr;
2412 while (isalnum(*p)) *npp++ = *p++;
2413 *npp++ = 0;
2414 *npp = 0;
2415 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2416 if (n < 0)
2417 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2418 copynamesptr = npp;
2419 }
2420 else if (*p == '+')
2421 {
2422 callout_extra = 1;
2423 p++;
2424 }
2425 else if (*p == '-')
2426 {
2427 pcre_callout = NULL;
2428 p++;
2429 }
2430 else if (*p == '!')
2431 {
2432 callout_fail_id = 0;
2433 p++;
2434 while(isdigit(*p))
2435 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2436 callout_fail_count = 0;
2437 if (*p == '!')
2438 {
2439 p++;
2440 while(isdigit(*p))
2441 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2442 }
2443 }
2444 else if (*p == '*')
2445 {
2446 int sign = 1;
2447 callout_data = 0;
2448 if (*(++p) == '-') { sign = -1; p++; }
2449 while(isdigit(*p))
2450 callout_data = callout_data * 10 + *p++ - '0';
2451 callout_data *= sign;
2452 callout_data_set = 1;
2453 }
2454 continue;
2455
2456 #if !defined NODFA
2457 case 'D':
2458 #if !defined NOPOSIX
2459 if (posix || do_posix)
2460 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2461 else
2462 #endif
2463 use_dfa = 1;
2464 continue;
2465 #endif
2466
2467 #if !defined NODFA
2468 case 'F':
2469 options |= PCRE_DFA_SHORTEST;
2470 continue;
2471 #endif
2472
2473 case 'G':
2474 if (isdigit(*p))
2475 {
2476 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2477 getstrings |= 1 << n;
2478 }
2479 else if (isalnum(*p))
2480 {
2481 uschar *npp = getnamesptr;
2482 while (isalnum(*p)) *npp++ = *p++;
2483 *npp++ = 0;
2484 *npp = 0;
2485 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2486 if (n < 0)
2487 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2488 getnamesptr = npp;
2489 }
2490 continue;
2491
2492 case 'J':
2493 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2494 if (extra != NULL
2495 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2496 && extra->executable_jit != NULL)
2497 {
2498 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2499 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2500 pcre_assign_jit_callback(extra, jit_callback, jit_stack);
2501 }
2502 continue;
2503
2504 case 'L':
2505 getlist = 1;
2506 continue;
2507
2508 case 'M':
2509 find_match_limit = 1;
2510 continue;
2511
2512 case 'N':
2513 if ((options & PCRE_NOTEMPTY) != 0)
2514 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2515 else
2516 options |= PCRE_NOTEMPTY;
2517 continue;
2518
2519 case 'O':
2520 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2521 if (n > size_offsets_max)
2522 {
2523 size_offsets_max = n;
2524 free(offsets);
2525 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2526 if (offsets == NULL)
2527 {
2528 printf("** Failed to get %d bytes of memory for offsets vector\n",
2529 (int)(size_offsets_max * sizeof(int)));
2530 yield = 1;
2531 goto EXIT;
2532 }
2533 }
2534 use_size_offsets = n;
2535 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2536 continue;
2537
2538 case 'P':
2539 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2540 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2541 continue;
2542
2543 case 'Q':
2544 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2545 if (extra == NULL)
2546 {
2547 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2548 extra->flags = 0;
2549 }
2550 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2551 extra->match_limit_recursion = n;
2552 continue;
2553
2554 case 'q':
2555 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2556 if (extra == NULL)
2557 {
2558 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2559 extra->flags = 0;
2560 }
2561 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2562 extra->match_limit = n;
2563 continue;
2564
2565 #if !defined NODFA
2566 case 'R':
2567 options |= PCRE_DFA_RESTART;
2568 continue;
2569 #endif
2570
2571 case 'S':
2572 show_malloc = 1;
2573 continue;
2574
2575 case 'Y':
2576 options |= PCRE_NO_START_OPTIMIZE;
2577 continue;
2578
2579 case 'Z':
2580 options |= PCRE_NOTEOL;
2581 continue;
2582
2583 case '?':
2584 options |= PCRE_NO_UTF8_CHECK;
2585 continue;
2586
2587 case '<':
2588 {
2589 int x = check_newline(p, outfile);
2590 if (x == 0) goto NEXT_DATA;
2591 options |= x;
2592 while (*p++ != '>');
2593 }
2594 continue;
2595 }
2596 *q++ = c;
2597 }
2598 *q = 0;
2599 len = (int)(q - dbuffer);
2600
2601 /* Move the data to the end of the buffer so that a read over the end of
2602 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2603 we are using the POSIX interface, we must include the terminating zero. */
2604
2605 #if !defined NOPOSIX
2606 if (posix || do_posix)
2607 {
2608 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2609 bptr += buffer_size - len - 1;
2610 }
2611 else
2612 #endif
2613 {
2614 memmove(bptr + buffer_size - len, bptr, len);
2615 bptr += buffer_size - len;
2616 }
2617
2618 if ((all_use_dfa || use_dfa) && find_match_limit)
2619 {
2620 printf("**Match limit not relevant for DFA matching: ignored\n");
2621 find_match_limit = 0;
2622 }
2623
2624 /* Handle matching via the POSIX interface, which does not
2625 support timing or playing with the match limit or callout data. */
2626
2627 #if !defined NOPOSIX
2628 if (posix || do_posix)
2629 {
2630 int rc;
2631 int eflags = 0;
2632 regmatch_t *pmatch = NULL;
2633 if (use_size_offsets > 0)
2634 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2635 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2636 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2637 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2638
2639 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2640
2641 if (rc != 0)
2642 {
2643 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2644 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2645 }
2646 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2647 != 0)
2648 {
2649 fprintf(outfile, "Matched with REG_NOSUB\n");
2650 }
2651 else
2652 {
2653 size_t i;
2654 for (i = 0; i < (size_t)use_size_offsets; i++)
2655 {
2656 if (pmatch[i].rm_so >= 0)
2657 {
2658 fprintf(outfile, "%2d: ", (int)i);
2659 (void)pchars(dbuffer + pmatch[i].rm_so,
2660 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2661 fprintf(outfile, "\n");
2662 if (do_showcaprest || (i == 0 && do_showrest))
2663 {
2664 fprintf(outfile, "%2d+ ", (int)i);
2665 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2666 outfile);
2667 fprintf(outfile, "\n");
2668 }
2669 }
2670 }
2671 }
2672 free(pmatch);
2673 }
2674
2675 /* Handle matching via the native interface - repeats for /g and /G */
2676
2677 else
2678 #endif /* !defined NOPOSIX */
2679
2680 for (;; gmatched++) /* Loop for /g or /G */
2681 {
2682 markptr = NULL;
2683
2684 if (timeitm > 0)
2685 {
2686 register int i;
2687 clock_t time_taken;
2688 clock_t start_time = clock();
2689
2690 #if !defined NODFA
2691 if (all_use_dfa || use_dfa)
2692 {
2693 int workspace[1000];
2694 for (i = 0; i < timeitm; i++)
2695 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2696 options | g_notempty, use_offsets, use_size_offsets, workspace,
2697 sizeof(workspace)/sizeof(int));
2698 }
2699 else
2700 #endif
2701
2702 for (i = 0; i < timeitm; i++)
2703 count = pcre_exec(re, extra, (char *)bptr, len,
2704 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2705
2706 time_taken = clock() - start_time;
2707 fprintf(outfile, "Execute time %.4f milliseconds\n",
2708 (((double)time_taken * 1000.0) / (double)timeitm) /
2709 (double)CLOCKS_PER_SEC);
2710 }
2711
2712 /* If find_match_limit is set, we want to do repeated matches with
2713 varying limits in order to find the minimum value for the match limit and
2714 for the recursion limit. The match limits are relevant only to the normal
2715 running of pcre_exec(), so disable the JIT optimization. This makes it
2716 possible to run the same set of tests with and without JIT externally
2717 requested. */
2718
2719 if (find_match_limit)
2720 {
2721 if (extra == NULL)
2722 {
2723 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2724 extra->flags = 0;
2725 }
2726 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2727
2728 (void)check_match_limit(re, extra, bptr, len, start_offset,
2729 options|g_notempty, use_offsets, use_size_offsets,
2730 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2731 PCRE_ERROR_MATCHLIMIT, "match()");
2732
2733 count = check_match_limit(re, extra, bptr, len, start_offset,
2734 options|g_notempty, use_offsets, use_size_offsets,
2735 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2736 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2737 }
2738
2739 /* If callout_data is set, use the interface with additional data */
2740
2741 else if (callout_data_set)
2742 {
2743 if (extra == NULL)
2744 {
2745 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2746 extra->flags = 0;
2747 }
2748 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2749 extra->callout_data = &callout_data;
2750 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2751 options | g_notempty, use_offsets, use_size_offsets);
2752 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2753 }
2754
2755 /* The normal case is just to do the match once, with the default
2756 value of match_limit. */
2757
2758 #if !defined NODFA
2759 else if (all_use_dfa || use_dfa)
2760 {
2761 int workspace[1000];
2762 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2763 options | g_notempty, use_offsets, use_size_offsets, workspace,
2764 sizeof(workspace)/sizeof(int));
2765 if (count == 0)
2766 {
2767 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2768 count = use_size_offsets/2;
2769 }
2770 }
2771 #endif
2772
2773 else
2774 {
2775 count = pcre_exec(re, extra, (char *)bptr, len,
2776 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2777 if (count == 0)
2778 {
2779 fprintf(outfile, "Matched, but too many substrings\n");
2780 count = use_size_offsets/3;
2781 }
2782 }
2783
2784 /* Matched */
2785
2786 if (count >= 0)
2787 {
2788 int i, maxcount;
2789
2790 #if !defined NODFA
2791 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2792 #endif
2793 maxcount = use_size_offsets/3;
2794
2795 /* This is a check against a lunatic return value. */
2796
2797 if (count > maxcount)
2798 {
2799 fprintf(outfile,
2800 "** PCRE error: returned count %d is too big for offset size %d\n",
2801 count, use_size_offsets);
2802 count = use_size_offsets/3;
2803 if (do_g || do_G)
2804 {
2805 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2806 do_g = do_G = FALSE; /* Break g/G loop */
2807 }
2808 }
2809
2810 /* do_allcaps requests showing of all captures in the pattern, to check
2811 unset ones at the end. */
2812
2813 if (do_allcaps)
2814 {
2815 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2816 count++; /* Allow for full match */
2817 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2818 }
2819
2820 /* Output the captured substrings */
2821
2822 for (i = 0; i < count * 2; i += 2)
2823 {
2824 if (use_offsets[i] < 0)
2825 {
2826 if (use_offsets[i] != -1)
2827 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2828 use_offsets[i], i);
2829 if (use_offsets[i+1] != -1)
2830 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2831 use_offsets[i+1], i+1);
2832 fprintf(outfile, "%2d: <unset>\n", i/2);
2833 }
2834 else
2835 {
2836 fprintf(outfile, "%2d: ", i/2);
2837 (void)pchars(bptr + use_offsets[i],
2838 use_offsets[i+1] - use_offsets[i], outfile);
2839 fprintf(outfile, "\n");
2840 if (do_showcaprest || (i == 0 && do_showrest))
2841 {
2842 fprintf(outfile, "%2d+ ", i/2);
2843 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2844 outfile);
2845 fprintf(outfile, "\n");
2846 }
2847 }
2848 }
2849
2850 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2851
2852 for (i = 0; i < 32; i++)
2853 {
2854 if ((copystrings & (1 << i)) != 0)
2855 {
2856 char copybuffer[256];
2857 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2858 i, copybuffer, sizeof(copybuffer));
2859 if (rc < 0)
2860 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2861 else
2862 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2863 }
2864 }
2865
2866 for (copynamesptr = copynames;
2867 *copynamesptr != 0;
2868 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2869 {
2870 char copybuffer[256];
2871 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2872 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2873 if (rc < 0)
2874 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2875 else
2876 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2877 }
2878
2879 for (i = 0; i < 32; i++)
2880 {
2881 if ((getstrings & (1 << i)) != 0)
2882 {
2883 const char *substring;
2884 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2885 i, &substring);
2886 if (rc < 0)
2887 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2888 else
2889 {
2890 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2891 pcre_free_substring(substring);
2892 }
2893 }
2894 }
2895
2896 for (getnamesptr = getnames;
2897 *getnamesptr != 0;
2898 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2899 {
2900 const char *substring;
2901 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2902 count, (char *)getnamesptr, &substring);
2903 if (rc < 0)
2904 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2905 else
2906 {
2907 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2908 pcre_free_substring(substring);
2909 }
2910 }
2911
2912 if (getlist)
2913 {
2914 const char **stringlist;
2915 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2916 &stringlist);
2917 if (rc < 0)
2918 fprintf(outfile, "get substring list failed %d\n", rc);
2919 else
2920 {
2921 for (i = 0; i < count; i++)
2922 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2923 if (stringlist[i] != NULL)
2924 fprintf(outfile, "string list not terminated by NULL\n");
2925 pcre_free_substring_list(stringlist);
2926 }
2927 }
2928 }
2929
2930 /* There was a partial match */
2931
2932 else if (count == PCRE_ERROR_PARTIAL)
2933 {
2934 if (markptr == NULL) fprintf(outfile, "Partial match");
2935 else fprintf(outfile, "Partial match, mark=%s", markptr);
2936 if (use_size_offsets > 1)
2937 {
2938 fprintf(outfile, ": ");
2939 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2940 outfile);
2941 }
2942 fprintf(outfile, "\n");
2943 break; /* Out of the /g loop */
2944 }
2945
2946 /* Failed to match. If this is a /g or /G loop and we previously set
2947 g_notempty after a null match, this is not necessarily the end. We want
2948 to advance the start offset, and continue. We won't be at the end of the
2949 string - that was checked before setting g_notempty.
2950
2951 Complication arises in the case when the newline convention is "any",
2952 "crlf", or "anycrlf". If the previous match was at the end of a line
2953 terminated by CRLF, an advance of one character just passes the \r,
2954 whereas we should prefer the longer newline sequence, as does the code in
2955 pcre_exec(). Fudge the offset value to achieve this. We check for a
2956 newline setting in the pattern; if none was set, use pcre_config() to
2957 find the default.
2958
2959 Otherwise, in the case of UTF-8 matching, the advance must be one
2960 character, not one byte. */
2961
2962 else
2963 {
2964 if (g_notempty != 0)
2965 {
2966 int onechar = 1;
2967 unsigned int obits = ((real_pcre *)re)->options;
2968 use_offsets[0] = start_offset;
2969 if ((obits & PCRE_NEWLINE_BITS) == 0)
2970 {
2971 int d;
2972 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2973 /* Note that these values are always the ASCII ones, even in
2974 EBCDIC environments. CR = 13, NL = 10. */
2975 obits = (d == 13)? PCRE_NEWLINE_CR :
2976 (d == 10)? PCRE_NEWLINE_LF :
2977 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2978 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2979 (d == -1)? PCRE_NEWLINE_ANY : 0;
2980 }
2981 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2982 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2983 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2984 &&
2985 start_offset < len - 1 &&
2986 bptr[start_offset] == '\r' &&
2987 bptr[start_offset+1] == '\n')
2988 onechar++;
2989 else if (use_utf8)
2990 {
2991 while (start_offset + onechar < len)
2992 {
2993 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2994 onechar++;
2995 }
2996 }
2997 use_offsets[1] = start_offset + onechar;
2998 }
2999 else
3000 {
3001 switch(count)
3002 {
3003 case PCRE_ERROR_NOMATCH:
3004 if (gmatched == 0)
3005 {
3006 if (markptr == NULL) fprintf(outfile, "No match\n");
3007 else fprintf(outfile, "No match, mark = %s\n", markptr);
3008 }
3009 break;
3010
3011 case PCRE_ERROR_BADUTF8:
3012 case PCRE_ERROR_SHORTUTF8:
3013 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3014 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3015 if (use_size_offsets >= 2)
3016 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3017 use_offsets[1]);
3018 fprintf(outfile, "\n");
3019 break;
3020
3021 default:
3022 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3023 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3024 else
3025 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3026 break;
3027 }
3028
3029 break; /* Out of the /g loop */
3030 }
3031 }
3032
3033 /* If not /g or /G we are done */
3034
3035 if (!do_g && !do_G) break;
3036
3037 /* If we have matched an empty string, first check to see if we are at
3038 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3039 Perl's /g options does. This turns out to be rather cunning. First we set
3040 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3041 same point. If this fails (picked up above) we advance to the next
3042 character. */
3043
3044 g_notempty = 0;
3045
3046 if (use_offsets[0] == use_offsets[1])
3047 {
3048 if (use_offsets[0] == len) break;
3049 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3050 }
3051
3052 /* For /g, update the start offset, leaving the rest alone */
3053
3054 if (do_g) start_offset = use_offsets[1];
3055
3056 /* For /G, update the pointer and length */
3057
3058 else
3059 {
3060 bptr += use_offsets[1];
3061 len -= use_offsets[1];
3062 }
3063 } /* End of loop for /g and /G */
3064
3065 NEXT_DATA: continue;
3066 } /* End of loop for data lines */
3067
3068 CONTINUE:
3069
3070 #if !defined NOPOSIX
3071 if (posix || do_posix) regfree(&preg);
3072 #endif
3073
3074 if (re != NULL) new_free(re);
3075 if (extra != NULL) pcre_free_study(extra);
3076 if (locale_set)
3077 {
3078 new_free((void *)tables);
3079 setlocale(LC_CTYPE, "C");
3080 locale_set = 0;
3081 }
3082 if (jit_stack != NULL)
3083 {
3084 pcre_jit_stack_free(jit_stack);
3085 jit_stack = NULL;
3086 }
3087 }
3088
3089 if (infile == stdin) fprintf(outfile, "\n");
3090
3091 EXIT:
3092
3093 if (infile != NULL && infile != stdin) fclose(infile);
3094 if (outfile != NULL && outfile != stdout) fclose(outfile);
3095
3096 free(buffer);
3097 free(dbuffer);
3098 free(pbuffer);
3099 free(offsets);
3100
3101 return yield;
3102 }
3103
3104 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5