/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 606 - (show annotations)
Mon Jun 6 17:46:22 2011 UTC (4 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 89513 byte(s)
Error occurred while calculating annotation data.
Tidy the API for _pcre_valid_utf8() to a more suitable form for a future public 
release. Also make -s in pcretest force a study for every regex.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192
193 /* The buffers grow automatically if very long input lines are encountered. */
194
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
199
200 /* Textual explanations for runtime error codes */
201
202 static const char *errtexts[] = {
203 NULL, /* 0 is no error */
204 NULL, /* NOMATCH is handled specially */
205 "NULL argument passed",
206 "bad option value",
207 "magic number missing",
208 "unknown opcode - pattern overwritten?",
209 "no more memory",
210 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211 "match limit exceeded",
212 "callout error code",
213 NULL, /* BADUTF8 is handled specially */
214 "bad UTF-8 offset",
215 NULL, /* PARTIAL is handled specially */
216 "not used - internal error",
217 "internal error - pattern overwritten?",
218 "bad count value",
219 "item unsupported for DFA matching",
220 "backreference condition or recursion test not supported for DFA matching",
221 "match limit not supported for DFA matching",
222 "workspace size exceeded in DFA matching",
223 "too much recursion for DFA matching",
224 "recursion limit exceeded",
225 "not used - internal error",
226 "invalid combination of newline options",
227 "bad offset value",
228 NULL /* SHORTUTF8 is handled specially */
229 };
230
231
232 /*************************************************
233 * Alternate character tables *
234 *************************************************/
235
236 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237 using the default tables of the library. However, the T option can be used to
238 select alternate sets of tables, for different kinds of testing. Note also that
239 the L (locale) option also adjusts the tables. */
240
241 /* This is the set of tables distributed as default with PCRE. It recognizes
242 only ASCII characters. */
243
244 static const unsigned char tables0[] = {
245
246 /* This table is a lower casing table. */
247
248 0, 1, 2, 3, 4, 5, 6, 7,
249 8, 9, 10, 11, 12, 13, 14, 15,
250 16, 17, 18, 19, 20, 21, 22, 23,
251 24, 25, 26, 27, 28, 29, 30, 31,
252 32, 33, 34, 35, 36, 37, 38, 39,
253 40, 41, 42, 43, 44, 45, 46, 47,
254 48, 49, 50, 51, 52, 53, 54, 55,
255 56, 57, 58, 59, 60, 61, 62, 63,
256 64, 97, 98, 99,100,101,102,103,
257 104,105,106,107,108,109,110,111,
258 112,113,114,115,116,117,118,119,
259 120,121,122, 91, 92, 93, 94, 95,
260 96, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122,123,124,125,126,127,
264 128,129,130,131,132,133,134,135,
265 136,137,138,139,140,141,142,143,
266 144,145,146,147,148,149,150,151,
267 152,153,154,155,156,157,158,159,
268 160,161,162,163,164,165,166,167,
269 168,169,170,171,172,173,174,175,
270 176,177,178,179,180,181,182,183,
271 184,185,186,187,188,189,190,191,
272 192,193,194,195,196,197,198,199,
273 200,201,202,203,204,205,206,207,
274 208,209,210,211,212,213,214,215,
275 216,217,218,219,220,221,222,223,
276 224,225,226,227,228,229,230,231,
277 232,233,234,235,236,237,238,239,
278 240,241,242,243,244,245,246,247,
279 248,249,250,251,252,253,254,255,
280
281 /* This table is a case flipping table. */
282
283 0, 1, 2, 3, 4, 5, 6, 7,
284 8, 9, 10, 11, 12, 13, 14, 15,
285 16, 17, 18, 19, 20, 21, 22, 23,
286 24, 25, 26, 27, 28, 29, 30, 31,
287 32, 33, 34, 35, 36, 37, 38, 39,
288 40, 41, 42, 43, 44, 45, 46, 47,
289 48, 49, 50, 51, 52, 53, 54, 55,
290 56, 57, 58, 59, 60, 61, 62, 63,
291 64, 97, 98, 99,100,101,102,103,
292 104,105,106,107,108,109,110,111,
293 112,113,114,115,116,117,118,119,
294 120,121,122, 91, 92, 93, 94, 95,
295 96, 65, 66, 67, 68, 69, 70, 71,
296 72, 73, 74, 75, 76, 77, 78, 79,
297 80, 81, 82, 83, 84, 85, 86, 87,
298 88, 89, 90,123,124,125,126,127,
299 128,129,130,131,132,133,134,135,
300 136,137,138,139,140,141,142,143,
301 144,145,146,147,148,149,150,151,
302 152,153,154,155,156,157,158,159,
303 160,161,162,163,164,165,166,167,
304 168,169,170,171,172,173,174,175,
305 176,177,178,179,180,181,182,183,
306 184,185,186,187,188,189,190,191,
307 192,193,194,195,196,197,198,199,
308 200,201,202,203,204,205,206,207,
309 208,209,210,211,212,213,214,215,
310 216,217,218,219,220,221,222,223,
311 224,225,226,227,228,229,230,231,
312 232,233,234,235,236,237,238,239,
313 240,241,242,243,244,245,246,247,
314 248,249,250,251,252,253,254,255,
315
316 /* This table contains bit maps for various character classes. Each map is 32
317 bytes long and the bits run from the least significant end of each byte. The
318 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319 graph, print, punct, and cntrl. Other classes are built from combinations. */
320
321 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325
326 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 /* This table identifies various classes of character by individual bits:
372 0x01 white space character
373 0x02 letter
374 0x04 decimal digit
375 0x08 hexadecimal digit
376 0x10 alphanumeric or '_'
377 0x80 regular expression metacharacter or binary zero
378 */
379
380 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412
413 /* This is a set of tables that came orginally from a Windows user. It seems to
414 be at least an approximation of ISO 8859. In particular, there are characters
415 greater than 128 that are marked as spaces, letters, etc. */
416
417 static const unsigned char tables1[] = {
418 0,1,2,3,4,5,6,7,
419 8,9,10,11,12,13,14,15,
420 16,17,18,19,20,21,22,23,
421 24,25,26,27,28,29,30,31,
422 32,33,34,35,36,37,38,39,
423 40,41,42,43,44,45,46,47,
424 48,49,50,51,52,53,54,55,
425 56,57,58,59,60,61,62,63,
426 64,97,98,99,100,101,102,103,
427 104,105,106,107,108,109,110,111,
428 112,113,114,115,116,117,118,119,
429 120,121,122,91,92,93,94,95,
430 96,97,98,99,100,101,102,103,
431 104,105,106,107,108,109,110,111,
432 112,113,114,115,116,117,118,119,
433 120,121,122,123,124,125,126,127,
434 128,129,130,131,132,133,134,135,
435 136,137,138,139,140,141,142,143,
436 144,145,146,147,148,149,150,151,
437 152,153,154,155,156,157,158,159,
438 160,161,162,163,164,165,166,167,
439 168,169,170,171,172,173,174,175,
440 176,177,178,179,180,181,182,183,
441 184,185,186,187,188,189,190,191,
442 224,225,226,227,228,229,230,231,
443 232,233,234,235,236,237,238,239,
444 240,241,242,243,244,245,246,215,
445 248,249,250,251,252,253,254,223,
446 224,225,226,227,228,229,230,231,
447 232,233,234,235,236,237,238,239,
448 240,241,242,243,244,245,246,247,
449 248,249,250,251,252,253,254,255,
450 0,1,2,3,4,5,6,7,
451 8,9,10,11,12,13,14,15,
452 16,17,18,19,20,21,22,23,
453 24,25,26,27,28,29,30,31,
454 32,33,34,35,36,37,38,39,
455 40,41,42,43,44,45,46,47,
456 48,49,50,51,52,53,54,55,
457 56,57,58,59,60,61,62,63,
458 64,97,98,99,100,101,102,103,
459 104,105,106,107,108,109,110,111,
460 112,113,114,115,116,117,118,119,
461 120,121,122,91,92,93,94,95,
462 96,65,66,67,68,69,70,71,
463 72,73,74,75,76,77,78,79,
464 80,81,82,83,84,85,86,87,
465 88,89,90,123,124,125,126,127,
466 128,129,130,131,132,133,134,135,
467 136,137,138,139,140,141,142,143,
468 144,145,146,147,148,149,150,151,
469 152,153,154,155,156,157,158,159,
470 160,161,162,163,164,165,166,167,
471 168,169,170,171,172,173,174,175,
472 176,177,178,179,180,181,182,183,
473 184,185,186,187,188,189,190,191,
474 224,225,226,227,228,229,230,231,
475 232,233,234,235,236,237,238,239,
476 240,241,242,243,244,245,246,215,
477 248,249,250,251,252,253,254,223,
478 192,193,194,195,196,197,198,199,
479 200,201,202,203,204,205,206,207,
480 208,209,210,211,212,213,214,247,
481 216,217,218,219,220,221,222,255,
482 0,62,0,0,1,0,0,0,
483 0,0,0,0,0,0,0,0,
484 32,0,0,0,1,0,0,0,
485 0,0,0,0,0,0,0,0,
486 0,0,0,0,0,0,255,3,
487 126,0,0,0,126,0,0,0,
488 0,0,0,0,0,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,255,3,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,12,2,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 254,255,255,7,0,0,0,0,
496 0,0,0,0,0,0,0,0,
497 255,255,127,127,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,254,255,255,7,
500 0,0,0,0,0,4,32,4,
501 0,0,0,128,255,255,127,255,
502 0,0,0,0,0,0,255,3,
503 254,255,255,135,254,255,255,7,
504 0,0,0,0,0,4,44,6,
505 255,255,127,255,255,255,127,255,
506 0,0,0,0,254,255,255,255,
507 255,255,255,255,255,255,255,127,
508 0,0,0,0,254,255,255,255,
509 255,255,255,255,255,255,255,255,
510 0,2,0,0,255,255,255,255,
511 255,255,255,255,255,255,255,127,
512 0,0,0,0,255,255,255,255,
513 255,255,255,255,255,255,255,255,
514 0,0,0,0,254,255,0,252,
515 1,0,0,248,1,0,0,120,
516 0,0,0,0,254,255,255,255,
517 0,0,128,0,0,0,128,0,
518 255,255,255,255,0,0,0,0,
519 0,0,0,0,0,0,0,128,
520 255,255,255,255,0,0,0,0,
521 0,0,0,0,0,0,0,0,
522 128,0,0,0,0,0,0,0,
523 0,1,1,0,1,1,0,0,
524 0,0,0,0,0,0,0,0,
525 0,0,0,0,0,0,0,0,
526 1,0,0,0,128,0,0,0,
527 128,128,128,128,0,0,128,0,
528 28,28,28,28,28,28,28,28,
529 28,28,0,0,0,0,0,128,
530 0,26,26,26,26,26,26,18,
531 18,18,18,18,18,18,18,18,
532 18,18,18,18,18,18,18,18,
533 18,18,18,128,128,0,128,16,
534 0,26,26,26,26,26,26,18,
535 18,18,18,18,18,18,18,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,128,128,0,0,0,
538 0,0,0,0,0,1,0,0,
539 0,0,0,0,0,0,0,0,
540 0,0,0,0,0,0,0,0,
541 0,0,0,0,0,0,0,0,
542 1,0,0,0,0,0,0,0,
543 0,0,18,0,0,0,0,0,
544 0,0,20,20,0,18,0,0,
545 0,20,18,0,0,0,0,0,
546 18,18,18,18,18,18,18,18,
547 18,18,18,18,18,18,18,18,
548 18,18,18,18,18,18,18,0,
549 18,18,18,18,18,18,18,18,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,0,
553 18,18,18,18,18,18,18,18
554 };
555
556
557
558
559 #ifndef HAVE_STRERROR
560 /*************************************************
561 * Provide strerror() for non-ANSI libraries *
562 *************************************************/
563
564 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565 in their libraries, but can provide the same facility by this simple
566 alternative function. */
567
568 extern int sys_nerr;
569 extern char *sys_errlist[];
570
571 char *
572 strerror(int n)
573 {
574 if (n < 0 || n >= sys_nerr) return "unknown error number";
575 return sys_errlist[n];
576 }
577 #endif /* HAVE_STRERROR */
578
579
580
581
582 /*************************************************
583 * Read or extend an input line *
584 *************************************************/
585
586 /* Input lines are read into buffer, but both patterns and data lines can be
587 continued over multiple input lines. In addition, if the buffer fills up, we
588 want to automatically expand it so as to be able to handle extremely large
589 lines that are needed for certain stress tests. When the input buffer is
590 expanded, the other two buffers must also be expanded likewise, and the
591 contents of pbuffer, which are a copy of the input for callouts, must be
592 preserved (for when expansion happens for a data line). This is not the most
593 optimal way of handling this, but hey, this is just a test program!
594
595 Arguments:
596 f the file to read
597 start where in buffer to start (this *must* be within buffer)
598 prompt for stdin or readline()
599
600 Returns: pointer to the start of new data
601 could be a copy of start, or could be moved
602 NULL if no data read and EOF reached
603 */
604
605 static uschar *
606 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 {
608 uschar *here = start;
609
610 for (;;)
611 {
612 int rlen = (int)(buffer_size - (here - buffer));
613
614 if (rlen > 1000)
615 {
616 int dlen;
617
618 /* If libreadline support is required, use readline() to read a line if the
619 input is a terminal. Note that readline() removes the trailing newline, so
620 we must put it back again, to be compatible with fgets(). */
621
622 #ifdef SUPPORT_LIBREADLINE
623 if (isatty(fileno(f)))
624 {
625 size_t len;
626 char *s = readline(prompt);
627 if (s == NULL) return (here == start)? NULL : start;
628 len = strlen(s);
629 if (len > 0) add_history(s);
630 if (len > rlen - 1) len = rlen - 1;
631 memcpy(here, s, len);
632 here[len] = '\n';
633 here[len+1] = 0;
634 free(s);
635 }
636 else
637 #endif
638
639 /* Read the next line by normal means, prompting if the file is stdin. */
640
641 {
642 if (f == stdin) printf("%s", prompt);
643 if (fgets((char *)here, rlen, f) == NULL)
644 return (here == start)? NULL : start;
645 }
646
647 dlen = (int)strlen((char *)here);
648 if (dlen > 0 && here[dlen - 1] == '\n') return start;
649 here += dlen;
650 }
651
652 else
653 {
654 int new_buffer_size = 2*buffer_size;
655 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658
659 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660 {
661 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662 exit(1);
663 }
664
665 memcpy(new_buffer, buffer, buffer_size);
666 memcpy(new_pbuffer, pbuffer, buffer_size);
667
668 buffer_size = new_buffer_size;
669
670 start = new_buffer + (start - buffer);
671 here = new_buffer + (here - buffer);
672
673 free(buffer);
674 free(dbuffer);
675 free(pbuffer);
676
677 buffer = new_buffer;
678 dbuffer = new_dbuffer;
679 pbuffer = new_pbuffer;
680 }
681 }
682
683 return NULL; /* Control never gets here */
684 }
685
686
687
688
689
690
691
692 /*************************************************
693 * Read number from string *
694 *************************************************/
695
696 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697 around with conditional compilation, just do the job by hand. It is only used
698 for unpicking arguments, so just keep it simple.
699
700 Arguments:
701 str string to be converted
702 endptr where to put the end pointer
703
704 Returns: the unsigned long
705 */
706
707 static int
708 get_value(unsigned char *str, unsigned char **endptr)
709 {
710 int result = 0;
711 while(*str != 0 && isspace(*str)) str++;
712 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713 *endptr = str;
714 return(result);
715 }
716
717
718
719
720 /*************************************************
721 * Convert UTF-8 string to value *
722 *************************************************/
723
724 /* This function takes one or more bytes that represents a UTF-8 character,
725 and returns the value of the character.
726
727 Argument:
728 utf8bytes a pointer to the byte vector
729 vptr a pointer to an int to receive the value
730
731 Returns: > 0 => the number of bytes consumed
732 -6 to 0 => malformed UTF-8 character at offset = (-return)
733 */
734
735 #if !defined NOUTF8
736
737 static int
738 utf82ord(unsigned char *utf8bytes, int *vptr)
739 {
740 int c = *utf8bytes++;
741 int d = c;
742 int i, j, s;
743
744 for (i = -1; i < 6; i++) /* i is number of additional bytes */
745 {
746 if ((d & 0x80) == 0) break;
747 d <<= 1;
748 }
749
750 if (i == -1) { *vptr = c; return 1; } /* ascii character */
751 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752
753 /* i now has a value in the range 1-5 */
754
755 s = 6*i;
756 d = (c & utf8_table3[i]) << s;
757
758 for (j = 0; j < i; j++)
759 {
760 c = *utf8bytes++;
761 if ((c & 0xc0) != 0x80) return -(j+1);
762 s -= 6;
763 d |= (c & 0x3f) << s;
764 }
765
766 /* Check that encoding was the correct unique one */
767
768 for (j = 0; j < utf8_table1_size; j++)
769 if (d <= utf8_table1[j]) break;
770 if (j != i) return -(i+1);
771
772 /* Valid value */
773
774 *vptr = d;
775 return i+1;
776 }
777
778 #endif
779
780
781
782 /*************************************************
783 * Convert character value to UTF-8 *
784 *************************************************/
785
786 /* This function takes an integer value in the range 0 - 0x7fffffff
787 and encodes it as a UTF-8 character in 0 to 6 bytes.
788
789 Arguments:
790 cvalue the character value
791 utf8bytes pointer to buffer for result - at least 6 bytes long
792
793 Returns: number of characters placed in the buffer
794 */
795
796 #if !defined NOUTF8
797
798 static int
799 ord2utf8(int cvalue, uschar *utf8bytes)
800 {
801 register int i, j;
802 for (i = 0; i < utf8_table1_size; i++)
803 if (cvalue <= utf8_table1[i]) break;
804 utf8bytes += i;
805 for (j = i; j > 0; j--)
806 {
807 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 cvalue >>= 6;
809 }
810 *utf8bytes = utf8_table2[i] | cvalue;
811 return i + 1;
812 }
813
814 #endif
815
816
817
818 /*************************************************
819 * Print character string *
820 *************************************************/
821
822 /* Character string printing function. Must handle UTF-8 strings in utf8
823 mode. Yields number of characters printed. If handed a NULL file, just counts
824 chars without printing. */
825
826 static int pchars(unsigned char *p, int length, FILE *f)
827 {
828 int c = 0;
829 int yield = 0;
830
831 while (length-- > 0)
832 {
833 #if !defined NOUTF8
834 if (use_utf8)
835 {
836 int rc = utf82ord(p, &c);
837
838 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839 {
840 length -= rc - 1;
841 p += rc;
842 if (PRINTHEX(c))
843 {
844 if (f != NULL) fprintf(f, "%c", c);
845 yield++;
846 }
847 else
848 {
849 int n = 4;
850 if (f != NULL) fprintf(f, "\\x{%02x}", c);
851 yield += (n <= 0x000000ff)? 2 :
852 (n <= 0x00000fff)? 3 :
853 (n <= 0x0000ffff)? 4 :
854 (n <= 0x000fffff)? 5 : 6;
855 }
856 continue;
857 }
858 }
859 #endif
860
861 /* Not UTF-8, or malformed UTF-8 */
862
863 c = *p++;
864 if (PRINTHEX(c))
865 {
866 if (f != NULL) fprintf(f, "%c", c);
867 yield++;
868 }
869 else
870 {
871 if (f != NULL) fprintf(f, "\\x%02x", c);
872 yield += 4;
873 }
874 }
875
876 return yield;
877 }
878
879
880
881 /*************************************************
882 * Callout function *
883 *************************************************/
884
885 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886 the match. Yield zero unless more callouts than the fail count, or the callout
887 data is not zero. */
888
889 static int callout(pcre_callout_block *cb)
890 {
891 FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 int i, pre_start, post_start, subject_length;
893
894 if (callout_extra)
895 {
896 fprintf(f, "Callout %d: last capture = %d\n",
897 cb->callout_number, cb->capture_last);
898
899 for (i = 0; i < cb->capture_top * 2; i += 2)
900 {
901 if (cb->offset_vector[i] < 0)
902 fprintf(f, "%2d: <unset>\n", i/2);
903 else
904 {
905 fprintf(f, "%2d: ", i/2);
906 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907 cb->offset_vector[i+1] - cb->offset_vector[i], f);
908 fprintf(f, "\n");
909 }
910 }
911 }
912
913 /* Re-print the subject in canonical form, the first time or if giving full
914 datails. On subsequent calls in the same match, we use pchars just to find the
915 printed lengths of the substrings. */
916
917 if (f != NULL) fprintf(f, "--->");
918
919 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921 cb->current_position - cb->start_match, f);
922
923 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924
925 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926 cb->subject_length - cb->current_position, f);
927
928 if (f != NULL) fprintf(f, "\n");
929
930 /* Always print appropriate indicators, with callout number if not already
931 shown. For automatic callouts, show the pattern offset. */
932
933 if (cb->callout_number == 255)
934 {
935 fprintf(outfile, "%+3d ", cb->pattern_position);
936 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937 }
938 else
939 {
940 if (callout_extra) fprintf(outfile, " ");
941 else fprintf(outfile, "%3d ", cb->callout_number);
942 }
943
944 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945 fprintf(outfile, "^");
946
947 if (post_start > 0)
948 {
949 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950 fprintf(outfile, "^");
951 }
952
953 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954 fprintf(outfile, " ");
955
956 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957 pbuffer + cb->pattern_position);
958
959 fprintf(outfile, "\n");
960 first_callout = 0;
961
962 if (cb->callout_data != NULL)
963 {
964 int callout_data = *((int *)(cb->callout_data));
965 if (callout_data != 0)
966 {
967 fprintf(outfile, "Callout data = %d\n", callout_data);
968 return callout_data;
969 }
970 }
971
972 return (cb->callout_number != callout_fail_id)? 0 :
973 (++callout_count >= callout_fail_count)? 1 : 0;
974 }
975
976
977 /*************************************************
978 * Local malloc functions *
979 *************************************************/
980
981 /* Alternative malloc function, to test functionality and show the size of the
982 compiled re. */
983
984 static void *new_malloc(size_t size)
985 {
986 void *block = malloc(size);
987 gotten_store = size;
988 if (show_malloc)
989 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 return block;
991 }
992
993 static void new_free(void *block)
994 {
995 if (show_malloc)
996 fprintf(outfile, "free %p\n", block);
997 free(block);
998 }
999
1000
1001 /* For recursion malloc/free, to test stacking calls */
1002
1003 static void *stack_malloc(size_t size)
1004 {
1005 void *block = malloc(size);
1006 if (show_malloc)
1007 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 return block;
1009 }
1010
1011 static void stack_free(void *block)
1012 {
1013 if (show_malloc)
1014 fprintf(outfile, "stack_free %p\n", block);
1015 free(block);
1016 }
1017
1018
1019 /*************************************************
1020 * Call pcre_fullinfo() *
1021 *************************************************/
1022
1023 /* Get one piece of information from the pcre_fullinfo() function */
1024
1025 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026 {
1027 int rc;
1028 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030 }
1031
1032
1033
1034 /*************************************************
1035 * Byte flipping function *
1036 *************************************************/
1037
1038 static unsigned long int
1039 byteflip(unsigned long int value, int n)
1040 {
1041 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042 return ((value & 0x000000ff) << 24) |
1043 ((value & 0x0000ff00) << 8) |
1044 ((value & 0x00ff0000) >> 8) |
1045 ((value & 0xff000000) >> 24);
1046 }
1047
1048
1049
1050
1051 /*************************************************
1052 * Check match or recursion limit *
1053 *************************************************/
1054
1055 static int
1056 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057 int start_offset, int options, int *use_offsets, int use_size_offsets,
1058 int flag, unsigned long int *limit, int errnumber, const char *msg)
1059 {
1060 int count;
1061 int min = 0;
1062 int mid = 64;
1063 int max = -1;
1064
1065 extra->flags |= flag;
1066
1067 for (;;)
1068 {
1069 *limit = mid;
1070
1071 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072 use_offsets, use_size_offsets);
1073
1074 if (count == errnumber)
1075 {
1076 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077 min = mid;
1078 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079 }
1080
1081 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082 count == PCRE_ERROR_PARTIAL)
1083 {
1084 if (mid == min + 1)
1085 {
1086 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087 break;
1088 }
1089 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090 max = mid;
1091 mid = (min + mid)/2;
1092 }
1093 else break; /* Some other error */
1094 }
1095
1096 extra->flags &= ~flag;
1097 return count;
1098 }
1099
1100
1101
1102 /*************************************************
1103 * Case-independent strncmp() function *
1104 *************************************************/
1105
1106 /*
1107 Arguments:
1108 s first string
1109 t second string
1110 n number of characters to compare
1111
1112 Returns: < 0, = 0, or > 0, according to the comparison
1113 */
1114
1115 static int
1116 strncmpic(uschar *s, uschar *t, int n)
1117 {
1118 while (n--)
1119 {
1120 int c = tolower(*s++) - tolower(*t++);
1121 if (c) return c;
1122 }
1123 return 0;
1124 }
1125
1126
1127
1128 /*************************************************
1129 * Check newline indicator *
1130 *************************************************/
1131
1132 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133 a message and return 0 if there is no match.
1134
1135 Arguments:
1136 p points after the leading '<'
1137 f file for error message
1138
1139 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140 */
1141
1142 static int
1143 check_newline(uschar *p, FILE *f)
1144 {
1145 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 fprintf(f, "Unknown newline type at: <%s\n", p);
1153 return 0;
1154 }
1155
1156
1157
1158 /*************************************************
1159 * Usage function *
1160 *************************************************/
1161
1162 static void
1163 usage(void)
1164 {
1165 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166 printf("Input and output default to stdin and stdout.\n");
1167 #ifdef SUPPORT_LIBREADLINE
1168 printf("If input is a terminal, readline() is used to read from it.\n");
1169 #else
1170 printf("This version of pcretest is not linked with readline().\n");
1171 #endif
1172 printf("\nOptions:\n");
1173 printf(" -b show compiled code (bytecode)\n");
1174 printf(" -C show PCRE compile-time options and exit\n");
1175 printf(" -d debug: show compiled code and information (-b and -i)\n");
1176 #if !defined NODFA
1177 printf(" -dfa force DFA matching for all subjects\n");
1178 #endif
1179 printf(" -help show usage information\n");
1180 printf(" -i show information about compiled patterns\n"
1181 " -M find MATCH_LIMIT minimum for each subject\n"
1182 " -m output memory used information\n"
1183 " -o <n> set size of offsets vector to <n>\n");
1184 #if !defined NOPOSIX
1185 printf(" -p use POSIX interface\n");
1186 #endif
1187 printf(" -q quiet: do not output PCRE version number at start\n");
1188 printf(" -S <n> set stack size to <n> megabytes\n");
1189 printf(" -s force each pattern to be studied\n"
1190 " -t time compilation and execution\n");
1191 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192 printf(" -tm time execution (matching) only\n");
1193 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194 }
1195
1196
1197
1198 /*************************************************
1199 * Main Program *
1200 *************************************************/
1201
1202 /* Read lines from named file or stdin and write to named file or stdout; lines
1203 consist of a regular expression, in delimiters and optionally followed by
1204 options, followed by a set of test data, terminated by an empty line. */
1205
1206 int main(int argc, char **argv)
1207 {
1208 FILE *infile = stdin;
1209 int options = 0;
1210 int study_options = 0;
1211 int default_find_match_limit = FALSE;
1212 int op = 1;
1213 int timeit = 0;
1214 int timeitm = 0;
1215 int showinfo = 0;
1216 int showstore = 0;
1217 int force_study = 0;
1218 int quiet = 0;
1219 int size_offsets = 45;
1220 int size_offsets_max;
1221 int *offsets = NULL;
1222 #if !defined NOPOSIX
1223 int posix = 0;
1224 #endif
1225 int debug = 0;
1226 int done = 0;
1227 int all_use_dfa = 0;
1228 int yield = 0;
1229 int stack_size;
1230
1231 /* These vectors store, end-to-end, a list of captured substring names. Assume
1232 that 1024 is plenty long enough for the few names we'll be testing. */
1233
1234 uschar copynames[1024];
1235 uschar getnames[1024];
1236
1237 uschar *copynamesptr;
1238 uschar *getnamesptr;
1239
1240 /* Get buffers from malloc() so that Electric Fence will check their misuse
1241 when I am debugging. They grow automatically when very long lines are read. */
1242
1243 buffer = (unsigned char *)malloc(buffer_size);
1244 dbuffer = (unsigned char *)malloc(buffer_size);
1245 pbuffer = (unsigned char *)malloc(buffer_size);
1246
1247 /* The outfile variable is static so that new_malloc can use it. */
1248
1249 outfile = stdout;
1250
1251 /* The following _setmode() stuff is some Windows magic that tells its runtime
1252 library to translate CRLF into a single LF character. At least, that's what
1253 I've been told: never having used Windows I take this all on trust. Originally
1254 it set 0x8000, but then I was advised that _O_BINARY was better. */
1255
1256 #if defined(_WIN32) || defined(WIN32)
1257 _setmode( _fileno( stdout ), _O_BINARY );
1258 #endif
1259
1260 /* Scan options */
1261
1262 while (argc > 1 && argv[op][0] == '-')
1263 {
1264 unsigned char *endptr;
1265
1266 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267 else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1271 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1272 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1273 #if !defined NODFA
1274 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1275 #endif
1276 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1277 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1278 *endptr == 0))
1279 {
1280 op++;
1281 argc--;
1282 }
1283 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1284 {
1285 int both = argv[op][2] == 0;
1286 int temp;
1287 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1288 *endptr == 0))
1289 {
1290 timeitm = temp;
1291 op++;
1292 argc--;
1293 }
1294 else timeitm = LOOPREPEAT;
1295 if (both) timeit = timeitm;
1296 }
1297 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1298 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1299 *endptr == 0))
1300 {
1301 #if defined(_WIN32) || defined(WIN32)
1302 printf("PCRE: -S not supported on this OS\n");
1303 exit(1);
1304 #else
1305 int rc;
1306 struct rlimit rlim;
1307 getrlimit(RLIMIT_STACK, &rlim);
1308 rlim.rlim_cur = stack_size * 1024 * 1024;
1309 rc = setrlimit(RLIMIT_STACK, &rlim);
1310 if (rc != 0)
1311 {
1312 printf("PCRE: setrlimit() failed with error %d\n", rc);
1313 exit(1);
1314 }
1315 op++;
1316 argc--;
1317 #endif
1318 }
1319 #if !defined NOPOSIX
1320 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1321 #endif
1322 else if (strcmp(argv[op], "-C") == 0)
1323 {
1324 int rc;
1325 unsigned long int lrc;
1326 printf("PCRE version %s\n", pcre_version());
1327 printf("Compiled with\n");
1328 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1329 printf(" %sUTF-8 support\n", rc? "" : "No ");
1330 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1331 printf(" %sUnicode properties support\n", rc? "" : "No ");
1332 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1333 /* Note that these values are always the ASCII values, even
1334 in EBCDIC environments. CR is 13 and NL is 10. */
1335 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1336 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1337 (rc == -2)? "ANYCRLF" :
1338 (rc == -1)? "ANY" : "???");
1339 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1340 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1341 "all Unicode newlines");
1342 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1343 printf(" Internal link size = %d\n", rc);
1344 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1345 printf(" POSIX malloc threshold = %d\n", rc);
1346 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1347 printf(" Default match limit = %ld\n", lrc);
1348 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1349 printf(" Default recursion depth limit = %ld\n", lrc);
1350 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1351 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1352 goto EXIT;
1353 }
1354 else if (strcmp(argv[op], "-help") == 0 ||
1355 strcmp(argv[op], "--help") == 0)
1356 {
1357 usage();
1358 goto EXIT;
1359 }
1360 else
1361 {
1362 printf("** Unknown or malformed option %s\n", argv[op]);
1363 usage();
1364 yield = 1;
1365 goto EXIT;
1366 }
1367 op++;
1368 argc--;
1369 }
1370
1371 /* Get the store for the offsets vector, and remember what it was */
1372
1373 size_offsets_max = size_offsets;
1374 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1375 if (offsets == NULL)
1376 {
1377 printf("** Failed to get %d bytes of memory for offsets vector\n",
1378 (int)(size_offsets_max * sizeof(int)));
1379 yield = 1;
1380 goto EXIT;
1381 }
1382
1383 /* Sort out the input and output files */
1384
1385 if (argc > 1)
1386 {
1387 infile = fopen(argv[op], INPUT_MODE);
1388 if (infile == NULL)
1389 {
1390 printf("** Failed to open %s\n", argv[op]);
1391 yield = 1;
1392 goto EXIT;
1393 }
1394 }
1395
1396 if (argc > 2)
1397 {
1398 outfile = fopen(argv[op+1], OUTPUT_MODE);
1399 if (outfile == NULL)
1400 {
1401 printf("** Failed to open %s\n", argv[op+1]);
1402 yield = 1;
1403 goto EXIT;
1404 }
1405 }
1406
1407 /* Set alternative malloc function */
1408
1409 pcre_malloc = new_malloc;
1410 pcre_free = new_free;
1411 pcre_stack_malloc = stack_malloc;
1412 pcre_stack_free = stack_free;
1413
1414 /* Heading line unless quiet, then prompt for first regex if stdin */
1415
1416 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1417
1418 /* Main loop */
1419
1420 while (!done)
1421 {
1422 pcre *re = NULL;
1423 pcre_extra *extra = NULL;
1424
1425 #if !defined NOPOSIX /* There are still compilers that require no indent */
1426 regex_t preg;
1427 int do_posix = 0;
1428 #endif
1429
1430 const char *error;
1431 unsigned char *markptr;
1432 unsigned char *p, *pp, *ppp;
1433 unsigned char *to_file = NULL;
1434 const unsigned char *tables = NULL;
1435 unsigned long int true_size, true_study_size = 0;
1436 size_t size, regex_gotten_store;
1437 int do_mark = 0;
1438 int do_study = 0;
1439 int do_debug = debug;
1440 int do_G = 0;
1441 int do_g = 0;
1442 int do_showinfo = showinfo;
1443 int do_showrest = 0;
1444 int do_flip = 0;
1445 int erroroffset, len, delimiter, poffset;
1446
1447 use_utf8 = 0;
1448 debug_lengths = 1;
1449
1450 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1451 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1452 fflush(outfile);
1453
1454 p = buffer;
1455 while (isspace(*p)) p++;
1456 if (*p == 0) continue;
1457
1458 /* See if the pattern is to be loaded pre-compiled from a file. */
1459
1460 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1461 {
1462 unsigned long int magic, get_options;
1463 uschar sbuf[8];
1464 FILE *f;
1465
1466 p++;
1467 pp = p + (int)strlen((char *)p);
1468 while (isspace(pp[-1])) pp--;
1469 *pp = 0;
1470
1471 f = fopen((char *)p, "rb");
1472 if (f == NULL)
1473 {
1474 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1475 continue;
1476 }
1477
1478 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1479
1480 true_size =
1481 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1482 true_study_size =
1483 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1484
1485 re = (real_pcre *)new_malloc(true_size);
1486 regex_gotten_store = gotten_store;
1487
1488 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1489
1490 magic = ((real_pcre *)re)->magic_number;
1491 if (magic != MAGIC_NUMBER)
1492 {
1493 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1494 {
1495 do_flip = 1;
1496 }
1497 else
1498 {
1499 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1500 fclose(f);
1501 continue;
1502 }
1503 }
1504
1505 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1506 do_flip? " (byte-inverted)" : "", p);
1507
1508 /* Need to know if UTF-8 for printing data strings */
1509
1510 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1511 use_utf8 = (get_options & PCRE_UTF8) != 0;
1512
1513 /* Now see if there is any following study data */
1514
1515 if (true_study_size != 0)
1516 {
1517 pcre_study_data *psd;
1518
1519 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1520 extra->flags = PCRE_EXTRA_STUDY_DATA;
1521
1522 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1523 extra->study_data = psd;
1524
1525 if (fread(psd, 1, true_study_size, f) != true_study_size)
1526 {
1527 FAIL_READ:
1528 fprintf(outfile, "Failed to read data from %s\n", p);
1529 if (extra != NULL) new_free(extra);
1530 if (re != NULL) new_free(re);
1531 fclose(f);
1532 continue;
1533 }
1534 fprintf(outfile, "Study data loaded from %s\n", p);
1535 do_study = 1; /* To get the data output if requested */
1536 }
1537 else fprintf(outfile, "No study data\n");
1538
1539 fclose(f);
1540 goto SHOW_INFO;
1541 }
1542
1543 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1544 the pattern; if is isn't complete, read more. */
1545
1546 delimiter = *p++;
1547
1548 if (isalnum(delimiter) || delimiter == '\\')
1549 {
1550 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1551 goto SKIP_DATA;
1552 }
1553
1554 pp = p;
1555 poffset = (int)(p - buffer);
1556
1557 for(;;)
1558 {
1559 while (*pp != 0)
1560 {
1561 if (*pp == '\\' && pp[1] != 0) pp++;
1562 else if (*pp == delimiter) break;
1563 pp++;
1564 }
1565 if (*pp != 0) break;
1566 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1567 {
1568 fprintf(outfile, "** Unexpected EOF\n");
1569 done = 1;
1570 goto CONTINUE;
1571 }
1572 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1573 }
1574
1575 /* The buffer may have moved while being extended; reset the start of data
1576 pointer to the correct relative point in the buffer. */
1577
1578 p = buffer + poffset;
1579
1580 /* If the first character after the delimiter is backslash, make
1581 the pattern end with backslash. This is purely to provide a way
1582 of testing for the error message when a pattern ends with backslash. */
1583
1584 if (pp[1] == '\\') *pp++ = '\\';
1585
1586 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1587 for callouts. */
1588
1589 *pp++ = 0;
1590 strcpy((char *)pbuffer, (char *)p);
1591
1592 /* Look for options after final delimiter */
1593
1594 options = 0;
1595 study_options = 0;
1596 log_store = showstore; /* default from command line */
1597
1598 while (*pp != 0)
1599 {
1600 switch (*pp++)
1601 {
1602 case 'f': options |= PCRE_FIRSTLINE; break;
1603 case 'g': do_g = 1; break;
1604 case 'i': options |= PCRE_CASELESS; break;
1605 case 'm': options |= PCRE_MULTILINE; break;
1606 case 's': options |= PCRE_DOTALL; break;
1607 case 'x': options |= PCRE_EXTENDED; break;
1608
1609 case '+': do_showrest = 1; break;
1610 case 'A': options |= PCRE_ANCHORED; break;
1611 case 'B': do_debug = 1; break;
1612 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1613 case 'D': do_debug = do_showinfo = 1; break;
1614 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1615 case 'F': do_flip = 1; break;
1616 case 'G': do_G = 1; break;
1617 case 'I': do_showinfo = 1; break;
1618 case 'J': options |= PCRE_DUPNAMES; break;
1619 case 'K': do_mark = 1; break;
1620 case 'M': log_store = 1; break;
1621 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1622
1623 #if !defined NOPOSIX
1624 case 'P': do_posix = 1; break;
1625 #endif
1626
1627 case 'S': do_study = 1; break;
1628 case 'U': options |= PCRE_UNGREEDY; break;
1629 case 'W': options |= PCRE_UCP; break;
1630 case 'X': options |= PCRE_EXTRA; break;
1631 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1632 case 'Z': debug_lengths = 0; break;
1633 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1634 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1635
1636 case 'T':
1637 switch (*pp++)
1638 {
1639 case '0': tables = tables0; break;
1640 case '1': tables = tables1; break;
1641
1642 case '\r':
1643 case '\n':
1644 case ' ':
1645 case 0:
1646 fprintf(outfile, "** Missing table number after /T\n");
1647 goto SKIP_DATA;
1648
1649 default:
1650 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1651 goto SKIP_DATA;
1652 }
1653 break;
1654
1655 case 'L':
1656 ppp = pp;
1657 /* The '\r' test here is so that it works on Windows. */
1658 /* The '0' test is just in case this is an unterminated line. */
1659 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1660 *ppp = 0;
1661 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1662 {
1663 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1664 goto SKIP_DATA;
1665 }
1666 locale_set = 1;
1667 tables = pcre_maketables();
1668 pp = ppp;
1669 break;
1670
1671 case '>':
1672 to_file = pp;
1673 while (*pp != 0) pp++;
1674 while (isspace(pp[-1])) pp--;
1675 *pp = 0;
1676 break;
1677
1678 case '<':
1679 {
1680 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1681 {
1682 options |= PCRE_JAVASCRIPT_COMPAT;
1683 pp += 3;
1684 }
1685 else
1686 {
1687 int x = check_newline(pp, outfile);
1688 if (x == 0) goto SKIP_DATA;
1689 options |= x;
1690 while (*pp++ != '>');
1691 }
1692 }
1693 break;
1694
1695 case '\r': /* So that it works in Windows */
1696 case '\n':
1697 case ' ':
1698 break;
1699
1700 default:
1701 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1702 goto SKIP_DATA;
1703 }
1704 }
1705
1706 /* Handle compiling via the POSIX interface, which doesn't support the
1707 timing, showing, or debugging options, nor the ability to pass over
1708 local character tables. */
1709
1710 #if !defined NOPOSIX
1711 if (posix || do_posix)
1712 {
1713 int rc;
1714 int cflags = 0;
1715
1716 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1717 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1718 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1719 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1720 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1721 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1722 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1723
1724 rc = regcomp(&preg, (char *)p, cflags);
1725
1726 /* Compilation failed; go back for another re, skipping to blank line
1727 if non-interactive. */
1728
1729 if (rc != 0)
1730 {
1731 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1732 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1733 goto SKIP_DATA;
1734 }
1735 }
1736
1737 /* Handle compiling via the native interface */
1738
1739 else
1740 #endif /* !defined NOPOSIX */
1741
1742 {
1743 unsigned long int get_options;
1744
1745 if (timeit > 0)
1746 {
1747 register int i;
1748 clock_t time_taken;
1749 clock_t start_time = clock();
1750 for (i = 0; i < timeit; i++)
1751 {
1752 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1753 if (re != NULL) free(re);
1754 }
1755 time_taken = clock() - start_time;
1756 fprintf(outfile, "Compile time %.4f milliseconds\n",
1757 (((double)time_taken * 1000.0) / (double)timeit) /
1758 (double)CLOCKS_PER_SEC);
1759 }
1760
1761 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1762
1763 /* Compilation failed; go back for another re, skipping to blank line
1764 if non-interactive. */
1765
1766 if (re == NULL)
1767 {
1768 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1769 SKIP_DATA:
1770 if (infile != stdin)
1771 {
1772 for (;;)
1773 {
1774 if (extend_inputline(infile, buffer, NULL) == NULL)
1775 {
1776 done = 1;
1777 goto CONTINUE;
1778 }
1779 len = (int)strlen((char *)buffer);
1780 while (len > 0 && isspace(buffer[len-1])) len--;
1781 if (len == 0) break;
1782 }
1783 fprintf(outfile, "\n");
1784 }
1785 goto CONTINUE;
1786 }
1787
1788 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1789 within the regex; check for this so that we know how to process the data
1790 lines. */
1791
1792 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1793 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1794
1795 /* Print information if required. There are now two info-returning
1796 functions. The old one has a limited interface and returns only limited
1797 data. Check that it agrees with the newer one. */
1798
1799 if (log_store)
1800 fprintf(outfile, "Memory allocation (code space): %d\n",
1801 (int)(gotten_store -
1802 sizeof(real_pcre) -
1803 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1804
1805 /* Extract the size for possible writing before possibly flipping it,
1806 and remember the store that was got. */
1807
1808 true_size = ((real_pcre *)re)->size;
1809 regex_gotten_store = gotten_store;
1810
1811 /* If -s or /S was present, study the regexp to generate additional info to
1812 help with the matching. */
1813
1814 if (do_study || force_study)
1815 {
1816 if (timeit > 0)
1817 {
1818 register int i;
1819 clock_t time_taken;
1820 clock_t start_time = clock();
1821 for (i = 0; i < timeit; i++)
1822 extra = pcre_study(re, study_options, &error);
1823 time_taken = clock() - start_time;
1824 if (extra != NULL) free(extra);
1825 fprintf(outfile, " Study time %.4f milliseconds\n",
1826 (((double)time_taken * 1000.0) / (double)timeit) /
1827 (double)CLOCKS_PER_SEC);
1828 }
1829 extra = pcre_study(re, study_options, &error);
1830 if (error != NULL)
1831 fprintf(outfile, "Failed to study: %s\n", error);
1832 else if (extra != NULL)
1833 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1834 }
1835
1836 /* If /K was present, we set up for handling MARK data. */
1837
1838 if (do_mark)
1839 {
1840 if (extra == NULL)
1841 {
1842 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1843 extra->flags = 0;
1844 }
1845 extra->mark = &markptr;
1846 extra->flags |= PCRE_EXTRA_MARK;
1847 }
1848
1849 /* If the 'F' option was present, we flip the bytes of all the integer
1850 fields in the regex data block and the study block. This is to make it
1851 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1852 compiled on a different architecture. */
1853
1854 if (do_flip)
1855 {
1856 real_pcre *rre = (real_pcre *)re;
1857 rre->magic_number =
1858 byteflip(rre->magic_number, sizeof(rre->magic_number));
1859 rre->size = byteflip(rre->size, sizeof(rre->size));
1860 rre->options = byteflip(rre->options, sizeof(rre->options));
1861 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1862 rre->top_bracket =
1863 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1864 rre->top_backref =
1865 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1866 rre->first_byte =
1867 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1868 rre->req_byte =
1869 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1870 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1871 sizeof(rre->name_table_offset));
1872 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1873 sizeof(rre->name_entry_size));
1874 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1875 sizeof(rre->name_count));
1876
1877 if (extra != NULL)
1878 {
1879 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1880 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1881 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1882 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1883 }
1884 }
1885
1886 /* Extract information from the compiled data if required */
1887
1888 SHOW_INFO:
1889
1890 if (do_debug)
1891 {
1892 fprintf(outfile, "------------------------------------------------------------------\n");
1893 pcre_printint(re, outfile, debug_lengths);
1894 }
1895
1896 /* We already have the options in get_options (see above) */
1897
1898 if (do_showinfo)
1899 {
1900 unsigned long int all_options;
1901 #if !defined NOINFOCHECK
1902 int old_first_char, old_options, old_count;
1903 #endif
1904 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1905 hascrorlf;
1906 int nameentrysize, namecount;
1907 const uschar *nametable;
1908
1909 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1910 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1911 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1912 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1913 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1914 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1915 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1916 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1917 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1918 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1919 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1920
1921 #if !defined NOINFOCHECK
1922 old_count = pcre_info(re, &old_options, &old_first_char);
1923 if (count < 0) fprintf(outfile,
1924 "Error %d from pcre_info()\n", count);
1925 else
1926 {
1927 if (old_count != count) fprintf(outfile,
1928 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1929 old_count);
1930
1931 if (old_first_char != first_char) fprintf(outfile,
1932 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1933 first_char, old_first_char);
1934
1935 if (old_options != (int)get_options) fprintf(outfile,
1936 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1937 get_options, old_options);
1938 }
1939 #endif
1940
1941 if (size != regex_gotten_store) fprintf(outfile,
1942 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1943 (int)size, (int)regex_gotten_store);
1944
1945 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1946 if (backrefmax > 0)
1947 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1948
1949 if (namecount > 0)
1950 {
1951 fprintf(outfile, "Named capturing subpatterns:\n");
1952 while (namecount-- > 0)
1953 {
1954 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1955 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1956 GET2(nametable, 0));
1957 nametable += nameentrysize;
1958 }
1959 }
1960
1961 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1962 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1963
1964 all_options = ((real_pcre *)re)->options;
1965 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1966
1967 if (get_options == 0) fprintf(outfile, "No options\n");
1968 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1969 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1970 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1971 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1972 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1973 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1974 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1975 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1976 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1977 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1978 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1979 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1980 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1981 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1982 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1983 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1984 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1985 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1986
1987 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1988
1989 switch (get_options & PCRE_NEWLINE_BITS)
1990 {
1991 case PCRE_NEWLINE_CR:
1992 fprintf(outfile, "Forced newline sequence: CR\n");
1993 break;
1994
1995 case PCRE_NEWLINE_LF:
1996 fprintf(outfile, "Forced newline sequence: LF\n");
1997 break;
1998
1999 case PCRE_NEWLINE_CRLF:
2000 fprintf(outfile, "Forced newline sequence: CRLF\n");
2001 break;
2002
2003 case PCRE_NEWLINE_ANYCRLF:
2004 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2005 break;
2006
2007 case PCRE_NEWLINE_ANY:
2008 fprintf(outfile, "Forced newline sequence: ANY\n");
2009 break;
2010
2011 default:
2012 break;
2013 }
2014
2015 if (first_char == -1)
2016 {
2017 fprintf(outfile, "First char at start or follows newline\n");
2018 }
2019 else if (first_char < 0)
2020 {
2021 fprintf(outfile, "No first char\n");
2022 }
2023 else
2024 {
2025 int ch = first_char & 255;
2026 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2027 "" : " (caseless)";
2028 if (PRINTHEX(ch))
2029 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2030 else
2031 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2032 }
2033
2034 if (need_char < 0)
2035 {
2036 fprintf(outfile, "No need char\n");
2037 }
2038 else
2039 {
2040 int ch = need_char & 255;
2041 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2042 "" : " (caseless)";
2043 if (PRINTHEX(ch))
2044 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2045 else
2046 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2047 }
2048
2049 /* Don't output study size; at present it is in any case a fixed
2050 value, but it varies, depending on the computer architecture, and
2051 so messes up the test suite. (And with the /F option, it might be
2052 flipped.) */
2053
2054 if (do_study || force_study)
2055 {
2056 if (extra == NULL)
2057 fprintf(outfile, "Study returned NULL\n");
2058 else
2059 {
2060 uschar *start_bits = NULL;
2061 int minlength;
2062
2063 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2064 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2065
2066 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2067 if (start_bits == NULL)
2068 fprintf(outfile, "No set of starting bytes\n");
2069 else
2070 {
2071 int i;
2072 int c = 24;
2073 fprintf(outfile, "Starting byte set: ");
2074 for (i = 0; i < 256; i++)
2075 {
2076 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2077 {
2078 if (c > 75)
2079 {
2080 fprintf(outfile, "\n ");
2081 c = 2;
2082 }
2083 if (PRINTHEX(i) && i != ' ')
2084 {
2085 fprintf(outfile, "%c ", i);
2086 c += 2;
2087 }
2088 else
2089 {
2090 fprintf(outfile, "\\x%02x ", i);
2091 c += 5;
2092 }
2093 }
2094 }
2095 fprintf(outfile, "\n");
2096 }
2097 }
2098 }
2099 }
2100
2101 /* If the '>' option was present, we write out the regex to a file, and
2102 that is all. The first 8 bytes of the file are the regex length and then
2103 the study length, in big-endian order. */
2104
2105 if (to_file != NULL)
2106 {
2107 FILE *f = fopen((char *)to_file, "wb");
2108 if (f == NULL)
2109 {
2110 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2111 }
2112 else
2113 {
2114 uschar sbuf[8];
2115 sbuf[0] = (uschar)((true_size >> 24) & 255);
2116 sbuf[1] = (uschar)((true_size >> 16) & 255);
2117 sbuf[2] = (uschar)((true_size >> 8) & 255);
2118 sbuf[3] = (uschar)((true_size) & 255);
2119
2120 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2121 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2122 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2123 sbuf[7] = (uschar)((true_study_size) & 255);
2124
2125 if (fwrite(sbuf, 1, 8, f) < 8 ||
2126 fwrite(re, 1, true_size, f) < true_size)
2127 {
2128 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2129 }
2130 else
2131 {
2132 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2133 if (extra != NULL)
2134 {
2135 if (fwrite(extra->study_data, 1, true_study_size, f) <
2136 true_study_size)
2137 {
2138 fprintf(outfile, "Write error on %s: %s\n", to_file,
2139 strerror(errno));
2140 }
2141 else fprintf(outfile, "Study data written to %s\n", to_file);
2142
2143 }
2144 }
2145 fclose(f);
2146 }
2147
2148 new_free(re);
2149 if (extra != NULL) new_free(extra);
2150 if (locale_set)
2151 {
2152 new_free((void *)tables);
2153 setlocale(LC_CTYPE, "C");
2154 locale_set = 0;
2155 }
2156 continue; /* With next regex */
2157 }
2158 } /* End of non-POSIX compile */
2159
2160 /* Read data lines and test them */
2161
2162 for (;;)
2163 {
2164 uschar *q;
2165 uschar *bptr;
2166 int *use_offsets = offsets;
2167 int use_size_offsets = size_offsets;
2168 int callout_data = 0;
2169 int callout_data_set = 0;
2170 int count, c;
2171 int copystrings = 0;
2172 int find_match_limit = default_find_match_limit;
2173 int getstrings = 0;
2174 int getlist = 0;
2175 int gmatched = 0;
2176 int start_offset = 0;
2177 int start_offset_sign = 1;
2178 int g_notempty = 0;
2179 int use_dfa = 0;
2180
2181 options = 0;
2182
2183 *copynames = 0;
2184 *getnames = 0;
2185
2186 copynamesptr = copynames;
2187 getnamesptr = getnames;
2188
2189 pcre_callout = callout;
2190 first_callout = 1;
2191 callout_extra = 0;
2192 callout_count = 0;
2193 callout_fail_count = 999999;
2194 callout_fail_id = -1;
2195 show_malloc = 0;
2196
2197 if (extra != NULL) extra->flags &=
2198 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2199
2200 len = 0;
2201 for (;;)
2202 {
2203 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2204 {
2205 if (len > 0) /* Reached EOF without hitting a newline */
2206 {
2207 fprintf(outfile, "\n");
2208 break;
2209 }
2210 done = 1;
2211 goto CONTINUE;
2212 }
2213 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2214 len = (int)strlen((char *)buffer);
2215 if (buffer[len-1] == '\n') break;
2216 }
2217
2218 while (len > 0 && isspace(buffer[len-1])) len--;
2219 buffer[len] = 0;
2220 if (len == 0) break;
2221
2222 p = buffer;
2223 while (isspace(*p)) p++;
2224
2225 bptr = q = dbuffer;
2226 while ((c = *p++) != 0)
2227 {
2228 int i = 0;
2229 int n = 0;
2230
2231 if (c == '\\') switch ((c = *p++))
2232 {
2233 case 'a': c = 7; break;
2234 case 'b': c = '\b'; break;
2235 case 'e': c = 27; break;
2236 case 'f': c = '\f'; break;
2237 case 'n': c = '\n'; break;
2238 case 'r': c = '\r'; break;
2239 case 't': c = '\t'; break;
2240 case 'v': c = '\v'; break;
2241
2242 case '0': case '1': case '2': case '3':
2243 case '4': case '5': case '6': case '7':
2244 c -= '0';
2245 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2246 c = c * 8 + *p++ - '0';
2247
2248 #if !defined NOUTF8
2249 if (use_utf8 && c > 255)
2250 {
2251 unsigned char buff8[8];
2252 int ii, utn;
2253 utn = ord2utf8(c, buff8);
2254 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2255 c = buff8[ii]; /* Last byte */
2256 }
2257 #endif
2258 break;
2259
2260 case 'x':
2261
2262 /* Handle \x{..} specially - new Perl thing for utf8 */
2263
2264 #if !defined NOUTF8
2265 if (*p == '{')
2266 {
2267 unsigned char *pt = p;
2268 c = 0;
2269 while (isxdigit(*(++pt)))
2270 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2271 if (*pt == '}')
2272 {
2273 unsigned char buff8[8];
2274 int ii, utn;
2275 if (use_utf8)
2276 {
2277 utn = ord2utf8(c, buff8);
2278 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2279 c = buff8[ii]; /* Last byte */
2280 }
2281 else
2282 {
2283 if (c > 255)
2284 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2285 "UTF-8 mode is not enabled.\n"
2286 "** Truncation will probably give the wrong result.\n", c);
2287 }
2288 p = pt + 1;
2289 break;
2290 }
2291 /* Not correct form; fall through */
2292 }
2293 #endif
2294
2295 /* Ordinary \x */
2296
2297 c = 0;
2298 while (i++ < 2 && isxdigit(*p))
2299 {
2300 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2301 p++;
2302 }
2303 break;
2304
2305 case 0: /* \ followed by EOF allows for an empty line */
2306 p--;
2307 continue;
2308
2309 case '>':
2310 if (*p == '-')
2311 {
2312 start_offset_sign = -1;
2313 p++;
2314 }
2315 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2316 start_offset *= start_offset_sign;
2317 continue;
2318
2319 case 'A': /* Option setting */
2320 options |= PCRE_ANCHORED;
2321 continue;
2322
2323 case 'B':
2324 options |= PCRE_NOTBOL;
2325 continue;
2326
2327 case 'C':
2328 if (isdigit(*p)) /* Set copy string */
2329 {
2330 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2331 copystrings |= 1 << n;
2332 }
2333 else if (isalnum(*p))
2334 {
2335 uschar *npp = copynamesptr;
2336 while (isalnum(*p)) *npp++ = *p++;
2337 *npp++ = 0;
2338 *npp = 0;
2339 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2340 if (n < 0)
2341 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2342 copynamesptr = npp;
2343 }
2344 else if (*p == '+')
2345 {
2346 callout_extra = 1;
2347 p++;
2348 }
2349 else if (*p == '-')
2350 {
2351 pcre_callout = NULL;
2352 p++;
2353 }
2354 else if (*p == '!')
2355 {
2356 callout_fail_id = 0;
2357 p++;
2358 while(isdigit(*p))
2359 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2360 callout_fail_count = 0;
2361 if (*p == '!')
2362 {
2363 p++;
2364 while(isdigit(*p))
2365 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2366 }
2367 }
2368 else if (*p == '*')
2369 {
2370 int sign = 1;
2371 callout_data = 0;
2372 if (*(++p) == '-') { sign = -1; p++; }
2373 while(isdigit(*p))
2374 callout_data = callout_data * 10 + *p++ - '0';
2375 callout_data *= sign;
2376 callout_data_set = 1;
2377 }
2378 continue;
2379
2380 #if !defined NODFA
2381 case 'D':
2382 #if !defined NOPOSIX
2383 if (posix || do_posix)
2384 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2385 else
2386 #endif
2387 use_dfa = 1;
2388 continue;
2389 #endif
2390
2391 #if !defined NODFA
2392 case 'F':
2393 options |= PCRE_DFA_SHORTEST;
2394 continue;
2395 #endif
2396
2397 case 'G':
2398 if (isdigit(*p))
2399 {
2400 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2401 getstrings |= 1 << n;
2402 }
2403 else if (isalnum(*p))
2404 {
2405 uschar *npp = getnamesptr;
2406 while (isalnum(*p)) *npp++ = *p++;
2407 *npp++ = 0;
2408 *npp = 0;
2409 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2410 if (n < 0)
2411 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2412 getnamesptr = npp;
2413 }
2414 continue;
2415
2416 case 'L':
2417 getlist = 1;
2418 continue;
2419
2420 case 'M':
2421 find_match_limit = 1;
2422 continue;
2423
2424 case 'N':
2425 if ((options & PCRE_NOTEMPTY) != 0)
2426 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2427 else
2428 options |= PCRE_NOTEMPTY;
2429 continue;
2430
2431 case 'O':
2432 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2433 if (n > size_offsets_max)
2434 {
2435 size_offsets_max = n;
2436 free(offsets);
2437 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2438 if (offsets == NULL)
2439 {
2440 printf("** Failed to get %d bytes of memory for offsets vector\n",
2441 (int)(size_offsets_max * sizeof(int)));
2442 yield = 1;
2443 goto EXIT;
2444 }
2445 }
2446 use_size_offsets = n;
2447 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2448 continue;
2449
2450 case 'P':
2451 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2452 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2453 continue;
2454
2455 case 'Q':
2456 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2457 if (extra == NULL)
2458 {
2459 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2460 extra->flags = 0;
2461 }
2462 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2463 extra->match_limit_recursion = n;
2464 continue;
2465
2466 case 'q':
2467 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2468 if (extra == NULL)
2469 {
2470 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2471 extra->flags = 0;
2472 }
2473 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2474 extra->match_limit = n;
2475 continue;
2476
2477 #if !defined NODFA
2478 case 'R':
2479 options |= PCRE_DFA_RESTART;
2480 continue;
2481 #endif
2482
2483 case 'S':
2484 show_malloc = 1;
2485 continue;
2486
2487 case 'Y':
2488 options |= PCRE_NO_START_OPTIMIZE;
2489 continue;
2490
2491 case 'Z':
2492 options |= PCRE_NOTEOL;
2493 continue;
2494
2495 case '?':
2496 options |= PCRE_NO_UTF8_CHECK;
2497 continue;
2498
2499 case '<':
2500 {
2501 int x = check_newline(p, outfile);
2502 if (x == 0) goto NEXT_DATA;
2503 options |= x;
2504 while (*p++ != '>');
2505 }
2506 continue;
2507 }
2508 *q++ = c;
2509 }
2510 *q = 0;
2511 len = (int)(q - dbuffer);
2512
2513 /* Move the data to the end of the buffer so that a read over the end of
2514 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2515 we are using the POSIX interface, we must include the terminating zero. */
2516
2517 #if !defined NOPOSIX
2518 if (posix || do_posix)
2519 {
2520 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2521 bptr += buffer_size - len - 1;
2522 }
2523 else
2524 #endif
2525 {
2526 memmove(bptr + buffer_size - len, bptr, len);
2527 bptr += buffer_size - len;
2528 }
2529
2530 if ((all_use_dfa || use_dfa) && find_match_limit)
2531 {
2532 printf("**Match limit not relevant for DFA matching: ignored\n");
2533 find_match_limit = 0;
2534 }
2535
2536 /* Handle matching via the POSIX interface, which does not
2537 support timing or playing with the match limit or callout data. */
2538
2539 #if !defined NOPOSIX
2540 if (posix || do_posix)
2541 {
2542 int rc;
2543 int eflags = 0;
2544 regmatch_t *pmatch = NULL;
2545 if (use_size_offsets > 0)
2546 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2547 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2548 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2549 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2550
2551 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2552
2553 if (rc != 0)
2554 {
2555 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2556 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2557 }
2558 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2559 != 0)
2560 {
2561 fprintf(outfile, "Matched with REG_NOSUB\n");
2562 }
2563 else
2564 {
2565 size_t i;
2566 for (i = 0; i < (size_t)use_size_offsets; i++)
2567 {
2568 if (pmatch[i].rm_so >= 0)
2569 {
2570 fprintf(outfile, "%2d: ", (int)i);
2571 (void)pchars(dbuffer + pmatch[i].rm_so,
2572 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2573 fprintf(outfile, "\n");
2574 if (i == 0 && do_showrest)
2575 {
2576 fprintf(outfile, " 0+ ");
2577 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2578 outfile);
2579 fprintf(outfile, "\n");
2580 }
2581 }
2582 }
2583 }
2584 free(pmatch);
2585 }
2586
2587 /* Handle matching via the native interface - repeats for /g and /G */
2588
2589 else
2590 #endif /* !defined NOPOSIX */
2591
2592 for (;; gmatched++) /* Loop for /g or /G */
2593 {
2594 markptr = NULL;
2595
2596 if (timeitm > 0)
2597 {
2598 register int i;
2599 clock_t time_taken;
2600 clock_t start_time = clock();
2601
2602 #if !defined NODFA
2603 if (all_use_dfa || use_dfa)
2604 {
2605 int workspace[1000];
2606 for (i = 0; i < timeitm; i++)
2607 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2608 options | g_notempty, use_offsets, use_size_offsets, workspace,
2609 sizeof(workspace)/sizeof(int));
2610 }
2611 else
2612 #endif
2613
2614 for (i = 0; i < timeitm; i++)
2615 count = pcre_exec(re, extra, (char *)bptr, len,
2616 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2617
2618 time_taken = clock() - start_time;
2619 fprintf(outfile, "Execute time %.4f milliseconds\n",
2620 (((double)time_taken * 1000.0) / (double)timeitm) /
2621 (double)CLOCKS_PER_SEC);
2622 }
2623
2624 /* If find_match_limit is set, we want to do repeated matches with
2625 varying limits in order to find the minimum value for the match limit and
2626 for the recursion limit. */
2627
2628 if (find_match_limit)
2629 {
2630 if (extra == NULL)
2631 {
2632 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2633 extra->flags = 0;
2634 }
2635
2636 (void)check_match_limit(re, extra, bptr, len, start_offset,
2637 options|g_notempty, use_offsets, use_size_offsets,
2638 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2639 PCRE_ERROR_MATCHLIMIT, "match()");
2640
2641 count = check_match_limit(re, extra, bptr, len, start_offset,
2642 options|g_notempty, use_offsets, use_size_offsets,
2643 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2644 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2645 }
2646
2647 /* If callout_data is set, use the interface with additional data */
2648
2649 else if (callout_data_set)
2650 {
2651 if (extra == NULL)
2652 {
2653 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2654 extra->flags = 0;
2655 }
2656 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2657 extra->callout_data = &callout_data;
2658 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2659 options | g_notempty, use_offsets, use_size_offsets);
2660 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2661 }
2662
2663 /* The normal case is just to do the match once, with the default
2664 value of match_limit. */
2665
2666 #if !defined NODFA
2667 else if (all_use_dfa || use_dfa)
2668 {
2669 int workspace[1000];
2670 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2671 options | g_notempty, use_offsets, use_size_offsets, workspace,
2672 sizeof(workspace)/sizeof(int));
2673 if (count == 0)
2674 {
2675 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2676 count = use_size_offsets/2;
2677 }
2678 }
2679 #endif
2680
2681 else
2682 {
2683 count = pcre_exec(re, extra, (char *)bptr, len,
2684 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2685 if (count == 0)
2686 {
2687 fprintf(outfile, "Matched, but too many substrings\n");
2688 count = use_size_offsets/3;
2689 }
2690 }
2691
2692 /* Matched */
2693
2694 if (count >= 0)
2695 {
2696 int i, maxcount;
2697
2698 #if !defined NODFA
2699 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2700 #endif
2701 maxcount = use_size_offsets/3;
2702
2703 /* This is a check against a lunatic return value. */
2704
2705 if (count > maxcount)
2706 {
2707 fprintf(outfile,
2708 "** PCRE error: returned count %d is too big for offset size %d\n",
2709 count, use_size_offsets);
2710 count = use_size_offsets/3;
2711 if (do_g || do_G)
2712 {
2713 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2714 do_g = do_G = FALSE; /* Break g/G loop */
2715 }
2716 }
2717
2718 for (i = 0; i < count * 2; i += 2)
2719 {
2720 if (use_offsets[i] < 0)
2721 fprintf(outfile, "%2d: <unset>\n", i/2);
2722 else
2723 {
2724 fprintf(outfile, "%2d: ", i/2);
2725 (void)pchars(bptr + use_offsets[i],
2726 use_offsets[i+1] - use_offsets[i], outfile);
2727 fprintf(outfile, "\n");
2728 if (i == 0)
2729 {
2730 if (do_showrest)
2731 {
2732 fprintf(outfile, " 0+ ");
2733 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2734 outfile);
2735 fprintf(outfile, "\n");
2736 }
2737 }
2738 }
2739 }
2740
2741 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2742
2743 for (i = 0; i < 32; i++)
2744 {
2745 if ((copystrings & (1 << i)) != 0)
2746 {
2747 char copybuffer[256];
2748 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2749 i, copybuffer, sizeof(copybuffer));
2750 if (rc < 0)
2751 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2752 else
2753 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2754 }
2755 }
2756
2757 for (copynamesptr = copynames;
2758 *copynamesptr != 0;
2759 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2760 {
2761 char copybuffer[256];
2762 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2763 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2764 if (rc < 0)
2765 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2766 else
2767 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2768 }
2769
2770 for (i = 0; i < 32; i++)
2771 {
2772 if ((getstrings & (1 << i)) != 0)
2773 {
2774 const char *substring;
2775 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2776 i, &substring);
2777 if (rc < 0)
2778 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2779 else
2780 {
2781 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2782 pcre_free_substring(substring);
2783 }
2784 }
2785 }
2786
2787 for (getnamesptr = getnames;
2788 *getnamesptr != 0;
2789 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2790 {
2791 const char *substring;
2792 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2793 count, (char *)getnamesptr, &substring);
2794 if (rc < 0)
2795 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2796 else
2797 {
2798 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2799 pcre_free_substring(substring);
2800 }
2801 }
2802
2803 if (getlist)
2804 {
2805 const char **stringlist;
2806 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2807 &stringlist);
2808 if (rc < 0)
2809 fprintf(outfile, "get substring list failed %d\n", rc);
2810 else
2811 {
2812 for (i = 0; i < count; i++)
2813 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2814 if (stringlist[i] != NULL)
2815 fprintf(outfile, "string list not terminated by NULL\n");
2816 /* free((void *)stringlist); */
2817 pcre_free_substring_list(stringlist);
2818 }
2819 }
2820 }
2821
2822 /* There was a partial match */
2823
2824 else if (count == PCRE_ERROR_PARTIAL)
2825 {
2826 if (markptr == NULL) fprintf(outfile, "Partial match");
2827 else fprintf(outfile, "Partial match, mark=%s", markptr);
2828 if (use_size_offsets > 1)
2829 {
2830 fprintf(outfile, ": ");
2831 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2832 outfile);
2833 }
2834 fprintf(outfile, "\n");
2835 break; /* Out of the /g loop */
2836 }
2837
2838 /* Failed to match. If this is a /g or /G loop and we previously set
2839 g_notempty after a null match, this is not necessarily the end. We want
2840 to advance the start offset, and continue. We won't be at the end of the
2841 string - that was checked before setting g_notempty.
2842
2843 Complication arises in the case when the newline convention is "any",
2844 "crlf", or "anycrlf". If the previous match was at the end of a line
2845 terminated by CRLF, an advance of one character just passes the \r,
2846 whereas we should prefer the longer newline sequence, as does the code in
2847 pcre_exec(). Fudge the offset value to achieve this. We check for a
2848 newline setting in the pattern; if none was set, use pcre_config() to
2849 find the default.
2850
2851 Otherwise, in the case of UTF-8 matching, the advance must be one
2852 character, not one byte. */
2853
2854 else
2855 {
2856 if (g_notempty != 0)
2857 {
2858 int onechar = 1;
2859 unsigned int obits = ((real_pcre *)re)->options;
2860 use_offsets[0] = start_offset;
2861 if ((obits & PCRE_NEWLINE_BITS) == 0)
2862 {
2863 int d;
2864 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2865 /* Note that these values are always the ASCII ones, even in
2866 EBCDIC environments. CR = 13, NL = 10. */
2867 obits = (d == 13)? PCRE_NEWLINE_CR :
2868 (d == 10)? PCRE_NEWLINE_LF :
2869 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2870 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2871 (d == -1)? PCRE_NEWLINE_ANY : 0;
2872 }
2873 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2874 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2875 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2876 &&
2877 start_offset < len - 1 &&
2878 bptr[start_offset] == '\r' &&
2879 bptr[start_offset+1] == '\n')
2880 onechar++;
2881 else if (use_utf8)
2882 {
2883 while (start_offset + onechar < len)
2884 {
2885 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2886 onechar++;
2887 }
2888 }
2889 use_offsets[1] = start_offset + onechar;
2890 }
2891 else
2892 {
2893 switch(count)
2894 {
2895 case PCRE_ERROR_NOMATCH:
2896 if (gmatched == 0)
2897 {
2898 if (markptr == NULL) fprintf(outfile, "No match\n");
2899 else fprintf(outfile, "No match, mark = %s\n", markptr);
2900 }
2901 break;
2902
2903 case PCRE_ERROR_BADUTF8:
2904 case PCRE_ERROR_SHORTUTF8:
2905 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2906 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2907 if (use_size_offsets >= 2)
2908 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2909 use_offsets[1]);
2910 fprintf(outfile, "\n");
2911 break;
2912
2913 default:
2914 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2915 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2916 else
2917 fprintf(outfile, "Error %d (Unexpected value)\n", count);
2918 break;
2919 }
2920
2921 break; /* Out of the /g loop */
2922 }
2923 }
2924
2925 /* If not /g or /G we are done */
2926
2927 if (!do_g && !do_G) break;
2928
2929 /* If we have matched an empty string, first check to see if we are at
2930 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2931 Perl's /g options does. This turns out to be rather cunning. First we set
2932 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2933 same point. If this fails (picked up above) we advance to the next
2934 character. */
2935
2936 g_notempty = 0;
2937
2938 if (use_offsets[0] == use_offsets[1])
2939 {
2940 if (use_offsets[0] == len) break;
2941 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2942 }
2943
2944 /* For /g, update the start offset, leaving the rest alone */
2945
2946 if (do_g) start_offset = use_offsets[1];
2947
2948 /* For /G, update the pointer and length */
2949
2950 else
2951 {
2952 bptr += use_offsets[1];
2953 len -= use_offsets[1];
2954 }
2955 } /* End of loop for /g and /G */
2956
2957 NEXT_DATA: continue;
2958 } /* End of loop for data lines */
2959
2960 CONTINUE:
2961
2962 #if !defined NOPOSIX
2963 if (posix || do_posix) regfree(&preg);
2964 #endif
2965
2966 if (re != NULL) new_free(re);
2967 if (extra != NULL) new_free(extra);
2968 if (locale_set)
2969 {
2970 new_free((void *)tables);
2971 setlocale(LC_CTYPE, "C");
2972 locale_set = 0;
2973 }
2974 }
2975
2976 if (infile == stdin) fprintf(outfile, "\n");
2977
2978 EXIT:
2979
2980 if (infile != NULL && infile != stdin) fclose(infile);
2981 if (outfile != NULL && outfile != stdout) fclose(outfile);
2982
2983 free(buffer);
2984 free(dbuffer);
2985 free(pbuffer);
2986 free(offsets);
2987
2988 return yield;
2989 }
2990
2991 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5