/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 612 - (show annotations)
Sat Jul 2 15:20:59 2011 UTC (8 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 90248 byte(s)
Fix two study bugs concerned with minimum subject lengths; add features to 
pcretest so that all tests can be run with or without study; adjust tests so 
that this happens.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192
193 /* The buffers grow automatically if very long input lines are encountered. */
194
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
199
200 /* Textual explanations for runtime error codes */
201
202 static const char *errtexts[] = {
203 NULL, /* 0 is no error */
204 NULL, /* NOMATCH is handled specially */
205 "NULL argument passed",
206 "bad option value",
207 "magic number missing",
208 "unknown opcode - pattern overwritten?",
209 "no more memory",
210 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211 "match limit exceeded",
212 "callout error code",
213 NULL, /* BADUTF8 is handled specially */
214 "bad UTF-8 offset",
215 NULL, /* PARTIAL is handled specially */
216 "not used - internal error",
217 "internal error - pattern overwritten?",
218 "bad count value",
219 "item unsupported for DFA matching",
220 "backreference condition or recursion test not supported for DFA matching",
221 "match limit not supported for DFA matching",
222 "workspace size exceeded in DFA matching",
223 "too much recursion for DFA matching",
224 "recursion limit exceeded",
225 "not used - internal error",
226 "invalid combination of newline options",
227 "bad offset value",
228 NULL /* SHORTUTF8 is handled specially */
229 };
230
231
232 /*************************************************
233 * Alternate character tables *
234 *************************************************/
235
236 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237 using the default tables of the library. However, the T option can be used to
238 select alternate sets of tables, for different kinds of testing. Note also that
239 the L (locale) option also adjusts the tables. */
240
241 /* This is the set of tables distributed as default with PCRE. It recognizes
242 only ASCII characters. */
243
244 static const unsigned char tables0[] = {
245
246 /* This table is a lower casing table. */
247
248 0, 1, 2, 3, 4, 5, 6, 7,
249 8, 9, 10, 11, 12, 13, 14, 15,
250 16, 17, 18, 19, 20, 21, 22, 23,
251 24, 25, 26, 27, 28, 29, 30, 31,
252 32, 33, 34, 35, 36, 37, 38, 39,
253 40, 41, 42, 43, 44, 45, 46, 47,
254 48, 49, 50, 51, 52, 53, 54, 55,
255 56, 57, 58, 59, 60, 61, 62, 63,
256 64, 97, 98, 99,100,101,102,103,
257 104,105,106,107,108,109,110,111,
258 112,113,114,115,116,117,118,119,
259 120,121,122, 91, 92, 93, 94, 95,
260 96, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122,123,124,125,126,127,
264 128,129,130,131,132,133,134,135,
265 136,137,138,139,140,141,142,143,
266 144,145,146,147,148,149,150,151,
267 152,153,154,155,156,157,158,159,
268 160,161,162,163,164,165,166,167,
269 168,169,170,171,172,173,174,175,
270 176,177,178,179,180,181,182,183,
271 184,185,186,187,188,189,190,191,
272 192,193,194,195,196,197,198,199,
273 200,201,202,203,204,205,206,207,
274 208,209,210,211,212,213,214,215,
275 216,217,218,219,220,221,222,223,
276 224,225,226,227,228,229,230,231,
277 232,233,234,235,236,237,238,239,
278 240,241,242,243,244,245,246,247,
279 248,249,250,251,252,253,254,255,
280
281 /* This table is a case flipping table. */
282
283 0, 1, 2, 3, 4, 5, 6, 7,
284 8, 9, 10, 11, 12, 13, 14, 15,
285 16, 17, 18, 19, 20, 21, 22, 23,
286 24, 25, 26, 27, 28, 29, 30, 31,
287 32, 33, 34, 35, 36, 37, 38, 39,
288 40, 41, 42, 43, 44, 45, 46, 47,
289 48, 49, 50, 51, 52, 53, 54, 55,
290 56, 57, 58, 59, 60, 61, 62, 63,
291 64, 97, 98, 99,100,101,102,103,
292 104,105,106,107,108,109,110,111,
293 112,113,114,115,116,117,118,119,
294 120,121,122, 91, 92, 93, 94, 95,
295 96, 65, 66, 67, 68, 69, 70, 71,
296 72, 73, 74, 75, 76, 77, 78, 79,
297 80, 81, 82, 83, 84, 85, 86, 87,
298 88, 89, 90,123,124,125,126,127,
299 128,129,130,131,132,133,134,135,
300 136,137,138,139,140,141,142,143,
301 144,145,146,147,148,149,150,151,
302 152,153,154,155,156,157,158,159,
303 160,161,162,163,164,165,166,167,
304 168,169,170,171,172,173,174,175,
305 176,177,178,179,180,181,182,183,
306 184,185,186,187,188,189,190,191,
307 192,193,194,195,196,197,198,199,
308 200,201,202,203,204,205,206,207,
309 208,209,210,211,212,213,214,215,
310 216,217,218,219,220,221,222,223,
311 224,225,226,227,228,229,230,231,
312 232,233,234,235,236,237,238,239,
313 240,241,242,243,244,245,246,247,
314 248,249,250,251,252,253,254,255,
315
316 /* This table contains bit maps for various character classes. Each map is 32
317 bytes long and the bits run from the least significant end of each byte. The
318 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319 graph, print, punct, and cntrl. Other classes are built from combinations. */
320
321 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325
326 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 /* This table identifies various classes of character by individual bits:
372 0x01 white space character
373 0x02 letter
374 0x04 decimal digit
375 0x08 hexadecimal digit
376 0x10 alphanumeric or '_'
377 0x80 regular expression metacharacter or binary zero
378 */
379
380 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412
413 /* This is a set of tables that came orginally from a Windows user. It seems to
414 be at least an approximation of ISO 8859. In particular, there are characters
415 greater than 128 that are marked as spaces, letters, etc. */
416
417 static const unsigned char tables1[] = {
418 0,1,2,3,4,5,6,7,
419 8,9,10,11,12,13,14,15,
420 16,17,18,19,20,21,22,23,
421 24,25,26,27,28,29,30,31,
422 32,33,34,35,36,37,38,39,
423 40,41,42,43,44,45,46,47,
424 48,49,50,51,52,53,54,55,
425 56,57,58,59,60,61,62,63,
426 64,97,98,99,100,101,102,103,
427 104,105,106,107,108,109,110,111,
428 112,113,114,115,116,117,118,119,
429 120,121,122,91,92,93,94,95,
430 96,97,98,99,100,101,102,103,
431 104,105,106,107,108,109,110,111,
432 112,113,114,115,116,117,118,119,
433 120,121,122,123,124,125,126,127,
434 128,129,130,131,132,133,134,135,
435 136,137,138,139,140,141,142,143,
436 144,145,146,147,148,149,150,151,
437 152,153,154,155,156,157,158,159,
438 160,161,162,163,164,165,166,167,
439 168,169,170,171,172,173,174,175,
440 176,177,178,179,180,181,182,183,
441 184,185,186,187,188,189,190,191,
442 224,225,226,227,228,229,230,231,
443 232,233,234,235,236,237,238,239,
444 240,241,242,243,244,245,246,215,
445 248,249,250,251,252,253,254,223,
446 224,225,226,227,228,229,230,231,
447 232,233,234,235,236,237,238,239,
448 240,241,242,243,244,245,246,247,
449 248,249,250,251,252,253,254,255,
450 0,1,2,3,4,5,6,7,
451 8,9,10,11,12,13,14,15,
452 16,17,18,19,20,21,22,23,
453 24,25,26,27,28,29,30,31,
454 32,33,34,35,36,37,38,39,
455 40,41,42,43,44,45,46,47,
456 48,49,50,51,52,53,54,55,
457 56,57,58,59,60,61,62,63,
458 64,97,98,99,100,101,102,103,
459 104,105,106,107,108,109,110,111,
460 112,113,114,115,116,117,118,119,
461 120,121,122,91,92,93,94,95,
462 96,65,66,67,68,69,70,71,
463 72,73,74,75,76,77,78,79,
464 80,81,82,83,84,85,86,87,
465 88,89,90,123,124,125,126,127,
466 128,129,130,131,132,133,134,135,
467 136,137,138,139,140,141,142,143,
468 144,145,146,147,148,149,150,151,
469 152,153,154,155,156,157,158,159,
470 160,161,162,163,164,165,166,167,
471 168,169,170,171,172,173,174,175,
472 176,177,178,179,180,181,182,183,
473 184,185,186,187,188,189,190,191,
474 224,225,226,227,228,229,230,231,
475 232,233,234,235,236,237,238,239,
476 240,241,242,243,244,245,246,215,
477 248,249,250,251,252,253,254,223,
478 192,193,194,195,196,197,198,199,
479 200,201,202,203,204,205,206,207,
480 208,209,210,211,212,213,214,247,
481 216,217,218,219,220,221,222,255,
482 0,62,0,0,1,0,0,0,
483 0,0,0,0,0,0,0,0,
484 32,0,0,0,1,0,0,0,
485 0,0,0,0,0,0,0,0,
486 0,0,0,0,0,0,255,3,
487 126,0,0,0,126,0,0,0,
488 0,0,0,0,0,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,255,3,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,12,2,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 254,255,255,7,0,0,0,0,
496 0,0,0,0,0,0,0,0,
497 255,255,127,127,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,254,255,255,7,
500 0,0,0,0,0,4,32,4,
501 0,0,0,128,255,255,127,255,
502 0,0,0,0,0,0,255,3,
503 254,255,255,135,254,255,255,7,
504 0,0,0,0,0,4,44,6,
505 255,255,127,255,255,255,127,255,
506 0,0,0,0,254,255,255,255,
507 255,255,255,255,255,255,255,127,
508 0,0,0,0,254,255,255,255,
509 255,255,255,255,255,255,255,255,
510 0,2,0,0,255,255,255,255,
511 255,255,255,255,255,255,255,127,
512 0,0,0,0,255,255,255,255,
513 255,255,255,255,255,255,255,255,
514 0,0,0,0,254,255,0,252,
515 1,0,0,248,1,0,0,120,
516 0,0,0,0,254,255,255,255,
517 0,0,128,0,0,0,128,0,
518 255,255,255,255,0,0,0,0,
519 0,0,0,0,0,0,0,128,
520 255,255,255,255,0,0,0,0,
521 0,0,0,0,0,0,0,0,
522 128,0,0,0,0,0,0,0,
523 0,1,1,0,1,1,0,0,
524 0,0,0,0,0,0,0,0,
525 0,0,0,0,0,0,0,0,
526 1,0,0,0,128,0,0,0,
527 128,128,128,128,0,0,128,0,
528 28,28,28,28,28,28,28,28,
529 28,28,0,0,0,0,0,128,
530 0,26,26,26,26,26,26,18,
531 18,18,18,18,18,18,18,18,
532 18,18,18,18,18,18,18,18,
533 18,18,18,128,128,0,128,16,
534 0,26,26,26,26,26,26,18,
535 18,18,18,18,18,18,18,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,128,128,0,0,0,
538 0,0,0,0,0,1,0,0,
539 0,0,0,0,0,0,0,0,
540 0,0,0,0,0,0,0,0,
541 0,0,0,0,0,0,0,0,
542 1,0,0,0,0,0,0,0,
543 0,0,18,0,0,0,0,0,
544 0,0,20,20,0,18,0,0,
545 0,20,18,0,0,0,0,0,
546 18,18,18,18,18,18,18,18,
547 18,18,18,18,18,18,18,18,
548 18,18,18,18,18,18,18,0,
549 18,18,18,18,18,18,18,18,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,0,
553 18,18,18,18,18,18,18,18
554 };
555
556
557
558
559 #ifndef HAVE_STRERROR
560 /*************************************************
561 * Provide strerror() for non-ANSI libraries *
562 *************************************************/
563
564 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565 in their libraries, but can provide the same facility by this simple
566 alternative function. */
567
568 extern int sys_nerr;
569 extern char *sys_errlist[];
570
571 char *
572 strerror(int n)
573 {
574 if (n < 0 || n >= sys_nerr) return "unknown error number";
575 return sys_errlist[n];
576 }
577 #endif /* HAVE_STRERROR */
578
579
580
581
582 /*************************************************
583 * Read or extend an input line *
584 *************************************************/
585
586 /* Input lines are read into buffer, but both patterns and data lines can be
587 continued over multiple input lines. In addition, if the buffer fills up, we
588 want to automatically expand it so as to be able to handle extremely large
589 lines that are needed for certain stress tests. When the input buffer is
590 expanded, the other two buffers must also be expanded likewise, and the
591 contents of pbuffer, which are a copy of the input for callouts, must be
592 preserved (for when expansion happens for a data line). This is not the most
593 optimal way of handling this, but hey, this is just a test program!
594
595 Arguments:
596 f the file to read
597 start where in buffer to start (this *must* be within buffer)
598 prompt for stdin or readline()
599
600 Returns: pointer to the start of new data
601 could be a copy of start, or could be moved
602 NULL if no data read and EOF reached
603 */
604
605 static uschar *
606 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 {
608 uschar *here = start;
609
610 for (;;)
611 {
612 int rlen = (int)(buffer_size - (here - buffer));
613
614 if (rlen > 1000)
615 {
616 int dlen;
617
618 /* If libreadline support is required, use readline() to read a line if the
619 input is a terminal. Note that readline() removes the trailing newline, so
620 we must put it back again, to be compatible with fgets(). */
621
622 #ifdef SUPPORT_LIBREADLINE
623 if (isatty(fileno(f)))
624 {
625 size_t len;
626 char *s = readline(prompt);
627 if (s == NULL) return (here == start)? NULL : start;
628 len = strlen(s);
629 if (len > 0) add_history(s);
630 if (len > rlen - 1) len = rlen - 1;
631 memcpy(here, s, len);
632 here[len] = '\n';
633 here[len+1] = 0;
634 free(s);
635 }
636 else
637 #endif
638
639 /* Read the next line by normal means, prompting if the file is stdin. */
640
641 {
642 if (f == stdin) printf("%s", prompt);
643 if (fgets((char *)here, rlen, f) == NULL)
644 return (here == start)? NULL : start;
645 }
646
647 dlen = (int)strlen((char *)here);
648 if (dlen > 0 && here[dlen - 1] == '\n') return start;
649 here += dlen;
650 }
651
652 else
653 {
654 int new_buffer_size = 2*buffer_size;
655 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658
659 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660 {
661 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662 exit(1);
663 }
664
665 memcpy(new_buffer, buffer, buffer_size);
666 memcpy(new_pbuffer, pbuffer, buffer_size);
667
668 buffer_size = new_buffer_size;
669
670 start = new_buffer + (start - buffer);
671 here = new_buffer + (here - buffer);
672
673 free(buffer);
674 free(dbuffer);
675 free(pbuffer);
676
677 buffer = new_buffer;
678 dbuffer = new_dbuffer;
679 pbuffer = new_pbuffer;
680 }
681 }
682
683 return NULL; /* Control never gets here */
684 }
685
686
687
688
689
690
691
692 /*************************************************
693 * Read number from string *
694 *************************************************/
695
696 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697 around with conditional compilation, just do the job by hand. It is only used
698 for unpicking arguments, so just keep it simple.
699
700 Arguments:
701 str string to be converted
702 endptr where to put the end pointer
703
704 Returns: the unsigned long
705 */
706
707 static int
708 get_value(unsigned char *str, unsigned char **endptr)
709 {
710 int result = 0;
711 while(*str != 0 && isspace(*str)) str++;
712 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713 *endptr = str;
714 return(result);
715 }
716
717
718
719
720 /*************************************************
721 * Convert UTF-8 string to value *
722 *************************************************/
723
724 /* This function takes one or more bytes that represents a UTF-8 character,
725 and returns the value of the character.
726
727 Argument:
728 utf8bytes a pointer to the byte vector
729 vptr a pointer to an int to receive the value
730
731 Returns: > 0 => the number of bytes consumed
732 -6 to 0 => malformed UTF-8 character at offset = (-return)
733 */
734
735 #if !defined NOUTF8
736
737 static int
738 utf82ord(unsigned char *utf8bytes, int *vptr)
739 {
740 int c = *utf8bytes++;
741 int d = c;
742 int i, j, s;
743
744 for (i = -1; i < 6; i++) /* i is number of additional bytes */
745 {
746 if ((d & 0x80) == 0) break;
747 d <<= 1;
748 }
749
750 if (i == -1) { *vptr = c; return 1; } /* ascii character */
751 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752
753 /* i now has a value in the range 1-5 */
754
755 s = 6*i;
756 d = (c & utf8_table3[i]) << s;
757
758 for (j = 0; j < i; j++)
759 {
760 c = *utf8bytes++;
761 if ((c & 0xc0) != 0x80) return -(j+1);
762 s -= 6;
763 d |= (c & 0x3f) << s;
764 }
765
766 /* Check that encoding was the correct unique one */
767
768 for (j = 0; j < utf8_table1_size; j++)
769 if (d <= utf8_table1[j]) break;
770 if (j != i) return -(i+1);
771
772 /* Valid value */
773
774 *vptr = d;
775 return i+1;
776 }
777
778 #endif
779
780
781
782 /*************************************************
783 * Convert character value to UTF-8 *
784 *************************************************/
785
786 /* This function takes an integer value in the range 0 - 0x7fffffff
787 and encodes it as a UTF-8 character in 0 to 6 bytes.
788
789 Arguments:
790 cvalue the character value
791 utf8bytes pointer to buffer for result - at least 6 bytes long
792
793 Returns: number of characters placed in the buffer
794 */
795
796 #if !defined NOUTF8
797
798 static int
799 ord2utf8(int cvalue, uschar *utf8bytes)
800 {
801 register int i, j;
802 for (i = 0; i < utf8_table1_size; i++)
803 if (cvalue <= utf8_table1[i]) break;
804 utf8bytes += i;
805 for (j = i; j > 0; j--)
806 {
807 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 cvalue >>= 6;
809 }
810 *utf8bytes = utf8_table2[i] | cvalue;
811 return i + 1;
812 }
813
814 #endif
815
816
817
818 /*************************************************
819 * Print character string *
820 *************************************************/
821
822 /* Character string printing function. Must handle UTF-8 strings in utf8
823 mode. Yields number of characters printed. If handed a NULL file, just counts
824 chars without printing. */
825
826 static int pchars(unsigned char *p, int length, FILE *f)
827 {
828 int c = 0;
829 int yield = 0;
830
831 while (length-- > 0)
832 {
833 #if !defined NOUTF8
834 if (use_utf8)
835 {
836 int rc = utf82ord(p, &c);
837
838 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839 {
840 length -= rc - 1;
841 p += rc;
842 if (PRINTHEX(c))
843 {
844 if (f != NULL) fprintf(f, "%c", c);
845 yield++;
846 }
847 else
848 {
849 int n = 4;
850 if (f != NULL) fprintf(f, "\\x{%02x}", c);
851 yield += (n <= 0x000000ff)? 2 :
852 (n <= 0x00000fff)? 3 :
853 (n <= 0x0000ffff)? 4 :
854 (n <= 0x000fffff)? 5 : 6;
855 }
856 continue;
857 }
858 }
859 #endif
860
861 /* Not UTF-8, or malformed UTF-8 */
862
863 c = *p++;
864 if (PRINTHEX(c))
865 {
866 if (f != NULL) fprintf(f, "%c", c);
867 yield++;
868 }
869 else
870 {
871 if (f != NULL) fprintf(f, "\\x%02x", c);
872 yield += 4;
873 }
874 }
875
876 return yield;
877 }
878
879
880
881 /*************************************************
882 * Callout function *
883 *************************************************/
884
885 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886 the match. Yield zero unless more callouts than the fail count, or the callout
887 data is not zero. */
888
889 static int callout(pcre_callout_block *cb)
890 {
891 FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 int i, pre_start, post_start, subject_length;
893
894 if (callout_extra)
895 {
896 fprintf(f, "Callout %d: last capture = %d\n",
897 cb->callout_number, cb->capture_last);
898
899 for (i = 0; i < cb->capture_top * 2; i += 2)
900 {
901 if (cb->offset_vector[i] < 0)
902 fprintf(f, "%2d: <unset>\n", i/2);
903 else
904 {
905 fprintf(f, "%2d: ", i/2);
906 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907 cb->offset_vector[i+1] - cb->offset_vector[i], f);
908 fprintf(f, "\n");
909 }
910 }
911 }
912
913 /* Re-print the subject in canonical form, the first time or if giving full
914 datails. On subsequent calls in the same match, we use pchars just to find the
915 printed lengths of the substrings. */
916
917 if (f != NULL) fprintf(f, "--->");
918
919 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921 cb->current_position - cb->start_match, f);
922
923 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924
925 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926 cb->subject_length - cb->current_position, f);
927
928 if (f != NULL) fprintf(f, "\n");
929
930 /* Always print appropriate indicators, with callout number if not already
931 shown. For automatic callouts, show the pattern offset. */
932
933 if (cb->callout_number == 255)
934 {
935 fprintf(outfile, "%+3d ", cb->pattern_position);
936 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937 }
938 else
939 {
940 if (callout_extra) fprintf(outfile, " ");
941 else fprintf(outfile, "%3d ", cb->callout_number);
942 }
943
944 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945 fprintf(outfile, "^");
946
947 if (post_start > 0)
948 {
949 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950 fprintf(outfile, "^");
951 }
952
953 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954 fprintf(outfile, " ");
955
956 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957 pbuffer + cb->pattern_position);
958
959 fprintf(outfile, "\n");
960 first_callout = 0;
961
962 if (cb->callout_data != NULL)
963 {
964 int callout_data = *((int *)(cb->callout_data));
965 if (callout_data != 0)
966 {
967 fprintf(outfile, "Callout data = %d\n", callout_data);
968 return callout_data;
969 }
970 }
971
972 return (cb->callout_number != callout_fail_id)? 0 :
973 (++callout_count >= callout_fail_count)? 1 : 0;
974 }
975
976
977 /*************************************************
978 * Local malloc functions *
979 *************************************************/
980
981 /* Alternative malloc function, to test functionality and show the size of the
982 compiled re. */
983
984 static void *new_malloc(size_t size)
985 {
986 void *block = malloc(size);
987 gotten_store = size;
988 if (show_malloc)
989 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 return block;
991 }
992
993 static void new_free(void *block)
994 {
995 if (show_malloc)
996 fprintf(outfile, "free %p\n", block);
997 free(block);
998 }
999
1000
1001 /* For recursion malloc/free, to test stacking calls */
1002
1003 static void *stack_malloc(size_t size)
1004 {
1005 void *block = malloc(size);
1006 if (show_malloc)
1007 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 return block;
1009 }
1010
1011 static void stack_free(void *block)
1012 {
1013 if (show_malloc)
1014 fprintf(outfile, "stack_free %p\n", block);
1015 free(block);
1016 }
1017
1018
1019 /*************************************************
1020 * Call pcre_fullinfo() *
1021 *************************************************/
1022
1023 /* Get one piece of information from the pcre_fullinfo() function */
1024
1025 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026 {
1027 int rc;
1028 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030 }
1031
1032
1033
1034 /*************************************************
1035 * Byte flipping function *
1036 *************************************************/
1037
1038 static unsigned long int
1039 byteflip(unsigned long int value, int n)
1040 {
1041 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042 return ((value & 0x000000ff) << 24) |
1043 ((value & 0x0000ff00) << 8) |
1044 ((value & 0x00ff0000) >> 8) |
1045 ((value & 0xff000000) >> 24);
1046 }
1047
1048
1049
1050
1051 /*************************************************
1052 * Check match or recursion limit *
1053 *************************************************/
1054
1055 static int
1056 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057 int start_offset, int options, int *use_offsets, int use_size_offsets,
1058 int flag, unsigned long int *limit, int errnumber, const char *msg)
1059 {
1060 int count;
1061 int min = 0;
1062 int mid = 64;
1063 int max = -1;
1064
1065 extra->flags |= flag;
1066
1067 for (;;)
1068 {
1069 *limit = mid;
1070
1071 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072 use_offsets, use_size_offsets);
1073
1074 if (count == errnumber)
1075 {
1076 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077 min = mid;
1078 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079 }
1080
1081 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082 count == PCRE_ERROR_PARTIAL)
1083 {
1084 if (mid == min + 1)
1085 {
1086 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087 break;
1088 }
1089 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090 max = mid;
1091 mid = (min + mid)/2;
1092 }
1093 else break; /* Some other error */
1094 }
1095
1096 extra->flags &= ~flag;
1097 return count;
1098 }
1099
1100
1101
1102 /*************************************************
1103 * Case-independent strncmp() function *
1104 *************************************************/
1105
1106 /*
1107 Arguments:
1108 s first string
1109 t second string
1110 n number of characters to compare
1111
1112 Returns: < 0, = 0, or > 0, according to the comparison
1113 */
1114
1115 static int
1116 strncmpic(uschar *s, uschar *t, int n)
1117 {
1118 while (n--)
1119 {
1120 int c = tolower(*s++) - tolower(*t++);
1121 if (c) return c;
1122 }
1123 return 0;
1124 }
1125
1126
1127
1128 /*************************************************
1129 * Check newline indicator *
1130 *************************************************/
1131
1132 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133 a message and return 0 if there is no match.
1134
1135 Arguments:
1136 p points after the leading '<'
1137 f file for error message
1138
1139 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140 */
1141
1142 static int
1143 check_newline(uschar *p, FILE *f)
1144 {
1145 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 fprintf(f, "Unknown newline type at: <%s\n", p);
1153 return 0;
1154 }
1155
1156
1157
1158 /*************************************************
1159 * Usage function *
1160 *************************************************/
1161
1162 static void
1163 usage(void)
1164 {
1165 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166 printf("Input and output default to stdin and stdout.\n");
1167 #ifdef SUPPORT_LIBREADLINE
1168 printf("If input is a terminal, readline() is used to read from it.\n");
1169 #else
1170 printf("This version of pcretest is not linked with readline().\n");
1171 #endif
1172 printf("\nOptions:\n");
1173 printf(" -b show compiled code (bytecode)\n");
1174 printf(" -C show PCRE compile-time options and exit\n");
1175 printf(" -d debug: show compiled code and information (-b and -i)\n");
1176 #if !defined NODFA
1177 printf(" -dfa force DFA matching for all subjects\n");
1178 #endif
1179 printf(" -help show usage information\n");
1180 printf(" -i show information about compiled patterns\n"
1181 " -M find MATCH_LIMIT minimum for each subject\n"
1182 " -m output memory used information\n"
1183 " -o <n> set size of offsets vector to <n>\n");
1184 #if !defined NOPOSIX
1185 printf(" -p use POSIX interface\n");
1186 #endif
1187 printf(" -q quiet: do not output PCRE version number at start\n");
1188 printf(" -S <n> set stack size to <n> megabytes\n");
1189 printf(" -s force each pattern to be studied\n"
1190 " -t time compilation and execution\n");
1191 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192 printf(" -tm time execution (matching) only\n");
1193 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194 }
1195
1196
1197
1198 /*************************************************
1199 * Main Program *
1200 *************************************************/
1201
1202 /* Read lines from named file or stdin and write to named file or stdout; lines
1203 consist of a regular expression, in delimiters and optionally followed by
1204 options, followed by a set of test data, terminated by an empty line. */
1205
1206 int main(int argc, char **argv)
1207 {
1208 FILE *infile = stdin;
1209 int options = 0;
1210 int study_options = 0;
1211 int default_find_match_limit = FALSE;
1212 int op = 1;
1213 int timeit = 0;
1214 int timeitm = 0;
1215 int showinfo = 0;
1216 int showstore = 0;
1217 int force_study = 0;
1218 int quiet = 0;
1219 int size_offsets = 45;
1220 int size_offsets_max;
1221 int *offsets = NULL;
1222 #if !defined NOPOSIX
1223 int posix = 0;
1224 #endif
1225 int debug = 0;
1226 int done = 0;
1227 int all_use_dfa = 0;
1228 int yield = 0;
1229 int stack_size;
1230
1231 /* These vectors store, end-to-end, a list of captured substring names. Assume
1232 that 1024 is plenty long enough for the few names we'll be testing. */
1233
1234 uschar copynames[1024];
1235 uschar getnames[1024];
1236
1237 uschar *copynamesptr;
1238 uschar *getnamesptr;
1239
1240 /* Get buffers from malloc() so that Electric Fence will check their misuse
1241 when I am debugging. They grow automatically when very long lines are read. */
1242
1243 buffer = (unsigned char *)malloc(buffer_size);
1244 dbuffer = (unsigned char *)malloc(buffer_size);
1245 pbuffer = (unsigned char *)malloc(buffer_size);
1246
1247 /* The outfile variable is static so that new_malloc can use it. */
1248
1249 outfile = stdout;
1250
1251 /* The following _setmode() stuff is some Windows magic that tells its runtime
1252 library to translate CRLF into a single LF character. At least, that's what
1253 I've been told: never having used Windows I take this all on trust. Originally
1254 it set 0x8000, but then I was advised that _O_BINARY was better. */
1255
1256 #if defined(_WIN32) || defined(WIN32)
1257 _setmode( _fileno( stdout ), _O_BINARY );
1258 #endif
1259
1260 /* Scan options */
1261
1262 while (argc > 1 && argv[op][0] == '-')
1263 {
1264 unsigned char *endptr;
1265
1266 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267 else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1271 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1272 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1273 #if !defined NODFA
1274 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1275 #endif
1276 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1277 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1278 *endptr == 0))
1279 {
1280 op++;
1281 argc--;
1282 }
1283 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1284 {
1285 int both = argv[op][2] == 0;
1286 int temp;
1287 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1288 *endptr == 0))
1289 {
1290 timeitm = temp;
1291 op++;
1292 argc--;
1293 }
1294 else timeitm = LOOPREPEAT;
1295 if (both) timeit = timeitm;
1296 }
1297 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1298 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1299 *endptr == 0))
1300 {
1301 #if defined(_WIN32) || defined(WIN32)
1302 printf("PCRE: -S not supported on this OS\n");
1303 exit(1);
1304 #else
1305 int rc;
1306 struct rlimit rlim;
1307 getrlimit(RLIMIT_STACK, &rlim);
1308 rlim.rlim_cur = stack_size * 1024 * 1024;
1309 rc = setrlimit(RLIMIT_STACK, &rlim);
1310 if (rc != 0)
1311 {
1312 printf("PCRE: setrlimit() failed with error %d\n", rc);
1313 exit(1);
1314 }
1315 op++;
1316 argc--;
1317 #endif
1318 }
1319 #if !defined NOPOSIX
1320 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1321 #endif
1322 else if (strcmp(argv[op], "-C") == 0)
1323 {
1324 int rc;
1325 unsigned long int lrc;
1326 printf("PCRE version %s\n", pcre_version());
1327 printf("Compiled with\n");
1328 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1329 printf(" %sUTF-8 support\n", rc? "" : "No ");
1330 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1331 printf(" %sUnicode properties support\n", rc? "" : "No ");
1332 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1333 /* Note that these values are always the ASCII values, even
1334 in EBCDIC environments. CR is 13 and NL is 10. */
1335 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1336 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1337 (rc == -2)? "ANYCRLF" :
1338 (rc == -1)? "ANY" : "???");
1339 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1340 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1341 "all Unicode newlines");
1342 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1343 printf(" Internal link size = %d\n", rc);
1344 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1345 printf(" POSIX malloc threshold = %d\n", rc);
1346 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1347 printf(" Default match limit = %ld\n", lrc);
1348 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1349 printf(" Default recursion depth limit = %ld\n", lrc);
1350 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1351 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1352 goto EXIT;
1353 }
1354 else if (strcmp(argv[op], "-help") == 0 ||
1355 strcmp(argv[op], "--help") == 0)
1356 {
1357 usage();
1358 goto EXIT;
1359 }
1360 else
1361 {
1362 printf("** Unknown or malformed option %s\n", argv[op]);
1363 usage();
1364 yield = 1;
1365 goto EXIT;
1366 }
1367 op++;
1368 argc--;
1369 }
1370
1371 /* Get the store for the offsets vector, and remember what it was */
1372
1373 size_offsets_max = size_offsets;
1374 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1375 if (offsets == NULL)
1376 {
1377 printf("** Failed to get %d bytes of memory for offsets vector\n",
1378 (int)(size_offsets_max * sizeof(int)));
1379 yield = 1;
1380 goto EXIT;
1381 }
1382
1383 /* Sort out the input and output files */
1384
1385 if (argc > 1)
1386 {
1387 infile = fopen(argv[op], INPUT_MODE);
1388 if (infile == NULL)
1389 {
1390 printf("** Failed to open %s\n", argv[op]);
1391 yield = 1;
1392 goto EXIT;
1393 }
1394 }
1395
1396 if (argc > 2)
1397 {
1398 outfile = fopen(argv[op+1], OUTPUT_MODE);
1399 if (outfile == NULL)
1400 {
1401 printf("** Failed to open %s\n", argv[op+1]);
1402 yield = 1;
1403 goto EXIT;
1404 }
1405 }
1406
1407 /* Set alternative malloc function */
1408
1409 pcre_malloc = new_malloc;
1410 pcre_free = new_free;
1411 pcre_stack_malloc = stack_malloc;
1412 pcre_stack_free = stack_free;
1413
1414 /* Heading line unless quiet, then prompt for first regex if stdin */
1415
1416 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1417
1418 /* Main loop */
1419
1420 while (!done)
1421 {
1422 pcre *re = NULL;
1423 pcre_extra *extra = NULL;
1424
1425 #if !defined NOPOSIX /* There are still compilers that require no indent */
1426 regex_t preg;
1427 int do_posix = 0;
1428 #endif
1429
1430 const char *error;
1431 unsigned char *markptr;
1432 unsigned char *p, *pp, *ppp;
1433 unsigned char *to_file = NULL;
1434 const unsigned char *tables = NULL;
1435 unsigned long int true_size, true_study_size = 0;
1436 size_t size, regex_gotten_store;
1437 int do_mark = 0;
1438 int do_study = 0;
1439 int no_force_study = 0;
1440 int do_debug = debug;
1441 int do_G = 0;
1442 int do_g = 0;
1443 int do_showinfo = showinfo;
1444 int do_showrest = 0;
1445 int do_flip = 0;
1446 int erroroffset, len, delimiter, poffset;
1447
1448 use_utf8 = 0;
1449 debug_lengths = 1;
1450
1451 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1452 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1453 fflush(outfile);
1454
1455 p = buffer;
1456 while (isspace(*p)) p++;
1457 if (*p == 0) continue;
1458
1459 /* See if the pattern is to be loaded pre-compiled from a file. */
1460
1461 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1462 {
1463 unsigned long int magic, get_options;
1464 uschar sbuf[8];
1465 FILE *f;
1466
1467 p++;
1468 pp = p + (int)strlen((char *)p);
1469 while (isspace(pp[-1])) pp--;
1470 *pp = 0;
1471
1472 f = fopen((char *)p, "rb");
1473 if (f == NULL)
1474 {
1475 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1476 continue;
1477 }
1478
1479 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1480
1481 true_size =
1482 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1483 true_study_size =
1484 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1485
1486 re = (real_pcre *)new_malloc(true_size);
1487 regex_gotten_store = gotten_store;
1488
1489 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1490
1491 magic = ((real_pcre *)re)->magic_number;
1492 if (magic != MAGIC_NUMBER)
1493 {
1494 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1495 {
1496 do_flip = 1;
1497 }
1498 else
1499 {
1500 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1501 fclose(f);
1502 continue;
1503 }
1504 }
1505
1506 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1507 do_flip? " (byte-inverted)" : "", p);
1508
1509 /* Need to know if UTF-8 for printing data strings */
1510
1511 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1512 use_utf8 = (get_options & PCRE_UTF8) != 0;
1513
1514 /* Now see if there is any following study data. */
1515
1516 if (true_study_size != 0)
1517 {
1518 pcre_study_data *psd;
1519
1520 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1521 extra->flags = PCRE_EXTRA_STUDY_DATA;
1522
1523 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1524 extra->study_data = psd;
1525
1526 if (fread(psd, 1, true_study_size, f) != true_study_size)
1527 {
1528 FAIL_READ:
1529 fprintf(outfile, "Failed to read data from %s\n", p);
1530 if (extra != NULL) new_free(extra);
1531 if (re != NULL) new_free(re);
1532 fclose(f);
1533 continue;
1534 }
1535 fprintf(outfile, "Study data loaded from %s\n", p);
1536 do_study = 1; /* To get the data output if requested */
1537 }
1538 else fprintf(outfile, "No study data\n");
1539
1540 fclose(f);
1541 goto SHOW_INFO;
1542 }
1543
1544 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1545 the pattern; if is isn't complete, read more. */
1546
1547 delimiter = *p++;
1548
1549 if (isalnum(delimiter) || delimiter == '\\')
1550 {
1551 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1552 goto SKIP_DATA;
1553 }
1554
1555 pp = p;
1556 poffset = (int)(p - buffer);
1557
1558 for(;;)
1559 {
1560 while (*pp != 0)
1561 {
1562 if (*pp == '\\' && pp[1] != 0) pp++;
1563 else if (*pp == delimiter) break;
1564 pp++;
1565 }
1566 if (*pp != 0) break;
1567 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1568 {
1569 fprintf(outfile, "** Unexpected EOF\n");
1570 done = 1;
1571 goto CONTINUE;
1572 }
1573 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1574 }
1575
1576 /* The buffer may have moved while being extended; reset the start of data
1577 pointer to the correct relative point in the buffer. */
1578
1579 p = buffer + poffset;
1580
1581 /* If the first character after the delimiter is backslash, make
1582 the pattern end with backslash. This is purely to provide a way
1583 of testing for the error message when a pattern ends with backslash. */
1584
1585 if (pp[1] == '\\') *pp++ = '\\';
1586
1587 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1588 for callouts. */
1589
1590 *pp++ = 0;
1591 strcpy((char *)pbuffer, (char *)p);
1592
1593 /* Look for options after final delimiter */
1594
1595 options = 0;
1596 study_options = 0;
1597 log_store = showstore; /* default from command line */
1598
1599 while (*pp != 0)
1600 {
1601 switch (*pp++)
1602 {
1603 case 'f': options |= PCRE_FIRSTLINE; break;
1604 case 'g': do_g = 1; break;
1605 case 'i': options |= PCRE_CASELESS; break;
1606 case 'm': options |= PCRE_MULTILINE; break;
1607 case 's': options |= PCRE_DOTALL; break;
1608 case 'x': options |= PCRE_EXTENDED; break;
1609
1610 case '+': do_showrest = 1; break;
1611 case 'A': options |= PCRE_ANCHORED; break;
1612 case 'B': do_debug = 1; break;
1613 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1614 case 'D': do_debug = do_showinfo = 1; break;
1615 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1616 case 'F': do_flip = 1; break;
1617 case 'G': do_G = 1; break;
1618 case 'I': do_showinfo = 1; break;
1619 case 'J': options |= PCRE_DUPNAMES; break;
1620 case 'K': do_mark = 1; break;
1621 case 'M': log_store = 1; break;
1622 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1623
1624 #if !defined NOPOSIX
1625 case 'P': do_posix = 1; break;
1626 #endif
1627
1628 case 'S':
1629 if (do_study == 0) do_study = 1; else
1630 {
1631 do_study = 0;
1632 no_force_study = 1;
1633 }
1634 break;
1635
1636 case 'U': options |= PCRE_UNGREEDY; break;
1637 case 'W': options |= PCRE_UCP; break;
1638 case 'X': options |= PCRE_EXTRA; break;
1639 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1640 case 'Z': debug_lengths = 0; break;
1641 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1642 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1643
1644 case 'T':
1645 switch (*pp++)
1646 {
1647 case '0': tables = tables0; break;
1648 case '1': tables = tables1; break;
1649
1650 case '\r':
1651 case '\n':
1652 case ' ':
1653 case 0:
1654 fprintf(outfile, "** Missing table number after /T\n");
1655 goto SKIP_DATA;
1656
1657 default:
1658 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1659 goto SKIP_DATA;
1660 }
1661 break;
1662
1663 case 'L':
1664 ppp = pp;
1665 /* The '\r' test here is so that it works on Windows. */
1666 /* The '0' test is just in case this is an unterminated line. */
1667 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1668 *ppp = 0;
1669 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1670 {
1671 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1672 goto SKIP_DATA;
1673 }
1674 locale_set = 1;
1675 tables = pcre_maketables();
1676 pp = ppp;
1677 break;
1678
1679 case '>':
1680 to_file = pp;
1681 while (*pp != 0) pp++;
1682 while (isspace(pp[-1])) pp--;
1683 *pp = 0;
1684 break;
1685
1686 case '<':
1687 {
1688 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1689 {
1690 options |= PCRE_JAVASCRIPT_COMPAT;
1691 pp += 3;
1692 }
1693 else
1694 {
1695 int x = check_newline(pp, outfile);
1696 if (x == 0) goto SKIP_DATA;
1697 options |= x;
1698 while (*pp++ != '>');
1699 }
1700 }
1701 break;
1702
1703 case '\r': /* So that it works in Windows */
1704 case '\n':
1705 case ' ':
1706 break;
1707
1708 default:
1709 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1710 goto SKIP_DATA;
1711 }
1712 }
1713
1714 /* Handle compiling via the POSIX interface, which doesn't support the
1715 timing, showing, or debugging options, nor the ability to pass over
1716 local character tables. */
1717
1718 #if !defined NOPOSIX
1719 if (posix || do_posix)
1720 {
1721 int rc;
1722 int cflags = 0;
1723
1724 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1725 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1726 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1727 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1728 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1729 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1730 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1731
1732 rc = regcomp(&preg, (char *)p, cflags);
1733
1734 /* Compilation failed; go back for another re, skipping to blank line
1735 if non-interactive. */
1736
1737 if (rc != 0)
1738 {
1739 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1740 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1741 goto SKIP_DATA;
1742 }
1743 }
1744
1745 /* Handle compiling via the native interface */
1746
1747 else
1748 #endif /* !defined NOPOSIX */
1749
1750 {
1751 unsigned long int get_options;
1752
1753 if (timeit > 0)
1754 {
1755 register int i;
1756 clock_t time_taken;
1757 clock_t start_time = clock();
1758 for (i = 0; i < timeit; i++)
1759 {
1760 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1761 if (re != NULL) free(re);
1762 }
1763 time_taken = clock() - start_time;
1764 fprintf(outfile, "Compile time %.4f milliseconds\n",
1765 (((double)time_taken * 1000.0) / (double)timeit) /
1766 (double)CLOCKS_PER_SEC);
1767 }
1768
1769 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1770
1771 /* Compilation failed; go back for another re, skipping to blank line
1772 if non-interactive. */
1773
1774 if (re == NULL)
1775 {
1776 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1777 SKIP_DATA:
1778 if (infile != stdin)
1779 {
1780 for (;;)
1781 {
1782 if (extend_inputline(infile, buffer, NULL) == NULL)
1783 {
1784 done = 1;
1785 goto CONTINUE;
1786 }
1787 len = (int)strlen((char *)buffer);
1788 while (len > 0 && isspace(buffer[len-1])) len--;
1789 if (len == 0) break;
1790 }
1791 fprintf(outfile, "\n");
1792 }
1793 goto CONTINUE;
1794 }
1795
1796 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1797 within the regex; check for this so that we know how to process the data
1798 lines. */
1799
1800 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1801 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1802
1803 /* Print information if required. There are now two info-returning
1804 functions. The old one has a limited interface and returns only limited
1805 data. Check that it agrees with the newer one. */
1806
1807 if (log_store)
1808 fprintf(outfile, "Memory allocation (code space): %d\n",
1809 (int)(gotten_store -
1810 sizeof(real_pcre) -
1811 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1812
1813 /* Extract the size for possible writing before possibly flipping it,
1814 and remember the store that was got. */
1815
1816 true_size = ((real_pcre *)re)->size;
1817 regex_gotten_store = gotten_store;
1818
1819 /* If -s or /S was present, study the regex to generate additional info to
1820 help with the matching, unless the pattern has the SS option, which
1821 suppresses the effect of /S (used for a few test patterns where studying is
1822 never sensible). */
1823
1824 if (do_study || (force_study && !no_force_study))
1825 {
1826 if (timeit > 0)
1827 {
1828 register int i;
1829 clock_t time_taken;
1830 clock_t start_time = clock();
1831 for (i = 0; i < timeit; i++)
1832 extra = pcre_study(re, study_options, &error);
1833 time_taken = clock() - start_time;
1834 if (extra != NULL) free(extra);
1835 fprintf(outfile, " Study time %.4f milliseconds\n",
1836 (((double)time_taken * 1000.0) / (double)timeit) /
1837 (double)CLOCKS_PER_SEC);
1838 }
1839 extra = pcre_study(re, study_options, &error);
1840 if (error != NULL)
1841 fprintf(outfile, "Failed to study: %s\n", error);
1842 else if (extra != NULL)
1843 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1844 }
1845
1846 /* If /K was present, we set up for handling MARK data. */
1847
1848 if (do_mark)
1849 {
1850 if (extra == NULL)
1851 {
1852 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1853 extra->flags = 0;
1854 }
1855 extra->mark = &markptr;
1856 extra->flags |= PCRE_EXTRA_MARK;
1857 }
1858
1859 /* If the 'F' option was present, we flip the bytes of all the integer
1860 fields in the regex data block and the study block. This is to make it
1861 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1862 compiled on a different architecture. */
1863
1864 if (do_flip)
1865 {
1866 real_pcre *rre = (real_pcre *)re;
1867 rre->magic_number =
1868 byteflip(rre->magic_number, sizeof(rre->magic_number));
1869 rre->size = byteflip(rre->size, sizeof(rre->size));
1870 rre->options = byteflip(rre->options, sizeof(rre->options));
1871 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1872 rre->top_bracket =
1873 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1874 rre->top_backref =
1875 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1876 rre->first_byte =
1877 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1878 rre->req_byte =
1879 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1880 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1881 sizeof(rre->name_table_offset));
1882 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1883 sizeof(rre->name_entry_size));
1884 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1885 sizeof(rre->name_count));
1886
1887 if (extra != NULL)
1888 {
1889 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1890 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1891 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1892 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1893 }
1894 }
1895
1896 /* Extract information from the compiled data if required */
1897
1898 SHOW_INFO:
1899
1900 if (do_debug)
1901 {
1902 fprintf(outfile, "------------------------------------------------------------------\n");
1903 pcre_printint(re, outfile, debug_lengths);
1904 }
1905
1906 /* We already have the options in get_options (see above) */
1907
1908 if (do_showinfo)
1909 {
1910 unsigned long int all_options;
1911 #if !defined NOINFOCHECK
1912 int old_first_char, old_options, old_count;
1913 #endif
1914 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1915 hascrorlf;
1916 int nameentrysize, namecount;
1917 const uschar *nametable;
1918
1919 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1920 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1921 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1922 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1923 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1924 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1925 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1926 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1927 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1928 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1929 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1930
1931 #if !defined NOINFOCHECK
1932 old_count = pcre_info(re, &old_options, &old_first_char);
1933 if (count < 0) fprintf(outfile,
1934 "Error %d from pcre_info()\n", count);
1935 else
1936 {
1937 if (old_count != count) fprintf(outfile,
1938 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1939 old_count);
1940
1941 if (old_first_char != first_char) fprintf(outfile,
1942 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1943 first_char, old_first_char);
1944
1945 if (old_options != (int)get_options) fprintf(outfile,
1946 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1947 get_options, old_options);
1948 }
1949 #endif
1950
1951 if (size != regex_gotten_store) fprintf(outfile,
1952 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1953 (int)size, (int)regex_gotten_store);
1954
1955 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1956 if (backrefmax > 0)
1957 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1958
1959 if (namecount > 0)
1960 {
1961 fprintf(outfile, "Named capturing subpatterns:\n");
1962 while (namecount-- > 0)
1963 {
1964 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1965 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1966 GET2(nametable, 0));
1967 nametable += nameentrysize;
1968 }
1969 }
1970
1971 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1972 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1973
1974 all_options = ((real_pcre *)re)->options;
1975 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1976
1977 if (get_options == 0) fprintf(outfile, "No options\n");
1978 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1979 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1980 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1981 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1982 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1983 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1984 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1985 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1986 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1987 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1988 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1989 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1990 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1991 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1992 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1993 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1994 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1995 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1996
1997 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1998
1999 switch (get_options & PCRE_NEWLINE_BITS)
2000 {
2001 case PCRE_NEWLINE_CR:
2002 fprintf(outfile, "Forced newline sequence: CR\n");
2003 break;
2004
2005 case PCRE_NEWLINE_LF:
2006 fprintf(outfile, "Forced newline sequence: LF\n");
2007 break;
2008
2009 case PCRE_NEWLINE_CRLF:
2010 fprintf(outfile, "Forced newline sequence: CRLF\n");
2011 break;
2012
2013 case PCRE_NEWLINE_ANYCRLF:
2014 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2015 break;
2016
2017 case PCRE_NEWLINE_ANY:
2018 fprintf(outfile, "Forced newline sequence: ANY\n");
2019 break;
2020
2021 default:
2022 break;
2023 }
2024
2025 if (first_char == -1)
2026 {
2027 fprintf(outfile, "First char at start or follows newline\n");
2028 }
2029 else if (first_char < 0)
2030 {
2031 fprintf(outfile, "No first char\n");
2032 }
2033 else
2034 {
2035 int ch = first_char & 255;
2036 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2037 "" : " (caseless)";
2038 if (PRINTHEX(ch))
2039 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2040 else
2041 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2042 }
2043
2044 if (need_char < 0)
2045 {
2046 fprintf(outfile, "No need char\n");
2047 }
2048 else
2049 {
2050 int ch = need_char & 255;
2051 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2052 "" : " (caseless)";
2053 if (PRINTHEX(ch))
2054 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2055 else
2056 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2057 }
2058
2059 /* Don't output study size; at present it is in any case a fixed
2060 value, but it varies, depending on the computer architecture, and
2061 so messes up the test suite. (And with the /F option, it might be
2062 flipped.) If study was forced by an external -s, don't show this
2063 information unless -i or -d was also present. This means that, except
2064 when auto-callouts are involved, the output from runs with and without
2065 -s should be identical. */
2066
2067 if (do_study || (force_study && showinfo && !no_force_study))
2068 {
2069 if (extra == NULL)
2070 fprintf(outfile, "Study returned NULL\n");
2071 else
2072 {
2073 uschar *start_bits = NULL;
2074 int minlength;
2075
2076 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2077 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2078
2079 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2080 if (start_bits == NULL)
2081 fprintf(outfile, "No set of starting bytes\n");
2082 else
2083 {
2084 int i;
2085 int c = 24;
2086 fprintf(outfile, "Starting byte set: ");
2087 for (i = 0; i < 256; i++)
2088 {
2089 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2090 {
2091 if (c > 75)
2092 {
2093 fprintf(outfile, "\n ");
2094 c = 2;
2095 }
2096 if (PRINTHEX(i) && i != ' ')
2097 {
2098 fprintf(outfile, "%c ", i);
2099 c += 2;
2100 }
2101 else
2102 {
2103 fprintf(outfile, "\\x%02x ", i);
2104 c += 5;
2105 }
2106 }
2107 }
2108 fprintf(outfile, "\n");
2109 }
2110 }
2111 }
2112 }
2113
2114 /* If the '>' option was present, we write out the regex to a file, and
2115 that is all. The first 8 bytes of the file are the regex length and then
2116 the study length, in big-endian order. */
2117
2118 if (to_file != NULL)
2119 {
2120 FILE *f = fopen((char *)to_file, "wb");
2121 if (f == NULL)
2122 {
2123 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2124 }
2125 else
2126 {
2127 uschar sbuf[8];
2128 sbuf[0] = (uschar)((true_size >> 24) & 255);
2129 sbuf[1] = (uschar)((true_size >> 16) & 255);
2130 sbuf[2] = (uschar)((true_size >> 8) & 255);
2131 sbuf[3] = (uschar)((true_size) & 255);
2132
2133 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2134 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2135 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2136 sbuf[7] = (uschar)((true_study_size) & 255);
2137
2138 if (fwrite(sbuf, 1, 8, f) < 8 ||
2139 fwrite(re, 1, true_size, f) < true_size)
2140 {
2141 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2142 }
2143 else
2144 {
2145 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2146
2147 /* If there is study data, write it, but verify the writing only
2148 if the studying was requested by /S, not just by -s. */
2149
2150 if (extra != NULL)
2151 {
2152 if (fwrite(extra->study_data, 1, true_study_size, f) <
2153 true_study_size)
2154 {
2155 fprintf(outfile, "Write error on %s: %s\n", to_file,
2156 strerror(errno));
2157 }
2158 else fprintf(outfile, "Study data written to %s\n", to_file);
2159 }
2160 }
2161 fclose(f);
2162 }
2163
2164 new_free(re);
2165 if (extra != NULL) new_free(extra);
2166 if (locale_set)
2167 {
2168 new_free((void *)tables);
2169 setlocale(LC_CTYPE, "C");
2170 locale_set = 0;
2171 }
2172 continue; /* With next regex */
2173 }
2174 } /* End of non-POSIX compile */
2175
2176 /* Read data lines and test them */
2177
2178 for (;;)
2179 {
2180 uschar *q;
2181 uschar *bptr;
2182 int *use_offsets = offsets;
2183 int use_size_offsets = size_offsets;
2184 int callout_data = 0;
2185 int callout_data_set = 0;
2186 int count, c;
2187 int copystrings = 0;
2188 int find_match_limit = default_find_match_limit;
2189 int getstrings = 0;
2190 int getlist = 0;
2191 int gmatched = 0;
2192 int start_offset = 0;
2193 int start_offset_sign = 1;
2194 int g_notempty = 0;
2195 int use_dfa = 0;
2196
2197 options = 0;
2198
2199 *copynames = 0;
2200 *getnames = 0;
2201
2202 copynamesptr = copynames;
2203 getnamesptr = getnames;
2204
2205 pcre_callout = callout;
2206 first_callout = 1;
2207 callout_extra = 0;
2208 callout_count = 0;
2209 callout_fail_count = 999999;
2210 callout_fail_id = -1;
2211 show_malloc = 0;
2212
2213 if (extra != NULL) extra->flags &=
2214 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2215
2216 len = 0;
2217 for (;;)
2218 {
2219 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2220 {
2221 if (len > 0) /* Reached EOF without hitting a newline */
2222 {
2223 fprintf(outfile, "\n");
2224 break;
2225 }
2226 done = 1;
2227 goto CONTINUE;
2228 }
2229 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2230 len = (int)strlen((char *)buffer);
2231 if (buffer[len-1] == '\n') break;
2232 }
2233
2234 while (len > 0 && isspace(buffer[len-1])) len--;
2235 buffer[len] = 0;
2236 if (len == 0) break;
2237
2238 p = buffer;
2239 while (isspace(*p)) p++;
2240
2241 bptr = q = dbuffer;
2242 while ((c = *p++) != 0)
2243 {
2244 int i = 0;
2245 int n = 0;
2246
2247 if (c == '\\') switch ((c = *p++))
2248 {
2249 case 'a': c = 7; break;
2250 case 'b': c = '\b'; break;
2251 case 'e': c = 27; break;
2252 case 'f': c = '\f'; break;
2253 case 'n': c = '\n'; break;
2254 case 'r': c = '\r'; break;
2255 case 't': c = '\t'; break;
2256 case 'v': c = '\v'; break;
2257
2258 case '0': case '1': case '2': case '3':
2259 case '4': case '5': case '6': case '7':
2260 c -= '0';
2261 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2262 c = c * 8 + *p++ - '0';
2263
2264 #if !defined NOUTF8
2265 if (use_utf8 && c > 255)
2266 {
2267 unsigned char buff8[8];
2268 int ii, utn;
2269 utn = ord2utf8(c, buff8);
2270 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2271 c = buff8[ii]; /* Last byte */
2272 }
2273 #endif
2274 break;
2275
2276 case 'x':
2277
2278 /* Handle \x{..} specially - new Perl thing for utf8 */
2279
2280 #if !defined NOUTF8
2281 if (*p == '{')
2282 {
2283 unsigned char *pt = p;
2284 c = 0;
2285 while (isxdigit(*(++pt)))
2286 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2287 if (*pt == '}')
2288 {
2289 unsigned char buff8[8];
2290 int ii, utn;
2291 if (use_utf8)
2292 {
2293 utn = ord2utf8(c, buff8);
2294 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2295 c = buff8[ii]; /* Last byte */
2296 }
2297 else
2298 {
2299 if (c > 255)
2300 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2301 "UTF-8 mode is not enabled.\n"
2302 "** Truncation will probably give the wrong result.\n", c);
2303 }
2304 p = pt + 1;
2305 break;
2306 }
2307 /* Not correct form; fall through */
2308 }
2309 #endif
2310
2311 /* Ordinary \x */
2312
2313 c = 0;
2314 while (i++ < 2 && isxdigit(*p))
2315 {
2316 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2317 p++;
2318 }
2319 break;
2320
2321 case 0: /* \ followed by EOF allows for an empty line */
2322 p--;
2323 continue;
2324
2325 case '>':
2326 if (*p == '-')
2327 {
2328 start_offset_sign = -1;
2329 p++;
2330 }
2331 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2332 start_offset *= start_offset_sign;
2333 continue;
2334
2335 case 'A': /* Option setting */
2336 options |= PCRE_ANCHORED;
2337 continue;
2338
2339 case 'B':
2340 options |= PCRE_NOTBOL;
2341 continue;
2342
2343 case 'C':
2344 if (isdigit(*p)) /* Set copy string */
2345 {
2346 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2347 copystrings |= 1 << n;
2348 }
2349 else if (isalnum(*p))
2350 {
2351 uschar *npp = copynamesptr;
2352 while (isalnum(*p)) *npp++ = *p++;
2353 *npp++ = 0;
2354 *npp = 0;
2355 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2356 if (n < 0)
2357 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2358 copynamesptr = npp;
2359 }
2360 else if (*p == '+')
2361 {
2362 callout_extra = 1;
2363 p++;
2364 }
2365 else if (*p == '-')
2366 {
2367 pcre_callout = NULL;
2368 p++;
2369 }
2370 else if (*p == '!')
2371 {
2372 callout_fail_id = 0;
2373 p++;
2374 while(isdigit(*p))
2375 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2376 callout_fail_count = 0;
2377 if (*p == '!')
2378 {
2379 p++;
2380 while(isdigit(*p))
2381 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2382 }
2383 }
2384 else if (*p == '*')
2385 {
2386 int sign = 1;
2387 callout_data = 0;
2388 if (*(++p) == '-') { sign = -1; p++; }
2389 while(isdigit(*p))
2390 callout_data = callout_data * 10 + *p++ - '0';
2391 callout_data *= sign;
2392 callout_data_set = 1;
2393 }
2394 continue;
2395
2396 #if !defined NODFA
2397 case 'D':
2398 #if !defined NOPOSIX
2399 if (posix || do_posix)
2400 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2401 else
2402 #endif
2403 use_dfa = 1;
2404 continue;
2405 #endif
2406
2407 #if !defined NODFA
2408 case 'F':
2409 options |= PCRE_DFA_SHORTEST;
2410 continue;
2411 #endif
2412
2413 case 'G':
2414 if (isdigit(*p))
2415 {
2416 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2417 getstrings |= 1 << n;
2418 }
2419 else if (isalnum(*p))
2420 {
2421 uschar *npp = getnamesptr;
2422 while (isalnum(*p)) *npp++ = *p++;
2423 *npp++ = 0;
2424 *npp = 0;
2425 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2426 if (n < 0)
2427 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2428 getnamesptr = npp;
2429 }
2430 continue;
2431
2432 case 'L':
2433 getlist = 1;
2434 continue;
2435
2436 case 'M':
2437 find_match_limit = 1;
2438 continue;
2439
2440 case 'N':
2441 if ((options & PCRE_NOTEMPTY) != 0)
2442 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2443 else
2444 options |= PCRE_NOTEMPTY;
2445 continue;
2446
2447 case 'O':
2448 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2449 if (n > size_offsets_max)
2450 {
2451 size_offsets_max = n;
2452 free(offsets);
2453 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2454 if (offsets == NULL)
2455 {
2456 printf("** Failed to get %d bytes of memory for offsets vector\n",
2457 (int)(size_offsets_max * sizeof(int)));
2458 yield = 1;
2459 goto EXIT;
2460 }
2461 }
2462 use_size_offsets = n;
2463 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2464 continue;
2465
2466 case 'P':
2467 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2468 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2469 continue;
2470
2471 case 'Q':
2472 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2473 if (extra == NULL)
2474 {
2475 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2476 extra->flags = 0;
2477 }
2478 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2479 extra->match_limit_recursion = n;
2480 continue;
2481
2482 case 'q':
2483 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2484 if (extra == NULL)
2485 {
2486 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2487 extra->flags = 0;
2488 }
2489 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2490 extra->match_limit = n;
2491 continue;
2492
2493 #if !defined NODFA
2494 case 'R':
2495 options |= PCRE_DFA_RESTART;
2496 continue;
2497 #endif
2498
2499 case 'S':
2500 show_malloc = 1;
2501 continue;
2502
2503 case 'Y':
2504 options |= PCRE_NO_START_OPTIMIZE;
2505 continue;
2506
2507 case 'Z':
2508 options |= PCRE_NOTEOL;
2509 continue;
2510
2511 case '?':
2512 options |= PCRE_NO_UTF8_CHECK;
2513 continue;
2514
2515 case '<':
2516 {
2517 int x = check_newline(p, outfile);
2518 if (x == 0) goto NEXT_DATA;
2519 options |= x;
2520 while (*p++ != '>');
2521 }
2522 continue;
2523 }
2524 *q++ = c;
2525 }
2526 *q = 0;
2527 len = (int)(q - dbuffer);
2528
2529 /* Move the data to the end of the buffer so that a read over the end of
2530 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2531 we are using the POSIX interface, we must include the terminating zero. */
2532
2533 #if !defined NOPOSIX
2534 if (posix || do_posix)
2535 {
2536 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2537 bptr += buffer_size - len - 1;
2538 }
2539 else
2540 #endif
2541 {
2542 memmove(bptr + buffer_size - len, bptr, len);
2543 bptr += buffer_size - len;
2544 }
2545
2546 if ((all_use_dfa || use_dfa) && find_match_limit)
2547 {
2548 printf("**Match limit not relevant for DFA matching: ignored\n");
2549 find_match_limit = 0;
2550 }
2551
2552 /* Handle matching via the POSIX interface, which does not
2553 support timing or playing with the match limit or callout data. */
2554
2555 #if !defined NOPOSIX
2556 if (posix || do_posix)
2557 {
2558 int rc;
2559 int eflags = 0;
2560 regmatch_t *pmatch = NULL;
2561 if (use_size_offsets > 0)
2562 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2563 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2564 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2565 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2566
2567 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2568
2569 if (rc != 0)
2570 {
2571 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2572 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2573 }
2574 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2575 != 0)
2576 {
2577 fprintf(outfile, "Matched with REG_NOSUB\n");
2578 }
2579 else
2580 {
2581 size_t i;
2582 for (i = 0; i < (size_t)use_size_offsets; i++)
2583 {
2584 if (pmatch[i].rm_so >= 0)
2585 {
2586 fprintf(outfile, "%2d: ", (int)i);
2587 (void)pchars(dbuffer + pmatch[i].rm_so,
2588 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2589 fprintf(outfile, "\n");
2590 if (i == 0 && do_showrest)
2591 {
2592 fprintf(outfile, " 0+ ");
2593 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2594 outfile);
2595 fprintf(outfile, "\n");
2596 }
2597 }
2598 }
2599 }
2600 free(pmatch);
2601 }
2602
2603 /* Handle matching via the native interface - repeats for /g and /G */
2604
2605 else
2606 #endif /* !defined NOPOSIX */
2607
2608 for (;; gmatched++) /* Loop for /g or /G */
2609 {
2610 markptr = NULL;
2611
2612 if (timeitm > 0)
2613 {
2614 register int i;
2615 clock_t time_taken;
2616 clock_t start_time = clock();
2617
2618 #if !defined NODFA
2619 if (all_use_dfa || use_dfa)
2620 {
2621 int workspace[1000];
2622 for (i = 0; i < timeitm; i++)
2623 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2624 options | g_notempty, use_offsets, use_size_offsets, workspace,
2625 sizeof(workspace)/sizeof(int));
2626 }
2627 else
2628 #endif
2629
2630 for (i = 0; i < timeitm; i++)
2631 count = pcre_exec(re, extra, (char *)bptr, len,
2632 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2633
2634 time_taken = clock() - start_time;
2635 fprintf(outfile, "Execute time %.4f milliseconds\n",
2636 (((double)time_taken * 1000.0) / (double)timeitm) /
2637 (double)CLOCKS_PER_SEC);
2638 }
2639
2640 /* If find_match_limit is set, we want to do repeated matches with
2641 varying limits in order to find the minimum value for the match limit and
2642 for the recursion limit. */
2643
2644 if (find_match_limit)
2645 {
2646 if (extra == NULL)
2647 {
2648 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2649 extra->flags = 0;
2650 }
2651
2652 (void)check_match_limit(re, extra, bptr, len, start_offset,
2653 options|g_notempty, use_offsets, use_size_offsets,
2654 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2655 PCRE_ERROR_MATCHLIMIT, "match()");
2656
2657 count = check_match_limit(re, extra, bptr, len, start_offset,
2658 options|g_notempty, use_offsets, use_size_offsets,
2659 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2660 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2661 }
2662
2663 /* If callout_data is set, use the interface with additional data */
2664
2665 else if (callout_data_set)
2666 {
2667 if (extra == NULL)
2668 {
2669 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2670 extra->flags = 0;
2671 }
2672 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2673 extra->callout_data = &callout_data;
2674 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2675 options | g_notempty, use_offsets, use_size_offsets);
2676 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2677 }
2678
2679 /* The normal case is just to do the match once, with the default
2680 value of match_limit. */
2681
2682 #if !defined NODFA
2683 else if (all_use_dfa || use_dfa)
2684 {
2685 int workspace[1000];
2686 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2687 options | g_notempty, use_offsets, use_size_offsets, workspace,
2688 sizeof(workspace)/sizeof(int));
2689 if (count == 0)
2690 {
2691 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2692 count = use_size_offsets/2;
2693 }
2694 }
2695 #endif
2696
2697 else
2698 {
2699 count = pcre_exec(re, extra, (char *)bptr, len,
2700 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2701 if (count == 0)
2702 {
2703 fprintf(outfile, "Matched, but too many substrings\n");
2704 count = use_size_offsets/3;
2705 }
2706 }
2707
2708 /* Matched */
2709
2710 if (count >= 0)
2711 {
2712 int i, maxcount;
2713
2714 #if !defined NODFA
2715 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2716 #endif
2717 maxcount = use_size_offsets/3;
2718
2719 /* This is a check against a lunatic return value. */
2720
2721 if (count > maxcount)
2722 {
2723 fprintf(outfile,
2724 "** PCRE error: returned count %d is too big for offset size %d\n",
2725 count, use_size_offsets);
2726 count = use_size_offsets/3;
2727 if (do_g || do_G)
2728 {
2729 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2730 do_g = do_G = FALSE; /* Break g/G loop */
2731 }
2732 }
2733
2734 for (i = 0; i < count * 2; i += 2)
2735 {
2736 if (use_offsets[i] < 0)
2737 fprintf(outfile, "%2d: <unset>\n", i/2);
2738 else
2739 {
2740 fprintf(outfile, "%2d: ", i/2);
2741 (void)pchars(bptr + use_offsets[i],
2742 use_offsets[i+1] - use_offsets[i], outfile);
2743 fprintf(outfile, "\n");
2744 if (i == 0)
2745 {
2746 if (do_showrest)
2747 {
2748 fprintf(outfile, " 0+ ");
2749 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2750 outfile);
2751 fprintf(outfile, "\n");
2752 }
2753 }
2754 }
2755 }
2756
2757 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2758
2759 for (i = 0; i < 32; i++)
2760 {
2761 if ((copystrings & (1 << i)) != 0)
2762 {
2763 char copybuffer[256];
2764 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2765 i, copybuffer, sizeof(copybuffer));
2766 if (rc < 0)
2767 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2768 else
2769 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2770 }
2771 }
2772
2773 for (copynamesptr = copynames;
2774 *copynamesptr != 0;
2775 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2776 {
2777 char copybuffer[256];
2778 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2779 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2780 if (rc < 0)
2781 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2782 else
2783 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2784 }
2785
2786 for (i = 0; i < 32; i++)
2787 {
2788 if ((getstrings & (1 << i)) != 0)
2789 {
2790 const char *substring;
2791 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2792 i, &substring);
2793 if (rc < 0)
2794 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2795 else
2796 {
2797 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2798 pcre_free_substring(substring);
2799 }
2800 }
2801 }
2802
2803 for (getnamesptr = getnames;
2804 *getnamesptr != 0;
2805 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2806 {
2807 const char *substring;
2808 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2809 count, (char *)getnamesptr, &substring);
2810 if (rc < 0)
2811 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2812 else
2813 {
2814 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2815 pcre_free_substring(substring);
2816 }
2817 }
2818
2819 if (getlist)
2820 {
2821 const char **stringlist;
2822 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2823 &stringlist);
2824 if (rc < 0)
2825 fprintf(outfile, "get substring list failed %d\n", rc);
2826 else
2827 {
2828 for (i = 0; i < count; i++)
2829 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2830 if (stringlist[i] != NULL)
2831 fprintf(outfile, "string list not terminated by NULL\n");
2832 /* free((void *)stringlist); */
2833 pcre_free_substring_list(stringlist);
2834 }
2835 }
2836 }
2837
2838 /* There was a partial match */
2839
2840 else if (count == PCRE_ERROR_PARTIAL)
2841 {
2842 if (markptr == NULL) fprintf(outfile, "Partial match");
2843 else fprintf(outfile, "Partial match, mark=%s", markptr);
2844 if (use_size_offsets > 1)
2845 {
2846 fprintf(outfile, ": ");
2847 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2848 outfile);
2849 }
2850 fprintf(outfile, "\n");
2851 break; /* Out of the /g loop */
2852 }
2853
2854 /* Failed to match. If this is a /g or /G loop and we previously set
2855 g_notempty after a null match, this is not necessarily the end. We want
2856 to advance the start offset, and continue. We won't be at the end of the
2857 string - that was checked before setting g_notempty.
2858
2859 Complication arises in the case when the newline convention is "any",
2860 "crlf", or "anycrlf". If the previous match was at the end of a line
2861 terminated by CRLF, an advance of one character just passes the \r,
2862 whereas we should prefer the longer newline sequence, as does the code in
2863 pcre_exec(). Fudge the offset value to achieve this. We check for a
2864 newline setting in the pattern; if none was set, use pcre_config() to
2865 find the default.
2866
2867 Otherwise, in the case of UTF-8 matching, the advance must be one
2868 character, not one byte. */
2869
2870 else
2871 {
2872 if (g_notempty != 0)
2873 {
2874 int onechar = 1;
2875 unsigned int obits = ((real_pcre *)re)->options;
2876 use_offsets[0] = start_offset;
2877 if ((obits & PCRE_NEWLINE_BITS) == 0)
2878 {
2879 int d;
2880 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2881 /* Note that these values are always the ASCII ones, even in
2882 EBCDIC environments. CR = 13, NL = 10. */
2883 obits = (d == 13)? PCRE_NEWLINE_CR :
2884 (d == 10)? PCRE_NEWLINE_LF :
2885 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2886 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2887 (d == -1)? PCRE_NEWLINE_ANY : 0;
2888 }
2889 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2890 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2891 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2892 &&
2893 start_offset < len - 1 &&
2894 bptr[start_offset] == '\r' &&
2895 bptr[start_offset+1] == '\n')
2896 onechar++;
2897 else if (use_utf8)
2898 {
2899 while (start_offset + onechar < len)
2900 {
2901 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2902 onechar++;
2903 }
2904 }
2905 use_offsets[1] = start_offset + onechar;
2906 }
2907 else
2908 {
2909 switch(count)
2910 {
2911 case PCRE_ERROR_NOMATCH:
2912 if (gmatched == 0)
2913 {
2914 if (markptr == NULL) fprintf(outfile, "No match\n");
2915 else fprintf(outfile, "No match, mark = %s\n", markptr);
2916 }
2917 break;
2918
2919 case PCRE_ERROR_BADUTF8:
2920 case PCRE_ERROR_SHORTUTF8:
2921 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2922 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2923 if (use_size_offsets >= 2)
2924 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2925 use_offsets[1]);
2926 fprintf(outfile, "\n");
2927 break;
2928
2929 default:
2930 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2931 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2932 else
2933 fprintf(outfile, "Error %d (Unexpected value)\n", count);
2934 break;
2935 }
2936
2937 break; /* Out of the /g loop */
2938 }
2939 }
2940
2941 /* If not /g or /G we are done */
2942
2943 if (!do_g && !do_G) break;
2944
2945 /* If we have matched an empty string, first check to see if we are at
2946 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2947 Perl's /g options does. This turns out to be rather cunning. First we set
2948 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2949 same point. If this fails (picked up above) we advance to the next
2950 character. */
2951
2952 g_notempty = 0;
2953
2954 if (use_offsets[0] == use_offsets[1])
2955 {
2956 if (use_offsets[0] == len) break;
2957 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2958 }
2959
2960 /* For /g, update the start offset, leaving the rest alone */
2961
2962 if (do_g) start_offset = use_offsets[1];
2963
2964 /* For /G, update the pointer and length */
2965
2966 else
2967 {
2968 bptr += use_offsets[1];
2969 len -= use_offsets[1];
2970 }
2971 } /* End of loop for /g and /G */
2972
2973 NEXT_DATA: continue;
2974 } /* End of loop for data lines */
2975
2976 CONTINUE:
2977
2978 #if !defined NOPOSIX
2979 if (posix || do_posix) regfree(&preg);
2980 #endif
2981
2982 if (re != NULL) new_free(re);
2983 if (extra != NULL) new_free(extra);
2984 if (locale_set)
2985 {
2986 new_free((void *)tables);
2987 setlocale(LC_CTYPE, "C");
2988 locale_set = 0;
2989 }
2990 }
2991
2992 if (infile == stdin) fprintf(outfile, "\n");
2993
2994 EXIT:
2995
2996 if (infile != NULL && infile != stdin) fclose(infile);
2997 if (outfile != NULL && outfile != stdout) fclose(outfile);
2998
2999 free(buffer);
3000 free(dbuffer);
3001 free(pbuffer);
3002 free(offsets);
3003
3004 return yield;
3005 }
3006
3007 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5