/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 616 - (show annotations)
Mon Jul 11 15:55:25 2011 UTC (8 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 90332 byte(s)
Error occurred while calculating annotation data.
Add the ++ feature to pcretest.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192
193 /* The buffers grow automatically if very long input lines are encountered. */
194
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
199
200 /* Textual explanations for runtime error codes */
201
202 static const char *errtexts[] = {
203 NULL, /* 0 is no error */
204 NULL, /* NOMATCH is handled specially */
205 "NULL argument passed",
206 "bad option value",
207 "magic number missing",
208 "unknown opcode - pattern overwritten?",
209 "no more memory",
210 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211 "match limit exceeded",
212 "callout error code",
213 NULL, /* BADUTF8 is handled specially */
214 "bad UTF-8 offset",
215 NULL, /* PARTIAL is handled specially */
216 "not used - internal error",
217 "internal error - pattern overwritten?",
218 "bad count value",
219 "item unsupported for DFA matching",
220 "backreference condition or recursion test not supported for DFA matching",
221 "match limit not supported for DFA matching",
222 "workspace size exceeded in DFA matching",
223 "too much recursion for DFA matching",
224 "recursion limit exceeded",
225 "not used - internal error",
226 "invalid combination of newline options",
227 "bad offset value",
228 NULL /* SHORTUTF8 is handled specially */
229 };
230
231
232 /*************************************************
233 * Alternate character tables *
234 *************************************************/
235
236 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237 using the default tables of the library. However, the T option can be used to
238 select alternate sets of tables, for different kinds of testing. Note also that
239 the L (locale) option also adjusts the tables. */
240
241 /* This is the set of tables distributed as default with PCRE. It recognizes
242 only ASCII characters. */
243
244 static const unsigned char tables0[] = {
245
246 /* This table is a lower casing table. */
247
248 0, 1, 2, 3, 4, 5, 6, 7,
249 8, 9, 10, 11, 12, 13, 14, 15,
250 16, 17, 18, 19, 20, 21, 22, 23,
251 24, 25, 26, 27, 28, 29, 30, 31,
252 32, 33, 34, 35, 36, 37, 38, 39,
253 40, 41, 42, 43, 44, 45, 46, 47,
254 48, 49, 50, 51, 52, 53, 54, 55,
255 56, 57, 58, 59, 60, 61, 62, 63,
256 64, 97, 98, 99,100,101,102,103,
257 104,105,106,107,108,109,110,111,
258 112,113,114,115,116,117,118,119,
259 120,121,122, 91, 92, 93, 94, 95,
260 96, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122,123,124,125,126,127,
264 128,129,130,131,132,133,134,135,
265 136,137,138,139,140,141,142,143,
266 144,145,146,147,148,149,150,151,
267 152,153,154,155,156,157,158,159,
268 160,161,162,163,164,165,166,167,
269 168,169,170,171,172,173,174,175,
270 176,177,178,179,180,181,182,183,
271 184,185,186,187,188,189,190,191,
272 192,193,194,195,196,197,198,199,
273 200,201,202,203,204,205,206,207,
274 208,209,210,211,212,213,214,215,
275 216,217,218,219,220,221,222,223,
276 224,225,226,227,228,229,230,231,
277 232,233,234,235,236,237,238,239,
278 240,241,242,243,244,245,246,247,
279 248,249,250,251,252,253,254,255,
280
281 /* This table is a case flipping table. */
282
283 0, 1, 2, 3, 4, 5, 6, 7,
284 8, 9, 10, 11, 12, 13, 14, 15,
285 16, 17, 18, 19, 20, 21, 22, 23,
286 24, 25, 26, 27, 28, 29, 30, 31,
287 32, 33, 34, 35, 36, 37, 38, 39,
288 40, 41, 42, 43, 44, 45, 46, 47,
289 48, 49, 50, 51, 52, 53, 54, 55,
290 56, 57, 58, 59, 60, 61, 62, 63,
291 64, 97, 98, 99,100,101,102,103,
292 104,105,106,107,108,109,110,111,
293 112,113,114,115,116,117,118,119,
294 120,121,122, 91, 92, 93, 94, 95,
295 96, 65, 66, 67, 68, 69, 70, 71,
296 72, 73, 74, 75, 76, 77, 78, 79,
297 80, 81, 82, 83, 84, 85, 86, 87,
298 88, 89, 90,123,124,125,126,127,
299 128,129,130,131,132,133,134,135,
300 136,137,138,139,140,141,142,143,
301 144,145,146,147,148,149,150,151,
302 152,153,154,155,156,157,158,159,
303 160,161,162,163,164,165,166,167,
304 168,169,170,171,172,173,174,175,
305 176,177,178,179,180,181,182,183,
306 184,185,186,187,188,189,190,191,
307 192,193,194,195,196,197,198,199,
308 200,201,202,203,204,205,206,207,
309 208,209,210,211,212,213,214,215,
310 216,217,218,219,220,221,222,223,
311 224,225,226,227,228,229,230,231,
312 232,233,234,235,236,237,238,239,
313 240,241,242,243,244,245,246,247,
314 248,249,250,251,252,253,254,255,
315
316 /* This table contains bit maps for various character classes. Each map is 32
317 bytes long and the bits run from the least significant end of each byte. The
318 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319 graph, print, punct, and cntrl. Other classes are built from combinations. */
320
321 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325
326 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 /* This table identifies various classes of character by individual bits:
372 0x01 white space character
373 0x02 letter
374 0x04 decimal digit
375 0x08 hexadecimal digit
376 0x10 alphanumeric or '_'
377 0x80 regular expression metacharacter or binary zero
378 */
379
380 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412
413 /* This is a set of tables that came orginally from a Windows user. It seems to
414 be at least an approximation of ISO 8859. In particular, there are characters
415 greater than 128 that are marked as spaces, letters, etc. */
416
417 static const unsigned char tables1[] = {
418 0,1,2,3,4,5,6,7,
419 8,9,10,11,12,13,14,15,
420 16,17,18,19,20,21,22,23,
421 24,25,26,27,28,29,30,31,
422 32,33,34,35,36,37,38,39,
423 40,41,42,43,44,45,46,47,
424 48,49,50,51,52,53,54,55,
425 56,57,58,59,60,61,62,63,
426 64,97,98,99,100,101,102,103,
427 104,105,106,107,108,109,110,111,
428 112,113,114,115,116,117,118,119,
429 120,121,122,91,92,93,94,95,
430 96,97,98,99,100,101,102,103,
431 104,105,106,107,108,109,110,111,
432 112,113,114,115,116,117,118,119,
433 120,121,122,123,124,125,126,127,
434 128,129,130,131,132,133,134,135,
435 136,137,138,139,140,141,142,143,
436 144,145,146,147,148,149,150,151,
437 152,153,154,155,156,157,158,159,
438 160,161,162,163,164,165,166,167,
439 168,169,170,171,172,173,174,175,
440 176,177,178,179,180,181,182,183,
441 184,185,186,187,188,189,190,191,
442 224,225,226,227,228,229,230,231,
443 232,233,234,235,236,237,238,239,
444 240,241,242,243,244,245,246,215,
445 248,249,250,251,252,253,254,223,
446 224,225,226,227,228,229,230,231,
447 232,233,234,235,236,237,238,239,
448 240,241,242,243,244,245,246,247,
449 248,249,250,251,252,253,254,255,
450 0,1,2,3,4,5,6,7,
451 8,9,10,11,12,13,14,15,
452 16,17,18,19,20,21,22,23,
453 24,25,26,27,28,29,30,31,
454 32,33,34,35,36,37,38,39,
455 40,41,42,43,44,45,46,47,
456 48,49,50,51,52,53,54,55,
457 56,57,58,59,60,61,62,63,
458 64,97,98,99,100,101,102,103,
459 104,105,106,107,108,109,110,111,
460 112,113,114,115,116,117,118,119,
461 120,121,122,91,92,93,94,95,
462 96,65,66,67,68,69,70,71,
463 72,73,74,75,76,77,78,79,
464 80,81,82,83,84,85,86,87,
465 88,89,90,123,124,125,126,127,
466 128,129,130,131,132,133,134,135,
467 136,137,138,139,140,141,142,143,
468 144,145,146,147,148,149,150,151,
469 152,153,154,155,156,157,158,159,
470 160,161,162,163,164,165,166,167,
471 168,169,170,171,172,173,174,175,
472 176,177,178,179,180,181,182,183,
473 184,185,186,187,188,189,190,191,
474 224,225,226,227,228,229,230,231,
475 232,233,234,235,236,237,238,239,
476 240,241,242,243,244,245,246,215,
477 248,249,250,251,252,253,254,223,
478 192,193,194,195,196,197,198,199,
479 200,201,202,203,204,205,206,207,
480 208,209,210,211,212,213,214,247,
481 216,217,218,219,220,221,222,255,
482 0,62,0,0,1,0,0,0,
483 0,0,0,0,0,0,0,0,
484 32,0,0,0,1,0,0,0,
485 0,0,0,0,0,0,0,0,
486 0,0,0,0,0,0,255,3,
487 126,0,0,0,126,0,0,0,
488 0,0,0,0,0,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,255,3,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,12,2,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 254,255,255,7,0,0,0,0,
496 0,0,0,0,0,0,0,0,
497 255,255,127,127,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,254,255,255,7,
500 0,0,0,0,0,4,32,4,
501 0,0,0,128,255,255,127,255,
502 0,0,0,0,0,0,255,3,
503 254,255,255,135,254,255,255,7,
504 0,0,0,0,0,4,44,6,
505 255,255,127,255,255,255,127,255,
506 0,0,0,0,254,255,255,255,
507 255,255,255,255,255,255,255,127,
508 0,0,0,0,254,255,255,255,
509 255,255,255,255,255,255,255,255,
510 0,2,0,0,255,255,255,255,
511 255,255,255,255,255,255,255,127,
512 0,0,0,0,255,255,255,255,
513 255,255,255,255,255,255,255,255,
514 0,0,0,0,254,255,0,252,
515 1,0,0,248,1,0,0,120,
516 0,0,0,0,254,255,255,255,
517 0,0,128,0,0,0,128,0,
518 255,255,255,255,0,0,0,0,
519 0,0,0,0,0,0,0,128,
520 255,255,255,255,0,0,0,0,
521 0,0,0,0,0,0,0,0,
522 128,0,0,0,0,0,0,0,
523 0,1,1,0,1,1,0,0,
524 0,0,0,0,0,0,0,0,
525 0,0,0,0,0,0,0,0,
526 1,0,0,0,128,0,0,0,
527 128,128,128,128,0,0,128,0,
528 28,28,28,28,28,28,28,28,
529 28,28,0,0,0,0,0,128,
530 0,26,26,26,26,26,26,18,
531 18,18,18,18,18,18,18,18,
532 18,18,18,18,18,18,18,18,
533 18,18,18,128,128,0,128,16,
534 0,26,26,26,26,26,26,18,
535 18,18,18,18,18,18,18,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,128,128,0,0,0,
538 0,0,0,0,0,1,0,0,
539 0,0,0,0,0,0,0,0,
540 0,0,0,0,0,0,0,0,
541 0,0,0,0,0,0,0,0,
542 1,0,0,0,0,0,0,0,
543 0,0,18,0,0,0,0,0,
544 0,0,20,20,0,18,0,0,
545 0,20,18,0,0,0,0,0,
546 18,18,18,18,18,18,18,18,
547 18,18,18,18,18,18,18,18,
548 18,18,18,18,18,18,18,0,
549 18,18,18,18,18,18,18,18,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,0,
553 18,18,18,18,18,18,18,18
554 };
555
556
557
558
559 #ifndef HAVE_STRERROR
560 /*************************************************
561 * Provide strerror() for non-ANSI libraries *
562 *************************************************/
563
564 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565 in their libraries, but can provide the same facility by this simple
566 alternative function. */
567
568 extern int sys_nerr;
569 extern char *sys_errlist[];
570
571 char *
572 strerror(int n)
573 {
574 if (n < 0 || n >= sys_nerr) return "unknown error number";
575 return sys_errlist[n];
576 }
577 #endif /* HAVE_STRERROR */
578
579
580
581
582 /*************************************************
583 * Read or extend an input line *
584 *************************************************/
585
586 /* Input lines are read into buffer, but both patterns and data lines can be
587 continued over multiple input lines. In addition, if the buffer fills up, we
588 want to automatically expand it so as to be able to handle extremely large
589 lines that are needed for certain stress tests. When the input buffer is
590 expanded, the other two buffers must also be expanded likewise, and the
591 contents of pbuffer, which are a copy of the input for callouts, must be
592 preserved (for when expansion happens for a data line). This is not the most
593 optimal way of handling this, but hey, this is just a test program!
594
595 Arguments:
596 f the file to read
597 start where in buffer to start (this *must* be within buffer)
598 prompt for stdin or readline()
599
600 Returns: pointer to the start of new data
601 could be a copy of start, or could be moved
602 NULL if no data read and EOF reached
603 */
604
605 static uschar *
606 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 {
608 uschar *here = start;
609
610 for (;;)
611 {
612 int rlen = (int)(buffer_size - (here - buffer));
613
614 if (rlen > 1000)
615 {
616 int dlen;
617
618 /* If libreadline support is required, use readline() to read a line if the
619 input is a terminal. Note that readline() removes the trailing newline, so
620 we must put it back again, to be compatible with fgets(). */
621
622 #ifdef SUPPORT_LIBREADLINE
623 if (isatty(fileno(f)))
624 {
625 size_t len;
626 char *s = readline(prompt);
627 if (s == NULL) return (here == start)? NULL : start;
628 len = strlen(s);
629 if (len > 0) add_history(s);
630 if (len > rlen - 1) len = rlen - 1;
631 memcpy(here, s, len);
632 here[len] = '\n';
633 here[len+1] = 0;
634 free(s);
635 }
636 else
637 #endif
638
639 /* Read the next line by normal means, prompting if the file is stdin. */
640
641 {
642 if (f == stdin) printf("%s", prompt);
643 if (fgets((char *)here, rlen, f) == NULL)
644 return (here == start)? NULL : start;
645 }
646
647 dlen = (int)strlen((char *)here);
648 if (dlen > 0 && here[dlen - 1] == '\n') return start;
649 here += dlen;
650 }
651
652 else
653 {
654 int new_buffer_size = 2*buffer_size;
655 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658
659 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660 {
661 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662 exit(1);
663 }
664
665 memcpy(new_buffer, buffer, buffer_size);
666 memcpy(new_pbuffer, pbuffer, buffer_size);
667
668 buffer_size = new_buffer_size;
669
670 start = new_buffer + (start - buffer);
671 here = new_buffer + (here - buffer);
672
673 free(buffer);
674 free(dbuffer);
675 free(pbuffer);
676
677 buffer = new_buffer;
678 dbuffer = new_dbuffer;
679 pbuffer = new_pbuffer;
680 }
681 }
682
683 return NULL; /* Control never gets here */
684 }
685
686
687
688
689
690
691
692 /*************************************************
693 * Read number from string *
694 *************************************************/
695
696 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697 around with conditional compilation, just do the job by hand. It is only used
698 for unpicking arguments, so just keep it simple.
699
700 Arguments:
701 str string to be converted
702 endptr where to put the end pointer
703
704 Returns: the unsigned long
705 */
706
707 static int
708 get_value(unsigned char *str, unsigned char **endptr)
709 {
710 int result = 0;
711 while(*str != 0 && isspace(*str)) str++;
712 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713 *endptr = str;
714 return(result);
715 }
716
717
718
719
720 /*************************************************
721 * Convert UTF-8 string to value *
722 *************************************************/
723
724 /* This function takes one or more bytes that represents a UTF-8 character,
725 and returns the value of the character.
726
727 Argument:
728 utf8bytes a pointer to the byte vector
729 vptr a pointer to an int to receive the value
730
731 Returns: > 0 => the number of bytes consumed
732 -6 to 0 => malformed UTF-8 character at offset = (-return)
733 */
734
735 #if !defined NOUTF8
736
737 static int
738 utf82ord(unsigned char *utf8bytes, int *vptr)
739 {
740 int c = *utf8bytes++;
741 int d = c;
742 int i, j, s;
743
744 for (i = -1; i < 6; i++) /* i is number of additional bytes */
745 {
746 if ((d & 0x80) == 0) break;
747 d <<= 1;
748 }
749
750 if (i == -1) { *vptr = c; return 1; } /* ascii character */
751 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752
753 /* i now has a value in the range 1-5 */
754
755 s = 6*i;
756 d = (c & utf8_table3[i]) << s;
757
758 for (j = 0; j < i; j++)
759 {
760 c = *utf8bytes++;
761 if ((c & 0xc0) != 0x80) return -(j+1);
762 s -= 6;
763 d |= (c & 0x3f) << s;
764 }
765
766 /* Check that encoding was the correct unique one */
767
768 for (j = 0; j < utf8_table1_size; j++)
769 if (d <= utf8_table1[j]) break;
770 if (j != i) return -(i+1);
771
772 /* Valid value */
773
774 *vptr = d;
775 return i+1;
776 }
777
778 #endif
779
780
781
782 /*************************************************
783 * Convert character value to UTF-8 *
784 *************************************************/
785
786 /* This function takes an integer value in the range 0 - 0x7fffffff
787 and encodes it as a UTF-8 character in 0 to 6 bytes.
788
789 Arguments:
790 cvalue the character value
791 utf8bytes pointer to buffer for result - at least 6 bytes long
792
793 Returns: number of characters placed in the buffer
794 */
795
796 #if !defined NOUTF8
797
798 static int
799 ord2utf8(int cvalue, uschar *utf8bytes)
800 {
801 register int i, j;
802 for (i = 0; i < utf8_table1_size; i++)
803 if (cvalue <= utf8_table1[i]) break;
804 utf8bytes += i;
805 for (j = i; j > 0; j--)
806 {
807 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 cvalue >>= 6;
809 }
810 *utf8bytes = utf8_table2[i] | cvalue;
811 return i + 1;
812 }
813
814 #endif
815
816
817
818 /*************************************************
819 * Print character string *
820 *************************************************/
821
822 /* Character string printing function. Must handle UTF-8 strings in utf8
823 mode. Yields number of characters printed. If handed a NULL file, just counts
824 chars without printing. */
825
826 static int pchars(unsigned char *p, int length, FILE *f)
827 {
828 int c = 0;
829 int yield = 0;
830
831 while (length-- > 0)
832 {
833 #if !defined NOUTF8
834 if (use_utf8)
835 {
836 int rc = utf82ord(p, &c);
837
838 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839 {
840 length -= rc - 1;
841 p += rc;
842 if (PRINTHEX(c))
843 {
844 if (f != NULL) fprintf(f, "%c", c);
845 yield++;
846 }
847 else
848 {
849 int n = 4;
850 if (f != NULL) fprintf(f, "\\x{%02x}", c);
851 yield += (n <= 0x000000ff)? 2 :
852 (n <= 0x00000fff)? 3 :
853 (n <= 0x0000ffff)? 4 :
854 (n <= 0x000fffff)? 5 : 6;
855 }
856 continue;
857 }
858 }
859 #endif
860
861 /* Not UTF-8, or malformed UTF-8 */
862
863 c = *p++;
864 if (PRINTHEX(c))
865 {
866 if (f != NULL) fprintf(f, "%c", c);
867 yield++;
868 }
869 else
870 {
871 if (f != NULL) fprintf(f, "\\x%02x", c);
872 yield += 4;
873 }
874 }
875
876 return yield;
877 }
878
879
880
881 /*************************************************
882 * Callout function *
883 *************************************************/
884
885 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886 the match. Yield zero unless more callouts than the fail count, or the callout
887 data is not zero. */
888
889 static int callout(pcre_callout_block *cb)
890 {
891 FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 int i, pre_start, post_start, subject_length;
893
894 if (callout_extra)
895 {
896 fprintf(f, "Callout %d: last capture = %d\n",
897 cb->callout_number, cb->capture_last);
898
899 for (i = 0; i < cb->capture_top * 2; i += 2)
900 {
901 if (cb->offset_vector[i] < 0)
902 fprintf(f, "%2d: <unset>\n", i/2);
903 else
904 {
905 fprintf(f, "%2d: ", i/2);
906 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907 cb->offset_vector[i+1] - cb->offset_vector[i], f);
908 fprintf(f, "\n");
909 }
910 }
911 }
912
913 /* Re-print the subject in canonical form, the first time or if giving full
914 datails. On subsequent calls in the same match, we use pchars just to find the
915 printed lengths of the substrings. */
916
917 if (f != NULL) fprintf(f, "--->");
918
919 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921 cb->current_position - cb->start_match, f);
922
923 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924
925 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926 cb->subject_length - cb->current_position, f);
927
928 if (f != NULL) fprintf(f, "\n");
929
930 /* Always print appropriate indicators, with callout number if not already
931 shown. For automatic callouts, show the pattern offset. */
932
933 if (cb->callout_number == 255)
934 {
935 fprintf(outfile, "%+3d ", cb->pattern_position);
936 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937 }
938 else
939 {
940 if (callout_extra) fprintf(outfile, " ");
941 else fprintf(outfile, "%3d ", cb->callout_number);
942 }
943
944 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945 fprintf(outfile, "^");
946
947 if (post_start > 0)
948 {
949 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950 fprintf(outfile, "^");
951 }
952
953 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954 fprintf(outfile, " ");
955
956 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957 pbuffer + cb->pattern_position);
958
959 fprintf(outfile, "\n");
960 first_callout = 0;
961
962 if (cb->callout_data != NULL)
963 {
964 int callout_data = *((int *)(cb->callout_data));
965 if (callout_data != 0)
966 {
967 fprintf(outfile, "Callout data = %d\n", callout_data);
968 return callout_data;
969 }
970 }
971
972 return (cb->callout_number != callout_fail_id)? 0 :
973 (++callout_count >= callout_fail_count)? 1 : 0;
974 }
975
976
977 /*************************************************
978 * Local malloc functions *
979 *************************************************/
980
981 /* Alternative malloc function, to test functionality and show the size of the
982 compiled re. */
983
984 static void *new_malloc(size_t size)
985 {
986 void *block = malloc(size);
987 gotten_store = size;
988 if (show_malloc)
989 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 return block;
991 }
992
993 static void new_free(void *block)
994 {
995 if (show_malloc)
996 fprintf(outfile, "free %p\n", block);
997 free(block);
998 }
999
1000
1001 /* For recursion malloc/free, to test stacking calls */
1002
1003 static void *stack_malloc(size_t size)
1004 {
1005 void *block = malloc(size);
1006 if (show_malloc)
1007 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 return block;
1009 }
1010
1011 static void stack_free(void *block)
1012 {
1013 if (show_malloc)
1014 fprintf(outfile, "stack_free %p\n", block);
1015 free(block);
1016 }
1017
1018
1019 /*************************************************
1020 * Call pcre_fullinfo() *
1021 *************************************************/
1022
1023 /* Get one piece of information from the pcre_fullinfo() function */
1024
1025 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026 {
1027 int rc;
1028 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030 }
1031
1032
1033
1034 /*************************************************
1035 * Byte flipping function *
1036 *************************************************/
1037
1038 static unsigned long int
1039 byteflip(unsigned long int value, int n)
1040 {
1041 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042 return ((value & 0x000000ff) << 24) |
1043 ((value & 0x0000ff00) << 8) |
1044 ((value & 0x00ff0000) >> 8) |
1045 ((value & 0xff000000) >> 24);
1046 }
1047
1048
1049
1050
1051 /*************************************************
1052 * Check match or recursion limit *
1053 *************************************************/
1054
1055 static int
1056 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057 int start_offset, int options, int *use_offsets, int use_size_offsets,
1058 int flag, unsigned long int *limit, int errnumber, const char *msg)
1059 {
1060 int count;
1061 int min = 0;
1062 int mid = 64;
1063 int max = -1;
1064
1065 extra->flags |= flag;
1066
1067 for (;;)
1068 {
1069 *limit = mid;
1070
1071 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072 use_offsets, use_size_offsets);
1073
1074 if (count == errnumber)
1075 {
1076 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077 min = mid;
1078 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079 }
1080
1081 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082 count == PCRE_ERROR_PARTIAL)
1083 {
1084 if (mid == min + 1)
1085 {
1086 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087 break;
1088 }
1089 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090 max = mid;
1091 mid = (min + mid)/2;
1092 }
1093 else break; /* Some other error */
1094 }
1095
1096 extra->flags &= ~flag;
1097 return count;
1098 }
1099
1100
1101
1102 /*************************************************
1103 * Case-independent strncmp() function *
1104 *************************************************/
1105
1106 /*
1107 Arguments:
1108 s first string
1109 t second string
1110 n number of characters to compare
1111
1112 Returns: < 0, = 0, or > 0, according to the comparison
1113 */
1114
1115 static int
1116 strncmpic(uschar *s, uschar *t, int n)
1117 {
1118 while (n--)
1119 {
1120 int c = tolower(*s++) - tolower(*t++);
1121 if (c) return c;
1122 }
1123 return 0;
1124 }
1125
1126
1127
1128 /*************************************************
1129 * Check newline indicator *
1130 *************************************************/
1131
1132 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133 a message and return 0 if there is no match.
1134
1135 Arguments:
1136 p points after the leading '<'
1137 f file for error message
1138
1139 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140 */
1141
1142 static int
1143 check_newline(uschar *p, FILE *f)
1144 {
1145 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 fprintf(f, "Unknown newline type at: <%s\n", p);
1153 return 0;
1154 }
1155
1156
1157
1158 /*************************************************
1159 * Usage function *
1160 *************************************************/
1161
1162 static void
1163 usage(void)
1164 {
1165 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166 printf("Input and output default to stdin and stdout.\n");
1167 #ifdef SUPPORT_LIBREADLINE
1168 printf("If input is a terminal, readline() is used to read from it.\n");
1169 #else
1170 printf("This version of pcretest is not linked with readline().\n");
1171 #endif
1172 printf("\nOptions:\n");
1173 printf(" -b show compiled code (bytecode)\n");
1174 printf(" -C show PCRE compile-time options and exit\n");
1175 printf(" -d debug: show compiled code and information (-b and -i)\n");
1176 #if !defined NODFA
1177 printf(" -dfa force DFA matching for all subjects\n");
1178 #endif
1179 printf(" -help show usage information\n");
1180 printf(" -i show information about compiled patterns\n"
1181 " -M find MATCH_LIMIT minimum for each subject\n"
1182 " -m output memory used information\n"
1183 " -o <n> set size of offsets vector to <n>\n");
1184 #if !defined NOPOSIX
1185 printf(" -p use POSIX interface\n");
1186 #endif
1187 printf(" -q quiet: do not output PCRE version number at start\n");
1188 printf(" -S <n> set stack size to <n> megabytes\n");
1189 printf(" -s force each pattern to be studied\n"
1190 " -t time compilation and execution\n");
1191 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192 printf(" -tm time execution (matching) only\n");
1193 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194 }
1195
1196
1197
1198 /*************************************************
1199 * Main Program *
1200 *************************************************/
1201
1202 /* Read lines from named file or stdin and write to named file or stdout; lines
1203 consist of a regular expression, in delimiters and optionally followed by
1204 options, followed by a set of test data, terminated by an empty line. */
1205
1206 int main(int argc, char **argv)
1207 {
1208 FILE *infile = stdin;
1209 int options = 0;
1210 int study_options = 0;
1211 int default_find_match_limit = FALSE;
1212 int op = 1;
1213 int timeit = 0;
1214 int timeitm = 0;
1215 int showinfo = 0;
1216 int showstore = 0;
1217 int force_study = 0;
1218 int quiet = 0;
1219 int size_offsets = 45;
1220 int size_offsets_max;
1221 int *offsets = NULL;
1222 #if !defined NOPOSIX
1223 int posix = 0;
1224 #endif
1225 int debug = 0;
1226 int done = 0;
1227 int all_use_dfa = 0;
1228 int yield = 0;
1229 int stack_size;
1230
1231 /* These vectors store, end-to-end, a list of captured substring names. Assume
1232 that 1024 is plenty long enough for the few names we'll be testing. */
1233
1234 uschar copynames[1024];
1235 uschar getnames[1024];
1236
1237 uschar *copynamesptr;
1238 uschar *getnamesptr;
1239
1240 /* Get buffers from malloc() so that Electric Fence will check their misuse
1241 when I am debugging. They grow automatically when very long lines are read. */
1242
1243 buffer = (unsigned char *)malloc(buffer_size);
1244 dbuffer = (unsigned char *)malloc(buffer_size);
1245 pbuffer = (unsigned char *)malloc(buffer_size);
1246
1247 /* The outfile variable is static so that new_malloc can use it. */
1248
1249 outfile = stdout;
1250
1251 /* The following _setmode() stuff is some Windows magic that tells its runtime
1252 library to translate CRLF into a single LF character. At least, that's what
1253 I've been told: never having used Windows I take this all on trust. Originally
1254 it set 0x8000, but then I was advised that _O_BINARY was better. */
1255
1256 #if defined(_WIN32) || defined(WIN32)
1257 _setmode( _fileno( stdout ), _O_BINARY );
1258 #endif
1259
1260 /* Scan options */
1261
1262 while (argc > 1 && argv[op][0] == '-')
1263 {
1264 unsigned char *endptr;
1265
1266 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267 else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1271 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1272 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1273 #if !defined NODFA
1274 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1275 #endif
1276 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1277 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1278 *endptr == 0))
1279 {
1280 op++;
1281 argc--;
1282 }
1283 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1284 {
1285 int both = argv[op][2] == 0;
1286 int temp;
1287 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1288 *endptr == 0))
1289 {
1290 timeitm = temp;
1291 op++;
1292 argc--;
1293 }
1294 else timeitm = LOOPREPEAT;
1295 if (both) timeit = timeitm;
1296 }
1297 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1298 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1299 *endptr == 0))
1300 {
1301 #if defined(_WIN32) || defined(WIN32)
1302 printf("PCRE: -S not supported on this OS\n");
1303 exit(1);
1304 #else
1305 int rc;
1306 struct rlimit rlim;
1307 getrlimit(RLIMIT_STACK, &rlim);
1308 rlim.rlim_cur = stack_size * 1024 * 1024;
1309 rc = setrlimit(RLIMIT_STACK, &rlim);
1310 if (rc != 0)
1311 {
1312 printf("PCRE: setrlimit() failed with error %d\n", rc);
1313 exit(1);
1314 }
1315 op++;
1316 argc--;
1317 #endif
1318 }
1319 #if !defined NOPOSIX
1320 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1321 #endif
1322 else if (strcmp(argv[op], "-C") == 0)
1323 {
1324 int rc;
1325 unsigned long int lrc;
1326 printf("PCRE version %s\n", pcre_version());
1327 printf("Compiled with\n");
1328 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1329 printf(" %sUTF-8 support\n", rc? "" : "No ");
1330 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1331 printf(" %sUnicode properties support\n", rc? "" : "No ");
1332 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1333 /* Note that these values are always the ASCII values, even
1334 in EBCDIC environments. CR is 13 and NL is 10. */
1335 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1336 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1337 (rc == -2)? "ANYCRLF" :
1338 (rc == -1)? "ANY" : "???");
1339 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1340 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1341 "all Unicode newlines");
1342 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1343 printf(" Internal link size = %d\n", rc);
1344 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1345 printf(" POSIX malloc threshold = %d\n", rc);
1346 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1347 printf(" Default match limit = %ld\n", lrc);
1348 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1349 printf(" Default recursion depth limit = %ld\n", lrc);
1350 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1351 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1352 goto EXIT;
1353 }
1354 else if (strcmp(argv[op], "-help") == 0 ||
1355 strcmp(argv[op], "--help") == 0)
1356 {
1357 usage();
1358 goto EXIT;
1359 }
1360 else
1361 {
1362 printf("** Unknown or malformed option %s\n", argv[op]);
1363 usage();
1364 yield = 1;
1365 goto EXIT;
1366 }
1367 op++;
1368 argc--;
1369 }
1370
1371 /* Get the store for the offsets vector, and remember what it was */
1372
1373 size_offsets_max = size_offsets;
1374 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1375 if (offsets == NULL)
1376 {
1377 printf("** Failed to get %d bytes of memory for offsets vector\n",
1378 (int)(size_offsets_max * sizeof(int)));
1379 yield = 1;
1380 goto EXIT;
1381 }
1382
1383 /* Sort out the input and output files */
1384
1385 if (argc > 1)
1386 {
1387 infile = fopen(argv[op], INPUT_MODE);
1388 if (infile == NULL)
1389 {
1390 printf("** Failed to open %s\n", argv[op]);
1391 yield = 1;
1392 goto EXIT;
1393 }
1394 }
1395
1396 if (argc > 2)
1397 {
1398 outfile = fopen(argv[op+1], OUTPUT_MODE);
1399 if (outfile == NULL)
1400 {
1401 printf("** Failed to open %s\n", argv[op+1]);
1402 yield = 1;
1403 goto EXIT;
1404 }
1405 }
1406
1407 /* Set alternative malloc function */
1408
1409 pcre_malloc = new_malloc;
1410 pcre_free = new_free;
1411 pcre_stack_malloc = stack_malloc;
1412 pcre_stack_free = stack_free;
1413
1414 /* Heading line unless quiet, then prompt for first regex if stdin */
1415
1416 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1417
1418 /* Main loop */
1419
1420 while (!done)
1421 {
1422 pcre *re = NULL;
1423 pcre_extra *extra = NULL;
1424
1425 #if !defined NOPOSIX /* There are still compilers that require no indent */
1426 regex_t preg;
1427 int do_posix = 0;
1428 #endif
1429
1430 const char *error;
1431 unsigned char *markptr;
1432 unsigned char *p, *pp, *ppp;
1433 unsigned char *to_file = NULL;
1434 const unsigned char *tables = NULL;
1435 unsigned long int true_size, true_study_size = 0;
1436 size_t size, regex_gotten_store;
1437 int do_mark = 0;
1438 int do_study = 0;
1439 int no_force_study = 0;
1440 int do_debug = debug;
1441 int do_G = 0;
1442 int do_g = 0;
1443 int do_showinfo = showinfo;
1444 int do_showrest = 0;
1445 int do_showcaprest = 0;
1446 int do_flip = 0;
1447 int erroroffset, len, delimiter, poffset;
1448
1449 use_utf8 = 0;
1450 debug_lengths = 1;
1451
1452 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1453 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1454 fflush(outfile);
1455
1456 p = buffer;
1457 while (isspace(*p)) p++;
1458 if (*p == 0) continue;
1459
1460 /* See if the pattern is to be loaded pre-compiled from a file. */
1461
1462 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1463 {
1464 unsigned long int magic, get_options;
1465 uschar sbuf[8];
1466 FILE *f;
1467
1468 p++;
1469 pp = p + (int)strlen((char *)p);
1470 while (isspace(pp[-1])) pp--;
1471 *pp = 0;
1472
1473 f = fopen((char *)p, "rb");
1474 if (f == NULL)
1475 {
1476 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1477 continue;
1478 }
1479
1480 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1481
1482 true_size =
1483 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1484 true_study_size =
1485 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1486
1487 re = (real_pcre *)new_malloc(true_size);
1488 regex_gotten_store = gotten_store;
1489
1490 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1491
1492 magic = ((real_pcre *)re)->magic_number;
1493 if (magic != MAGIC_NUMBER)
1494 {
1495 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1496 {
1497 do_flip = 1;
1498 }
1499 else
1500 {
1501 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1502 fclose(f);
1503 continue;
1504 }
1505 }
1506
1507 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1508 do_flip? " (byte-inverted)" : "", p);
1509
1510 /* Need to know if UTF-8 for printing data strings */
1511
1512 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1513 use_utf8 = (get_options & PCRE_UTF8) != 0;
1514
1515 /* Now see if there is any following study data. */
1516
1517 if (true_study_size != 0)
1518 {
1519 pcre_study_data *psd;
1520
1521 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1522 extra->flags = PCRE_EXTRA_STUDY_DATA;
1523
1524 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1525 extra->study_data = psd;
1526
1527 if (fread(psd, 1, true_study_size, f) != true_study_size)
1528 {
1529 FAIL_READ:
1530 fprintf(outfile, "Failed to read data from %s\n", p);
1531 if (extra != NULL) new_free(extra);
1532 if (re != NULL) new_free(re);
1533 fclose(f);
1534 continue;
1535 }
1536 fprintf(outfile, "Study data loaded from %s\n", p);
1537 do_study = 1; /* To get the data output if requested */
1538 }
1539 else fprintf(outfile, "No study data\n");
1540
1541 fclose(f);
1542 goto SHOW_INFO;
1543 }
1544
1545 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1546 the pattern; if is isn't complete, read more. */
1547
1548 delimiter = *p++;
1549
1550 if (isalnum(delimiter) || delimiter == '\\')
1551 {
1552 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1553 goto SKIP_DATA;
1554 }
1555
1556 pp = p;
1557 poffset = (int)(p - buffer);
1558
1559 for(;;)
1560 {
1561 while (*pp != 0)
1562 {
1563 if (*pp == '\\' && pp[1] != 0) pp++;
1564 else if (*pp == delimiter) break;
1565 pp++;
1566 }
1567 if (*pp != 0) break;
1568 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1569 {
1570 fprintf(outfile, "** Unexpected EOF\n");
1571 done = 1;
1572 goto CONTINUE;
1573 }
1574 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1575 }
1576
1577 /* The buffer may have moved while being extended; reset the start of data
1578 pointer to the correct relative point in the buffer. */
1579
1580 p = buffer + poffset;
1581
1582 /* If the first character after the delimiter is backslash, make
1583 the pattern end with backslash. This is purely to provide a way
1584 of testing for the error message when a pattern ends with backslash. */
1585
1586 if (pp[1] == '\\') *pp++ = '\\';
1587
1588 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1589 for callouts. */
1590
1591 *pp++ = 0;
1592 strcpy((char *)pbuffer, (char *)p);
1593
1594 /* Look for options after final delimiter */
1595
1596 options = 0;
1597 study_options = 0;
1598 log_store = showstore; /* default from command line */
1599
1600 while (*pp != 0)
1601 {
1602 switch (*pp++)
1603 {
1604 case 'f': options |= PCRE_FIRSTLINE; break;
1605 case 'g': do_g = 1; break;
1606 case 'i': options |= PCRE_CASELESS; break;
1607 case 'm': options |= PCRE_MULTILINE; break;
1608 case 's': options |= PCRE_DOTALL; break;
1609 case 'x': options |= PCRE_EXTENDED; break;
1610
1611 case '+':
1612 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1613 break;
1614
1615 case 'A': options |= PCRE_ANCHORED; break;
1616 case 'B': do_debug = 1; break;
1617 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1618 case 'D': do_debug = do_showinfo = 1; break;
1619 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1620 case 'F': do_flip = 1; break;
1621 case 'G': do_G = 1; break;
1622 case 'I': do_showinfo = 1; break;
1623 case 'J': options |= PCRE_DUPNAMES; break;
1624 case 'K': do_mark = 1; break;
1625 case 'M': log_store = 1; break;
1626 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1627
1628 #if !defined NOPOSIX
1629 case 'P': do_posix = 1; break;
1630 #endif
1631
1632 case 'S':
1633 if (do_study == 0) do_study = 1; else
1634 {
1635 do_study = 0;
1636 no_force_study = 1;
1637 }
1638 break;
1639
1640 case 'U': options |= PCRE_UNGREEDY; break;
1641 case 'W': options |= PCRE_UCP; break;
1642 case 'X': options |= PCRE_EXTRA; break;
1643 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1644 case 'Z': debug_lengths = 0; break;
1645 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1646 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1647
1648 case 'T':
1649 switch (*pp++)
1650 {
1651 case '0': tables = tables0; break;
1652 case '1': tables = tables1; break;
1653
1654 case '\r':
1655 case '\n':
1656 case ' ':
1657 case 0:
1658 fprintf(outfile, "** Missing table number after /T\n");
1659 goto SKIP_DATA;
1660
1661 default:
1662 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1663 goto SKIP_DATA;
1664 }
1665 break;
1666
1667 case 'L':
1668 ppp = pp;
1669 /* The '\r' test here is so that it works on Windows. */
1670 /* The '0' test is just in case this is an unterminated line. */
1671 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1672 *ppp = 0;
1673 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1674 {
1675 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1676 goto SKIP_DATA;
1677 }
1678 locale_set = 1;
1679 tables = pcre_maketables();
1680 pp = ppp;
1681 break;
1682
1683 case '>':
1684 to_file = pp;
1685 while (*pp != 0) pp++;
1686 while (isspace(pp[-1])) pp--;
1687 *pp = 0;
1688 break;
1689
1690 case '<':
1691 {
1692 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1693 {
1694 options |= PCRE_JAVASCRIPT_COMPAT;
1695 pp += 3;
1696 }
1697 else
1698 {
1699 int x = check_newline(pp, outfile);
1700 if (x == 0) goto SKIP_DATA;
1701 options |= x;
1702 while (*pp++ != '>');
1703 }
1704 }
1705 break;
1706
1707 case '\r': /* So that it works in Windows */
1708 case '\n':
1709 case ' ':
1710 break;
1711
1712 default:
1713 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1714 goto SKIP_DATA;
1715 }
1716 }
1717
1718 /* Handle compiling via the POSIX interface, which doesn't support the
1719 timing, showing, or debugging options, nor the ability to pass over
1720 local character tables. */
1721
1722 #if !defined NOPOSIX
1723 if (posix || do_posix)
1724 {
1725 int rc;
1726 int cflags = 0;
1727
1728 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1729 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1730 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1731 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1732 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1733 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1734 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1735
1736 rc = regcomp(&preg, (char *)p, cflags);
1737
1738 /* Compilation failed; go back for another re, skipping to blank line
1739 if non-interactive. */
1740
1741 if (rc != 0)
1742 {
1743 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1744 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1745 goto SKIP_DATA;
1746 }
1747 }
1748
1749 /* Handle compiling via the native interface */
1750
1751 else
1752 #endif /* !defined NOPOSIX */
1753
1754 {
1755 unsigned long int get_options;
1756
1757 if (timeit > 0)
1758 {
1759 register int i;
1760 clock_t time_taken;
1761 clock_t start_time = clock();
1762 for (i = 0; i < timeit; i++)
1763 {
1764 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1765 if (re != NULL) free(re);
1766 }
1767 time_taken = clock() - start_time;
1768 fprintf(outfile, "Compile time %.4f milliseconds\n",
1769 (((double)time_taken * 1000.0) / (double)timeit) /
1770 (double)CLOCKS_PER_SEC);
1771 }
1772
1773 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1774
1775 /* Compilation failed; go back for another re, skipping to blank line
1776 if non-interactive. */
1777
1778 if (re == NULL)
1779 {
1780 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1781 SKIP_DATA:
1782 if (infile != stdin)
1783 {
1784 for (;;)
1785 {
1786 if (extend_inputline(infile, buffer, NULL) == NULL)
1787 {
1788 done = 1;
1789 goto CONTINUE;
1790 }
1791 len = (int)strlen((char *)buffer);
1792 while (len > 0 && isspace(buffer[len-1])) len--;
1793 if (len == 0) break;
1794 }
1795 fprintf(outfile, "\n");
1796 }
1797 goto CONTINUE;
1798 }
1799
1800 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1801 within the regex; check for this so that we know how to process the data
1802 lines. */
1803
1804 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1805 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1806
1807 /* Print information if required. There are now two info-returning
1808 functions. The old one has a limited interface and returns only limited
1809 data. Check that it agrees with the newer one. */
1810
1811 if (log_store)
1812 fprintf(outfile, "Memory allocation (code space): %d\n",
1813 (int)(gotten_store -
1814 sizeof(real_pcre) -
1815 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1816
1817 /* Extract the size for possible writing before possibly flipping it,
1818 and remember the store that was got. */
1819
1820 true_size = ((real_pcre *)re)->size;
1821 regex_gotten_store = gotten_store;
1822
1823 /* If -s or /S was present, study the regex to generate additional info to
1824 help with the matching, unless the pattern has the SS option, which
1825 suppresses the effect of /S (used for a few test patterns where studying is
1826 never sensible). */
1827
1828 if (do_study || (force_study && !no_force_study))
1829 {
1830 if (timeit > 0)
1831 {
1832 register int i;
1833 clock_t time_taken;
1834 clock_t start_time = clock();
1835 for (i = 0; i < timeit; i++)
1836 extra = pcre_study(re, study_options, &error);
1837 time_taken = clock() - start_time;
1838 if (extra != NULL) free(extra);
1839 fprintf(outfile, " Study time %.4f milliseconds\n",
1840 (((double)time_taken * 1000.0) / (double)timeit) /
1841 (double)CLOCKS_PER_SEC);
1842 }
1843 extra = pcre_study(re, study_options, &error);
1844 if (error != NULL)
1845 fprintf(outfile, "Failed to study: %s\n", error);
1846 else if (extra != NULL)
1847 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1848 }
1849
1850 /* If /K was present, we set up for handling MARK data. */
1851
1852 if (do_mark)
1853 {
1854 if (extra == NULL)
1855 {
1856 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1857 extra->flags = 0;
1858 }
1859 extra->mark = &markptr;
1860 extra->flags |= PCRE_EXTRA_MARK;
1861 }
1862
1863 /* If the 'F' option was present, we flip the bytes of all the integer
1864 fields in the regex data block and the study block. This is to make it
1865 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1866 compiled on a different architecture. */
1867
1868 if (do_flip)
1869 {
1870 real_pcre *rre = (real_pcre *)re;
1871 rre->magic_number =
1872 byteflip(rre->magic_number, sizeof(rre->magic_number));
1873 rre->size = byteflip(rre->size, sizeof(rre->size));
1874 rre->options = byteflip(rre->options, sizeof(rre->options));
1875 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1876 rre->top_bracket =
1877 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1878 rre->top_backref =
1879 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1880 rre->first_byte =
1881 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1882 rre->req_byte =
1883 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1884 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1885 sizeof(rre->name_table_offset));
1886 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1887 sizeof(rre->name_entry_size));
1888 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1889 sizeof(rre->name_count));
1890
1891 if (extra != NULL)
1892 {
1893 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1894 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1895 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1896 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1897 }
1898 }
1899
1900 /* Extract information from the compiled data if required */
1901
1902 SHOW_INFO:
1903
1904 if (do_debug)
1905 {
1906 fprintf(outfile, "------------------------------------------------------------------\n");
1907 pcre_printint(re, outfile, debug_lengths);
1908 }
1909
1910 /* We already have the options in get_options (see above) */
1911
1912 if (do_showinfo)
1913 {
1914 unsigned long int all_options;
1915 #if !defined NOINFOCHECK
1916 int old_first_char, old_options, old_count;
1917 #endif
1918 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1919 hascrorlf;
1920 int nameentrysize, namecount;
1921 const uschar *nametable;
1922
1923 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1924 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1925 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1926 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1927 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1928 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1929 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1930 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1931 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1932 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1933 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1934
1935 #if !defined NOINFOCHECK
1936 old_count = pcre_info(re, &old_options, &old_first_char);
1937 if (count < 0) fprintf(outfile,
1938 "Error %d from pcre_info()\n", count);
1939 else
1940 {
1941 if (old_count != count) fprintf(outfile,
1942 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1943 old_count);
1944
1945 if (old_first_char != first_char) fprintf(outfile,
1946 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1947 first_char, old_first_char);
1948
1949 if (old_options != (int)get_options) fprintf(outfile,
1950 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1951 get_options, old_options);
1952 }
1953 #endif
1954
1955 if (size != regex_gotten_store) fprintf(outfile,
1956 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1957 (int)size, (int)regex_gotten_store);
1958
1959 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1960 if (backrefmax > 0)
1961 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1962
1963 if (namecount > 0)
1964 {
1965 fprintf(outfile, "Named capturing subpatterns:\n");
1966 while (namecount-- > 0)
1967 {
1968 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1969 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1970 GET2(nametable, 0));
1971 nametable += nameentrysize;
1972 }
1973 }
1974
1975 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1976 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1977
1978 all_options = ((real_pcre *)re)->options;
1979 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1980
1981 if (get_options == 0) fprintf(outfile, "No options\n");
1982 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1983 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1984 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1985 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1986 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1987 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1988 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1989 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1990 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1991 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1992 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1993 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1994 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1995 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1996 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1997 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1998 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1999 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2000
2001 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2002
2003 switch (get_options & PCRE_NEWLINE_BITS)
2004 {
2005 case PCRE_NEWLINE_CR:
2006 fprintf(outfile, "Forced newline sequence: CR\n");
2007 break;
2008
2009 case PCRE_NEWLINE_LF:
2010 fprintf(outfile, "Forced newline sequence: LF\n");
2011 break;
2012
2013 case PCRE_NEWLINE_CRLF:
2014 fprintf(outfile, "Forced newline sequence: CRLF\n");
2015 break;
2016
2017 case PCRE_NEWLINE_ANYCRLF:
2018 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2019 break;
2020
2021 case PCRE_NEWLINE_ANY:
2022 fprintf(outfile, "Forced newline sequence: ANY\n");
2023 break;
2024
2025 default:
2026 break;
2027 }
2028
2029 if (first_char == -1)
2030 {
2031 fprintf(outfile, "First char at start or follows newline\n");
2032 }
2033 else if (first_char < 0)
2034 {
2035 fprintf(outfile, "No first char\n");
2036 }
2037 else
2038 {
2039 int ch = first_char & 255;
2040 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2041 "" : " (caseless)";
2042 if (PRINTHEX(ch))
2043 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2044 else
2045 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2046 }
2047
2048 if (need_char < 0)
2049 {
2050 fprintf(outfile, "No need char\n");
2051 }
2052 else
2053 {
2054 int ch = need_char & 255;
2055 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2056 "" : " (caseless)";
2057 if (PRINTHEX(ch))
2058 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2059 else
2060 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2061 }
2062
2063 /* Don't output study size; at present it is in any case a fixed
2064 value, but it varies, depending on the computer architecture, and
2065 so messes up the test suite. (And with the /F option, it might be
2066 flipped.) If study was forced by an external -s, don't show this
2067 information unless -i or -d was also present. This means that, except
2068 when auto-callouts are involved, the output from runs with and without
2069 -s should be identical. */
2070
2071 if (do_study || (force_study && showinfo && !no_force_study))
2072 {
2073 if (extra == NULL)
2074 fprintf(outfile, "Study returned NULL\n");
2075 else
2076 {
2077 uschar *start_bits = NULL;
2078 int minlength;
2079
2080 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2081 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2082
2083 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2084 if (start_bits == NULL)
2085 fprintf(outfile, "No set of starting bytes\n");
2086 else
2087 {
2088 int i;
2089 int c = 24;
2090 fprintf(outfile, "Starting byte set: ");
2091 for (i = 0; i < 256; i++)
2092 {
2093 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2094 {
2095 if (c > 75)
2096 {
2097 fprintf(outfile, "\n ");
2098 c = 2;
2099 }
2100 if (PRINTHEX(i) && i != ' ')
2101 {
2102 fprintf(outfile, "%c ", i);
2103 c += 2;
2104 }
2105 else
2106 {
2107 fprintf(outfile, "\\x%02x ", i);
2108 c += 5;
2109 }
2110 }
2111 }
2112 fprintf(outfile, "\n");
2113 }
2114 }
2115 }
2116 }
2117
2118 /* If the '>' option was present, we write out the regex to a file, and
2119 that is all. The first 8 bytes of the file are the regex length and then
2120 the study length, in big-endian order. */
2121
2122 if (to_file != NULL)
2123 {
2124 FILE *f = fopen((char *)to_file, "wb");
2125 if (f == NULL)
2126 {
2127 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2128 }
2129 else
2130 {
2131 uschar sbuf[8];
2132 sbuf[0] = (uschar)((true_size >> 24) & 255);
2133 sbuf[1] = (uschar)((true_size >> 16) & 255);
2134 sbuf[2] = (uschar)((true_size >> 8) & 255);
2135 sbuf[3] = (uschar)((true_size) & 255);
2136
2137 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2138 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2139 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2140 sbuf[7] = (uschar)((true_study_size) & 255);
2141
2142 if (fwrite(sbuf, 1, 8, f) < 8 ||
2143 fwrite(re, 1, true_size, f) < true_size)
2144 {
2145 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2146 }
2147 else
2148 {
2149 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2150
2151 /* If there is study data, write it, but verify the writing only
2152 if the studying was requested by /S, not just by -s. */
2153
2154 if (extra != NULL)
2155 {
2156 if (fwrite(extra->study_data, 1, true_study_size, f) <
2157 true_study_size)
2158 {
2159 fprintf(outfile, "Write error on %s: %s\n", to_file,
2160 strerror(errno));
2161 }
2162 else fprintf(outfile, "Study data written to %s\n", to_file);
2163 }
2164 }
2165 fclose(f);
2166 }
2167
2168 new_free(re);
2169 if (extra != NULL) new_free(extra);
2170 if (locale_set)
2171 {
2172 new_free((void *)tables);
2173 setlocale(LC_CTYPE, "C");
2174 locale_set = 0;
2175 }
2176 continue; /* With next regex */
2177 }
2178 } /* End of non-POSIX compile */
2179
2180 /* Read data lines and test them */
2181
2182 for (;;)
2183 {
2184 uschar *q;
2185 uschar *bptr;
2186 int *use_offsets = offsets;
2187 int use_size_offsets = size_offsets;
2188 int callout_data = 0;
2189 int callout_data_set = 0;
2190 int count, c;
2191 int copystrings = 0;
2192 int find_match_limit = default_find_match_limit;
2193 int getstrings = 0;
2194 int getlist = 0;
2195 int gmatched = 0;
2196 int start_offset = 0;
2197 int start_offset_sign = 1;
2198 int g_notempty = 0;
2199 int use_dfa = 0;
2200
2201 options = 0;
2202
2203 *copynames = 0;
2204 *getnames = 0;
2205
2206 copynamesptr = copynames;
2207 getnamesptr = getnames;
2208
2209 pcre_callout = callout;
2210 first_callout = 1;
2211 callout_extra = 0;
2212 callout_count = 0;
2213 callout_fail_count = 999999;
2214 callout_fail_id = -1;
2215 show_malloc = 0;
2216
2217 if (extra != NULL) extra->flags &=
2218 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2219
2220 len = 0;
2221 for (;;)
2222 {
2223 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2224 {
2225 if (len > 0) /* Reached EOF without hitting a newline */
2226 {
2227 fprintf(outfile, "\n");
2228 break;
2229 }
2230 done = 1;
2231 goto CONTINUE;
2232 }
2233 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2234 len = (int)strlen((char *)buffer);
2235 if (buffer[len-1] == '\n') break;
2236 }
2237
2238 while (len > 0 && isspace(buffer[len-1])) len--;
2239 buffer[len] = 0;
2240 if (len == 0) break;
2241
2242 p = buffer;
2243 while (isspace(*p)) p++;
2244
2245 bptr = q = dbuffer;
2246 while ((c = *p++) != 0)
2247 {
2248 int i = 0;
2249 int n = 0;
2250
2251 if (c == '\\') switch ((c = *p++))
2252 {
2253 case 'a': c = 7; break;
2254 case 'b': c = '\b'; break;
2255 case 'e': c = 27; break;
2256 case 'f': c = '\f'; break;
2257 case 'n': c = '\n'; break;
2258 case 'r': c = '\r'; break;
2259 case 't': c = '\t'; break;
2260 case 'v': c = '\v'; break;
2261
2262 case '0': case '1': case '2': case '3':
2263 case '4': case '5': case '6': case '7':
2264 c -= '0';
2265 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2266 c = c * 8 + *p++ - '0';
2267
2268 #if !defined NOUTF8
2269 if (use_utf8 && c > 255)
2270 {
2271 unsigned char buff8[8];
2272 int ii, utn;
2273 utn = ord2utf8(c, buff8);
2274 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2275 c = buff8[ii]; /* Last byte */
2276 }
2277 #endif
2278 break;
2279
2280 case 'x':
2281
2282 /* Handle \x{..} specially - new Perl thing for utf8 */
2283
2284 #if !defined NOUTF8
2285 if (*p == '{')
2286 {
2287 unsigned char *pt = p;
2288 c = 0;
2289 while (isxdigit(*(++pt)))
2290 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2291 if (*pt == '}')
2292 {
2293 unsigned char buff8[8];
2294 int ii, utn;
2295 if (use_utf8)
2296 {
2297 utn = ord2utf8(c, buff8);
2298 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2299 c = buff8[ii]; /* Last byte */
2300 }
2301 else
2302 {
2303 if (c > 255)
2304 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2305 "UTF-8 mode is not enabled.\n"
2306 "** Truncation will probably give the wrong result.\n", c);
2307 }
2308 p = pt + 1;
2309 break;
2310 }
2311 /* Not correct form; fall through */
2312 }
2313 #endif
2314
2315 /* Ordinary \x */
2316
2317 c = 0;
2318 while (i++ < 2 && isxdigit(*p))
2319 {
2320 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2321 p++;
2322 }
2323 break;
2324
2325 case 0: /* \ followed by EOF allows for an empty line */
2326 p--;
2327 continue;
2328
2329 case '>':
2330 if (*p == '-')
2331 {
2332 start_offset_sign = -1;
2333 p++;
2334 }
2335 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2336 start_offset *= start_offset_sign;
2337 continue;
2338
2339 case 'A': /* Option setting */
2340 options |= PCRE_ANCHORED;
2341 continue;
2342
2343 case 'B':
2344 options |= PCRE_NOTBOL;
2345 continue;
2346
2347 case 'C':
2348 if (isdigit(*p)) /* Set copy string */
2349 {
2350 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2351 copystrings |= 1 << n;
2352 }
2353 else if (isalnum(*p))
2354 {
2355 uschar *npp = copynamesptr;
2356 while (isalnum(*p)) *npp++ = *p++;
2357 *npp++ = 0;
2358 *npp = 0;
2359 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2360 if (n < 0)
2361 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2362 copynamesptr = npp;
2363 }
2364 else if (*p == '+')
2365 {
2366 callout_extra = 1;
2367 p++;
2368 }
2369 else if (*p == '-')
2370 {
2371 pcre_callout = NULL;
2372 p++;
2373 }
2374 else if (*p == '!')
2375 {
2376 callout_fail_id = 0;
2377 p++;
2378 while(isdigit(*p))
2379 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2380 callout_fail_count = 0;
2381 if (*p == '!')
2382 {
2383 p++;
2384 while(isdigit(*p))
2385 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2386 }
2387 }
2388 else if (*p == '*')
2389 {
2390 int sign = 1;
2391 callout_data = 0;
2392 if (*(++p) == '-') { sign = -1; p++; }
2393 while(isdigit(*p))
2394 callout_data = callout_data * 10 + *p++ - '0';
2395 callout_data *= sign;
2396 callout_data_set = 1;
2397 }
2398 continue;
2399
2400 #if !defined NODFA
2401 case 'D':
2402 #if !defined NOPOSIX
2403 if (posix || do_posix)
2404 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2405 else
2406 #endif
2407 use_dfa = 1;
2408 continue;
2409 #endif
2410
2411 #if !defined NODFA
2412 case 'F':
2413 options |= PCRE_DFA_SHORTEST;
2414 continue;
2415 #endif
2416
2417 case 'G':
2418 if (isdigit(*p))
2419 {
2420 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2421 getstrings |= 1 << n;
2422 }
2423 else if (isalnum(*p))
2424 {
2425 uschar *npp = getnamesptr;
2426 while (isalnum(*p)) *npp++ = *p++;
2427 *npp++ = 0;
2428 *npp = 0;
2429 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2430 if (n < 0)
2431 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2432 getnamesptr = npp;
2433 }
2434 continue;
2435
2436 case 'L':
2437 getlist = 1;
2438 continue;
2439
2440 case 'M':
2441 find_match_limit = 1;
2442 continue;
2443
2444 case 'N':
2445 if ((options & PCRE_NOTEMPTY) != 0)
2446 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2447 else
2448 options |= PCRE_NOTEMPTY;
2449 continue;
2450
2451 case 'O':
2452 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2453 if (n > size_offsets_max)
2454 {
2455 size_offsets_max = n;
2456 free(offsets);
2457 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2458 if (offsets == NULL)
2459 {
2460 printf("** Failed to get %d bytes of memory for offsets vector\n",
2461 (int)(size_offsets_max * sizeof(int)));
2462 yield = 1;
2463 goto EXIT;
2464 }
2465 }
2466 use_size_offsets = n;
2467 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2468 continue;
2469
2470 case 'P':
2471 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2472 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2473 continue;
2474
2475 case 'Q':
2476 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2477 if (extra == NULL)
2478 {
2479 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2480 extra->flags = 0;
2481 }
2482 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2483 extra->match_limit_recursion = n;
2484 continue;
2485
2486 case 'q':
2487 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2488 if (extra == NULL)
2489 {
2490 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2491 extra->flags = 0;
2492 }
2493 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2494 extra->match_limit = n;
2495 continue;
2496
2497 #if !defined NODFA
2498 case 'R':
2499 options |= PCRE_DFA_RESTART;
2500 continue;
2501 #endif
2502
2503 case 'S':
2504 show_malloc = 1;
2505 continue;
2506
2507 case 'Y':
2508 options |= PCRE_NO_START_OPTIMIZE;
2509 continue;
2510
2511 case 'Z':
2512 options |= PCRE_NOTEOL;
2513 continue;
2514
2515 case '?':
2516 options |= PCRE_NO_UTF8_CHECK;
2517 continue;
2518
2519 case '<':
2520 {
2521 int x = check_newline(p, outfile);
2522 if (x == 0) goto NEXT_DATA;
2523 options |= x;
2524 while (*p++ != '>');
2525 }
2526 continue;
2527 }
2528 *q++ = c;
2529 }
2530 *q = 0;
2531 len = (int)(q - dbuffer);
2532
2533 /* Move the data to the end of the buffer so that a read over the end of
2534 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2535 we are using the POSIX interface, we must include the terminating zero. */
2536
2537 #if !defined NOPOSIX
2538 if (posix || do_posix)
2539 {
2540 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2541 bptr += buffer_size - len - 1;
2542 }
2543 else
2544 #endif
2545 {
2546 memmove(bptr + buffer_size - len, bptr, len);
2547 bptr += buffer_size - len;
2548 }
2549
2550 if ((all_use_dfa || use_dfa) && find_match_limit)
2551 {
2552 printf("**Match limit not relevant for DFA matching: ignored\n");
2553 find_match_limit = 0;
2554 }
2555
2556 /* Handle matching via the POSIX interface, which does not
2557 support timing or playing with the match limit or callout data. */
2558
2559 #if !defined NOPOSIX
2560 if (posix || do_posix)
2561 {
2562 int rc;
2563 int eflags = 0;
2564 regmatch_t *pmatch = NULL;
2565 if (use_size_offsets > 0)
2566 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2567 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2568 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2569 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2570
2571 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2572
2573 if (rc != 0)
2574 {
2575 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2576 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2577 }
2578 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2579 != 0)
2580 {
2581 fprintf(outfile, "Matched with REG_NOSUB\n");
2582 }
2583 else
2584 {
2585 size_t i;
2586 for (i = 0; i < (size_t)use_size_offsets; i++)
2587 {
2588 if (pmatch[i].rm_so >= 0)
2589 {
2590 fprintf(outfile, "%2d: ", (int)i);
2591 (void)pchars(dbuffer + pmatch[i].rm_so,
2592 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2593 fprintf(outfile, "\n");
2594 if (do_showcaprest || (i == 0 && do_showrest))
2595 {
2596 fprintf(outfile, "%2d+ ", (int)i);
2597 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2598 outfile);
2599 fprintf(outfile, "\n");
2600 }
2601 }
2602 }
2603 }
2604 free(pmatch);
2605 }
2606
2607 /* Handle matching via the native interface - repeats for /g and /G */
2608
2609 else
2610 #endif /* !defined NOPOSIX */
2611
2612 for (;; gmatched++) /* Loop for /g or /G */
2613 {
2614 markptr = NULL;
2615
2616 if (timeitm > 0)
2617 {
2618 register int i;
2619 clock_t time_taken;
2620 clock_t start_time = clock();
2621
2622 #if !defined NODFA
2623 if (all_use_dfa || use_dfa)
2624 {
2625 int workspace[1000];
2626 for (i = 0; i < timeitm; i++)
2627 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2628 options | g_notempty, use_offsets, use_size_offsets, workspace,
2629 sizeof(workspace)/sizeof(int));
2630 }
2631 else
2632 #endif
2633
2634 for (i = 0; i < timeitm; i++)
2635 count = pcre_exec(re, extra, (char *)bptr, len,
2636 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2637
2638 time_taken = clock() - start_time;
2639 fprintf(outfile, "Execute time %.4f milliseconds\n",
2640 (((double)time_taken * 1000.0) / (double)timeitm) /
2641 (double)CLOCKS_PER_SEC);
2642 }
2643
2644 /* If find_match_limit is set, we want to do repeated matches with
2645 varying limits in order to find the minimum value for the match limit and
2646 for the recursion limit. */
2647
2648 if (find_match_limit)
2649 {
2650 if (extra == NULL)
2651 {
2652 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2653 extra->flags = 0;
2654 }
2655
2656 (void)check_match_limit(re, extra, bptr, len, start_offset,
2657 options|g_notempty, use_offsets, use_size_offsets,
2658 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2659 PCRE_ERROR_MATCHLIMIT, "match()");
2660
2661 count = check_match_limit(re, extra, bptr, len, start_offset,
2662 options|g_notempty, use_offsets, use_size_offsets,
2663 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2664 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2665 }
2666
2667 /* If callout_data is set, use the interface with additional data */
2668
2669 else if (callout_data_set)
2670 {
2671 if (extra == NULL)
2672 {
2673 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2674 extra->flags = 0;
2675 }
2676 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2677 extra->callout_data = &callout_data;
2678 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2679 options | g_notempty, use_offsets, use_size_offsets);
2680 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2681 }
2682
2683 /* The normal case is just to do the match once, with the default
2684 value of match_limit. */
2685
2686 #if !defined NODFA
2687 else if (all_use_dfa || use_dfa)
2688 {
2689 int workspace[1000];
2690 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2691 options | g_notempty, use_offsets, use_size_offsets, workspace,
2692 sizeof(workspace)/sizeof(int));
2693 if (count == 0)
2694 {
2695 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2696 count = use_size_offsets/2;
2697 }
2698 }
2699 #endif
2700
2701 else
2702 {
2703 count = pcre_exec(re, extra, (char *)bptr, len,
2704 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2705 if (count == 0)
2706 {
2707 fprintf(outfile, "Matched, but too many substrings\n");
2708 count = use_size_offsets/3;
2709 }
2710 }
2711
2712 /* Matched */
2713
2714 if (count >= 0)
2715 {
2716 int i, maxcount;
2717
2718 #if !defined NODFA
2719 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2720 #endif
2721 maxcount = use_size_offsets/3;
2722
2723 /* This is a check against a lunatic return value. */
2724
2725 if (count > maxcount)
2726 {
2727 fprintf(outfile,
2728 "** PCRE error: returned count %d is too big for offset size %d\n",
2729 count, use_size_offsets);
2730 count = use_size_offsets/3;
2731 if (do_g || do_G)
2732 {
2733 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2734 do_g = do_G = FALSE; /* Break g/G loop */
2735 }
2736 }
2737
2738 for (i = 0; i < count * 2; i += 2)
2739 {
2740 if (use_offsets[i] < 0)
2741 fprintf(outfile, "%2d: <unset>\n", i/2);
2742 else
2743 {
2744 fprintf(outfile, "%2d: ", i/2);
2745 (void)pchars(bptr + use_offsets[i],
2746 use_offsets[i+1] - use_offsets[i], outfile);
2747 fprintf(outfile, "\n");
2748 if (do_showcaprest || (i == 0 && do_showrest))
2749 {
2750 fprintf(outfile, "%2d+ ", i/2);
2751 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2752 outfile);
2753 fprintf(outfile, "\n");
2754 }
2755 }
2756 }
2757
2758 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2759
2760 for (i = 0; i < 32; i++)
2761 {
2762 if ((copystrings & (1 << i)) != 0)
2763 {
2764 char copybuffer[256];
2765 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2766 i, copybuffer, sizeof(copybuffer));
2767 if (rc < 0)
2768 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2769 else
2770 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2771 }
2772 }
2773
2774 for (copynamesptr = copynames;
2775 *copynamesptr != 0;
2776 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2777 {
2778 char copybuffer[256];
2779 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2780 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2781 if (rc < 0)
2782 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2783 else
2784 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2785 }
2786
2787 for (i = 0; i < 32; i++)
2788 {
2789 if ((getstrings & (1 << i)) != 0)
2790 {
2791 const char *substring;
2792 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2793 i, &substring);
2794 if (rc < 0)
2795 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2796 else
2797 {
2798 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2799 pcre_free_substring(substring);
2800 }
2801 }
2802 }
2803
2804 for (getnamesptr = getnames;
2805 *getnamesptr != 0;
2806 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2807 {
2808 const char *substring;
2809 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2810 count, (char *)getnamesptr, &substring);
2811 if (rc < 0)
2812 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2813 else
2814 {
2815 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2816 pcre_free_substring(substring);
2817 }
2818 }
2819
2820 if (getlist)
2821 {
2822 const char **stringlist;
2823 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2824 &stringlist);
2825 if (rc < 0)
2826 fprintf(outfile, "get substring list failed %d\n", rc);
2827 else
2828 {
2829 for (i = 0; i < count; i++)
2830 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2831 if (stringlist[i] != NULL)
2832 fprintf(outfile, "string list not terminated by NULL\n");
2833 /* free((void *)stringlist); */
2834 pcre_free_substring_list(stringlist);
2835 }
2836 }
2837 }
2838
2839 /* There was a partial match */
2840
2841 else if (count == PCRE_ERROR_PARTIAL)
2842 {
2843 if (markptr == NULL) fprintf(outfile, "Partial match");
2844 else fprintf(outfile, "Partial match, mark=%s", markptr);
2845 if (use_size_offsets > 1)
2846 {
2847 fprintf(outfile, ": ");
2848 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2849 outfile);
2850 }
2851 fprintf(outfile, "\n");
2852 break; /* Out of the /g loop */
2853 }
2854
2855 /* Failed to match. If this is a /g or /G loop and we previously set
2856 g_notempty after a null match, this is not necessarily the end. We want
2857 to advance the start offset, and continue. We won't be at the end of the
2858 string - that was checked before setting g_notempty.
2859
2860 Complication arises in the case when the newline convention is "any",
2861 "crlf", or "anycrlf". If the previous match was at the end of a line
2862 terminated by CRLF, an advance of one character just passes the \r,
2863 whereas we should prefer the longer newline sequence, as does the code in
2864 pcre_exec(). Fudge the offset value to achieve this. We check for a
2865 newline setting in the pattern; if none was set, use pcre_config() to
2866 find the default.
2867
2868 Otherwise, in the case of UTF-8 matching, the advance must be one
2869 character, not one byte. */
2870
2871 else
2872 {
2873 if (g_notempty != 0)
2874 {
2875 int onechar = 1;
2876 unsigned int obits = ((real_pcre *)re)->options;
2877 use_offsets[0] = start_offset;
2878 if ((obits & PCRE_NEWLINE_BITS) == 0)
2879 {
2880 int d;
2881 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2882 /* Note that these values are always the ASCII ones, even in
2883 EBCDIC environments. CR = 13, NL = 10. */
2884 obits = (d == 13)? PCRE_NEWLINE_CR :
2885 (d == 10)? PCRE_NEWLINE_LF :
2886 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2887 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2888 (d == -1)? PCRE_NEWLINE_ANY : 0;
2889 }
2890 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2891 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2892 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2893 &&
2894 start_offset < len - 1 &&
2895 bptr[start_offset] == '\r' &&
2896 bptr[start_offset+1] == '\n')
2897 onechar++;
2898 else if (use_utf8)
2899 {
2900 while (start_offset + onechar < len)
2901 {
2902 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2903 onechar++;
2904 }
2905 }
2906 use_offsets[1] = start_offset + onechar;
2907 }
2908 else
2909 {
2910 switch(count)
2911 {
2912 case PCRE_ERROR_NOMATCH:
2913 if (gmatched == 0)
2914 {
2915 if (markptr == NULL) fprintf(outfile, "No match\n");
2916 else fprintf(outfile, "No match, mark = %s\n", markptr);
2917 }
2918 break;
2919
2920 case PCRE_ERROR_BADUTF8:
2921 case PCRE_ERROR_SHORTUTF8:
2922 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2923 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2924 if (use_size_offsets >= 2)
2925 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2926 use_offsets[1]);
2927 fprintf(outfile, "\n");
2928 break;
2929
2930 default:
2931 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2932 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2933 else
2934 fprintf(outfile, "Error %d (Unexpected value)\n", count);
2935 break;
2936 }
2937
2938 break; /* Out of the /g loop */
2939 }
2940 }
2941
2942 /* If not /g or /G we are done */
2943
2944 if (!do_g && !do_G) break;
2945
2946 /* If we have matched an empty string, first check to see if we are at
2947 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2948 Perl's /g options does. This turns out to be rather cunning. First we set
2949 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2950 same point. If this fails (picked up above) we advance to the next
2951 character. */
2952
2953 g_notempty = 0;
2954
2955 if (use_offsets[0] == use_offsets[1])
2956 {
2957 if (use_offsets[0] == len) break;
2958 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2959 }
2960
2961 /* For /g, update the start offset, leaving the rest alone */
2962
2963 if (do_g) start_offset = use_offsets[1];
2964
2965 /* For /G, update the pointer and length */
2966
2967 else
2968 {
2969 bptr += use_offsets[1];
2970 len -= use_offsets[1];
2971 }
2972 } /* End of loop for /g and /G */
2973
2974 NEXT_DATA: continue;
2975 } /* End of loop for data lines */
2976
2977 CONTINUE:
2978
2979 #if !defined NOPOSIX
2980 if (posix || do_posix) regfree(&preg);
2981 #endif
2982
2983 if (re != NULL) new_free(re);
2984 if (extra != NULL) new_free(extra);
2985 if (locale_set)
2986 {
2987 new_free((void *)tables);
2988 setlocale(LC_CTYPE, "C");
2989 locale_set = 0;
2990 }
2991 }
2992
2993 if (infile == stdin) fprintf(outfile, "\n");
2994
2995 EXIT:
2996
2997 if (infile != NULL && infile != stdin) fclose(infile);
2998 if (outfile != NULL && outfile != stdout) fclose(outfile);
2999
3000 free(buffer);
3001 free(dbuffer);
3002 free(pbuffer);
3003 free(offsets);
3004
3005 return yield;
3006 }
3007
3008 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5