/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 658 - (show annotations)
Mon Aug 15 17:43:44 2011 UTC (3 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 91249 byte(s)
Error occurred while calculating annotation data.
Fix incorrect comment in pcretest.c.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192 static const unsigned char *last_callout_mark = NULL;
193
194 /* The buffers grow automatically if very long input lines are encountered. */
195
196 static int buffer_size = 50000;
197 static uschar *buffer = NULL;
198 static uschar *dbuffer = NULL;
199 static uschar *pbuffer = NULL;
200
201 /* Textual explanations for runtime error codes */
202
203 static const char *errtexts[] = {
204 NULL, /* 0 is no error */
205 NULL, /* NOMATCH is handled specially */
206 "NULL argument passed",
207 "bad option value",
208 "magic number missing",
209 "unknown opcode - pattern overwritten?",
210 "no more memory",
211 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
212 "match limit exceeded",
213 "callout error code",
214 NULL, /* BADUTF8 is handled specially */
215 "bad UTF-8 offset",
216 NULL, /* PARTIAL is handled specially */
217 "not used - internal error",
218 "internal error - pattern overwritten?",
219 "bad count value",
220 "item unsupported for DFA matching",
221 "backreference condition or recursion test not supported for DFA matching",
222 "match limit not supported for DFA matching",
223 "workspace size exceeded in DFA matching",
224 "too much recursion for DFA matching",
225 "recursion limit exceeded",
226 "not used - internal error",
227 "invalid combination of newline options",
228 "bad offset value",
229 NULL, /* SHORTUTF8 is handled specially */
230 "nested recursion at the same subject position"
231 };
232
233
234 /*************************************************
235 * Alternate character tables *
236 *************************************************/
237
238 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
239 using the default tables of the library. However, the T option can be used to
240 select alternate sets of tables, for different kinds of testing. Note also that
241 the L (locale) option also adjusts the tables. */
242
243 /* This is the set of tables distributed as default with PCRE. It recognizes
244 only ASCII characters. */
245
246 static const unsigned char tables0[] = {
247
248 /* This table is a lower casing table. */
249
250 0, 1, 2, 3, 4, 5, 6, 7,
251 8, 9, 10, 11, 12, 13, 14, 15,
252 16, 17, 18, 19, 20, 21, 22, 23,
253 24, 25, 26, 27, 28, 29, 30, 31,
254 32, 33, 34, 35, 36, 37, 38, 39,
255 40, 41, 42, 43, 44, 45, 46, 47,
256 48, 49, 50, 51, 52, 53, 54, 55,
257 56, 57, 58, 59, 60, 61, 62, 63,
258 64, 97, 98, 99,100,101,102,103,
259 104,105,106,107,108,109,110,111,
260 112,113,114,115,116,117,118,119,
261 120,121,122, 91, 92, 93, 94, 95,
262 96, 97, 98, 99,100,101,102,103,
263 104,105,106,107,108,109,110,111,
264 112,113,114,115,116,117,118,119,
265 120,121,122,123,124,125,126,127,
266 128,129,130,131,132,133,134,135,
267 136,137,138,139,140,141,142,143,
268 144,145,146,147,148,149,150,151,
269 152,153,154,155,156,157,158,159,
270 160,161,162,163,164,165,166,167,
271 168,169,170,171,172,173,174,175,
272 176,177,178,179,180,181,182,183,
273 184,185,186,187,188,189,190,191,
274 192,193,194,195,196,197,198,199,
275 200,201,202,203,204,205,206,207,
276 208,209,210,211,212,213,214,215,
277 216,217,218,219,220,221,222,223,
278 224,225,226,227,228,229,230,231,
279 232,233,234,235,236,237,238,239,
280 240,241,242,243,244,245,246,247,
281 248,249,250,251,252,253,254,255,
282
283 /* This table is a case flipping table. */
284
285 0, 1, 2, 3, 4, 5, 6, 7,
286 8, 9, 10, 11, 12, 13, 14, 15,
287 16, 17, 18, 19, 20, 21, 22, 23,
288 24, 25, 26, 27, 28, 29, 30, 31,
289 32, 33, 34, 35, 36, 37, 38, 39,
290 40, 41, 42, 43, 44, 45, 46, 47,
291 48, 49, 50, 51, 52, 53, 54, 55,
292 56, 57, 58, 59, 60, 61, 62, 63,
293 64, 97, 98, 99,100,101,102,103,
294 104,105,106,107,108,109,110,111,
295 112,113,114,115,116,117,118,119,
296 120,121,122, 91, 92, 93, 94, 95,
297 96, 65, 66, 67, 68, 69, 70, 71,
298 72, 73, 74, 75, 76, 77, 78, 79,
299 80, 81, 82, 83, 84, 85, 86, 87,
300 88, 89, 90,123,124,125,126,127,
301 128,129,130,131,132,133,134,135,
302 136,137,138,139,140,141,142,143,
303 144,145,146,147,148,149,150,151,
304 152,153,154,155,156,157,158,159,
305 160,161,162,163,164,165,166,167,
306 168,169,170,171,172,173,174,175,
307 176,177,178,179,180,181,182,183,
308 184,185,186,187,188,189,190,191,
309 192,193,194,195,196,197,198,199,
310 200,201,202,203,204,205,206,207,
311 208,209,210,211,212,213,214,215,
312 216,217,218,219,220,221,222,223,
313 224,225,226,227,228,229,230,231,
314 232,233,234,235,236,237,238,239,
315 240,241,242,243,244,245,246,247,
316 248,249,250,251,252,253,254,255,
317
318 /* This table contains bit maps for various character classes. Each map is 32
319 bytes long and the bits run from the least significant end of each byte. The
320 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
321 graph, print, punct, and cntrl. Other classes are built from combinations. */
322
323 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327
328 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
329 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
332
333 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347
348 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
349 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
350 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
352
353 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
354 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
355 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357
358 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
359 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362
363 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
364 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367
368 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
372
373 /* This table identifies various classes of character by individual bits:
374 0x01 white space character
375 0x02 letter
376 0x04 decimal digit
377 0x08 hexadecimal digit
378 0x10 alphanumeric or '_'
379 0x80 regular expression metacharacter or binary zero
380 */
381
382 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
383 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
384 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
385 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
386 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
387 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
388 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
389 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
390 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
391 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
392 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
393 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
394 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
396 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
397 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
414
415 /* This is a set of tables that came orginally from a Windows user. It seems to
416 be at least an approximation of ISO 8859. In particular, there are characters
417 greater than 128 that are marked as spaces, letters, etc. */
418
419 static const unsigned char tables1[] = {
420 0,1,2,3,4,5,6,7,
421 8,9,10,11,12,13,14,15,
422 16,17,18,19,20,21,22,23,
423 24,25,26,27,28,29,30,31,
424 32,33,34,35,36,37,38,39,
425 40,41,42,43,44,45,46,47,
426 48,49,50,51,52,53,54,55,
427 56,57,58,59,60,61,62,63,
428 64,97,98,99,100,101,102,103,
429 104,105,106,107,108,109,110,111,
430 112,113,114,115,116,117,118,119,
431 120,121,122,91,92,93,94,95,
432 96,97,98,99,100,101,102,103,
433 104,105,106,107,108,109,110,111,
434 112,113,114,115,116,117,118,119,
435 120,121,122,123,124,125,126,127,
436 128,129,130,131,132,133,134,135,
437 136,137,138,139,140,141,142,143,
438 144,145,146,147,148,149,150,151,
439 152,153,154,155,156,157,158,159,
440 160,161,162,163,164,165,166,167,
441 168,169,170,171,172,173,174,175,
442 176,177,178,179,180,181,182,183,
443 184,185,186,187,188,189,190,191,
444 224,225,226,227,228,229,230,231,
445 232,233,234,235,236,237,238,239,
446 240,241,242,243,244,245,246,215,
447 248,249,250,251,252,253,254,223,
448 224,225,226,227,228,229,230,231,
449 232,233,234,235,236,237,238,239,
450 240,241,242,243,244,245,246,247,
451 248,249,250,251,252,253,254,255,
452 0,1,2,3,4,5,6,7,
453 8,9,10,11,12,13,14,15,
454 16,17,18,19,20,21,22,23,
455 24,25,26,27,28,29,30,31,
456 32,33,34,35,36,37,38,39,
457 40,41,42,43,44,45,46,47,
458 48,49,50,51,52,53,54,55,
459 56,57,58,59,60,61,62,63,
460 64,97,98,99,100,101,102,103,
461 104,105,106,107,108,109,110,111,
462 112,113,114,115,116,117,118,119,
463 120,121,122,91,92,93,94,95,
464 96,65,66,67,68,69,70,71,
465 72,73,74,75,76,77,78,79,
466 80,81,82,83,84,85,86,87,
467 88,89,90,123,124,125,126,127,
468 128,129,130,131,132,133,134,135,
469 136,137,138,139,140,141,142,143,
470 144,145,146,147,148,149,150,151,
471 152,153,154,155,156,157,158,159,
472 160,161,162,163,164,165,166,167,
473 168,169,170,171,172,173,174,175,
474 176,177,178,179,180,181,182,183,
475 184,185,186,187,188,189,190,191,
476 224,225,226,227,228,229,230,231,
477 232,233,234,235,236,237,238,239,
478 240,241,242,243,244,245,246,215,
479 248,249,250,251,252,253,254,223,
480 192,193,194,195,196,197,198,199,
481 200,201,202,203,204,205,206,207,
482 208,209,210,211,212,213,214,247,
483 216,217,218,219,220,221,222,255,
484 0,62,0,0,1,0,0,0,
485 0,0,0,0,0,0,0,0,
486 32,0,0,0,1,0,0,0,
487 0,0,0,0,0,0,0,0,
488 0,0,0,0,0,0,255,3,
489 126,0,0,0,126,0,0,0,
490 0,0,0,0,0,0,0,0,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,255,3,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,12,2,
495 0,0,0,0,0,0,0,0,
496 0,0,0,0,0,0,0,0,
497 254,255,255,7,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 255,255,127,127,0,0,0,0,
500 0,0,0,0,0,0,0,0,
501 0,0,0,0,254,255,255,7,
502 0,0,0,0,0,4,32,4,
503 0,0,0,128,255,255,127,255,
504 0,0,0,0,0,0,255,3,
505 254,255,255,135,254,255,255,7,
506 0,0,0,0,0,4,44,6,
507 255,255,127,255,255,255,127,255,
508 0,0,0,0,254,255,255,255,
509 255,255,255,255,255,255,255,127,
510 0,0,0,0,254,255,255,255,
511 255,255,255,255,255,255,255,255,
512 0,2,0,0,255,255,255,255,
513 255,255,255,255,255,255,255,127,
514 0,0,0,0,255,255,255,255,
515 255,255,255,255,255,255,255,255,
516 0,0,0,0,254,255,0,252,
517 1,0,0,248,1,0,0,120,
518 0,0,0,0,254,255,255,255,
519 0,0,128,0,0,0,128,0,
520 255,255,255,255,0,0,0,0,
521 0,0,0,0,0,0,0,128,
522 255,255,255,255,0,0,0,0,
523 0,0,0,0,0,0,0,0,
524 128,0,0,0,0,0,0,0,
525 0,1,1,0,1,1,0,0,
526 0,0,0,0,0,0,0,0,
527 0,0,0,0,0,0,0,0,
528 1,0,0,0,128,0,0,0,
529 128,128,128,128,0,0,128,0,
530 28,28,28,28,28,28,28,28,
531 28,28,0,0,0,0,0,128,
532 0,26,26,26,26,26,26,18,
533 18,18,18,18,18,18,18,18,
534 18,18,18,18,18,18,18,18,
535 18,18,18,128,128,0,128,16,
536 0,26,26,26,26,26,26,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,18,18,18,18,18,
539 18,18,18,128,128,0,0,0,
540 0,0,0,0,0,1,0,0,
541 0,0,0,0,0,0,0,0,
542 0,0,0,0,0,0,0,0,
543 0,0,0,0,0,0,0,0,
544 1,0,0,0,0,0,0,0,
545 0,0,18,0,0,0,0,0,
546 0,0,20,20,0,18,0,0,
547 0,20,18,0,0,0,0,0,
548 18,18,18,18,18,18,18,18,
549 18,18,18,18,18,18,18,18,
550 18,18,18,18,18,18,18,0,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,18,
554 18,18,18,18,18,18,18,0,
555 18,18,18,18,18,18,18,18
556 };
557
558
559
560
561 #ifndef HAVE_STRERROR
562 /*************************************************
563 * Provide strerror() for non-ANSI libraries *
564 *************************************************/
565
566 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
567 in their libraries, but can provide the same facility by this simple
568 alternative function. */
569
570 extern int sys_nerr;
571 extern char *sys_errlist[];
572
573 char *
574 strerror(int n)
575 {
576 if (n < 0 || n >= sys_nerr) return "unknown error number";
577 return sys_errlist[n];
578 }
579 #endif /* HAVE_STRERROR */
580
581
582
583
584 /*************************************************
585 * Read or extend an input line *
586 *************************************************/
587
588 /* Input lines are read into buffer, but both patterns and data lines can be
589 continued over multiple input lines. In addition, if the buffer fills up, we
590 want to automatically expand it so as to be able to handle extremely large
591 lines that are needed for certain stress tests. When the input buffer is
592 expanded, the other two buffers must also be expanded likewise, and the
593 contents of pbuffer, which are a copy of the input for callouts, must be
594 preserved (for when expansion happens for a data line). This is not the most
595 optimal way of handling this, but hey, this is just a test program!
596
597 Arguments:
598 f the file to read
599 start where in buffer to start (this *must* be within buffer)
600 prompt for stdin or readline()
601
602 Returns: pointer to the start of new data
603 could be a copy of start, or could be moved
604 NULL if no data read and EOF reached
605 */
606
607 static uschar *
608 extend_inputline(FILE *f, uschar *start, const char *prompt)
609 {
610 uschar *here = start;
611
612 for (;;)
613 {
614 int rlen = (int)(buffer_size - (here - buffer));
615
616 if (rlen > 1000)
617 {
618 int dlen;
619
620 /* If libreadline support is required, use readline() to read a line if the
621 input is a terminal. Note that readline() removes the trailing newline, so
622 we must put it back again, to be compatible with fgets(). */
623
624 #ifdef SUPPORT_LIBREADLINE
625 if (isatty(fileno(f)))
626 {
627 size_t len;
628 char *s = readline(prompt);
629 if (s == NULL) return (here == start)? NULL : start;
630 len = strlen(s);
631 if (len > 0) add_history(s);
632 if (len > rlen - 1) len = rlen - 1;
633 memcpy(here, s, len);
634 here[len] = '\n';
635 here[len+1] = 0;
636 free(s);
637 }
638 else
639 #endif
640
641 /* Read the next line by normal means, prompting if the file is stdin. */
642
643 {
644 if (f == stdin) printf("%s", prompt);
645 if (fgets((char *)here, rlen, f) == NULL)
646 return (here == start)? NULL : start;
647 }
648
649 dlen = (int)strlen((char *)here);
650 if (dlen > 0 && here[dlen - 1] == '\n') return start;
651 here += dlen;
652 }
653
654 else
655 {
656 int new_buffer_size = 2*buffer_size;
657 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
658 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
659 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
660
661 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
662 {
663 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
664 exit(1);
665 }
666
667 memcpy(new_buffer, buffer, buffer_size);
668 memcpy(new_pbuffer, pbuffer, buffer_size);
669
670 buffer_size = new_buffer_size;
671
672 start = new_buffer + (start - buffer);
673 here = new_buffer + (here - buffer);
674
675 free(buffer);
676 free(dbuffer);
677 free(pbuffer);
678
679 buffer = new_buffer;
680 dbuffer = new_dbuffer;
681 pbuffer = new_pbuffer;
682 }
683 }
684
685 return NULL; /* Control never gets here */
686 }
687
688
689
690
691
692
693
694 /*************************************************
695 * Read number from string *
696 *************************************************/
697
698 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
699 around with conditional compilation, just do the job by hand. It is only used
700 for unpicking arguments, so just keep it simple.
701
702 Arguments:
703 str string to be converted
704 endptr where to put the end pointer
705
706 Returns: the unsigned long
707 */
708
709 static int
710 get_value(unsigned char *str, unsigned char **endptr)
711 {
712 int result = 0;
713 while(*str != 0 && isspace(*str)) str++;
714 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
715 *endptr = str;
716 return(result);
717 }
718
719
720
721
722 /*************************************************
723 * Convert UTF-8 string to value *
724 *************************************************/
725
726 /* This function takes one or more bytes that represents a UTF-8 character,
727 and returns the value of the character.
728
729 Argument:
730 utf8bytes a pointer to the byte vector
731 vptr a pointer to an int to receive the value
732
733 Returns: > 0 => the number of bytes consumed
734 -6 to 0 => malformed UTF-8 character at offset = (-return)
735 */
736
737 #if !defined NOUTF8
738
739 static int
740 utf82ord(unsigned char *utf8bytes, int *vptr)
741 {
742 int c = *utf8bytes++;
743 int d = c;
744 int i, j, s;
745
746 for (i = -1; i < 6; i++) /* i is number of additional bytes */
747 {
748 if ((d & 0x80) == 0) break;
749 d <<= 1;
750 }
751
752 if (i == -1) { *vptr = c; return 1; } /* ascii character */
753 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
754
755 /* i now has a value in the range 1-5 */
756
757 s = 6*i;
758 d = (c & utf8_table3[i]) << s;
759
760 for (j = 0; j < i; j++)
761 {
762 c = *utf8bytes++;
763 if ((c & 0xc0) != 0x80) return -(j+1);
764 s -= 6;
765 d |= (c & 0x3f) << s;
766 }
767
768 /* Check that encoding was the correct unique one */
769
770 for (j = 0; j < utf8_table1_size; j++)
771 if (d <= utf8_table1[j]) break;
772 if (j != i) return -(i+1);
773
774 /* Valid value */
775
776 *vptr = d;
777 return i+1;
778 }
779
780 #endif
781
782
783
784 /*************************************************
785 * Convert character value to UTF-8 *
786 *************************************************/
787
788 /* This function takes an integer value in the range 0 - 0x7fffffff
789 and encodes it as a UTF-8 character in 0 to 6 bytes.
790
791 Arguments:
792 cvalue the character value
793 utf8bytes pointer to buffer for result - at least 6 bytes long
794
795 Returns: number of characters placed in the buffer
796 */
797
798 #if !defined NOUTF8
799
800 static int
801 ord2utf8(int cvalue, uschar *utf8bytes)
802 {
803 register int i, j;
804 for (i = 0; i < utf8_table1_size; i++)
805 if (cvalue <= utf8_table1[i]) break;
806 utf8bytes += i;
807 for (j = i; j > 0; j--)
808 {
809 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
810 cvalue >>= 6;
811 }
812 *utf8bytes = utf8_table2[i] | cvalue;
813 return i + 1;
814 }
815
816 #endif
817
818
819
820 /*************************************************
821 * Print character string *
822 *************************************************/
823
824 /* Character string printing function. Must handle UTF-8 strings in utf8
825 mode. Yields number of characters printed. If handed a NULL file, just counts
826 chars without printing. */
827
828 static int pchars(unsigned char *p, int length, FILE *f)
829 {
830 int c = 0;
831 int yield = 0;
832
833 while (length-- > 0)
834 {
835 #if !defined NOUTF8
836 if (use_utf8)
837 {
838 int rc = utf82ord(p, &c);
839
840 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
841 {
842 length -= rc - 1;
843 p += rc;
844 if (PRINTHEX(c))
845 {
846 if (f != NULL) fprintf(f, "%c", c);
847 yield++;
848 }
849 else
850 {
851 int n = 4;
852 if (f != NULL) fprintf(f, "\\x{%02x}", c);
853 yield += (n <= 0x000000ff)? 2 :
854 (n <= 0x00000fff)? 3 :
855 (n <= 0x0000ffff)? 4 :
856 (n <= 0x000fffff)? 5 : 6;
857 }
858 continue;
859 }
860 }
861 #endif
862
863 /* Not UTF-8, or malformed UTF-8 */
864
865 c = *p++;
866 if (PRINTHEX(c))
867 {
868 if (f != NULL) fprintf(f, "%c", c);
869 yield++;
870 }
871 else
872 {
873 if (f != NULL) fprintf(f, "\\x%02x", c);
874 yield += 4;
875 }
876 }
877
878 return yield;
879 }
880
881
882
883 /*************************************************
884 * Callout function *
885 *************************************************/
886
887 /* Called from PCRE as a result of the (?C) item. We print out where we are in
888 the match. Yield zero unless more callouts than the fail count, or the callout
889 data is not zero. */
890
891 static int callout(pcre_callout_block *cb)
892 {
893 FILE *f = (first_callout | callout_extra)? outfile : NULL;
894 int i, pre_start, post_start, subject_length;
895
896 if (callout_extra)
897 {
898 fprintf(f, "Callout %d: last capture = %d\n",
899 cb->callout_number, cb->capture_last);
900
901 for (i = 0; i < cb->capture_top * 2; i += 2)
902 {
903 if (cb->offset_vector[i] < 0)
904 fprintf(f, "%2d: <unset>\n", i/2);
905 else
906 {
907 fprintf(f, "%2d: ", i/2);
908 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
909 cb->offset_vector[i+1] - cb->offset_vector[i], f);
910 fprintf(f, "\n");
911 }
912 }
913 }
914
915 /* Re-print the subject in canonical form, the first time or if giving full
916 datails. On subsequent calls in the same match, we use pchars just to find the
917 printed lengths of the substrings. */
918
919 if (f != NULL) fprintf(f, "--->");
920
921 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
922 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
923 cb->current_position - cb->start_match, f);
924
925 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
926
927 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
928 cb->subject_length - cb->current_position, f);
929
930 if (f != NULL) fprintf(f, "\n");
931
932 /* Always print appropriate indicators, with callout number if not already
933 shown. For automatic callouts, show the pattern offset. */
934
935 if (cb->callout_number == 255)
936 {
937 fprintf(outfile, "%+3d ", cb->pattern_position);
938 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
939 }
940 else
941 {
942 if (callout_extra) fprintf(outfile, " ");
943 else fprintf(outfile, "%3d ", cb->callout_number);
944 }
945
946 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
947 fprintf(outfile, "^");
948
949 if (post_start > 0)
950 {
951 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
952 fprintf(outfile, "^");
953 }
954
955 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
956 fprintf(outfile, " ");
957
958 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
959 pbuffer + cb->pattern_position);
960
961 fprintf(outfile, "\n");
962 first_callout = 0;
963
964 if (cb->mark != last_callout_mark)
965 {
966 fprintf(outfile, "Latest Mark: %s\n",
967 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
968 last_callout_mark = cb->mark;
969 }
970
971 if (cb->callout_data != NULL)
972 {
973 int callout_data = *((int *)(cb->callout_data));
974 if (callout_data != 0)
975 {
976 fprintf(outfile, "Callout data = %d\n", callout_data);
977 return callout_data;
978 }
979 }
980
981 return (cb->callout_number != callout_fail_id)? 0 :
982 (++callout_count >= callout_fail_count)? 1 : 0;
983 }
984
985
986 /*************************************************
987 * Local malloc functions *
988 *************************************************/
989
990 /* Alternative malloc function, to test functionality and show the size of the
991 compiled re. */
992
993 static void *new_malloc(size_t size)
994 {
995 void *block = malloc(size);
996 gotten_store = size;
997 if (show_malloc)
998 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
999 return block;
1000 }
1001
1002 static void new_free(void *block)
1003 {
1004 if (show_malloc)
1005 fprintf(outfile, "free %p\n", block);
1006 free(block);
1007 }
1008
1009
1010 /* For recursion malloc/free, to test stacking calls */
1011
1012 static void *stack_malloc(size_t size)
1013 {
1014 void *block = malloc(size);
1015 if (show_malloc)
1016 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1017 return block;
1018 }
1019
1020 static void stack_free(void *block)
1021 {
1022 if (show_malloc)
1023 fprintf(outfile, "stack_free %p\n", block);
1024 free(block);
1025 }
1026
1027
1028 /*************************************************
1029 * Call pcre_fullinfo() *
1030 *************************************************/
1031
1032 /* Get one piece of information from the pcre_fullinfo() function */
1033
1034 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1035 {
1036 int rc;
1037 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1038 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1039 }
1040
1041
1042
1043 /*************************************************
1044 * Byte flipping function *
1045 *************************************************/
1046
1047 static unsigned long int
1048 byteflip(unsigned long int value, int n)
1049 {
1050 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1051 return ((value & 0x000000ff) << 24) |
1052 ((value & 0x0000ff00) << 8) |
1053 ((value & 0x00ff0000) >> 8) |
1054 ((value & 0xff000000) >> 24);
1055 }
1056
1057
1058
1059
1060 /*************************************************
1061 * Check match or recursion limit *
1062 *************************************************/
1063
1064 static int
1065 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1066 int start_offset, int options, int *use_offsets, int use_size_offsets,
1067 int flag, unsigned long int *limit, int errnumber, const char *msg)
1068 {
1069 int count;
1070 int min = 0;
1071 int mid = 64;
1072 int max = -1;
1073
1074 extra->flags |= flag;
1075
1076 for (;;)
1077 {
1078 *limit = mid;
1079
1080 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1081 use_offsets, use_size_offsets);
1082
1083 if (count == errnumber)
1084 {
1085 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1086 min = mid;
1087 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1088 }
1089
1090 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1091 count == PCRE_ERROR_PARTIAL)
1092 {
1093 if (mid == min + 1)
1094 {
1095 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1096 break;
1097 }
1098 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099 max = mid;
1100 mid = (min + mid)/2;
1101 }
1102 else break; /* Some other error */
1103 }
1104
1105 extra->flags &= ~flag;
1106 return count;
1107 }
1108
1109
1110
1111 /*************************************************
1112 * Case-independent strncmp() function *
1113 *************************************************/
1114
1115 /*
1116 Arguments:
1117 s first string
1118 t second string
1119 n number of characters to compare
1120
1121 Returns: < 0, = 0, or > 0, according to the comparison
1122 */
1123
1124 static int
1125 strncmpic(uschar *s, uschar *t, int n)
1126 {
1127 while (n--)
1128 {
1129 int c = tolower(*s++) - tolower(*t++);
1130 if (c) return c;
1131 }
1132 return 0;
1133 }
1134
1135
1136
1137 /*************************************************
1138 * Check newline indicator *
1139 *************************************************/
1140
1141 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1142 a message and return 0 if there is no match.
1143
1144 Arguments:
1145 p points after the leading '<'
1146 f file for error message
1147
1148 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1149 */
1150
1151 static int
1152 check_newline(uschar *p, FILE *f)
1153 {
1154 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1155 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1156 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1157 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1158 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1159 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1160 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1161 fprintf(f, "Unknown newline type at: <%s\n", p);
1162 return 0;
1163 }
1164
1165
1166
1167 /*************************************************
1168 * Usage function *
1169 *************************************************/
1170
1171 static void
1172 usage(void)
1173 {
1174 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1175 printf("Input and output default to stdin and stdout.\n");
1176 #ifdef SUPPORT_LIBREADLINE
1177 printf("If input is a terminal, readline() is used to read from it.\n");
1178 #else
1179 printf("This version of pcretest is not linked with readline().\n");
1180 #endif
1181 printf("\nOptions:\n");
1182 printf(" -b show compiled code (bytecode)\n");
1183 printf(" -C show PCRE compile-time options and exit\n");
1184 printf(" -d debug: show compiled code and information (-b and -i)\n");
1185 #if !defined NODFA
1186 printf(" -dfa force DFA matching for all subjects\n");
1187 #endif
1188 printf(" -help show usage information\n");
1189 printf(" -i show information about compiled patterns\n"
1190 " -M find MATCH_LIMIT minimum for each subject\n"
1191 " -m output memory used information\n"
1192 " -o <n> set size of offsets vector to <n>\n");
1193 #if !defined NOPOSIX
1194 printf(" -p use POSIX interface\n");
1195 #endif
1196 printf(" -q quiet: do not output PCRE version number at start\n");
1197 printf(" -S <n> set stack size to <n> megabytes\n");
1198 printf(" -s force each pattern to be studied\n"
1199 " -t time compilation and execution\n");
1200 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1201 printf(" -tm time execution (matching) only\n");
1202 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1203 }
1204
1205
1206
1207 /*************************************************
1208 * Main Program *
1209 *************************************************/
1210
1211 /* Read lines from named file or stdin and write to named file or stdout; lines
1212 consist of a regular expression, in delimiters and optionally followed by
1213 options, followed by a set of test data, terminated by an empty line. */
1214
1215 int main(int argc, char **argv)
1216 {
1217 FILE *infile = stdin;
1218 int options = 0;
1219 int study_options = 0;
1220 int default_find_match_limit = FALSE;
1221 int op = 1;
1222 int timeit = 0;
1223 int timeitm = 0;
1224 int showinfo = 0;
1225 int showstore = 0;
1226 int force_study = 0;
1227 int quiet = 0;
1228 int size_offsets = 45;
1229 int size_offsets_max;
1230 int *offsets = NULL;
1231 #if !defined NOPOSIX
1232 int posix = 0;
1233 #endif
1234 int debug = 0;
1235 int done = 0;
1236 int all_use_dfa = 0;
1237 int yield = 0;
1238 int stack_size;
1239
1240 /* These vectors store, end-to-end, a list of captured substring names. Assume
1241 that 1024 is plenty long enough for the few names we'll be testing. */
1242
1243 uschar copynames[1024];
1244 uschar getnames[1024];
1245
1246 uschar *copynamesptr;
1247 uschar *getnamesptr;
1248
1249 /* Get buffers from malloc() so that Electric Fence will check their misuse
1250 when I am debugging. They grow automatically when very long lines are read. */
1251
1252 buffer = (unsigned char *)malloc(buffer_size);
1253 dbuffer = (unsigned char *)malloc(buffer_size);
1254 pbuffer = (unsigned char *)malloc(buffer_size);
1255
1256 /* The outfile variable is static so that new_malloc can use it. */
1257
1258 outfile = stdout;
1259
1260 /* The following _setmode() stuff is some Windows magic that tells its runtime
1261 library to translate CRLF into a single LF character. At least, that's what
1262 I've been told: never having used Windows I take this all on trust. Originally
1263 it set 0x8000, but then I was advised that _O_BINARY was better. */
1264
1265 #if defined(_WIN32) || defined(WIN32)
1266 _setmode( _fileno( stdout ), _O_BINARY );
1267 #endif
1268
1269 /* Scan options */
1270
1271 while (argc > 1 && argv[op][0] == '-')
1272 {
1273 unsigned char *endptr;
1274
1275 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1276 else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1277 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1278 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1279 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1280 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1281 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1282 #if !defined NODFA
1283 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1284 #endif
1285 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1286 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1287 *endptr == 0))
1288 {
1289 op++;
1290 argc--;
1291 }
1292 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1293 {
1294 int both = argv[op][2] == 0;
1295 int temp;
1296 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1297 *endptr == 0))
1298 {
1299 timeitm = temp;
1300 op++;
1301 argc--;
1302 }
1303 else timeitm = LOOPREPEAT;
1304 if (both) timeit = timeitm;
1305 }
1306 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1307 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1308 *endptr == 0))
1309 {
1310 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1311 printf("PCRE: -S not supported on this OS\n");
1312 exit(1);
1313 #else
1314 int rc;
1315 struct rlimit rlim;
1316 getrlimit(RLIMIT_STACK, &rlim);
1317 rlim.rlim_cur = stack_size * 1024 * 1024;
1318 rc = setrlimit(RLIMIT_STACK, &rlim);
1319 if (rc != 0)
1320 {
1321 printf("PCRE: setrlimit() failed with error %d\n", rc);
1322 exit(1);
1323 }
1324 op++;
1325 argc--;
1326 #endif
1327 }
1328 #if !defined NOPOSIX
1329 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1330 #endif
1331 else if (strcmp(argv[op], "-C") == 0)
1332 {
1333 int rc;
1334 unsigned long int lrc;
1335 printf("PCRE version %s\n", pcre_version());
1336 printf("Compiled with\n");
1337 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1338 printf(" %sUTF-8 support\n", rc? "" : "No ");
1339 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1340 printf(" %sUnicode properties support\n", rc? "" : "No ");
1341 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1342 /* Note that these values are always the ASCII values, even
1343 in EBCDIC environments. CR is 13 and NL is 10. */
1344 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1345 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1346 (rc == -2)? "ANYCRLF" :
1347 (rc == -1)? "ANY" : "???");
1348 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1349 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1350 "all Unicode newlines");
1351 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1352 printf(" Internal link size = %d\n", rc);
1353 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1354 printf(" POSIX malloc threshold = %d\n", rc);
1355 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1356 printf(" Default match limit = %ld\n", lrc);
1357 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1358 printf(" Default recursion depth limit = %ld\n", lrc);
1359 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1360 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1361 goto EXIT;
1362 }
1363 else if (strcmp(argv[op], "-help") == 0 ||
1364 strcmp(argv[op], "--help") == 0)
1365 {
1366 usage();
1367 goto EXIT;
1368 }
1369 else
1370 {
1371 printf("** Unknown or malformed option %s\n", argv[op]);
1372 usage();
1373 yield = 1;
1374 goto EXIT;
1375 }
1376 op++;
1377 argc--;
1378 }
1379
1380 /* Get the store for the offsets vector, and remember what it was */
1381
1382 size_offsets_max = size_offsets;
1383 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1384 if (offsets == NULL)
1385 {
1386 printf("** Failed to get %d bytes of memory for offsets vector\n",
1387 (int)(size_offsets_max * sizeof(int)));
1388 yield = 1;
1389 goto EXIT;
1390 }
1391
1392 /* Sort out the input and output files */
1393
1394 if (argc > 1)
1395 {
1396 infile = fopen(argv[op], INPUT_MODE);
1397 if (infile == NULL)
1398 {
1399 printf("** Failed to open %s\n", argv[op]);
1400 yield = 1;
1401 goto EXIT;
1402 }
1403 }
1404
1405 if (argc > 2)
1406 {
1407 outfile = fopen(argv[op+1], OUTPUT_MODE);
1408 if (outfile == NULL)
1409 {
1410 printf("** Failed to open %s\n", argv[op+1]);
1411 yield = 1;
1412 goto EXIT;
1413 }
1414 }
1415
1416 /* Set alternative malloc function */
1417
1418 pcre_malloc = new_malloc;
1419 pcre_free = new_free;
1420 pcre_stack_malloc = stack_malloc;
1421 pcre_stack_free = stack_free;
1422
1423 /* Heading line unless quiet, then prompt for first regex if stdin */
1424
1425 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1426
1427 /* Main loop */
1428
1429 while (!done)
1430 {
1431 pcre *re = NULL;
1432 pcre_extra *extra = NULL;
1433
1434 #if !defined NOPOSIX /* There are still compilers that require no indent */
1435 regex_t preg;
1436 int do_posix = 0;
1437 #endif
1438
1439 const char *error;
1440 unsigned char *markptr;
1441 unsigned char *p, *pp, *ppp;
1442 unsigned char *to_file = NULL;
1443 const unsigned char *tables = NULL;
1444 unsigned long int true_size, true_study_size = 0;
1445 size_t size, regex_gotten_store;
1446 int do_allcaps = 0;
1447 int do_mark = 0;
1448 int do_study = 0;
1449 int no_force_study = 0;
1450 int do_debug = debug;
1451 int do_G = 0;
1452 int do_g = 0;
1453 int do_showinfo = showinfo;
1454 int do_showrest = 0;
1455 int do_showcaprest = 0;
1456 int do_flip = 0;
1457 int erroroffset, len, delimiter, poffset;
1458
1459 use_utf8 = 0;
1460 debug_lengths = 1;
1461
1462 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1463 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1464 fflush(outfile);
1465
1466 p = buffer;
1467 while (isspace(*p)) p++;
1468 if (*p == 0) continue;
1469
1470 /* See if the pattern is to be loaded pre-compiled from a file. */
1471
1472 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1473 {
1474 unsigned long int magic, get_options;
1475 uschar sbuf[8];
1476 FILE *f;
1477
1478 p++;
1479 pp = p + (int)strlen((char *)p);
1480 while (isspace(pp[-1])) pp--;
1481 *pp = 0;
1482
1483 f = fopen((char *)p, "rb");
1484 if (f == NULL)
1485 {
1486 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1487 continue;
1488 }
1489
1490 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1491
1492 true_size =
1493 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1494 true_study_size =
1495 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1496
1497 re = (real_pcre *)new_malloc(true_size);
1498 regex_gotten_store = gotten_store;
1499
1500 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1501
1502 magic = ((real_pcre *)re)->magic_number;
1503 if (magic != MAGIC_NUMBER)
1504 {
1505 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1506 {
1507 do_flip = 1;
1508 }
1509 else
1510 {
1511 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1512 fclose(f);
1513 continue;
1514 }
1515 }
1516
1517 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1518 do_flip? " (byte-inverted)" : "", p);
1519
1520 /* Need to know if UTF-8 for printing data strings */
1521
1522 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1523 use_utf8 = (get_options & PCRE_UTF8) != 0;
1524
1525 /* Now see if there is any following study data. */
1526
1527 if (true_study_size != 0)
1528 {
1529 pcre_study_data *psd;
1530
1531 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1532 extra->flags = PCRE_EXTRA_STUDY_DATA;
1533
1534 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1535 extra->study_data = psd;
1536
1537 if (fread(psd, 1, true_study_size, f) != true_study_size)
1538 {
1539 FAIL_READ:
1540 fprintf(outfile, "Failed to read data from %s\n", p);
1541 if (extra != NULL) new_free(extra);
1542 if (re != NULL) new_free(re);
1543 fclose(f);
1544 continue;
1545 }
1546 fprintf(outfile, "Study data loaded from %s\n", p);
1547 do_study = 1; /* To get the data output if requested */
1548 }
1549 else fprintf(outfile, "No study data\n");
1550
1551 fclose(f);
1552 goto SHOW_INFO;
1553 }
1554
1555 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1556 the pattern; if is isn't complete, read more. */
1557
1558 delimiter = *p++;
1559
1560 if (isalnum(delimiter) || delimiter == '\\')
1561 {
1562 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1563 goto SKIP_DATA;
1564 }
1565
1566 pp = p;
1567 poffset = (int)(p - buffer);
1568
1569 for(;;)
1570 {
1571 while (*pp != 0)
1572 {
1573 if (*pp == '\\' && pp[1] != 0) pp++;
1574 else if (*pp == delimiter) break;
1575 pp++;
1576 }
1577 if (*pp != 0) break;
1578 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1579 {
1580 fprintf(outfile, "** Unexpected EOF\n");
1581 done = 1;
1582 goto CONTINUE;
1583 }
1584 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1585 }
1586
1587 /* The buffer may have moved while being extended; reset the start of data
1588 pointer to the correct relative point in the buffer. */
1589
1590 p = buffer + poffset;
1591
1592 /* If the first character after the delimiter is backslash, make
1593 the pattern end with backslash. This is purely to provide a way
1594 of testing for the error message when a pattern ends with backslash. */
1595
1596 if (pp[1] == '\\') *pp++ = '\\';
1597
1598 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1599 for callouts. */
1600
1601 *pp++ = 0;
1602 strcpy((char *)pbuffer, (char *)p);
1603
1604 /* Look for options after final delimiter */
1605
1606 options = 0;
1607 study_options = 0;
1608 log_store = showstore; /* default from command line */
1609
1610 while (*pp != 0)
1611 {
1612 switch (*pp++)
1613 {
1614 case 'f': options |= PCRE_FIRSTLINE; break;
1615 case 'g': do_g = 1; break;
1616 case 'i': options |= PCRE_CASELESS; break;
1617 case 'm': options |= PCRE_MULTILINE; break;
1618 case 's': options |= PCRE_DOTALL; break;
1619 case 'x': options |= PCRE_EXTENDED; break;
1620
1621 case '+':
1622 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1623 break;
1624
1625 case '=': do_allcaps = 1; break;
1626 case 'A': options |= PCRE_ANCHORED; break;
1627 case 'B': do_debug = 1; break;
1628 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1629 case 'D': do_debug = do_showinfo = 1; break;
1630 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1631 case 'F': do_flip = 1; break;
1632 case 'G': do_G = 1; break;
1633 case 'I': do_showinfo = 1; break;
1634 case 'J': options |= PCRE_DUPNAMES; break;
1635 case 'K': do_mark = 1; break;
1636 case 'M': log_store = 1; break;
1637 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1638
1639 #if !defined NOPOSIX
1640 case 'P': do_posix = 1; break;
1641 #endif
1642
1643 case 'S':
1644 if (do_study == 0) do_study = 1; else
1645 {
1646 do_study = 0;
1647 no_force_study = 1;
1648 }
1649 break;
1650
1651 case 'U': options |= PCRE_UNGREEDY; break;
1652 case 'W': options |= PCRE_UCP; break;
1653 case 'X': options |= PCRE_EXTRA; break;
1654 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1655 case 'Z': debug_lengths = 0; break;
1656 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1657 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1658
1659 case 'T':
1660 switch (*pp++)
1661 {
1662 case '0': tables = tables0; break;
1663 case '1': tables = tables1; break;
1664
1665 case '\r':
1666 case '\n':
1667 case ' ':
1668 case 0:
1669 fprintf(outfile, "** Missing table number after /T\n");
1670 goto SKIP_DATA;
1671
1672 default:
1673 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1674 goto SKIP_DATA;
1675 }
1676 break;
1677
1678 case 'L':
1679 ppp = pp;
1680 /* The '\r' test here is so that it works on Windows. */
1681 /* The '0' test is just in case this is an unterminated line. */
1682 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1683 *ppp = 0;
1684 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1685 {
1686 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1687 goto SKIP_DATA;
1688 }
1689 locale_set = 1;
1690 tables = pcre_maketables();
1691 pp = ppp;
1692 break;
1693
1694 case '>':
1695 to_file = pp;
1696 while (*pp != 0) pp++;
1697 while (isspace(pp[-1])) pp--;
1698 *pp = 0;
1699 break;
1700
1701 case '<':
1702 {
1703 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1704 {
1705 options |= PCRE_JAVASCRIPT_COMPAT;
1706 pp += 3;
1707 }
1708 else
1709 {
1710 int x = check_newline(pp, outfile);
1711 if (x == 0) goto SKIP_DATA;
1712 options |= x;
1713 while (*pp++ != '>');
1714 }
1715 }
1716 break;
1717
1718 case '\r': /* So that it works in Windows */
1719 case '\n':
1720 case ' ':
1721 break;
1722
1723 default:
1724 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1725 goto SKIP_DATA;
1726 }
1727 }
1728
1729 /* Handle compiling via the POSIX interface, which doesn't support the
1730 timing, showing, or debugging options, nor the ability to pass over
1731 local character tables. */
1732
1733 #if !defined NOPOSIX
1734 if (posix || do_posix)
1735 {
1736 int rc;
1737 int cflags = 0;
1738
1739 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1740 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1741 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1742 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1743 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1744 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1745 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1746
1747 rc = regcomp(&preg, (char *)p, cflags);
1748
1749 /* Compilation failed; go back for another re, skipping to blank line
1750 if non-interactive. */
1751
1752 if (rc != 0)
1753 {
1754 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1755 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1756 goto SKIP_DATA;
1757 }
1758 }
1759
1760 /* Handle compiling via the native interface */
1761
1762 else
1763 #endif /* !defined NOPOSIX */
1764
1765 {
1766 unsigned long int get_options;
1767
1768 if (timeit > 0)
1769 {
1770 register int i;
1771 clock_t time_taken;
1772 clock_t start_time = clock();
1773 for (i = 0; i < timeit; i++)
1774 {
1775 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1776 if (re != NULL) free(re);
1777 }
1778 time_taken = clock() - start_time;
1779 fprintf(outfile, "Compile time %.4f milliseconds\n",
1780 (((double)time_taken * 1000.0) / (double)timeit) /
1781 (double)CLOCKS_PER_SEC);
1782 }
1783
1784 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1785
1786 /* Compilation failed; go back for another re, skipping to blank line
1787 if non-interactive. */
1788
1789 if (re == NULL)
1790 {
1791 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1792 SKIP_DATA:
1793 if (infile != stdin)
1794 {
1795 for (;;)
1796 {
1797 if (extend_inputline(infile, buffer, NULL) == NULL)
1798 {
1799 done = 1;
1800 goto CONTINUE;
1801 }
1802 len = (int)strlen((char *)buffer);
1803 while (len > 0 && isspace(buffer[len-1])) len--;
1804 if (len == 0) break;
1805 }
1806 fprintf(outfile, "\n");
1807 }
1808 goto CONTINUE;
1809 }
1810
1811 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1812 within the regex; check for this so that we know how to process the data
1813 lines. */
1814
1815 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1816 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1817
1818 /* Print information if required. There are now two info-returning
1819 functions. The old one has a limited interface and returns only limited
1820 data. Check that it agrees with the newer one. */
1821
1822 if (log_store)
1823 fprintf(outfile, "Memory allocation (code space): %d\n",
1824 (int)(gotten_store -
1825 sizeof(real_pcre) -
1826 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1827
1828 /* Extract the size for possible writing before possibly flipping it,
1829 and remember the store that was got. */
1830
1831 true_size = ((real_pcre *)re)->size;
1832 regex_gotten_store = gotten_store;
1833
1834 /* If -s or /S was present, study the regex to generate additional info to
1835 help with the matching, unless the pattern has the SS option, which
1836 suppresses the effect of /S (used for a few test patterns where studying is
1837 never sensible). */
1838
1839 if (do_study || (force_study && !no_force_study))
1840 {
1841 if (timeit > 0)
1842 {
1843 register int i;
1844 clock_t time_taken;
1845 clock_t start_time = clock();
1846 for (i = 0; i < timeit; i++)
1847 extra = pcre_study(re, study_options, &error);
1848 time_taken = clock() - start_time;
1849 if (extra != NULL) free(extra);
1850 fprintf(outfile, " Study time %.4f milliseconds\n",
1851 (((double)time_taken * 1000.0) / (double)timeit) /
1852 (double)CLOCKS_PER_SEC);
1853 }
1854 extra = pcre_study(re, study_options, &error);
1855 if (error != NULL)
1856 fprintf(outfile, "Failed to study: %s\n", error);
1857 else if (extra != NULL)
1858 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1859 }
1860
1861 /* If /K was present, we set up for handling MARK data. */
1862
1863 if (do_mark)
1864 {
1865 if (extra == NULL)
1866 {
1867 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1868 extra->flags = 0;
1869 }
1870 extra->mark = &markptr;
1871 extra->flags |= PCRE_EXTRA_MARK;
1872 }
1873
1874 /* If the 'F' option was present, we flip the bytes of all the integer
1875 fields in the regex data block and the study block. This is to make it
1876 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1877 compiled on a different architecture. */
1878
1879 if (do_flip)
1880 {
1881 real_pcre *rre = (real_pcre *)re;
1882 rre->magic_number =
1883 byteflip(rre->magic_number, sizeof(rre->magic_number));
1884 rre->size = byteflip(rre->size, sizeof(rre->size));
1885 rre->options = byteflip(rre->options, sizeof(rre->options));
1886 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1887 rre->top_bracket =
1888 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1889 rre->top_backref =
1890 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1891 rre->first_byte =
1892 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1893 rre->req_byte =
1894 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1895 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1896 sizeof(rre->name_table_offset));
1897 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1898 sizeof(rre->name_entry_size));
1899 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1900 sizeof(rre->name_count));
1901
1902 if (extra != NULL)
1903 {
1904 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1905 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1906 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1907 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1908 }
1909 }
1910
1911 /* Extract information from the compiled data if required */
1912
1913 SHOW_INFO:
1914
1915 if (do_debug)
1916 {
1917 fprintf(outfile, "------------------------------------------------------------------\n");
1918 pcre_printint(re, outfile, debug_lengths);
1919 }
1920
1921 /* We already have the options in get_options (see above) */
1922
1923 if (do_showinfo)
1924 {
1925 unsigned long int all_options;
1926 #if !defined NOINFOCHECK
1927 int old_first_char, old_options, old_count;
1928 #endif
1929 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1930 hascrorlf;
1931 int nameentrysize, namecount;
1932 const uschar *nametable;
1933
1934 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1935 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1936 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1937 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1938 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1939 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1940 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1941 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1942 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1943 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1944 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1945
1946 #if !defined NOINFOCHECK
1947 old_count = pcre_info(re, &old_options, &old_first_char);
1948 if (count < 0) fprintf(outfile,
1949 "Error %d from pcre_info()\n", count);
1950 else
1951 {
1952 if (old_count != count) fprintf(outfile,
1953 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1954 old_count);
1955
1956 if (old_first_char != first_char) fprintf(outfile,
1957 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1958 first_char, old_first_char);
1959
1960 if (old_options != (int)get_options) fprintf(outfile,
1961 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1962 get_options, old_options);
1963 }
1964 #endif
1965
1966 if (size != regex_gotten_store) fprintf(outfile,
1967 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1968 (int)size, (int)regex_gotten_store);
1969
1970 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1971 if (backrefmax > 0)
1972 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1973
1974 if (namecount > 0)
1975 {
1976 fprintf(outfile, "Named capturing subpatterns:\n");
1977 while (namecount-- > 0)
1978 {
1979 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1980 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1981 GET2(nametable, 0));
1982 nametable += nameentrysize;
1983 }
1984 }
1985
1986 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1987 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1988
1989 all_options = ((real_pcre *)re)->options;
1990 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1991
1992 if (get_options == 0) fprintf(outfile, "No options\n");
1993 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1994 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1995 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1996 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1997 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1998 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1999 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2000 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2001 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2002 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2003 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2004 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2005 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2006 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2007 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2008 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2009 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2010 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2011
2012 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2013
2014 switch (get_options & PCRE_NEWLINE_BITS)
2015 {
2016 case PCRE_NEWLINE_CR:
2017 fprintf(outfile, "Forced newline sequence: CR\n");
2018 break;
2019
2020 case PCRE_NEWLINE_LF:
2021 fprintf(outfile, "Forced newline sequence: LF\n");
2022 break;
2023
2024 case PCRE_NEWLINE_CRLF:
2025 fprintf(outfile, "Forced newline sequence: CRLF\n");
2026 break;
2027
2028 case PCRE_NEWLINE_ANYCRLF:
2029 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2030 break;
2031
2032 case PCRE_NEWLINE_ANY:
2033 fprintf(outfile, "Forced newline sequence: ANY\n");
2034 break;
2035
2036 default:
2037 break;
2038 }
2039
2040 if (first_char == -1)
2041 {
2042 fprintf(outfile, "First char at start or follows newline\n");
2043 }
2044 else if (first_char < 0)
2045 {
2046 fprintf(outfile, "No first char\n");
2047 }
2048 else
2049 {
2050 int ch = first_char & 255;
2051 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2052 "" : " (caseless)";
2053 if (PRINTHEX(ch))
2054 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2055 else
2056 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2057 }
2058
2059 if (need_char < 0)
2060 {
2061 fprintf(outfile, "No need char\n");
2062 }
2063 else
2064 {
2065 int ch = need_char & 255;
2066 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2067 "" : " (caseless)";
2068 if (PRINTHEX(ch))
2069 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2070 else
2071 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2072 }
2073
2074 /* Don't output study size; at present it is in any case a fixed
2075 value, but it varies, depending on the computer architecture, and
2076 so messes up the test suite. (And with the /F option, it might be
2077 flipped.) If study was forced by an external -s, don't show this
2078 information unless -i or -d was also present. This means that, except
2079 when auto-callouts are involved, the output from runs with and without
2080 -s should be identical. */
2081
2082 if (do_study || (force_study && showinfo && !no_force_study))
2083 {
2084 if (extra == NULL)
2085 fprintf(outfile, "Study returned NULL\n");
2086 else
2087 {
2088 uschar *start_bits = NULL;
2089 int minlength;
2090
2091 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2092 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2093
2094 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2095 if (start_bits == NULL)
2096 fprintf(outfile, "No set of starting bytes\n");
2097 else
2098 {
2099 int i;
2100 int c = 24;
2101 fprintf(outfile, "Starting byte set: ");
2102 for (i = 0; i < 256; i++)
2103 {
2104 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2105 {
2106 if (c > 75)
2107 {
2108 fprintf(outfile, "\n ");
2109 c = 2;
2110 }
2111 if (PRINTHEX(i) && i != ' ')
2112 {
2113 fprintf(outfile, "%c ", i);
2114 c += 2;
2115 }
2116 else
2117 {
2118 fprintf(outfile, "\\x%02x ", i);
2119 c += 5;
2120 }
2121 }
2122 }
2123 fprintf(outfile, "\n");
2124 }
2125 }
2126 }
2127 }
2128
2129 /* If the '>' option was present, we write out the regex to a file, and
2130 that is all. The first 8 bytes of the file are the regex length and then
2131 the study length, in big-endian order. */
2132
2133 if (to_file != NULL)
2134 {
2135 FILE *f = fopen((char *)to_file, "wb");
2136 if (f == NULL)
2137 {
2138 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2139 }
2140 else
2141 {
2142 uschar sbuf[8];
2143 sbuf[0] = (uschar)((true_size >> 24) & 255);
2144 sbuf[1] = (uschar)((true_size >> 16) & 255);
2145 sbuf[2] = (uschar)((true_size >> 8) & 255);
2146 sbuf[3] = (uschar)((true_size) & 255);
2147
2148 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2149 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2150 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2151 sbuf[7] = (uschar)((true_study_size) & 255);
2152
2153 if (fwrite(sbuf, 1, 8, f) < 8 ||
2154 fwrite(re, 1, true_size, f) < true_size)
2155 {
2156 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2157 }
2158 else
2159 {
2160 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2161
2162 /* If there is study data, write it. */
2163
2164 if (extra != NULL)
2165 {
2166 if (fwrite(extra->study_data, 1, true_study_size, f) <
2167 true_study_size)
2168 {
2169 fprintf(outfile, "Write error on %s: %s\n", to_file,
2170 strerror(errno));
2171 }
2172 else fprintf(outfile, "Study data written to %s\n", to_file);
2173 }
2174 }
2175 fclose(f);
2176 }
2177
2178 new_free(re);
2179 if (extra != NULL) new_free(extra);
2180 if (locale_set)
2181 {
2182 new_free((void *)tables);
2183 setlocale(LC_CTYPE, "C");
2184 locale_set = 0;
2185 }
2186 continue; /* With next regex */
2187 }
2188 } /* End of non-POSIX compile */
2189
2190 /* Read data lines and test them */
2191
2192 for (;;)
2193 {
2194 uschar *q;
2195 uschar *bptr;
2196 int *use_offsets = offsets;
2197 int use_size_offsets = size_offsets;
2198 int callout_data = 0;
2199 int callout_data_set = 0;
2200 int count, c;
2201 int copystrings = 0;
2202 int find_match_limit = default_find_match_limit;
2203 int getstrings = 0;
2204 int getlist = 0;
2205 int gmatched = 0;
2206 int start_offset = 0;
2207 int start_offset_sign = 1;
2208 int g_notempty = 0;
2209 int use_dfa = 0;
2210
2211 options = 0;
2212
2213 *copynames = 0;
2214 *getnames = 0;
2215
2216 copynamesptr = copynames;
2217 getnamesptr = getnames;
2218
2219 pcre_callout = callout;
2220 first_callout = 1;
2221 last_callout_mark = NULL;
2222 callout_extra = 0;
2223 callout_count = 0;
2224 callout_fail_count = 999999;
2225 callout_fail_id = -1;
2226 show_malloc = 0;
2227
2228 if (extra != NULL) extra->flags &=
2229 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2230
2231 len = 0;
2232 for (;;)
2233 {
2234 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2235 {
2236 if (len > 0) /* Reached EOF without hitting a newline */
2237 {
2238 fprintf(outfile, "\n");
2239 break;
2240 }
2241 done = 1;
2242 goto CONTINUE;
2243 }
2244 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2245 len = (int)strlen((char *)buffer);
2246 if (buffer[len-1] == '\n') break;
2247 }
2248
2249 while (len > 0 && isspace(buffer[len-1])) len--;
2250 buffer[len] = 0;
2251 if (len == 0) break;
2252
2253 p = buffer;
2254 while (isspace(*p)) p++;
2255
2256 bptr = q = dbuffer;
2257 while ((c = *p++) != 0)
2258 {
2259 int i = 0;
2260 int n = 0;
2261
2262 if (c == '\\') switch ((c = *p++))
2263 {
2264 case 'a': c = 7; break;
2265 case 'b': c = '\b'; break;
2266 case 'e': c = 27; break;
2267 case 'f': c = '\f'; break;
2268 case 'n': c = '\n'; break;
2269 case 'r': c = '\r'; break;
2270 case 't': c = '\t'; break;
2271 case 'v': c = '\v'; break;
2272
2273 case '0': case '1': case '2': case '3':
2274 case '4': case '5': case '6': case '7':
2275 c -= '0';
2276 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2277 c = c * 8 + *p++ - '0';
2278
2279 #if !defined NOUTF8
2280 if (use_utf8 && c > 255)
2281 {
2282 unsigned char buff8[8];
2283 int ii, utn;
2284 utn = ord2utf8(c, buff8);
2285 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2286 c = buff8[ii]; /* Last byte */
2287 }
2288 #endif
2289 break;
2290
2291 case 'x':
2292
2293 /* Handle \x{..} specially - new Perl thing for utf8 */
2294
2295 #if !defined NOUTF8
2296 if (*p == '{')
2297 {
2298 unsigned char *pt = p;
2299 c = 0;
2300 while (isxdigit(*(++pt)))
2301 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2302 if (*pt == '}')
2303 {
2304 unsigned char buff8[8];
2305 int ii, utn;
2306 if (use_utf8)
2307 {
2308 utn = ord2utf8(c, buff8);
2309 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2310 c = buff8[ii]; /* Last byte */
2311 }
2312 else
2313 {
2314 if (c > 255)
2315 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2316 "UTF-8 mode is not enabled.\n"
2317 "** Truncation will probably give the wrong result.\n", c);
2318 }
2319 p = pt + 1;
2320 break;
2321 }
2322 /* Not correct form; fall through */
2323 }
2324 #endif
2325
2326 /* Ordinary \x */
2327
2328 c = 0;
2329 while (i++ < 2 && isxdigit(*p))
2330 {
2331 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2332 p++;
2333 }
2334 break;
2335
2336 case 0: /* \ followed by EOF allows for an empty line */
2337 p--;
2338 continue;
2339
2340 case '>':
2341 if (*p == '-')
2342 {
2343 start_offset_sign = -1;
2344 p++;
2345 }
2346 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2347 start_offset *= start_offset_sign;
2348 continue;
2349
2350 case 'A': /* Option setting */
2351 options |= PCRE_ANCHORED;
2352 continue;
2353
2354 case 'B':
2355 options |= PCRE_NOTBOL;
2356 continue;
2357
2358 case 'C':
2359 if (isdigit(*p)) /* Set copy string */
2360 {
2361 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2362 copystrings |= 1 << n;
2363 }
2364 else if (isalnum(*p))
2365 {
2366 uschar *npp = copynamesptr;
2367 while (isalnum(*p)) *npp++ = *p++;
2368 *npp++ = 0;
2369 *npp = 0;
2370 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2371 if (n < 0)
2372 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2373 copynamesptr = npp;
2374 }
2375 else if (*p == '+')
2376 {
2377 callout_extra = 1;
2378 p++;
2379 }
2380 else if (*p == '-')
2381 {
2382 pcre_callout = NULL;
2383 p++;
2384 }
2385 else if (*p == '!')
2386 {
2387 callout_fail_id = 0;
2388 p++;
2389 while(isdigit(*p))
2390 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2391 callout_fail_count = 0;
2392 if (*p == '!')
2393 {
2394 p++;
2395 while(isdigit(*p))
2396 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2397 }
2398 }
2399 else if (*p == '*')
2400 {
2401 int sign = 1;
2402 callout_data = 0;
2403 if (*(++p) == '-') { sign = -1; p++; }
2404 while(isdigit(*p))
2405 callout_data = callout_data * 10 + *p++ - '0';
2406 callout_data *= sign;
2407 callout_data_set = 1;
2408 }
2409 continue;
2410
2411 #if !defined NODFA
2412 case 'D':
2413 #if !defined NOPOSIX
2414 if (posix || do_posix)
2415 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2416 else
2417 #endif
2418 use_dfa = 1;
2419 continue;
2420 #endif
2421
2422 #if !defined NODFA
2423 case 'F':
2424 options |= PCRE_DFA_SHORTEST;
2425 continue;
2426 #endif
2427
2428 case 'G':
2429 if (isdigit(*p))
2430 {
2431 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2432 getstrings |= 1 << n;
2433 }
2434 else if (isalnum(*p))
2435 {
2436 uschar *npp = getnamesptr;
2437 while (isalnum(*p)) *npp++ = *p++;
2438 *npp++ = 0;
2439 *npp = 0;
2440 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2441 if (n < 0)
2442 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2443 getnamesptr = npp;
2444 }
2445 continue;
2446
2447 case 'L':
2448 getlist = 1;
2449 continue;
2450
2451 case 'M':
2452 find_match_limit = 1;
2453 continue;
2454
2455 case 'N':
2456 if ((options & PCRE_NOTEMPTY) != 0)
2457 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2458 else
2459 options |= PCRE_NOTEMPTY;
2460 continue;
2461
2462 case 'O':
2463 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2464 if (n > size_offsets_max)
2465 {
2466 size_offsets_max = n;
2467 free(offsets);
2468 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2469 if (offsets == NULL)
2470 {
2471 printf("** Failed to get %d bytes of memory for offsets vector\n",
2472 (int)(size_offsets_max * sizeof(int)));
2473 yield = 1;
2474 goto EXIT;
2475 }
2476 }
2477 use_size_offsets = n;
2478 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2479 continue;
2480
2481 case 'P':
2482 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2483 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2484 continue;
2485
2486 case 'Q':
2487 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2488 if (extra == NULL)
2489 {
2490 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2491 extra->flags = 0;
2492 }
2493 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2494 extra->match_limit_recursion = n;
2495 continue;
2496
2497 case 'q':
2498 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2499 if (extra == NULL)
2500 {
2501 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2502 extra->flags = 0;
2503 }
2504 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2505 extra->match_limit = n;
2506 continue;
2507
2508 #if !defined NODFA
2509 case 'R':
2510 options |= PCRE_DFA_RESTART;
2511 continue;
2512 #endif
2513
2514 case 'S':
2515 show_malloc = 1;
2516 continue;
2517
2518 case 'Y':
2519 options |= PCRE_NO_START_OPTIMIZE;
2520 continue;
2521
2522 case 'Z':
2523 options |= PCRE_NOTEOL;
2524 continue;
2525
2526 case '?':
2527 options |= PCRE_NO_UTF8_CHECK;
2528 continue;
2529
2530 case '<':
2531 {
2532 int x = check_newline(p, outfile);
2533 if (x == 0) goto NEXT_DATA;
2534 options |= x;
2535 while (*p++ != '>');
2536 }
2537 continue;
2538 }
2539 *q++ = c;
2540 }
2541 *q = 0;
2542 len = (int)(q - dbuffer);
2543
2544 /* Move the data to the end of the buffer so that a read over the end of
2545 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2546 we are using the POSIX interface, we must include the terminating zero. */
2547
2548 #if !defined NOPOSIX
2549 if (posix || do_posix)
2550 {
2551 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2552 bptr += buffer_size - len - 1;
2553 }
2554 else
2555 #endif
2556 {
2557 memmove(bptr + buffer_size - len, bptr, len);
2558 bptr += buffer_size - len;
2559 }
2560
2561 if ((all_use_dfa || use_dfa) && find_match_limit)
2562 {
2563 printf("**Match limit not relevant for DFA matching: ignored\n");
2564 find_match_limit = 0;
2565 }
2566
2567 /* Handle matching via the POSIX interface, which does not
2568 support timing or playing with the match limit or callout data. */
2569
2570 #if !defined NOPOSIX
2571 if (posix || do_posix)
2572 {
2573 int rc;
2574 int eflags = 0;
2575 regmatch_t *pmatch = NULL;
2576 if (use_size_offsets > 0)
2577 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2578 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2579 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2580 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2581
2582 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2583
2584 if (rc != 0)
2585 {
2586 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2587 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2588 }
2589 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2590 != 0)
2591 {
2592 fprintf(outfile, "Matched with REG_NOSUB\n");
2593 }
2594 else
2595 {
2596 size_t i;
2597 for (i = 0; i < (size_t)use_size_offsets; i++)
2598 {
2599 if (pmatch[i].rm_so >= 0)
2600 {
2601 fprintf(outfile, "%2d: ", (int)i);
2602 (void)pchars(dbuffer + pmatch[i].rm_so,
2603 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2604 fprintf(outfile, "\n");
2605 if (do_showcaprest || (i == 0 && do_showrest))
2606 {
2607 fprintf(outfile, "%2d+ ", (int)i);
2608 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2609 outfile);
2610 fprintf(outfile, "\n");
2611 }
2612 }
2613 }
2614 }
2615 free(pmatch);
2616 }
2617
2618 /* Handle matching via the native interface - repeats for /g and /G */
2619
2620 else
2621 #endif /* !defined NOPOSIX */
2622
2623 for (;; gmatched++) /* Loop for /g or /G */
2624 {
2625 markptr = NULL;
2626
2627 if (timeitm > 0)
2628 {
2629 register int i;
2630 clock_t time_taken;
2631 clock_t start_time = clock();
2632
2633 #if !defined NODFA
2634 if (all_use_dfa || use_dfa)
2635 {
2636 int workspace[1000];
2637 for (i = 0; i < timeitm; i++)
2638 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639 options | g_notempty, use_offsets, use_size_offsets, workspace,
2640 sizeof(workspace)/sizeof(int));
2641 }
2642 else
2643 #endif
2644
2645 for (i = 0; i < timeitm; i++)
2646 count = pcre_exec(re, extra, (char *)bptr, len,
2647 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2648
2649 time_taken = clock() - start_time;
2650 fprintf(outfile, "Execute time %.4f milliseconds\n",
2651 (((double)time_taken * 1000.0) / (double)timeitm) /
2652 (double)CLOCKS_PER_SEC);
2653 }
2654
2655 /* If find_match_limit is set, we want to do repeated matches with
2656 varying limits in order to find the minimum value for the match limit and
2657 for the recursion limit. */
2658
2659 if (find_match_limit)
2660 {
2661 if (extra == NULL)
2662 {
2663 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2664 extra->flags = 0;
2665 }
2666
2667 (void)check_match_limit(re, extra, bptr, len, start_offset,
2668 options|g_notempty, use_offsets, use_size_offsets,
2669 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2670 PCRE_ERROR_MATCHLIMIT, "match()");
2671
2672 count = check_match_limit(re, extra, bptr, len, start_offset,
2673 options|g_notempty, use_offsets, use_size_offsets,
2674 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2675 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2676 }
2677
2678 /* If callout_data is set, use the interface with additional data */
2679
2680 else if (callout_data_set)
2681 {
2682 if (extra == NULL)
2683 {
2684 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2685 extra->flags = 0;
2686 }
2687 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2688 extra->callout_data = &callout_data;
2689 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2690 options | g_notempty, use_offsets, use_size_offsets);
2691 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2692 }
2693
2694 /* The normal case is just to do the match once, with the default
2695 value of match_limit. */
2696
2697 #if !defined NODFA
2698 else if (all_use_dfa || use_dfa)
2699 {
2700 int workspace[1000];
2701 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2702 options | g_notempty, use_offsets, use_size_offsets, workspace,
2703 sizeof(workspace)/sizeof(int));
2704 if (count == 0)
2705 {
2706 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2707 count = use_size_offsets/2;
2708 }
2709 }
2710 #endif
2711
2712 else
2713 {
2714 count = pcre_exec(re, extra, (char *)bptr, len,
2715 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2716 if (count == 0)
2717 {
2718 fprintf(outfile, "Matched, but too many substrings\n");
2719 count = use_size_offsets/3;
2720 }
2721 }
2722
2723 /* Matched */
2724
2725 if (count >= 0)
2726 {
2727 int i, maxcount;
2728
2729 #if !defined NODFA
2730 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2731 #endif
2732 maxcount = use_size_offsets/3;
2733
2734 /* This is a check against a lunatic return value. */
2735
2736 if (count > maxcount)
2737 {
2738 fprintf(outfile,
2739 "** PCRE error: returned count %d is too big for offset size %d\n",
2740 count, use_size_offsets);
2741 count = use_size_offsets/3;
2742 if (do_g || do_G)
2743 {
2744 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2745 do_g = do_G = FALSE; /* Break g/G loop */
2746 }
2747 }
2748
2749 /* do_allcaps requests showing of all captures in the pattern, to check
2750 unset ones at the end. */
2751
2752 if (do_allcaps)
2753 {
2754 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2755 count++; /* Allow for full match */
2756 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2757 }
2758
2759 /* Output the captured substrings */
2760
2761 for (i = 0; i < count * 2; i += 2)
2762 {
2763 if (use_offsets[i] < 0)
2764 {
2765 if (use_offsets[i] != -1)
2766 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2767 use_offsets[i], i);
2768 if (use_offsets[i+1] != -1)
2769 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2770 use_offsets[i+1], i+1);
2771 fprintf(outfile, "%2d: <unset>\n", i/2);
2772 }
2773 else
2774 {
2775 fprintf(outfile, "%2d: ", i/2);
2776 (void)pchars(bptr + use_offsets[i],
2777 use_offsets[i+1] - use_offsets[i], outfile);
2778 fprintf(outfile, "\n");
2779 if (do_showcaprest || (i == 0 && do_showrest))
2780 {
2781 fprintf(outfile, "%2d+ ", i/2);
2782 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2783 outfile);
2784 fprintf(outfile, "\n");
2785 }
2786 }
2787 }
2788
2789 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2790
2791 for (i = 0; i < 32; i++)
2792 {
2793 if ((copystrings & (1 << i)) != 0)
2794 {
2795 char copybuffer[256];
2796 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2797 i, copybuffer, sizeof(copybuffer));
2798 if (rc < 0)
2799 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2800 else
2801 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2802 }
2803 }
2804
2805 for (copynamesptr = copynames;
2806 *copynamesptr != 0;
2807 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2808 {
2809 char copybuffer[256];
2810 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2811 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2812 if (rc < 0)
2813 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2814 else
2815 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2816 }
2817
2818 for (i = 0; i < 32; i++)
2819 {
2820 if ((getstrings & (1 << i)) != 0)
2821 {
2822 const char *substring;
2823 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2824 i, &substring);
2825 if (rc < 0)
2826 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2827 else
2828 {
2829 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2830 pcre_free_substring(substring);
2831 }
2832 }
2833 }
2834
2835 for (getnamesptr = getnames;
2836 *getnamesptr != 0;
2837 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2838 {
2839 const char *substring;
2840 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2841 count, (char *)getnamesptr, &substring);
2842 if (rc < 0)
2843 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2844 else
2845 {
2846 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2847 pcre_free_substring(substring);
2848 }
2849 }
2850
2851 if (getlist)
2852 {
2853 const char **stringlist;
2854 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2855 &stringlist);
2856 if (rc < 0)
2857 fprintf(outfile, "get substring list failed %d\n", rc);
2858 else
2859 {
2860 for (i = 0; i < count; i++)
2861 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2862 if (stringlist[i] != NULL)
2863 fprintf(outfile, "string list not terminated by NULL\n");
2864 /* free((void *)stringlist); */
2865 pcre_free_substring_list(stringlist);
2866 }
2867 }
2868 }
2869
2870 /* There was a partial match */
2871
2872 else if (count == PCRE_ERROR_PARTIAL)
2873 {
2874 if (markptr == NULL) fprintf(outfile, "Partial match");
2875 else fprintf(outfile, "Partial match, mark=%s", markptr);
2876 if (use_size_offsets > 1)
2877 {
2878 fprintf(outfile, ": ");
2879 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2880 outfile);
2881 }
2882 fprintf(outfile, "\n");
2883 break; /* Out of the /g loop */
2884 }
2885
2886 /* Failed to match. If this is a /g or /G loop and we previously set
2887 g_notempty after a null match, this is not necessarily the end. We want
2888 to advance the start offset, and continue. We won't be at the end of the
2889 string - that was checked before setting g_notempty.
2890
2891 Complication arises in the case when the newline convention is "any",
2892 "crlf", or "anycrlf". If the previous match was at the end of a line
2893 terminated by CRLF, an advance of one character just passes the \r,
2894 whereas we should prefer the longer newline sequence, as does the code in
2895 pcre_exec(). Fudge the offset value to achieve this. We check for a
2896 newline setting in the pattern; if none was set, use pcre_config() to
2897 find the default.
2898
2899 Otherwise, in the case of UTF-8 matching, the advance must be one
2900 character, not one byte. */
2901
2902 else
2903 {
2904 if (g_notempty != 0)
2905 {
2906 int onechar = 1;
2907 unsigned int obits = ((real_pcre *)re)->options;
2908 use_offsets[0] = start_offset;
2909 if ((obits & PCRE_NEWLINE_BITS) == 0)
2910 {
2911 int d;
2912 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2913 /* Note that these values are always the ASCII ones, even in
2914 EBCDIC environments. CR = 13, NL = 10. */
2915 obits = (d == 13)? PCRE_NEWLINE_CR :
2916 (d == 10)? PCRE_NEWLINE_LF :
2917 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2918 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2919 (d == -1)? PCRE_NEWLINE_ANY : 0;
2920 }
2921 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2922 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2923 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2924 &&
2925 start_offset < len - 1 &&
2926 bptr[start_offset] == '\r' &&
2927 bptr[start_offset+1] == '\n')
2928 onechar++;
2929 else if (use_utf8)
2930 {
2931 while (start_offset + onechar < len)
2932 {
2933 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2934 onechar++;
2935 }
2936 }
2937 use_offsets[1] = start_offset + onechar;
2938 }
2939 else
2940 {
2941 switch(count)
2942 {
2943 case PCRE_ERROR_NOMATCH:
2944 if (gmatched == 0)
2945 {
2946 if (markptr == NULL) fprintf(outfile, "No match\n");
2947 else fprintf(outfile, "No match, mark = %s\n", markptr);
2948 }
2949 break;
2950
2951 case PCRE_ERROR_BADUTF8:
2952 case PCRE_ERROR_SHORTUTF8:
2953 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2954 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2955 if (use_size_offsets >= 2)
2956 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2957 use_offsets[1]);
2958 fprintf(outfile, "\n");
2959 break;
2960
2961 default:
2962 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2963 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2964 else
2965 fprintf(outfile, "Error %d (Unexpected value)\n", count);
2966 break;
2967 }
2968
2969 break; /* Out of the /g loop */
2970 }
2971 }
2972
2973 /* If not /g or /G we are done */
2974
2975 if (!do_g && !do_G) break;
2976
2977 /* If we have matched an empty string, first check to see if we are at
2978 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2979 Perl's /g options does. This turns out to be rather cunning. First we set
2980 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2981 same point. If this fails (picked up above) we advance to the next
2982 character. */
2983
2984 g_notempty = 0;
2985
2986 if (use_offsets[0] == use_offsets[1])
2987 {
2988 if (use_offsets[0] == len) break;
2989 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2990 }
2991
2992 /* For /g, update the start offset, leaving the rest alone */
2993
2994 if (do_g) start_offset = use_offsets[1];
2995
2996 /* For /G, update the pointer and length */
2997
2998 else
2999 {
3000 bptr += use_offsets[1];
3001 len -= use_offsets[1];
3002 }
3003 } /* End of loop for /g and /G */
3004
3005 NEXT_DATA: continue;
3006 } /* End of loop for data lines */
3007
3008 CONTINUE:
3009
3010 #if !defined NOPOSIX
3011 if (posix || do_posix) regfree(&preg);
3012 #endif
3013
3014 if (re != NULL) new_free(re);
3015 if (extra != NULL) new_free(extra);
3016 if (locale_set)
3017 {
3018 new_free((void *)tables);
3019 setlocale(LC_CTYPE, "C");
3020 locale_set = 0;
3021 }
3022 }
3023
3024 if (infile == stdin) fprintf(outfile, "\n");
3025
3026 EXIT:
3027
3028 if (infile != NULL && infile != stdin) fclose(infile);
3029 if (outfile != NULL && outfile != stdout) fclose(outfile);
3030
3031 free(buffer);
3032 free(dbuffer);
3033 free(pbuffer);
3034 free(offsets);
3035
3036 return yield;
3037 }
3038
3039 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5