/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 775 - (show annotations)
Thu Dec 1 10:35:30 2011 UTC (7 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 94106 byte(s)
Error occurred while calculating annotation data.
Fix pcretest not forgetting JIT after it has been used once.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utf8_char_sizes utf8_char_sizes
122 #define _pcre_utt utt
123 #define _pcre_utt_size utt_size
124 #define _pcre_utt_names utt_names
125 #define _pcre_OP_lengths OP_lengths
126
127 #include "pcre_tables.c"
128
129 /* We also need the pcre_printint() function for printing out compiled
130 patterns. This function is in a separate file so that it can be included in
131 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 know which case is being compiled. */
133
134 #define COMPILING_PCRETEST
135 #include "pcre_printint.src"
136
137 /* The definition of the macro PRINTABLE, which determines whether to print an
138 output character as-is or as a hex value when showing compiled patterns, is
139 contained in the printint.src file. We uses it here also, in cases when the
140 locale has not been explicitly changed, so as to get consistent output from
141 systems that differ in their output from isprint() even in the "C" locale. */
142
143 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144
145 /* It is possible to compile this test program without including support for
146 testing the POSIX interface, though this is not available via the standard
147 Makefile. */
148
149 #if !defined NOPOSIX
150 #include "pcreposix.h"
151 #endif
152
153 /* It is also possible, for the benefit of the version currently imported into
154 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155 interface to the DFA matcher (NODFA), and without the doublecheck of the old
156 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157 UTF8 support if PCRE is built without it. */
158
159 #ifndef SUPPORT_UTF8
160 #ifndef NOUTF8
161 #define NOUTF8
162 #endif
163 #endif
164
165
166 /* Other parameters */
167
168 #ifndef CLOCKS_PER_SEC
169 #ifdef CLK_TCK
170 #define CLOCKS_PER_SEC CLK_TCK
171 #else
172 #define CLOCKS_PER_SEC 100
173 #endif
174 #endif
175
176 /* This is the default loop count for timing. */
177
178 #define LOOPREPEAT 500000
179
180 /* Static variables */
181
182 static FILE *outfile;
183 static int log_store = 0;
184 static int callout_count;
185 static int callout_extra;
186 static int callout_fail_count;
187 static int callout_fail_id;
188 static int debug_lengths;
189 static int first_callout;
190 static int locale_set = 0;
191 static int show_malloc;
192 static int use_utf8;
193 static size_t gotten_store;
194 static size_t first_gotten_store = 0;
195 static const unsigned char *last_callout_mark = NULL;
196
197 /* The buffers grow automatically if very long input lines are encountered. */
198
199 static int buffer_size = 50000;
200 static uschar *buffer = NULL;
201 static uschar *dbuffer = NULL;
202 static uschar *pbuffer = NULL;
203
204 /* Textual explanations for runtime error codes */
205
206 static const char *errtexts[] = {
207 NULL, /* 0 is no error */
208 NULL, /* NOMATCH is handled specially */
209 "NULL argument passed",
210 "bad option value",
211 "magic number missing",
212 "unknown opcode - pattern overwritten?",
213 "no more memory",
214 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
215 "match limit exceeded",
216 "callout error code",
217 NULL, /* BADUTF8 is handled specially */
218 "bad UTF-8 offset",
219 NULL, /* PARTIAL is handled specially */
220 "not used - internal error",
221 "internal error - pattern overwritten?",
222 "bad count value",
223 "item unsupported for DFA matching",
224 "backreference condition or recursion test not supported for DFA matching",
225 "match limit not supported for DFA matching",
226 "workspace size exceeded in DFA matching",
227 "too much recursion for DFA matching",
228 "recursion limit exceeded",
229 "not used - internal error",
230 "invalid combination of newline options",
231 "bad offset value",
232 NULL, /* SHORTUTF8 is handled specially */
233 "nested recursion at the same subject position",
234 "JIT stack limit reached"
235 };
236
237
238 /*************************************************
239 * Alternate character tables *
240 *************************************************/
241
242 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
243 using the default tables of the library. However, the T option can be used to
244 select alternate sets of tables, for different kinds of testing. Note also that
245 the L (locale) option also adjusts the tables. */
246
247 /* This is the set of tables distributed as default with PCRE. It recognizes
248 only ASCII characters. */
249
250 static const unsigned char tables0[] = {
251
252 /* This table is a lower casing table. */
253
254 0, 1, 2, 3, 4, 5, 6, 7,
255 8, 9, 10, 11, 12, 13, 14, 15,
256 16, 17, 18, 19, 20, 21, 22, 23,
257 24, 25, 26, 27, 28, 29, 30, 31,
258 32, 33, 34, 35, 36, 37, 38, 39,
259 40, 41, 42, 43, 44, 45, 46, 47,
260 48, 49, 50, 51, 52, 53, 54, 55,
261 56, 57, 58, 59, 60, 61, 62, 63,
262 64, 97, 98, 99,100,101,102,103,
263 104,105,106,107,108,109,110,111,
264 112,113,114,115,116,117,118,119,
265 120,121,122, 91, 92, 93, 94, 95,
266 96, 97, 98, 99,100,101,102,103,
267 104,105,106,107,108,109,110,111,
268 112,113,114,115,116,117,118,119,
269 120,121,122,123,124,125,126,127,
270 128,129,130,131,132,133,134,135,
271 136,137,138,139,140,141,142,143,
272 144,145,146,147,148,149,150,151,
273 152,153,154,155,156,157,158,159,
274 160,161,162,163,164,165,166,167,
275 168,169,170,171,172,173,174,175,
276 176,177,178,179,180,181,182,183,
277 184,185,186,187,188,189,190,191,
278 192,193,194,195,196,197,198,199,
279 200,201,202,203,204,205,206,207,
280 208,209,210,211,212,213,214,215,
281 216,217,218,219,220,221,222,223,
282 224,225,226,227,228,229,230,231,
283 232,233,234,235,236,237,238,239,
284 240,241,242,243,244,245,246,247,
285 248,249,250,251,252,253,254,255,
286
287 /* This table is a case flipping table. */
288
289 0, 1, 2, 3, 4, 5, 6, 7,
290 8, 9, 10, 11, 12, 13, 14, 15,
291 16, 17, 18, 19, 20, 21, 22, 23,
292 24, 25, 26, 27, 28, 29, 30, 31,
293 32, 33, 34, 35, 36, 37, 38, 39,
294 40, 41, 42, 43, 44, 45, 46, 47,
295 48, 49, 50, 51, 52, 53, 54, 55,
296 56, 57, 58, 59, 60, 61, 62, 63,
297 64, 97, 98, 99,100,101,102,103,
298 104,105,106,107,108,109,110,111,
299 112,113,114,115,116,117,118,119,
300 120,121,122, 91, 92, 93, 94, 95,
301 96, 65, 66, 67, 68, 69, 70, 71,
302 72, 73, 74, 75, 76, 77, 78, 79,
303 80, 81, 82, 83, 84, 85, 86, 87,
304 88, 89, 90,123,124,125,126,127,
305 128,129,130,131,132,133,134,135,
306 136,137,138,139,140,141,142,143,
307 144,145,146,147,148,149,150,151,
308 152,153,154,155,156,157,158,159,
309 160,161,162,163,164,165,166,167,
310 168,169,170,171,172,173,174,175,
311 176,177,178,179,180,181,182,183,
312 184,185,186,187,188,189,190,191,
313 192,193,194,195,196,197,198,199,
314 200,201,202,203,204,205,206,207,
315 208,209,210,211,212,213,214,215,
316 216,217,218,219,220,221,222,223,
317 224,225,226,227,228,229,230,231,
318 232,233,234,235,236,237,238,239,
319 240,241,242,243,244,245,246,247,
320 248,249,250,251,252,253,254,255,
321
322 /* This table contains bit maps for various character classes. Each map is 32
323 bytes long and the bits run from the least significant end of each byte. The
324 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
325 graph, print, punct, and cntrl. Other classes are built from combinations. */
326
327 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331
332 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336
337 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341
342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346
347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351
352 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
353 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356
357 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
358 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361
362 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
363 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366
367 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
368 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371
372 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
376
377 /* This table identifies various classes of character by individual bits:
378 0x01 white space character
379 0x02 letter
380 0x04 decimal digit
381 0x08 hexadecimal digit
382 0x10 alphanumeric or '_'
383 0x80 regular expression metacharacter or binary zero
384 */
385
386 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
387 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
388 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
389 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
390 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
391 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
392 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
393 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
394 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
396 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
397 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
398 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
399 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
400 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
401 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
417 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
418
419 /* This is a set of tables that came orginally from a Windows user. It seems to
420 be at least an approximation of ISO 8859. In particular, there are characters
421 greater than 128 that are marked as spaces, letters, etc. */
422
423 static const unsigned char tables1[] = {
424 0,1,2,3,4,5,6,7,
425 8,9,10,11,12,13,14,15,
426 16,17,18,19,20,21,22,23,
427 24,25,26,27,28,29,30,31,
428 32,33,34,35,36,37,38,39,
429 40,41,42,43,44,45,46,47,
430 48,49,50,51,52,53,54,55,
431 56,57,58,59,60,61,62,63,
432 64,97,98,99,100,101,102,103,
433 104,105,106,107,108,109,110,111,
434 112,113,114,115,116,117,118,119,
435 120,121,122,91,92,93,94,95,
436 96,97,98,99,100,101,102,103,
437 104,105,106,107,108,109,110,111,
438 112,113,114,115,116,117,118,119,
439 120,121,122,123,124,125,126,127,
440 128,129,130,131,132,133,134,135,
441 136,137,138,139,140,141,142,143,
442 144,145,146,147,148,149,150,151,
443 152,153,154,155,156,157,158,159,
444 160,161,162,163,164,165,166,167,
445 168,169,170,171,172,173,174,175,
446 176,177,178,179,180,181,182,183,
447 184,185,186,187,188,189,190,191,
448 224,225,226,227,228,229,230,231,
449 232,233,234,235,236,237,238,239,
450 240,241,242,243,244,245,246,215,
451 248,249,250,251,252,253,254,223,
452 224,225,226,227,228,229,230,231,
453 232,233,234,235,236,237,238,239,
454 240,241,242,243,244,245,246,247,
455 248,249,250,251,252,253,254,255,
456 0,1,2,3,4,5,6,7,
457 8,9,10,11,12,13,14,15,
458 16,17,18,19,20,21,22,23,
459 24,25,26,27,28,29,30,31,
460 32,33,34,35,36,37,38,39,
461 40,41,42,43,44,45,46,47,
462 48,49,50,51,52,53,54,55,
463 56,57,58,59,60,61,62,63,
464 64,97,98,99,100,101,102,103,
465 104,105,106,107,108,109,110,111,
466 112,113,114,115,116,117,118,119,
467 120,121,122,91,92,93,94,95,
468 96,65,66,67,68,69,70,71,
469 72,73,74,75,76,77,78,79,
470 80,81,82,83,84,85,86,87,
471 88,89,90,123,124,125,126,127,
472 128,129,130,131,132,133,134,135,
473 136,137,138,139,140,141,142,143,
474 144,145,146,147,148,149,150,151,
475 152,153,154,155,156,157,158,159,
476 160,161,162,163,164,165,166,167,
477 168,169,170,171,172,173,174,175,
478 176,177,178,179,180,181,182,183,
479 184,185,186,187,188,189,190,191,
480 224,225,226,227,228,229,230,231,
481 232,233,234,235,236,237,238,239,
482 240,241,242,243,244,245,246,215,
483 248,249,250,251,252,253,254,223,
484 192,193,194,195,196,197,198,199,
485 200,201,202,203,204,205,206,207,
486 208,209,210,211,212,213,214,247,
487 216,217,218,219,220,221,222,255,
488 0,62,0,0,1,0,0,0,
489 0,0,0,0,0,0,0,0,
490 32,0,0,0,1,0,0,0,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,255,3,
493 126,0,0,0,126,0,0,0,
494 0,0,0,0,0,0,0,0,
495 0,0,0,0,0,0,0,0,
496 0,0,0,0,0,0,255,3,
497 0,0,0,0,0,0,0,0,
498 0,0,0,0,0,0,12,2,
499 0,0,0,0,0,0,0,0,
500 0,0,0,0,0,0,0,0,
501 254,255,255,7,0,0,0,0,
502 0,0,0,0,0,0,0,0,
503 255,255,127,127,0,0,0,0,
504 0,0,0,0,0,0,0,0,
505 0,0,0,0,254,255,255,7,
506 0,0,0,0,0,4,32,4,
507 0,0,0,128,255,255,127,255,
508 0,0,0,0,0,0,255,3,
509 254,255,255,135,254,255,255,7,
510 0,0,0,0,0,4,44,6,
511 255,255,127,255,255,255,127,255,
512 0,0,0,0,254,255,255,255,
513 255,255,255,255,255,255,255,127,
514 0,0,0,0,254,255,255,255,
515 255,255,255,255,255,255,255,255,
516 0,2,0,0,255,255,255,255,
517 255,255,255,255,255,255,255,127,
518 0,0,0,0,255,255,255,255,
519 255,255,255,255,255,255,255,255,
520 0,0,0,0,254,255,0,252,
521 1,0,0,248,1,0,0,120,
522 0,0,0,0,254,255,255,255,
523 0,0,128,0,0,0,128,0,
524 255,255,255,255,0,0,0,0,
525 0,0,0,0,0,0,0,128,
526 255,255,255,255,0,0,0,0,
527 0,0,0,0,0,0,0,0,
528 128,0,0,0,0,0,0,0,
529 0,1,1,0,1,1,0,0,
530 0,0,0,0,0,0,0,0,
531 0,0,0,0,0,0,0,0,
532 1,0,0,0,128,0,0,0,
533 128,128,128,128,0,0,128,0,
534 28,28,28,28,28,28,28,28,
535 28,28,0,0,0,0,0,128,
536 0,26,26,26,26,26,26,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,18,18,18,18,18,
539 18,18,18,128,128,0,128,16,
540 0,26,26,26,26,26,26,18,
541 18,18,18,18,18,18,18,18,
542 18,18,18,18,18,18,18,18,
543 18,18,18,128,128,0,0,0,
544 0,0,0,0,0,1,0,0,
545 0,0,0,0,0,0,0,0,
546 0,0,0,0,0,0,0,0,
547 0,0,0,0,0,0,0,0,
548 1,0,0,0,0,0,0,0,
549 0,0,18,0,0,0,0,0,
550 0,0,20,20,0,18,0,0,
551 0,20,18,0,0,0,0,0,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,18,
554 18,18,18,18,18,18,18,0,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,18,
557 18,18,18,18,18,18,18,18,
558 18,18,18,18,18,18,18,0,
559 18,18,18,18,18,18,18,18
560 };
561
562
563
564
565 #ifndef HAVE_STRERROR
566 /*************************************************
567 * Provide strerror() for non-ANSI libraries *
568 *************************************************/
569
570 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
571 in their libraries, but can provide the same facility by this simple
572 alternative function. */
573
574 extern int sys_nerr;
575 extern char *sys_errlist[];
576
577 char *
578 strerror(int n)
579 {
580 if (n < 0 || n >= sys_nerr) return "unknown error number";
581 return sys_errlist[n];
582 }
583 #endif /* HAVE_STRERROR */
584
585
586 /*************************************************
587 * JIT memory callback *
588 *************************************************/
589
590 static pcre_jit_stack* jit_callback(void *arg)
591 {
592 return (pcre_jit_stack *)arg;
593 }
594
595
596 /*************************************************
597 * Read or extend an input line *
598 *************************************************/
599
600 /* Input lines are read into buffer, but both patterns and data lines can be
601 continued over multiple input lines. In addition, if the buffer fills up, we
602 want to automatically expand it so as to be able to handle extremely large
603 lines that are needed for certain stress tests. When the input buffer is
604 expanded, the other two buffers must also be expanded likewise, and the
605 contents of pbuffer, which are a copy of the input for callouts, must be
606 preserved (for when expansion happens for a data line). This is not the most
607 optimal way of handling this, but hey, this is just a test program!
608
609 Arguments:
610 f the file to read
611 start where in buffer to start (this *must* be within buffer)
612 prompt for stdin or readline()
613
614 Returns: pointer to the start of new data
615 could be a copy of start, or could be moved
616 NULL if no data read and EOF reached
617 */
618
619 static uschar *
620 extend_inputline(FILE *f, uschar *start, const char *prompt)
621 {
622 uschar *here = start;
623
624 for (;;)
625 {
626 int rlen = (int)(buffer_size - (here - buffer));
627
628 if (rlen > 1000)
629 {
630 int dlen;
631
632 /* If libreadline support is required, use readline() to read a line if the
633 input is a terminal. Note that readline() removes the trailing newline, so
634 we must put it back again, to be compatible with fgets(). */
635
636 #ifdef SUPPORT_LIBREADLINE
637 if (isatty(fileno(f)))
638 {
639 size_t len;
640 char *s = readline(prompt);
641 if (s == NULL) return (here == start)? NULL : start;
642 len = strlen(s);
643 if (len > 0) add_history(s);
644 if (len > rlen - 1) len = rlen - 1;
645 memcpy(here, s, len);
646 here[len] = '\n';
647 here[len+1] = 0;
648 free(s);
649 }
650 else
651 #endif
652
653 /* Read the next line by normal means, prompting if the file is stdin. */
654
655 {
656 if (f == stdin) printf("%s", prompt);
657 if (fgets((char *)here, rlen, f) == NULL)
658 return (here == start)? NULL : start;
659 }
660
661 dlen = (int)strlen((char *)here);
662 if (dlen > 0 && here[dlen - 1] == '\n') return start;
663 here += dlen;
664 }
665
666 else
667 {
668 int new_buffer_size = 2*buffer_size;
669 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
670 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
671 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
672
673 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
674 {
675 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
676 exit(1);
677 }
678
679 memcpy(new_buffer, buffer, buffer_size);
680 memcpy(new_pbuffer, pbuffer, buffer_size);
681
682 buffer_size = new_buffer_size;
683
684 start = new_buffer + (start - buffer);
685 here = new_buffer + (here - buffer);
686
687 free(buffer);
688 free(dbuffer);
689 free(pbuffer);
690
691 buffer = new_buffer;
692 dbuffer = new_dbuffer;
693 pbuffer = new_pbuffer;
694 }
695 }
696
697 return NULL; /* Control never gets here */
698 }
699
700
701
702
703
704
705
706 /*************************************************
707 * Read number from string *
708 *************************************************/
709
710 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
711 around with conditional compilation, just do the job by hand. It is only used
712 for unpicking arguments, so just keep it simple.
713
714 Arguments:
715 str string to be converted
716 endptr where to put the end pointer
717
718 Returns: the unsigned long
719 */
720
721 static int
722 get_value(unsigned char *str, unsigned char **endptr)
723 {
724 int result = 0;
725 while(*str != 0 && isspace(*str)) str++;
726 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
727 *endptr = str;
728 return(result);
729 }
730
731
732
733
734 /*************************************************
735 * Convert UTF-8 string to value *
736 *************************************************/
737
738 /* This function takes one or more bytes that represents a UTF-8 character,
739 and returns the value of the character.
740
741 Argument:
742 utf8bytes a pointer to the byte vector
743 vptr a pointer to an int to receive the value
744
745 Returns: > 0 => the number of bytes consumed
746 -6 to 0 => malformed UTF-8 character at offset = (-return)
747 */
748
749 #if !defined NOUTF8
750
751 static int
752 utf82ord(unsigned char *utf8bytes, int *vptr)
753 {
754 int c = *utf8bytes++;
755 int d = c;
756 int i, j, s;
757
758 for (i = -1; i < 6; i++) /* i is number of additional bytes */
759 {
760 if ((d & 0x80) == 0) break;
761 d <<= 1;
762 }
763
764 if (i == -1) { *vptr = c; return 1; } /* ascii character */
765 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
766
767 /* i now has a value in the range 1-5 */
768
769 s = 6*i;
770 d = (c & utf8_table3[i]) << s;
771
772 for (j = 0; j < i; j++)
773 {
774 c = *utf8bytes++;
775 if ((c & 0xc0) != 0x80) return -(j+1);
776 s -= 6;
777 d |= (c & 0x3f) << s;
778 }
779
780 /* Check that encoding was the correct unique one */
781
782 for (j = 0; j < utf8_table1_size; j++)
783 if (d <= utf8_table1[j]) break;
784 if (j != i) return -(i+1);
785
786 /* Valid value */
787
788 *vptr = d;
789 return i+1;
790 }
791
792 #endif
793
794
795
796 /*************************************************
797 * Convert character value to UTF-8 *
798 *************************************************/
799
800 /* This function takes an integer value in the range 0 - 0x7fffffff
801 and encodes it as a UTF-8 character in 0 to 6 bytes.
802
803 Arguments:
804 cvalue the character value
805 utf8bytes pointer to buffer for result - at least 6 bytes long
806
807 Returns: number of characters placed in the buffer
808 */
809
810 #if !defined NOUTF8
811
812 static int
813 ord2utf8(int cvalue, uschar *utf8bytes)
814 {
815 register int i, j;
816 for (i = 0; i < utf8_table1_size; i++)
817 if (cvalue <= utf8_table1[i]) break;
818 utf8bytes += i;
819 for (j = i; j > 0; j--)
820 {
821 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
822 cvalue >>= 6;
823 }
824 *utf8bytes = utf8_table2[i] | cvalue;
825 return i + 1;
826 }
827
828 #endif
829
830
831
832 /*************************************************
833 * Print character string *
834 *************************************************/
835
836 /* Character string printing function. Must handle UTF-8 strings in utf8
837 mode. Yields number of characters printed. If handed a NULL file, just counts
838 chars without printing. */
839
840 static int pchars(unsigned char *p, int length, FILE *f)
841 {
842 int c = 0;
843 int yield = 0;
844
845 while (length-- > 0)
846 {
847 #if !defined NOUTF8
848 if (use_utf8)
849 {
850 int rc = utf82ord(p, &c);
851
852 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
853 {
854 length -= rc - 1;
855 p += rc;
856 if (PRINTHEX(c))
857 {
858 if (f != NULL) fprintf(f, "%c", c);
859 yield++;
860 }
861 else
862 {
863 int n = 4;
864 if (f != NULL) fprintf(f, "\\x{%02x}", c);
865 yield += (n <= 0x000000ff)? 2 :
866 (n <= 0x00000fff)? 3 :
867 (n <= 0x0000ffff)? 4 :
868 (n <= 0x000fffff)? 5 : 6;
869 }
870 continue;
871 }
872 }
873 #endif
874
875 /* Not UTF-8, or malformed UTF-8 */
876
877 c = *p++;
878 if (PRINTHEX(c))
879 {
880 if (f != NULL) fprintf(f, "%c", c);
881 yield++;
882 }
883 else
884 {
885 if (f != NULL) fprintf(f, "\\x%02x", c);
886 yield += 4;
887 }
888 }
889
890 return yield;
891 }
892
893
894
895 /*************************************************
896 * Callout function *
897 *************************************************/
898
899 /* Called from PCRE as a result of the (?C) item. We print out where we are in
900 the match. Yield zero unless more callouts than the fail count, or the callout
901 data is not zero. */
902
903 static int callout(pcre_callout_block *cb)
904 {
905 FILE *f = (first_callout | callout_extra)? outfile : NULL;
906 int i, pre_start, post_start, subject_length;
907
908 if (callout_extra)
909 {
910 fprintf(f, "Callout %d: last capture = %d\n",
911 cb->callout_number, cb->capture_last);
912
913 for (i = 0; i < cb->capture_top * 2; i += 2)
914 {
915 if (cb->offset_vector[i] < 0)
916 fprintf(f, "%2d: <unset>\n", i/2);
917 else
918 {
919 fprintf(f, "%2d: ", i/2);
920 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
921 cb->offset_vector[i+1] - cb->offset_vector[i], f);
922 fprintf(f, "\n");
923 }
924 }
925 }
926
927 /* Re-print the subject in canonical form, the first time or if giving full
928 datails. On subsequent calls in the same match, we use pchars just to find the
929 printed lengths of the substrings. */
930
931 if (f != NULL) fprintf(f, "--->");
932
933 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
934 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
935 cb->current_position - cb->start_match, f);
936
937 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
938
939 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
940 cb->subject_length - cb->current_position, f);
941
942 if (f != NULL) fprintf(f, "\n");
943
944 /* Always print appropriate indicators, with callout number if not already
945 shown. For automatic callouts, show the pattern offset. */
946
947 if (cb->callout_number == 255)
948 {
949 fprintf(outfile, "%+3d ", cb->pattern_position);
950 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
951 }
952 else
953 {
954 if (callout_extra) fprintf(outfile, " ");
955 else fprintf(outfile, "%3d ", cb->callout_number);
956 }
957
958 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
959 fprintf(outfile, "^");
960
961 if (post_start > 0)
962 {
963 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
964 fprintf(outfile, "^");
965 }
966
967 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
968 fprintf(outfile, " ");
969
970 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
971 pbuffer + cb->pattern_position);
972
973 fprintf(outfile, "\n");
974 first_callout = 0;
975
976 if (cb->mark != last_callout_mark)
977 {
978 fprintf(outfile, "Latest Mark: %s\n",
979 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
980 last_callout_mark = cb->mark;
981 }
982
983 if (cb->callout_data != NULL)
984 {
985 int callout_data = *((int *)(cb->callout_data));
986 if (callout_data != 0)
987 {
988 fprintf(outfile, "Callout data = %d\n", callout_data);
989 return callout_data;
990 }
991 }
992
993 return (cb->callout_number != callout_fail_id)? 0 :
994 (++callout_count >= callout_fail_count)? 1 : 0;
995 }
996
997
998 /*************************************************
999 * Local malloc functions *
1000 *************************************************/
1001
1002 /* Alternative malloc function, to test functionality and save the size of a
1003 compiled re, which is the first store request that pcre_compile() makes. The
1004 show_malloc variable is set only during matching. */
1005
1006 static void *new_malloc(size_t size)
1007 {
1008 void *block = malloc(size);
1009 gotten_store = size;
1010 if (first_gotten_store == 0) first_gotten_store = size;
1011 if (show_malloc)
1012 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1013 return block;
1014 }
1015
1016 static void new_free(void *block)
1017 {
1018 if (show_malloc)
1019 fprintf(outfile, "free %p\n", block);
1020 free(block);
1021 }
1022
1023 /* For recursion malloc/free, to test stacking calls */
1024
1025 static void *stack_malloc(size_t size)
1026 {
1027 void *block = malloc(size);
1028 if (show_malloc)
1029 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1030 return block;
1031 }
1032
1033 static void stack_free(void *block)
1034 {
1035 if (show_malloc)
1036 fprintf(outfile, "stack_free %p\n", block);
1037 free(block);
1038 }
1039
1040
1041 /*************************************************
1042 * Call pcre_fullinfo() *
1043 *************************************************/
1044
1045 /* Get one piece of information from the pcre_fullinfo() function */
1046
1047 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1048 {
1049 int rc;
1050 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1051 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1052 }
1053
1054
1055
1056 /*************************************************
1057 * Byte flipping function *
1058 *************************************************/
1059
1060 static unsigned long int
1061 byteflip(unsigned long int value, int n)
1062 {
1063 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1064 return ((value & 0x000000ff) << 24) |
1065 ((value & 0x0000ff00) << 8) |
1066 ((value & 0x00ff0000) >> 8) |
1067 ((value & 0xff000000) >> 24);
1068 }
1069
1070
1071
1072
1073 /*************************************************
1074 * Check match or recursion limit *
1075 *************************************************/
1076
1077 static int
1078 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1079 int start_offset, int options, int *use_offsets, int use_size_offsets,
1080 int flag, unsigned long int *limit, int errnumber, const char *msg)
1081 {
1082 int count;
1083 int min = 0;
1084 int mid = 64;
1085 int max = -1;
1086
1087 extra->flags |= flag;
1088
1089 for (;;)
1090 {
1091 *limit = mid;
1092
1093 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1094 use_offsets, use_size_offsets);
1095
1096 if (count == errnumber)
1097 {
1098 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099 min = mid;
1100 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1101 }
1102
1103 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1104 count == PCRE_ERROR_PARTIAL)
1105 {
1106 if (mid == min + 1)
1107 {
1108 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1109 break;
1110 }
1111 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1112 max = mid;
1113 mid = (min + mid)/2;
1114 }
1115 else break; /* Some other error */
1116 }
1117
1118 extra->flags &= ~flag;
1119 return count;
1120 }
1121
1122
1123
1124 /*************************************************
1125 * Case-independent strncmp() function *
1126 *************************************************/
1127
1128 /*
1129 Arguments:
1130 s first string
1131 t second string
1132 n number of characters to compare
1133
1134 Returns: < 0, = 0, or > 0, according to the comparison
1135 */
1136
1137 static int
1138 strncmpic(uschar *s, uschar *t, int n)
1139 {
1140 while (n--)
1141 {
1142 int c = tolower(*s++) - tolower(*t++);
1143 if (c) return c;
1144 }
1145 return 0;
1146 }
1147
1148
1149
1150 /*************************************************
1151 * Check newline indicator *
1152 *************************************************/
1153
1154 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1155 a message and return 0 if there is no match.
1156
1157 Arguments:
1158 p points after the leading '<'
1159 f file for error message
1160
1161 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1162 */
1163
1164 static int
1165 check_newline(uschar *p, FILE *f)
1166 {
1167 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1168 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1169 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1170 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1171 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1172 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1173 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1174 fprintf(f, "Unknown newline type at: <%s\n", p);
1175 return 0;
1176 }
1177
1178
1179
1180 /*************************************************
1181 * Usage function *
1182 *************************************************/
1183
1184 static void
1185 usage(void)
1186 {
1187 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1188 printf("Input and output default to stdin and stdout.\n");
1189 #ifdef SUPPORT_LIBREADLINE
1190 printf("If input is a terminal, readline() is used to read from it.\n");
1191 #else
1192 printf("This version of pcretest is not linked with readline().\n");
1193 #endif
1194 printf("\nOptions:\n");
1195 printf(" -b show compiled code (bytecode)\n");
1196 printf(" -C show PCRE compile-time options and exit\n");
1197 printf(" -d debug: show compiled code and information (-b and -i)\n");
1198 #if !defined NODFA
1199 printf(" -dfa force DFA matching for all subjects\n");
1200 #endif
1201 printf(" -help show usage information\n");
1202 printf(" -i show information about compiled patterns\n"
1203 " -M find MATCH_LIMIT minimum for each subject\n"
1204 " -m output memory used information\n"
1205 " -o <n> set size of offsets vector to <n>\n");
1206 #if !defined NOPOSIX
1207 printf(" -p use POSIX interface\n");
1208 #endif
1209 printf(" -q quiet: do not output PCRE version number at start\n");
1210 printf(" -S <n> set stack size to <n> megabytes\n");
1211 printf(" -s force each pattern to be studied at basic level\n"
1212 " -s+ force each pattern to be studied, using JIT if available\n"
1213 " -t time compilation and execution\n");
1214 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1215 printf(" -tm time execution (matching) only\n");
1216 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1217 }
1218
1219
1220
1221 /*************************************************
1222 * Main Program *
1223 *************************************************/
1224
1225 /* Read lines from named file or stdin and write to named file or stdout; lines
1226 consist of a regular expression, in delimiters and optionally followed by
1227 options, followed by a set of test data, terminated by an empty line. */
1228
1229 int main(int argc, char **argv)
1230 {
1231 FILE *infile = stdin;
1232 int options = 0;
1233 int study_options = 0;
1234 int default_find_match_limit = FALSE;
1235 int op = 1;
1236 int timeit = 0;
1237 int timeitm = 0;
1238 int showinfo = 0;
1239 int showstore = 0;
1240 int force_study = -1;
1241 int force_study_options = 0;
1242 int quiet = 0;
1243 int size_offsets = 45;
1244 int size_offsets_max;
1245 int *offsets = NULL;
1246 #if !defined NOPOSIX
1247 int posix = 0;
1248 #endif
1249 int debug = 0;
1250 int done = 0;
1251 int all_use_dfa = 0;
1252 int yield = 0;
1253 int stack_size;
1254
1255 pcre_jit_stack *jit_stack = NULL;
1256
1257
1258 /* These vectors store, end-to-end, a list of captured substring names. Assume
1259 that 1024 is plenty long enough for the few names we'll be testing. */
1260
1261 uschar copynames[1024];
1262 uschar getnames[1024];
1263
1264 uschar *copynamesptr;
1265 uschar *getnamesptr;
1266
1267 /* Get buffers from malloc() so that Electric Fence will check their misuse
1268 when I am debugging. They grow automatically when very long lines are read. */
1269
1270 buffer = (unsigned char *)malloc(buffer_size);
1271 dbuffer = (unsigned char *)malloc(buffer_size);
1272 pbuffer = (unsigned char *)malloc(buffer_size);
1273
1274 /* The outfile variable is static so that new_malloc can use it. */
1275
1276 outfile = stdout;
1277
1278 /* The following _setmode() stuff is some Windows magic that tells its runtime
1279 library to translate CRLF into a single LF character. At least, that's what
1280 I've been told: never having used Windows I take this all on trust. Originally
1281 it set 0x8000, but then I was advised that _O_BINARY was better. */
1282
1283 #if defined(_WIN32) || defined(WIN32)
1284 _setmode( _fileno( stdout ), _O_BINARY );
1285 #endif
1286
1287 /* Scan options */
1288
1289 while (argc > 1 && argv[op][0] == '-')
1290 {
1291 unsigned char *endptr;
1292
1293 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1294 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1295 else if (strcmp(argv[op], "-s+") == 0)
1296 {
1297 force_study = 1;
1298 force_study_options = PCRE_STUDY_JIT_COMPILE;
1299 }
1300 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1301 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1302 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1303 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1304 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1305 #if !defined NODFA
1306 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1307 #endif
1308 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1309 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1310 *endptr == 0))
1311 {
1312 op++;
1313 argc--;
1314 }
1315 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1316 {
1317 int both = argv[op][2] == 0;
1318 int temp;
1319 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1320 *endptr == 0))
1321 {
1322 timeitm = temp;
1323 op++;
1324 argc--;
1325 }
1326 else timeitm = LOOPREPEAT;
1327 if (both) timeit = timeitm;
1328 }
1329 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1330 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1331 *endptr == 0))
1332 {
1333 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1334 printf("PCRE: -S not supported on this OS\n");
1335 exit(1);
1336 #else
1337 int rc;
1338 struct rlimit rlim;
1339 getrlimit(RLIMIT_STACK, &rlim);
1340 rlim.rlim_cur = stack_size * 1024 * 1024;
1341 rc = setrlimit(RLIMIT_STACK, &rlim);
1342 if (rc != 0)
1343 {
1344 printf("PCRE: setrlimit() failed with error %d\n", rc);
1345 exit(1);
1346 }
1347 op++;
1348 argc--;
1349 #endif
1350 }
1351 #if !defined NOPOSIX
1352 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1353 #endif
1354 else if (strcmp(argv[op], "-C") == 0)
1355 {
1356 int rc;
1357 unsigned long int lrc;
1358 printf("PCRE version %s\n", pcre_version());
1359 printf("Compiled with\n");
1360 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1361 printf(" %sUTF-8 support\n", rc? "" : "No ");
1362 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1363 printf(" %sUnicode properties support\n", rc? "" : "No ");
1364 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1365 if (rc)
1366 printf(" Just-in-time compiler support\n");
1367 else
1368 printf(" No just-in-time compiler support\n");
1369 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1370 /* Note that these values are always the ASCII values, even
1371 in EBCDIC environments. CR is 13 and NL is 10. */
1372 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1373 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1374 (rc == -2)? "ANYCRLF" :
1375 (rc == -1)? "ANY" : "???");
1376 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1377 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1378 "all Unicode newlines");
1379 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1380 printf(" Internal link size = %d\n", rc);
1381 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1382 printf(" POSIX malloc threshold = %d\n", rc);
1383 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1384 printf(" Default match limit = %ld\n", lrc);
1385 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1386 printf(" Default recursion depth limit = %ld\n", lrc);
1387 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1388 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1389 goto EXIT;
1390 }
1391 else if (strcmp(argv[op], "-help") == 0 ||
1392 strcmp(argv[op], "--help") == 0)
1393 {
1394 usage();
1395 goto EXIT;
1396 }
1397 else
1398 {
1399 printf("** Unknown or malformed option %s\n", argv[op]);
1400 usage();
1401 yield = 1;
1402 goto EXIT;
1403 }
1404 op++;
1405 argc--;
1406 }
1407
1408 /* Get the store for the offsets vector, and remember what it was */
1409
1410 size_offsets_max = size_offsets;
1411 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1412 if (offsets == NULL)
1413 {
1414 printf("** Failed to get %d bytes of memory for offsets vector\n",
1415 (int)(size_offsets_max * sizeof(int)));
1416 yield = 1;
1417 goto EXIT;
1418 }
1419
1420 /* Sort out the input and output files */
1421
1422 if (argc > 1)
1423 {
1424 infile = fopen(argv[op], INPUT_MODE);
1425 if (infile == NULL)
1426 {
1427 printf("** Failed to open %s\n", argv[op]);
1428 yield = 1;
1429 goto EXIT;
1430 }
1431 }
1432
1433 if (argc > 2)
1434 {
1435 outfile = fopen(argv[op+1], OUTPUT_MODE);
1436 if (outfile == NULL)
1437 {
1438 printf("** Failed to open %s\n", argv[op+1]);
1439 yield = 1;
1440 goto EXIT;
1441 }
1442 }
1443
1444 /* Set alternative malloc function */
1445
1446 pcre_malloc = new_malloc;
1447 pcre_free = new_free;
1448 pcre_stack_malloc = stack_malloc;
1449 pcre_stack_free = stack_free;
1450
1451 /* Heading line unless quiet, then prompt for first regex if stdin */
1452
1453 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1454
1455 /* Main loop */
1456
1457 while (!done)
1458 {
1459 pcre *re = NULL;
1460 pcre_extra *extra = NULL;
1461
1462 #if !defined NOPOSIX /* There are still compilers that require no indent */
1463 regex_t preg;
1464 int do_posix = 0;
1465 #endif
1466
1467 const char *error;
1468 unsigned char *markptr;
1469 unsigned char *p, *pp, *ppp;
1470 unsigned char *to_file = NULL;
1471 const unsigned char *tables = NULL;
1472 unsigned long int true_size, true_study_size = 0;
1473 size_t size, regex_gotten_store;
1474 int do_allcaps = 0;
1475 int do_mark = 0;
1476 int do_study = 0;
1477 int no_force_study = 0;
1478 int do_debug = debug;
1479 int do_G = 0;
1480 int do_g = 0;
1481 int do_showinfo = showinfo;
1482 int do_showrest = 0;
1483 int do_showcaprest = 0;
1484 int do_flip = 0;
1485 int erroroffset, len, delimiter, poffset;
1486
1487 use_utf8 = 0;
1488 debug_lengths = 1;
1489
1490 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1491 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1492 fflush(outfile);
1493
1494 p = buffer;
1495 while (isspace(*p)) p++;
1496 if (*p == 0) continue;
1497
1498 /* See if the pattern is to be loaded pre-compiled from a file. */
1499
1500 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1501 {
1502 unsigned long int magic, get_options;
1503 uschar sbuf[8];
1504 FILE *f;
1505
1506 p++;
1507 pp = p + (int)strlen((char *)p);
1508 while (isspace(pp[-1])) pp--;
1509 *pp = 0;
1510
1511 f = fopen((char *)p, "rb");
1512 if (f == NULL)
1513 {
1514 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1515 continue;
1516 }
1517
1518 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1519
1520 true_size =
1521 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1522 true_study_size =
1523 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1524
1525 re = (real_pcre *)new_malloc(true_size);
1526 regex_gotten_store = first_gotten_store;
1527
1528 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1529
1530 magic = ((real_pcre *)re)->magic_number;
1531 if (magic != MAGIC_NUMBER)
1532 {
1533 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1534 {
1535 do_flip = 1;
1536 }
1537 else
1538 {
1539 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1540 fclose(f);
1541 continue;
1542 }
1543 }
1544
1545 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1546 do_flip? " (byte-inverted)" : "", p);
1547
1548 /* Need to know if UTF-8 for printing data strings */
1549
1550 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1551 use_utf8 = (get_options & PCRE_UTF8) != 0;
1552
1553 /* Now see if there is any following study data. */
1554
1555 if (true_study_size != 0)
1556 {
1557 pcre_study_data *psd;
1558
1559 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1560 extra->flags = PCRE_EXTRA_STUDY_DATA;
1561
1562 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1563 extra->study_data = psd;
1564
1565 if (fread(psd, 1, true_study_size, f) != true_study_size)
1566 {
1567 FAIL_READ:
1568 fprintf(outfile, "Failed to read data from %s\n", p);
1569 if (extra != NULL) pcre_free_study(extra);
1570 if (re != NULL) new_free(re);
1571 fclose(f);
1572 continue;
1573 }
1574 fprintf(outfile, "Study data loaded from %s\n", p);
1575 do_study = 1; /* To get the data output if requested */
1576 }
1577 else fprintf(outfile, "No study data\n");
1578
1579 fclose(f);
1580 goto SHOW_INFO;
1581 }
1582
1583 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1584 the pattern; if is isn't complete, read more. */
1585
1586 delimiter = *p++;
1587
1588 if (isalnum(delimiter) || delimiter == '\\')
1589 {
1590 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1591 goto SKIP_DATA;
1592 }
1593
1594 pp = p;
1595 poffset = (int)(p - buffer);
1596
1597 for(;;)
1598 {
1599 while (*pp != 0)
1600 {
1601 if (*pp == '\\' && pp[1] != 0) pp++;
1602 else if (*pp == delimiter) break;
1603 pp++;
1604 }
1605 if (*pp != 0) break;
1606 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1607 {
1608 fprintf(outfile, "** Unexpected EOF\n");
1609 done = 1;
1610 goto CONTINUE;
1611 }
1612 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1613 }
1614
1615 /* The buffer may have moved while being extended; reset the start of data
1616 pointer to the correct relative point in the buffer. */
1617
1618 p = buffer + poffset;
1619
1620 /* If the first character after the delimiter is backslash, make
1621 the pattern end with backslash. This is purely to provide a way
1622 of testing for the error message when a pattern ends with backslash. */
1623
1624 if (pp[1] == '\\') *pp++ = '\\';
1625
1626 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1627 for callouts. */
1628
1629 *pp++ = 0;
1630 strcpy((char *)pbuffer, (char *)p);
1631
1632 /* Look for options after final delimiter */
1633
1634 options = 0;
1635 study_options = 0;
1636 log_store = showstore; /* default from command line */
1637
1638 while (*pp != 0)
1639 {
1640 switch (*pp++)
1641 {
1642 case 'f': options |= PCRE_FIRSTLINE; break;
1643 case 'g': do_g = 1; break;
1644 case 'i': options |= PCRE_CASELESS; break;
1645 case 'm': options |= PCRE_MULTILINE; break;
1646 case 's': options |= PCRE_DOTALL; break;
1647 case 'x': options |= PCRE_EXTENDED; break;
1648
1649 case '+':
1650 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1651 break;
1652
1653 case '=': do_allcaps = 1; break;
1654 case 'A': options |= PCRE_ANCHORED; break;
1655 case 'B': do_debug = 1; break;
1656 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1657 case 'D': do_debug = do_showinfo = 1; break;
1658 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1659 case 'F': do_flip = 1; break;
1660 case 'G': do_G = 1; break;
1661 case 'I': do_showinfo = 1; break;
1662 case 'J': options |= PCRE_DUPNAMES; break;
1663 case 'K': do_mark = 1; break;
1664 case 'M': log_store = 1; break;
1665 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1666
1667 #if !defined NOPOSIX
1668 case 'P': do_posix = 1; break;
1669 #endif
1670
1671 case 'S':
1672 if (do_study == 0)
1673 {
1674 do_study = 1;
1675 if (*pp == '+')
1676 {
1677 study_options |= PCRE_STUDY_JIT_COMPILE;
1678 pp++;
1679 }
1680 }
1681 else
1682 {
1683 do_study = 0;
1684 no_force_study = 1;
1685 }
1686 break;
1687
1688 case 'U': options |= PCRE_UNGREEDY; break;
1689 case 'W': options |= PCRE_UCP; break;
1690 case 'X': options |= PCRE_EXTRA; break;
1691 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1692 case 'Z': debug_lengths = 0; break;
1693 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1694 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1695
1696 case 'T':
1697 switch (*pp++)
1698 {
1699 case '0': tables = tables0; break;
1700 case '1': tables = tables1; break;
1701
1702 case '\r':
1703 case '\n':
1704 case ' ':
1705 case 0:
1706 fprintf(outfile, "** Missing table number after /T\n");
1707 goto SKIP_DATA;
1708
1709 default:
1710 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1711 goto SKIP_DATA;
1712 }
1713 break;
1714
1715 case 'L':
1716 ppp = pp;
1717 /* The '\r' test here is so that it works on Windows. */
1718 /* The '0' test is just in case this is an unterminated line. */
1719 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1720 *ppp = 0;
1721 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1722 {
1723 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1724 goto SKIP_DATA;
1725 }
1726 locale_set = 1;
1727 tables = pcre_maketables();
1728 pp = ppp;
1729 break;
1730
1731 case '>':
1732 to_file = pp;
1733 while (*pp != 0) pp++;
1734 while (isspace(pp[-1])) pp--;
1735 *pp = 0;
1736 break;
1737
1738 case '<':
1739 {
1740 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1741 {
1742 options |= PCRE_JAVASCRIPT_COMPAT;
1743 pp += 3;
1744 }
1745 else
1746 {
1747 int x = check_newline(pp, outfile);
1748 if (x == 0) goto SKIP_DATA;
1749 options |= x;
1750 while (*pp++ != '>');
1751 }
1752 }
1753 break;
1754
1755 case '\r': /* So that it works in Windows */
1756 case '\n':
1757 case ' ':
1758 break;
1759
1760 default:
1761 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1762 goto SKIP_DATA;
1763 }
1764 }
1765
1766 /* Handle compiling via the POSIX interface, which doesn't support the
1767 timing, showing, or debugging options, nor the ability to pass over
1768 local character tables. */
1769
1770 #if !defined NOPOSIX
1771 if (posix || do_posix)
1772 {
1773 int rc;
1774 int cflags = 0;
1775
1776 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1777 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1778 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1779 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1780 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1781 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1782 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1783
1784 first_gotten_store = 0;
1785 rc = regcomp(&preg, (char *)p, cflags);
1786
1787 /* Compilation failed; go back for another re, skipping to blank line
1788 if non-interactive. */
1789
1790 if (rc != 0)
1791 {
1792 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1793 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1794 goto SKIP_DATA;
1795 }
1796 }
1797
1798 /* Handle compiling via the native interface */
1799
1800 else
1801 #endif /* !defined NOPOSIX */
1802
1803 {
1804 unsigned long int get_options;
1805
1806 if (timeit > 0)
1807 {
1808 register int i;
1809 clock_t time_taken;
1810 clock_t start_time = clock();
1811 for (i = 0; i < timeit; i++)
1812 {
1813 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1814 if (re != NULL) free(re);
1815 }
1816 time_taken = clock() - start_time;
1817 fprintf(outfile, "Compile time %.4f milliseconds\n",
1818 (((double)time_taken * 1000.0) / (double)timeit) /
1819 (double)CLOCKS_PER_SEC);
1820 }
1821
1822 first_gotten_store = 0;
1823 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1824
1825 /* Compilation failed; go back for another re, skipping to blank line
1826 if non-interactive. */
1827
1828 if (re == NULL)
1829 {
1830 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1831 SKIP_DATA:
1832 if (infile != stdin)
1833 {
1834 for (;;)
1835 {
1836 if (extend_inputline(infile, buffer, NULL) == NULL)
1837 {
1838 done = 1;
1839 goto CONTINUE;
1840 }
1841 len = (int)strlen((char *)buffer);
1842 while (len > 0 && isspace(buffer[len-1])) len--;
1843 if (len == 0) break;
1844 }
1845 fprintf(outfile, "\n");
1846 }
1847 goto CONTINUE;
1848 }
1849
1850 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1851 within the regex; check for this so that we know how to process the data
1852 lines. */
1853
1854 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1855 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1856
1857 /* Print information if required. There are now two info-returning
1858 functions. The old one has a limited interface and returns only limited
1859 data. Check that it agrees with the newer one. */
1860
1861 if (log_store)
1862 fprintf(outfile, "Memory allocation (code space): %d\n",
1863 (int)(first_gotten_store -
1864 sizeof(real_pcre) -
1865 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1866
1867 /* Extract the size for possible writing before possibly flipping it,
1868 and remember the store that was got. */
1869
1870 true_size = ((real_pcre *)re)->size;
1871 regex_gotten_store = first_gotten_store;
1872
1873 /* If -s or /S was present, study the regex to generate additional info to
1874 help with the matching, unless the pattern has the SS option, which
1875 suppresses the effect of /S (used for a few test patterns where studying is
1876 never sensible). */
1877
1878 if (do_study || (force_study >= 0 && !no_force_study))
1879 {
1880 if (timeit > 0)
1881 {
1882 register int i;
1883 clock_t time_taken;
1884 clock_t start_time = clock();
1885 for (i = 0; i < timeit; i++)
1886 extra = pcre_study(re, study_options | force_study_options, &error);
1887 time_taken = clock() - start_time;
1888 if (extra != NULL) pcre_free_study(extra);
1889 fprintf(outfile, " Study time %.4f milliseconds\n",
1890 (((double)time_taken * 1000.0) / (double)timeit) /
1891 (double)CLOCKS_PER_SEC);
1892 }
1893 extra = pcre_study(re, study_options | force_study_options, &error);
1894 if (error != NULL)
1895 fprintf(outfile, "Failed to study: %s\n", error);
1896 else if (extra != NULL)
1897 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1898 }
1899
1900 /* If /K was present, we set up for handling MARK data. */
1901
1902 if (do_mark)
1903 {
1904 if (extra == NULL)
1905 {
1906 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1907 extra->flags = 0;
1908 }
1909 extra->mark = &markptr;
1910 extra->flags |= PCRE_EXTRA_MARK;
1911 }
1912
1913 /* If the 'F' option was present, we flip the bytes of all the integer
1914 fields in the regex data block and the study block. This is to make it
1915 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1916 compiled on a different architecture. */
1917
1918 if (do_flip)
1919 {
1920 real_pcre *rre = (real_pcre *)re;
1921 rre->magic_number =
1922 byteflip(rre->magic_number, sizeof(rre->magic_number));
1923 rre->size = byteflip(rre->size, sizeof(rre->size));
1924 rre->options = byteflip(rre->options, sizeof(rre->options));
1925 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1926 rre->top_bracket =
1927 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1928 rre->top_backref =
1929 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1930 rre->first_byte =
1931 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1932 rre->req_byte =
1933 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1934 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1935 sizeof(rre->name_table_offset));
1936 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1937 sizeof(rre->name_entry_size));
1938 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1939 sizeof(rre->name_count));
1940
1941 if (extra != NULL)
1942 {
1943 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1944 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1945 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1946 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1947 }
1948 }
1949
1950 /* Extract information from the compiled data if required */
1951
1952 SHOW_INFO:
1953
1954 if (do_debug)
1955 {
1956 fprintf(outfile, "------------------------------------------------------------------\n");
1957 pcre_printint(re, outfile, debug_lengths);
1958 }
1959
1960 /* We already have the options in get_options (see above) */
1961
1962 if (do_showinfo)
1963 {
1964 unsigned long int all_options;
1965 #if !defined NOINFOCHECK
1966 int old_first_char, old_options, old_count;
1967 #endif
1968 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1969 hascrorlf;
1970 int nameentrysize, namecount;
1971 const uschar *nametable;
1972
1973 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1974 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1975 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1976 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1977 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1978 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1979 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1980 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1981 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1982 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1983 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1984
1985 #if !defined NOINFOCHECK
1986 old_count = pcre_info(re, &old_options, &old_first_char);
1987 if (count < 0) fprintf(outfile,
1988 "Error %d from pcre_info()\n", count);
1989 else
1990 {
1991 if (old_count != count) fprintf(outfile,
1992 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1993 old_count);
1994
1995 if (old_first_char != first_char) fprintf(outfile,
1996 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1997 first_char, old_first_char);
1998
1999 if (old_options != (int)get_options) fprintf(outfile,
2000 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2001 get_options, old_options);
2002 }
2003 #endif
2004
2005 if (size != regex_gotten_store) fprintf(outfile,
2006 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2007 (int)size, (int)regex_gotten_store);
2008
2009 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2010 if (backrefmax > 0)
2011 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2012
2013 if (namecount > 0)
2014 {
2015 fprintf(outfile, "Named capturing subpatterns:\n");
2016 while (namecount-- > 0)
2017 {
2018 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2019 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2020 GET2(nametable, 0));
2021 nametable += nameentrysize;
2022 }
2023 }
2024
2025 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2026 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2027
2028 all_options = ((real_pcre *)re)->options;
2029 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2030
2031 if (get_options == 0) fprintf(outfile, "No options\n");
2032 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2033 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2034 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2035 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2036 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2037 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2038 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2039 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2040 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2041 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2042 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2043 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2044 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2045 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2046 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2047 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2048 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2049 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2050
2051 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2052
2053 switch (get_options & PCRE_NEWLINE_BITS)
2054 {
2055 case PCRE_NEWLINE_CR:
2056 fprintf(outfile, "Forced newline sequence: CR\n");
2057 break;
2058
2059 case PCRE_NEWLINE_LF:
2060 fprintf(outfile, "Forced newline sequence: LF\n");
2061 break;
2062
2063 case PCRE_NEWLINE_CRLF:
2064 fprintf(outfile, "Forced newline sequence: CRLF\n");
2065 break;
2066
2067 case PCRE_NEWLINE_ANYCRLF:
2068 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2069 break;
2070
2071 case PCRE_NEWLINE_ANY:
2072 fprintf(outfile, "Forced newline sequence: ANY\n");
2073 break;
2074
2075 default:
2076 break;
2077 }
2078
2079 if (first_char == -1)
2080 {
2081 fprintf(outfile, "First char at start or follows newline\n");
2082 }
2083 else if (first_char < 0)
2084 {
2085 fprintf(outfile, "No first char\n");
2086 }
2087 else
2088 {
2089 int ch = first_char & 255;
2090 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2091 "" : " (caseless)";
2092 if (PRINTHEX(ch))
2093 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2094 else
2095 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2096 }
2097
2098 if (need_char < 0)
2099 {
2100 fprintf(outfile, "No need char\n");
2101 }
2102 else
2103 {
2104 int ch = need_char & 255;
2105 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2106 "" : " (caseless)";
2107 if (PRINTHEX(ch))
2108 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2109 else
2110 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2111 }
2112
2113 /* Don't output study size; at present it is in any case a fixed
2114 value, but it varies, depending on the computer architecture, and
2115 so messes up the test suite. (And with the /F option, it might be
2116 flipped.) If study was forced by an external -s, don't show this
2117 information unless -i or -d was also present. This means that, except
2118 when auto-callouts are involved, the output from runs with and without
2119 -s should be identical. */
2120
2121 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2122 {
2123 if (extra == NULL)
2124 fprintf(outfile, "Study returned NULL\n");
2125 else
2126 {
2127 uschar *start_bits = NULL;
2128 int minlength;
2129
2130 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2131 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2132
2133 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2134 if (start_bits == NULL)
2135 fprintf(outfile, "No set of starting bytes\n");
2136 else
2137 {
2138 int i;
2139 int c = 24;
2140 fprintf(outfile, "Starting byte set: ");
2141 for (i = 0; i < 256; i++)
2142 {
2143 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2144 {
2145 if (c > 75)
2146 {
2147 fprintf(outfile, "\n ");
2148 c = 2;
2149 }
2150 if (PRINTHEX(i) && i != ' ')
2151 {
2152 fprintf(outfile, "%c ", i);
2153 c += 2;
2154 }
2155 else
2156 {
2157 fprintf(outfile, "\\x%02x ", i);
2158 c += 5;
2159 }
2160 }
2161 }
2162 fprintf(outfile, "\n");
2163 }
2164 }
2165
2166 /* Show this only if the JIT was set by /S, not by -s. */
2167
2168 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2169 {
2170 int jit;
2171 new_info(re, extra, PCRE_INFO_JIT, &jit);
2172 if (jit)
2173 fprintf(outfile, "JIT study was successful\n");
2174 else
2175 #ifdef SUPPORT_JIT
2176 fprintf(outfile, "JIT study was not successful\n");
2177 #else
2178 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2179 #endif
2180 }
2181 }
2182 }
2183
2184 /* If the '>' option was present, we write out the regex to a file, and
2185 that is all. The first 8 bytes of the file are the regex length and then
2186 the study length, in big-endian order. */
2187
2188 if (to_file != NULL)
2189 {
2190 FILE *f = fopen((char *)to_file, "wb");
2191 if (f == NULL)
2192 {
2193 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2194 }
2195 else
2196 {
2197 uschar sbuf[8];
2198 sbuf[0] = (uschar)((true_size >> 24) & 255);
2199 sbuf[1] = (uschar)((true_size >> 16) & 255);
2200 sbuf[2] = (uschar)((true_size >> 8) & 255);
2201 sbuf[3] = (uschar)((true_size) & 255);
2202
2203 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2204 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2205 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2206 sbuf[7] = (uschar)((true_study_size) & 255);
2207
2208 if (fwrite(sbuf, 1, 8, f) < 8 ||
2209 fwrite(re, 1, true_size, f) < true_size)
2210 {
2211 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2212 }
2213 else
2214 {
2215 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2216
2217 /* If there is study data, write it. */
2218
2219 if (extra != NULL)
2220 {
2221 if (fwrite(extra->study_data, 1, true_study_size, f) <
2222 true_study_size)
2223 {
2224 fprintf(outfile, "Write error on %s: %s\n", to_file,
2225 strerror(errno));
2226 }
2227 else fprintf(outfile, "Study data written to %s\n", to_file);
2228 }
2229 }
2230 fclose(f);
2231 }
2232
2233 new_free(re);
2234 if (extra != NULL) pcre_free_study(extra);
2235 if (locale_set)
2236 {
2237 new_free((void *)tables);
2238 setlocale(LC_CTYPE, "C");
2239 locale_set = 0;
2240 }
2241 continue; /* With next regex */
2242 }
2243 } /* End of non-POSIX compile */
2244
2245 /* Read data lines and test them */
2246
2247 for (;;)
2248 {
2249 uschar *q;
2250 uschar *bptr;
2251 int *use_offsets = offsets;
2252 int use_size_offsets = size_offsets;
2253 int callout_data = 0;
2254 int callout_data_set = 0;
2255 int count, c;
2256 int copystrings = 0;
2257 int find_match_limit = default_find_match_limit;
2258 int getstrings = 0;
2259 int getlist = 0;
2260 int gmatched = 0;
2261 int start_offset = 0;
2262 int start_offset_sign = 1;
2263 int g_notempty = 0;
2264 int use_dfa = 0;
2265
2266 options = 0;
2267
2268 *copynames = 0;
2269 *getnames = 0;
2270
2271 copynamesptr = copynames;
2272 getnamesptr = getnames;
2273
2274 pcre_callout = callout;
2275 first_callout = 1;
2276 last_callout_mark = NULL;
2277 callout_extra = 0;
2278 callout_count = 0;
2279 callout_fail_count = 999999;
2280 callout_fail_id = -1;
2281 show_malloc = 0;
2282
2283 if (extra != NULL) extra->flags &=
2284 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2285
2286 len = 0;
2287 for (;;)
2288 {
2289 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2290 {
2291 if (len > 0) /* Reached EOF without hitting a newline */
2292 {
2293 fprintf(outfile, "\n");
2294 break;
2295 }
2296 done = 1;
2297 goto CONTINUE;
2298 }
2299 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2300 len = (int)strlen((char *)buffer);
2301 if (buffer[len-1] == '\n') break;
2302 }
2303
2304 while (len > 0 && isspace(buffer[len-1])) len--;
2305 buffer[len] = 0;
2306 if (len == 0) break;
2307
2308 p = buffer;
2309 while (isspace(*p)) p++;
2310
2311 bptr = q = dbuffer;
2312 while ((c = *p++) != 0)
2313 {
2314 int i = 0;
2315 int n = 0;
2316
2317 if (c == '\\') switch ((c = *p++))
2318 {
2319 case 'a': c = 7; break;
2320 case 'b': c = '\b'; break;
2321 case 'e': c = 27; break;
2322 case 'f': c = '\f'; break;
2323 case 'n': c = '\n'; break;
2324 case 'r': c = '\r'; break;
2325 case 't': c = '\t'; break;
2326 case 'v': c = '\v'; break;
2327
2328 case '0': case '1': case '2': case '3':
2329 case '4': case '5': case '6': case '7':
2330 c -= '0';
2331 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2332 c = c * 8 + *p++ - '0';
2333
2334 #if !defined NOUTF8
2335 if (use_utf8 && c > 255)
2336 {
2337 unsigned char buff8[8];
2338 int ii, utn;
2339 utn = ord2utf8(c, buff8);
2340 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2341 c = buff8[ii]; /* Last byte */
2342 }
2343 #endif
2344 break;
2345
2346 case 'x':
2347
2348 /* Handle \x{..} specially - new Perl thing for utf8 */
2349
2350 #if !defined NOUTF8
2351 if (*p == '{')
2352 {
2353 unsigned char *pt = p;
2354 c = 0;
2355
2356 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2357 when isxdigit() is a macro that refers to its argument more than
2358 once. This is banned by the C Standard, but apparently happens in at
2359 least one MacOS environment. */
2360
2361 for (pt++; isxdigit(*pt); pt++)
2362 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2363 if (*pt == '}')
2364 {
2365 unsigned char buff8[8];
2366 int ii, utn;
2367 if (use_utf8)
2368 {
2369 utn = ord2utf8(c, buff8);
2370 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2371 c = buff8[ii]; /* Last byte */
2372 }
2373 else
2374 {
2375 if (c > 255)
2376 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2377 "UTF-8 mode is not enabled.\n"
2378 "** Truncation will probably give the wrong result.\n", c);
2379 }
2380 p = pt + 1;
2381 break;
2382 }
2383 /* Not correct form; fall through */
2384 }
2385 #endif
2386
2387 /* Ordinary \x */
2388
2389 c = 0;
2390 while (i++ < 2 && isxdigit(*p))
2391 {
2392 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2393 p++;
2394 }
2395 break;
2396
2397 case 0: /* \ followed by EOF allows for an empty line */
2398 p--;
2399 continue;
2400
2401 case '>':
2402 if (*p == '-')
2403 {
2404 start_offset_sign = -1;
2405 p++;
2406 }
2407 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2408 start_offset *= start_offset_sign;
2409 continue;
2410
2411 case 'A': /* Option setting */
2412 options |= PCRE_ANCHORED;
2413 continue;
2414
2415 case 'B':
2416 options |= PCRE_NOTBOL;
2417 continue;
2418
2419 case 'C':
2420 if (isdigit(*p)) /* Set copy string */
2421 {
2422 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2423 copystrings |= 1 << n;
2424 }
2425 else if (isalnum(*p))
2426 {
2427 uschar *npp = copynamesptr;
2428 while (isalnum(*p)) *npp++ = *p++;
2429 *npp++ = 0;
2430 *npp = 0;
2431 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2432 if (n < 0)
2433 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2434 copynamesptr = npp;
2435 }
2436 else if (*p == '+')
2437 {
2438 callout_extra = 1;
2439 p++;
2440 }
2441 else if (*p == '-')
2442 {
2443 pcre_callout = NULL;
2444 p++;
2445 }
2446 else if (*p == '!')
2447 {
2448 callout_fail_id = 0;
2449 p++;
2450 while(isdigit(*p))
2451 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2452 callout_fail_count = 0;
2453 if (*p == '!')
2454 {
2455 p++;
2456 while(isdigit(*p))
2457 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2458 }
2459 }
2460 else if (*p == '*')
2461 {
2462 int sign = 1;
2463 callout_data = 0;
2464 if (*(++p) == '-') { sign = -1; p++; }
2465 while(isdigit(*p))
2466 callout_data = callout_data * 10 + *p++ - '0';
2467 callout_data *= sign;
2468 callout_data_set = 1;
2469 }
2470 continue;
2471
2472 #if !defined NODFA
2473 case 'D':
2474 #if !defined NOPOSIX
2475 if (posix || do_posix)
2476 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2477 else
2478 #endif
2479 use_dfa = 1;
2480 continue;
2481 #endif
2482
2483 #if !defined NODFA
2484 case 'F':
2485 options |= PCRE_DFA_SHORTEST;
2486 continue;
2487 #endif
2488
2489 case 'G':
2490 if (isdigit(*p))
2491 {
2492 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2493 getstrings |= 1 << n;
2494 }
2495 else if (isalnum(*p))
2496 {
2497 uschar *npp = getnamesptr;
2498 while (isalnum(*p)) *npp++ = *p++;
2499 *npp++ = 0;
2500 *npp = 0;
2501 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2502 if (n < 0)
2503 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2504 getnamesptr = npp;
2505 }
2506 continue;
2507
2508 case 'J':
2509 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2510 if (extra != NULL
2511 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2512 && extra->executable_jit != NULL)
2513 {
2514 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2515 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2516 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2517 }
2518 continue;
2519
2520 case 'L':
2521 getlist = 1;
2522 continue;
2523
2524 case 'M':
2525 find_match_limit = 1;
2526 continue;
2527
2528 case 'N':
2529 if ((options & PCRE_NOTEMPTY) != 0)
2530 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2531 else
2532 options |= PCRE_NOTEMPTY;
2533 continue;
2534
2535 case 'O':
2536 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2537 if (n > size_offsets_max)
2538 {
2539 size_offsets_max = n;
2540 free(offsets);
2541 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2542 if (offsets == NULL)
2543 {
2544 printf("** Failed to get %d bytes of memory for offsets vector\n",
2545 (int)(size_offsets_max * sizeof(int)));
2546 yield = 1;
2547 goto EXIT;
2548 }
2549 }
2550 use_size_offsets = n;
2551 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2552 continue;
2553
2554 case 'P':
2555 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2556 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2557 continue;
2558
2559 case 'Q':
2560 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2561 if (extra == NULL)
2562 {
2563 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2564 extra->flags = 0;
2565 }
2566 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2567 extra->match_limit_recursion = n;
2568 continue;
2569
2570 case 'q':
2571 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2572 if (extra == NULL)
2573 {
2574 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2575 extra->flags = 0;
2576 }
2577 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2578 extra->match_limit = n;
2579 continue;
2580
2581 #if !defined NODFA
2582 case 'R':
2583 options |= PCRE_DFA_RESTART;
2584 continue;
2585 #endif
2586
2587 case 'S':
2588 show_malloc = 1;
2589 continue;
2590
2591 case 'Y':
2592 options |= PCRE_NO_START_OPTIMIZE;
2593 continue;
2594
2595 case 'Z':
2596 options |= PCRE_NOTEOL;
2597 continue;
2598
2599 case '?':
2600 options |= PCRE_NO_UTF8_CHECK;
2601 continue;
2602
2603 case '<':
2604 {
2605 int x = check_newline(p, outfile);
2606 if (x == 0) goto NEXT_DATA;
2607 options |= x;
2608 while (*p++ != '>');
2609 }
2610 continue;
2611 }
2612 *q++ = c;
2613 }
2614 *q = 0;
2615 len = (int)(q - dbuffer);
2616
2617 /* Move the data to the end of the buffer so that a read over the end of
2618 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2619 we are using the POSIX interface, we must include the terminating zero. */
2620
2621 #if !defined NOPOSIX
2622 if (posix || do_posix)
2623 {
2624 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2625 bptr += buffer_size - len - 1;
2626 }
2627 else
2628 #endif
2629 {
2630 memmove(bptr + buffer_size - len, bptr, len);
2631 bptr += buffer_size - len;
2632 }
2633
2634 if ((all_use_dfa || use_dfa) && find_match_limit)
2635 {
2636 printf("**Match limit not relevant for DFA matching: ignored\n");
2637 find_match_limit = 0;
2638 }
2639
2640 /* Handle matching via the POSIX interface, which does not
2641 support timing or playing with the match limit or callout data. */
2642
2643 #if !defined NOPOSIX
2644 if (posix || do_posix)
2645 {
2646 int rc;
2647 int eflags = 0;
2648 regmatch_t *pmatch = NULL;
2649 if (use_size_offsets > 0)
2650 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2651 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2652 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2653 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2654
2655 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2656
2657 if (rc != 0)
2658 {
2659 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2660 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2661 }
2662 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2663 != 0)
2664 {
2665 fprintf(outfile, "Matched with REG_NOSUB\n");
2666 }
2667 else
2668 {
2669 size_t i;
2670 for (i = 0; i < (size_t)use_size_offsets; i++)
2671 {
2672 if (pmatch[i].rm_so >= 0)
2673 {
2674 fprintf(outfile, "%2d: ", (int)i);
2675 (void)pchars(dbuffer + pmatch[i].rm_so,
2676 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2677 fprintf(outfile, "\n");
2678 if (do_showcaprest || (i == 0 && do_showrest))
2679 {
2680 fprintf(outfile, "%2d+ ", (int)i);
2681 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2682 outfile);
2683 fprintf(outfile, "\n");
2684 }
2685 }
2686 }
2687 }
2688 free(pmatch);
2689 }
2690
2691 /* Handle matching via the native interface - repeats for /g and /G */
2692
2693 else
2694 #endif /* !defined NOPOSIX */
2695
2696 for (;; gmatched++) /* Loop for /g or /G */
2697 {
2698 markptr = NULL;
2699
2700 if (timeitm > 0)
2701 {
2702 register int i;
2703 clock_t time_taken;
2704 clock_t start_time = clock();
2705
2706 #if !defined NODFA
2707 if (all_use_dfa || use_dfa)
2708 {
2709 int workspace[1000];
2710 for (i = 0; i < timeitm; i++)
2711 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2712 options | g_notempty, use_offsets, use_size_offsets, workspace,
2713 sizeof(workspace)/sizeof(int));
2714 }
2715 else
2716 #endif
2717
2718 for (i = 0; i < timeitm; i++)
2719 count = pcre_exec(re, extra, (char *)bptr, len,
2720 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2721
2722 time_taken = clock() - start_time;
2723 fprintf(outfile, "Execute time %.4f milliseconds\n",
2724 (((double)time_taken * 1000.0) / (double)timeitm) /
2725 (double)CLOCKS_PER_SEC);
2726 }
2727
2728 /* If find_match_limit is set, we want to do repeated matches with
2729 varying limits in order to find the minimum value for the match limit and
2730 for the recursion limit. The match limits are relevant only to the normal
2731 running of pcre_exec(), so disable the JIT optimization. This makes it
2732 possible to run the same set of tests with and without JIT externally
2733 requested. */
2734
2735 if (find_match_limit)
2736 {
2737 if (extra == NULL)
2738 {
2739 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2740 extra->flags = 0;
2741 }
2742 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2743
2744 (void)check_match_limit(re, extra, bptr, len, start_offset,
2745 options|g_notempty, use_offsets, use_size_offsets,
2746 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2747 PCRE_ERROR_MATCHLIMIT, "match()");
2748
2749 count = check_match_limit(re, extra, bptr, len, start_offset,
2750 options|g_notempty, use_offsets, use_size_offsets,
2751 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2752 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2753 }
2754
2755 /* If callout_data is set, use the interface with additional data */
2756
2757 else if (callout_data_set)
2758 {
2759 if (extra == NULL)
2760 {
2761 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2762 extra->flags = 0;
2763 }
2764 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2765 extra->callout_data = &callout_data;
2766 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2767 options | g_notempty, use_offsets, use_size_offsets);
2768 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2769 }
2770
2771 /* The normal case is just to do the match once, with the default
2772 value of match_limit. */
2773
2774 #if !defined NODFA
2775 else if (all_use_dfa || use_dfa)
2776 {
2777 int workspace[1000];
2778 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2779 options | g_notempty, use_offsets, use_size_offsets, workspace,
2780 sizeof(workspace)/sizeof(int));
2781 if (count == 0)
2782 {
2783 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2784 count = use_size_offsets/2;
2785 }
2786 }
2787 #endif
2788
2789 else
2790 {
2791 count = pcre_exec(re, extra, (char *)bptr, len,
2792 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2793 if (count == 0)
2794 {
2795 fprintf(outfile, "Matched, but too many substrings\n");
2796 count = use_size_offsets/3;
2797 }
2798 }
2799
2800 /* Matched */
2801
2802 if (count >= 0)
2803 {
2804 int i, maxcount;
2805
2806 #if !defined NODFA
2807 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2808 #endif
2809 maxcount = use_size_offsets/3;
2810
2811 /* This is a check against a lunatic return value. */
2812
2813 if (count > maxcount)
2814 {
2815 fprintf(outfile,
2816 "** PCRE error: returned count %d is too big for offset size %d\n",
2817 count, use_size_offsets);
2818 count = use_size_offsets/3;
2819 if (do_g || do_G)
2820 {
2821 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2822 do_g = do_G = FALSE; /* Break g/G loop */
2823 }
2824 }
2825
2826 /* do_allcaps requests showing of all captures in the pattern, to check
2827 unset ones at the end. */
2828
2829 if (do_allcaps)
2830 {
2831 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2832 count++; /* Allow for full match */
2833 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2834 }
2835
2836 /* Output the captured substrings */
2837
2838 for (i = 0; i < count * 2; i += 2)
2839 {
2840 if (use_offsets[i] < 0)
2841 {
2842 if (use_offsets[i] != -1)
2843 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2844 use_offsets[i], i);
2845 if (use_offsets[i+1] != -1)
2846 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2847 use_offsets[i+1], i+1);
2848 fprintf(outfile, "%2d: <unset>\n", i/2);
2849 }
2850 else
2851 {
2852 fprintf(outfile, "%2d: ", i/2);
2853 (void)pchars(bptr + use_offsets[i],
2854 use_offsets[i+1] - use_offsets[i], outfile);
2855 fprintf(outfile, "\n");
2856 if (do_showcaprest || (i == 0 && do_showrest))
2857 {
2858 fprintf(outfile, "%2d+ ", i/2);
2859 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2860 outfile);
2861 fprintf(outfile, "\n");
2862 }
2863 }
2864 }
2865
2866 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2867
2868 for (i = 0; i < 32; i++)
2869 {
2870 if ((copystrings & (1 << i)) != 0)
2871 {
2872 char copybuffer[256];
2873 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2874 i, copybuffer, sizeof(copybuffer));
2875 if (rc < 0)
2876 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2877 else
2878 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2879 }
2880 }
2881
2882 for (copynamesptr = copynames;
2883 *copynamesptr != 0;
2884 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2885 {
2886 char copybuffer[256];
2887 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2888 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2889 if (rc < 0)
2890 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2891 else
2892 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2893 }
2894
2895 for (i = 0; i < 32; i++)
2896 {
2897 if ((getstrings & (1 << i)) != 0)
2898 {
2899 const char *substring;
2900 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2901 i, &substring);
2902 if (rc < 0)
2903 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2904 else
2905 {
2906 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2907 pcre_free_substring(substring);
2908 }
2909 }
2910 }
2911
2912 for (getnamesptr = getnames;
2913 *getnamesptr != 0;
2914 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2915 {
2916 const char *substring;
2917 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2918 count, (char *)getnamesptr, &substring);
2919 if (rc < 0)
2920 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2921 else
2922 {
2923 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2924 pcre_free_substring(substring);
2925 }
2926 }
2927
2928 if (getlist)
2929 {
2930 const char **stringlist;
2931 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2932 &stringlist);
2933 if (rc < 0)
2934 fprintf(outfile, "get substring list failed %d\n", rc);
2935 else
2936 {
2937 for (i = 0; i < count; i++)
2938 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2939 if (stringlist[i] != NULL)
2940 fprintf(outfile, "string list not terminated by NULL\n");
2941 pcre_free_substring_list(stringlist);
2942 }
2943 }
2944 }
2945
2946 /* There was a partial match */
2947
2948 else if (count == PCRE_ERROR_PARTIAL)
2949 {
2950 if (markptr == NULL) fprintf(outfile, "Partial match");
2951 else fprintf(outfile, "Partial match, mark=%s", markptr);
2952 if (use_size_offsets > 1)
2953 {
2954 fprintf(outfile, ": ");
2955 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2956 outfile);
2957 }
2958 fprintf(outfile, "\n");
2959 break; /* Out of the /g loop */
2960 }
2961
2962 /* Failed to match. If this is a /g or /G loop and we previously set
2963 g_notempty after a null match, this is not necessarily the end. We want
2964 to advance the start offset, and continue. We won't be at the end of the
2965 string - that was checked before setting g_notempty.
2966
2967 Complication arises in the case when the newline convention is "any",
2968 "crlf", or "anycrlf". If the previous match was at the end of a line
2969 terminated by CRLF, an advance of one character just passes the \r,
2970 whereas we should prefer the longer newline sequence, as does the code in
2971 pcre_exec(). Fudge the offset value to achieve this. We check for a
2972 newline setting in the pattern; if none was set, use pcre_config() to
2973 find the default.
2974
2975 Otherwise, in the case of UTF-8 matching, the advance must be one
2976 character, not one byte. */
2977
2978 else
2979 {
2980 if (g_notempty != 0)
2981 {
2982 int onechar = 1;
2983 unsigned int obits = ((real_pcre *)re)->options;
2984 use_offsets[0] = start_offset;
2985 if ((obits & PCRE_NEWLINE_BITS) == 0)
2986 {
2987 int d;
2988 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2989 /* Note that these values are always the ASCII ones, even in
2990 EBCDIC environments. CR = 13, NL = 10. */
2991 obits = (d == 13)? PCRE_NEWLINE_CR :
2992 (d == 10)? PCRE_NEWLINE_LF :
2993 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2994 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2995 (d == -1)? PCRE_NEWLINE_ANY : 0;
2996 }
2997 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2998 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2999 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3000 &&
3001 start_offset < len - 1 &&
3002 bptr[start_offset] == '\r' &&
3003 bptr[start_offset+1] == '\n')
3004 onechar++;
3005 else if (use_utf8)
3006 {
3007 while (start_offset + onechar < len)
3008 {
3009 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3010 onechar++;
3011 }
3012 }
3013 use_offsets[1] = start_offset + onechar;
3014 }
3015 else
3016 {
3017 switch(count)
3018 {
3019 case PCRE_ERROR_NOMATCH:
3020 if (gmatched == 0)
3021 {
3022 if (markptr == NULL) fprintf(outfile, "No match\n");
3023 else fprintf(outfile, "No match, mark = %s\n", markptr);
3024 }
3025 break;
3026
3027 case PCRE_ERROR_BADUTF8:
3028 case PCRE_ERROR_SHORTUTF8:
3029 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3030 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3031 if (use_size_offsets >= 2)
3032 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3033 use_offsets[1]);
3034 fprintf(outfile, "\n");
3035 break;
3036
3037 default:
3038 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3039 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3040 else
3041 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3042 break;
3043 }
3044
3045 break; /* Out of the /g loop */
3046 }
3047 }
3048
3049 /* If not /g or /G we are done */
3050
3051 if (!do_g && !do_G) break;
3052
3053 /* If we have matched an empty string, first check to see if we are at
3054 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3055 Perl's /g options does. This turns out to be rather cunning. First we set
3056 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3057 same point. If this fails (picked up above) we advance to the next
3058 character. */
3059
3060 g_notempty = 0;
3061
3062 if (use_offsets[0] == use_offsets[1])
3063 {
3064 if (use_offsets[0] == len) break;
3065 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3066 }
3067
3068 /* For /g, update the start offset, leaving the rest alone */
3069
3070 if (do_g) start_offset = use_offsets[1];
3071
3072 /* For /G, update the pointer and length */
3073
3074 else
3075 {
3076 bptr += use_offsets[1];
3077 len -= use_offsets[1];
3078 }
3079 } /* End of loop for /g and /G */
3080
3081 NEXT_DATA: continue;
3082 } /* End of loop for data lines */
3083
3084 CONTINUE:
3085
3086 #if !defined NOPOSIX
3087 if (posix || do_posix) regfree(&preg);
3088 #endif
3089
3090 if (re != NULL) new_free(re);
3091 if (extra != NULL) pcre_free_study(extra);
3092 if (locale_set)
3093 {
3094 new_free((void *)tables);
3095 setlocale(LC_CTYPE, "C");
3096 locale_set = 0;
3097 }
3098 if (jit_stack != NULL)
3099 {
3100 pcre_jit_stack_free(jit_stack);
3101 jit_stack = NULL;
3102 }
3103 }
3104
3105 if (infile == stdin) fprintf(outfile, "\n");
3106
3107 EXIT:
3108
3109 if (infile != NULL && infile != stdin) fclose(infile);
3110 if (outfile != NULL && outfile != stdout) fclose(outfile);
3111
3112 free(buffer);
3113 free(dbuffer);
3114 free(pbuffer);
3115 free(offsets);
3116
3117 return yield;
3118 }
3119
3120 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5