/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 580 - (show annotations)
Fri Nov 26 11:16:43 2010 UTC (4 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 87639 byte(s)
Error occurred while calculating annotation data.
Patches to avoid build problems in some Borland environments.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192
193 /* The buffers grow automatically if very long input lines are encountered. */
194
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
199
200
201 /*************************************************
202 * Alternate character tables *
203 *************************************************/
204
205 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
206 using the default tables of the library. However, the T option can be used to
207 select alternate sets of tables, for different kinds of testing. Note also that
208 the L (locale) option also adjusts the tables. */
209
210 /* This is the set of tables distributed as default with PCRE. It recognizes
211 only ASCII characters. */
212
213 static const unsigned char tables0[] = {
214
215 /* This table is a lower casing table. */
216
217 0, 1, 2, 3, 4, 5, 6, 7,
218 8, 9, 10, 11, 12, 13, 14, 15,
219 16, 17, 18, 19, 20, 21, 22, 23,
220 24, 25, 26, 27, 28, 29, 30, 31,
221 32, 33, 34, 35, 36, 37, 38, 39,
222 40, 41, 42, 43, 44, 45, 46, 47,
223 48, 49, 50, 51, 52, 53, 54, 55,
224 56, 57, 58, 59, 60, 61, 62, 63,
225 64, 97, 98, 99,100,101,102,103,
226 104,105,106,107,108,109,110,111,
227 112,113,114,115,116,117,118,119,
228 120,121,122, 91, 92, 93, 94, 95,
229 96, 97, 98, 99,100,101,102,103,
230 104,105,106,107,108,109,110,111,
231 112,113,114,115,116,117,118,119,
232 120,121,122,123,124,125,126,127,
233 128,129,130,131,132,133,134,135,
234 136,137,138,139,140,141,142,143,
235 144,145,146,147,148,149,150,151,
236 152,153,154,155,156,157,158,159,
237 160,161,162,163,164,165,166,167,
238 168,169,170,171,172,173,174,175,
239 176,177,178,179,180,181,182,183,
240 184,185,186,187,188,189,190,191,
241 192,193,194,195,196,197,198,199,
242 200,201,202,203,204,205,206,207,
243 208,209,210,211,212,213,214,215,
244 216,217,218,219,220,221,222,223,
245 224,225,226,227,228,229,230,231,
246 232,233,234,235,236,237,238,239,
247 240,241,242,243,244,245,246,247,
248 248,249,250,251,252,253,254,255,
249
250 /* This table is a case flipping table. */
251
252 0, 1, 2, 3, 4, 5, 6, 7,
253 8, 9, 10, 11, 12, 13, 14, 15,
254 16, 17, 18, 19, 20, 21, 22, 23,
255 24, 25, 26, 27, 28, 29, 30, 31,
256 32, 33, 34, 35, 36, 37, 38, 39,
257 40, 41, 42, 43, 44, 45, 46, 47,
258 48, 49, 50, 51, 52, 53, 54, 55,
259 56, 57, 58, 59, 60, 61, 62, 63,
260 64, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122, 91, 92, 93, 94, 95,
264 96, 65, 66, 67, 68, 69, 70, 71,
265 72, 73, 74, 75, 76, 77, 78, 79,
266 80, 81, 82, 83, 84, 85, 86, 87,
267 88, 89, 90,123,124,125,126,127,
268 128,129,130,131,132,133,134,135,
269 136,137,138,139,140,141,142,143,
270 144,145,146,147,148,149,150,151,
271 152,153,154,155,156,157,158,159,
272 160,161,162,163,164,165,166,167,
273 168,169,170,171,172,173,174,175,
274 176,177,178,179,180,181,182,183,
275 184,185,186,187,188,189,190,191,
276 192,193,194,195,196,197,198,199,
277 200,201,202,203,204,205,206,207,
278 208,209,210,211,212,213,214,215,
279 216,217,218,219,220,221,222,223,
280 224,225,226,227,228,229,230,231,
281 232,233,234,235,236,237,238,239,
282 240,241,242,243,244,245,246,247,
283 248,249,250,251,252,253,254,255,
284
285 /* This table contains bit maps for various character classes. Each map is 32
286 bytes long and the bits run from the least significant end of each byte. The
287 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
288 graph, print, punct, and cntrl. Other classes are built from combinations. */
289
290 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294
295 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
296 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
297 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299
300 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304
305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309
310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314
315 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
316 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319
320 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
321 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324
325 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
326 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329
330 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
331 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334
335 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339
340 /* This table identifies various classes of character by individual bits:
341 0x01 white space character
342 0x02 letter
343 0x04 decimal digit
344 0x08 hexadecimal digit
345 0x10 alphanumeric or '_'
346 0x80 regular expression metacharacter or binary zero
347 */
348
349 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
350 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
351 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
352 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
353 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
354 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
355 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
356 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
357 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
358 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
359 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
360 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
361 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
362 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
363 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
364 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
366 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
376 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
377 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
378 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
379 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
380 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
381
382 /* This is a set of tables that came orginally from a Windows user. It seems to
383 be at least an approximation of ISO 8859. In particular, there are characters
384 greater than 128 that are marked as spaces, letters, etc. */
385
386 static const unsigned char tables1[] = {
387 0,1,2,3,4,5,6,7,
388 8,9,10,11,12,13,14,15,
389 16,17,18,19,20,21,22,23,
390 24,25,26,27,28,29,30,31,
391 32,33,34,35,36,37,38,39,
392 40,41,42,43,44,45,46,47,
393 48,49,50,51,52,53,54,55,
394 56,57,58,59,60,61,62,63,
395 64,97,98,99,100,101,102,103,
396 104,105,106,107,108,109,110,111,
397 112,113,114,115,116,117,118,119,
398 120,121,122,91,92,93,94,95,
399 96,97,98,99,100,101,102,103,
400 104,105,106,107,108,109,110,111,
401 112,113,114,115,116,117,118,119,
402 120,121,122,123,124,125,126,127,
403 128,129,130,131,132,133,134,135,
404 136,137,138,139,140,141,142,143,
405 144,145,146,147,148,149,150,151,
406 152,153,154,155,156,157,158,159,
407 160,161,162,163,164,165,166,167,
408 168,169,170,171,172,173,174,175,
409 176,177,178,179,180,181,182,183,
410 184,185,186,187,188,189,190,191,
411 224,225,226,227,228,229,230,231,
412 232,233,234,235,236,237,238,239,
413 240,241,242,243,244,245,246,215,
414 248,249,250,251,252,253,254,223,
415 224,225,226,227,228,229,230,231,
416 232,233,234,235,236,237,238,239,
417 240,241,242,243,244,245,246,247,
418 248,249,250,251,252,253,254,255,
419 0,1,2,3,4,5,6,7,
420 8,9,10,11,12,13,14,15,
421 16,17,18,19,20,21,22,23,
422 24,25,26,27,28,29,30,31,
423 32,33,34,35,36,37,38,39,
424 40,41,42,43,44,45,46,47,
425 48,49,50,51,52,53,54,55,
426 56,57,58,59,60,61,62,63,
427 64,97,98,99,100,101,102,103,
428 104,105,106,107,108,109,110,111,
429 112,113,114,115,116,117,118,119,
430 120,121,122,91,92,93,94,95,
431 96,65,66,67,68,69,70,71,
432 72,73,74,75,76,77,78,79,
433 80,81,82,83,84,85,86,87,
434 88,89,90,123,124,125,126,127,
435 128,129,130,131,132,133,134,135,
436 136,137,138,139,140,141,142,143,
437 144,145,146,147,148,149,150,151,
438 152,153,154,155,156,157,158,159,
439 160,161,162,163,164,165,166,167,
440 168,169,170,171,172,173,174,175,
441 176,177,178,179,180,181,182,183,
442 184,185,186,187,188,189,190,191,
443 224,225,226,227,228,229,230,231,
444 232,233,234,235,236,237,238,239,
445 240,241,242,243,244,245,246,215,
446 248,249,250,251,252,253,254,223,
447 192,193,194,195,196,197,198,199,
448 200,201,202,203,204,205,206,207,
449 208,209,210,211,212,213,214,247,
450 216,217,218,219,220,221,222,255,
451 0,62,0,0,1,0,0,0,
452 0,0,0,0,0,0,0,0,
453 32,0,0,0,1,0,0,0,
454 0,0,0,0,0,0,0,0,
455 0,0,0,0,0,0,255,3,
456 126,0,0,0,126,0,0,0,
457 0,0,0,0,0,0,0,0,
458 0,0,0,0,0,0,0,0,
459 0,0,0,0,0,0,255,3,
460 0,0,0,0,0,0,0,0,
461 0,0,0,0,0,0,12,2,
462 0,0,0,0,0,0,0,0,
463 0,0,0,0,0,0,0,0,
464 254,255,255,7,0,0,0,0,
465 0,0,0,0,0,0,0,0,
466 255,255,127,127,0,0,0,0,
467 0,0,0,0,0,0,0,0,
468 0,0,0,0,254,255,255,7,
469 0,0,0,0,0,4,32,4,
470 0,0,0,128,255,255,127,255,
471 0,0,0,0,0,0,255,3,
472 254,255,255,135,254,255,255,7,
473 0,0,0,0,0,4,44,6,
474 255,255,127,255,255,255,127,255,
475 0,0,0,0,254,255,255,255,
476 255,255,255,255,255,255,255,127,
477 0,0,0,0,254,255,255,255,
478 255,255,255,255,255,255,255,255,
479 0,2,0,0,255,255,255,255,
480 255,255,255,255,255,255,255,127,
481 0,0,0,0,255,255,255,255,
482 255,255,255,255,255,255,255,255,
483 0,0,0,0,254,255,0,252,
484 1,0,0,248,1,0,0,120,
485 0,0,0,0,254,255,255,255,
486 0,0,128,0,0,0,128,0,
487 255,255,255,255,0,0,0,0,
488 0,0,0,0,0,0,0,128,
489 255,255,255,255,0,0,0,0,
490 0,0,0,0,0,0,0,0,
491 128,0,0,0,0,0,0,0,
492 0,1,1,0,1,1,0,0,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 1,0,0,0,128,0,0,0,
496 128,128,128,128,0,0,128,0,
497 28,28,28,28,28,28,28,28,
498 28,28,0,0,0,0,0,128,
499 0,26,26,26,26,26,26,18,
500 18,18,18,18,18,18,18,18,
501 18,18,18,18,18,18,18,18,
502 18,18,18,128,128,0,128,16,
503 0,26,26,26,26,26,26,18,
504 18,18,18,18,18,18,18,18,
505 18,18,18,18,18,18,18,18,
506 18,18,18,128,128,0,0,0,
507 0,0,0,0,0,1,0,0,
508 0,0,0,0,0,0,0,0,
509 0,0,0,0,0,0,0,0,
510 0,0,0,0,0,0,0,0,
511 1,0,0,0,0,0,0,0,
512 0,0,18,0,0,0,0,0,
513 0,0,20,20,0,18,0,0,
514 0,20,18,0,0,0,0,0,
515 18,18,18,18,18,18,18,18,
516 18,18,18,18,18,18,18,18,
517 18,18,18,18,18,18,18,0,
518 18,18,18,18,18,18,18,18,
519 18,18,18,18,18,18,18,18,
520 18,18,18,18,18,18,18,18,
521 18,18,18,18,18,18,18,0,
522 18,18,18,18,18,18,18,18
523 };
524
525
526
527
528 #ifndef HAVE_STRERROR
529 /*************************************************
530 * Provide strerror() for non-ANSI libraries *
531 *************************************************/
532
533 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534 in their libraries, but can provide the same facility by this simple
535 alternative function. */
536
537 extern int sys_nerr;
538 extern char *sys_errlist[];
539
540 char *
541 strerror(int n)
542 {
543 if (n < 0 || n >= sys_nerr) return "unknown error number";
544 return sys_errlist[n];
545 }
546 #endif /* HAVE_STRERROR */
547
548
549
550
551 /*************************************************
552 * Read or extend an input line *
553 *************************************************/
554
555 /* Input lines are read into buffer, but both patterns and data lines can be
556 continued over multiple input lines. In addition, if the buffer fills up, we
557 want to automatically expand it so as to be able to handle extremely large
558 lines that are needed for certain stress tests. When the input buffer is
559 expanded, the other two buffers must also be expanded likewise, and the
560 contents of pbuffer, which are a copy of the input for callouts, must be
561 preserved (for when expansion happens for a data line). This is not the most
562 optimal way of handling this, but hey, this is just a test program!
563
564 Arguments:
565 f the file to read
566 start where in buffer to start (this *must* be within buffer)
567 prompt for stdin or readline()
568
569 Returns: pointer to the start of new data
570 could be a copy of start, or could be moved
571 NULL if no data read and EOF reached
572 */
573
574 static uschar *
575 extend_inputline(FILE *f, uschar *start, const char *prompt)
576 {
577 uschar *here = start;
578
579 for (;;)
580 {
581 int rlen = (int)(buffer_size - (here - buffer));
582
583 if (rlen > 1000)
584 {
585 int dlen;
586
587 /* If libreadline support is required, use readline() to read a line if the
588 input is a terminal. Note that readline() removes the trailing newline, so
589 we must put it back again, to be compatible with fgets(). */
590
591 #ifdef SUPPORT_LIBREADLINE
592 if (isatty(fileno(f)))
593 {
594 size_t len;
595 char *s = readline(prompt);
596 if (s == NULL) return (here == start)? NULL : start;
597 len = strlen(s);
598 if (len > 0) add_history(s);
599 if (len > rlen - 1) len = rlen - 1;
600 memcpy(here, s, len);
601 here[len] = '\n';
602 here[len+1] = 0;
603 free(s);
604 }
605 else
606 #endif
607
608 /* Read the next line by normal means, prompting if the file is stdin. */
609
610 {
611 if (f == stdin) printf("%s", prompt);
612 if (fgets((char *)here, rlen, f) == NULL)
613 return (here == start)? NULL : start;
614 }
615
616 dlen = (int)strlen((char *)here);
617 if (dlen > 0 && here[dlen - 1] == '\n') return start;
618 here += dlen;
619 }
620
621 else
622 {
623 int new_buffer_size = 2*buffer_size;
624 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
625 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
626 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
627
628 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
629 {
630 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
631 exit(1);
632 }
633
634 memcpy(new_buffer, buffer, buffer_size);
635 memcpy(new_pbuffer, pbuffer, buffer_size);
636
637 buffer_size = new_buffer_size;
638
639 start = new_buffer + (start - buffer);
640 here = new_buffer + (here - buffer);
641
642 free(buffer);
643 free(dbuffer);
644 free(pbuffer);
645
646 buffer = new_buffer;
647 dbuffer = new_dbuffer;
648 pbuffer = new_pbuffer;
649 }
650 }
651
652 return NULL; /* Control never gets here */
653 }
654
655
656
657
658
659
660
661 /*************************************************
662 * Read number from string *
663 *************************************************/
664
665 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
666 around with conditional compilation, just do the job by hand. It is only used
667 for unpicking arguments, so just keep it simple.
668
669 Arguments:
670 str string to be converted
671 endptr where to put the end pointer
672
673 Returns: the unsigned long
674 */
675
676 static int
677 get_value(unsigned char *str, unsigned char **endptr)
678 {
679 int result = 0;
680 while(*str != 0 && isspace(*str)) str++;
681 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
682 *endptr = str;
683 return(result);
684 }
685
686
687
688
689 /*************************************************
690 * Convert UTF-8 string to value *
691 *************************************************/
692
693 /* This function takes one or more bytes that represents a UTF-8 character,
694 and returns the value of the character.
695
696 Argument:
697 utf8bytes a pointer to the byte vector
698 vptr a pointer to an int to receive the value
699
700 Returns: > 0 => the number of bytes consumed
701 -6 to 0 => malformed UTF-8 character at offset = (-return)
702 */
703
704 #if !defined NOUTF8
705
706 static int
707 utf82ord(unsigned char *utf8bytes, int *vptr)
708 {
709 int c = *utf8bytes++;
710 int d = c;
711 int i, j, s;
712
713 for (i = -1; i < 6; i++) /* i is number of additional bytes */
714 {
715 if ((d & 0x80) == 0) break;
716 d <<= 1;
717 }
718
719 if (i == -1) { *vptr = c; return 1; } /* ascii character */
720 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
721
722 /* i now has a value in the range 1-5 */
723
724 s = 6*i;
725 d = (c & utf8_table3[i]) << s;
726
727 for (j = 0; j < i; j++)
728 {
729 c = *utf8bytes++;
730 if ((c & 0xc0) != 0x80) return -(j+1);
731 s -= 6;
732 d |= (c & 0x3f) << s;
733 }
734
735 /* Check that encoding was the correct unique one */
736
737 for (j = 0; j < utf8_table1_size; j++)
738 if (d <= utf8_table1[j]) break;
739 if (j != i) return -(i+1);
740
741 /* Valid value */
742
743 *vptr = d;
744 return i+1;
745 }
746
747 #endif
748
749
750
751 /*************************************************
752 * Convert character value to UTF-8 *
753 *************************************************/
754
755 /* This function takes an integer value in the range 0 - 0x7fffffff
756 and encodes it as a UTF-8 character in 0 to 6 bytes.
757
758 Arguments:
759 cvalue the character value
760 utf8bytes pointer to buffer for result - at least 6 bytes long
761
762 Returns: number of characters placed in the buffer
763 */
764
765 #if !defined NOUTF8
766
767 static int
768 ord2utf8(int cvalue, uschar *utf8bytes)
769 {
770 register int i, j;
771 for (i = 0; i < utf8_table1_size; i++)
772 if (cvalue <= utf8_table1[i]) break;
773 utf8bytes += i;
774 for (j = i; j > 0; j--)
775 {
776 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
777 cvalue >>= 6;
778 }
779 *utf8bytes = utf8_table2[i] | cvalue;
780 return i + 1;
781 }
782
783 #endif
784
785
786
787 /*************************************************
788 * Print character string *
789 *************************************************/
790
791 /* Character string printing function. Must handle UTF-8 strings in utf8
792 mode. Yields number of characters printed. If handed a NULL file, just counts
793 chars without printing. */
794
795 static int pchars(unsigned char *p, int length, FILE *f)
796 {
797 int c = 0;
798 int yield = 0;
799
800 while (length-- > 0)
801 {
802 #if !defined NOUTF8
803 if (use_utf8)
804 {
805 int rc = utf82ord(p, &c);
806
807 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
808 {
809 length -= rc - 1;
810 p += rc;
811 if (PRINTHEX(c))
812 {
813 if (f != NULL) fprintf(f, "%c", c);
814 yield++;
815 }
816 else
817 {
818 int n = 4;
819 if (f != NULL) fprintf(f, "\\x{%02x}", c);
820 yield += (n <= 0x000000ff)? 2 :
821 (n <= 0x00000fff)? 3 :
822 (n <= 0x0000ffff)? 4 :
823 (n <= 0x000fffff)? 5 : 6;
824 }
825 continue;
826 }
827 }
828 #endif
829
830 /* Not UTF-8, or malformed UTF-8 */
831
832 c = *p++;
833 if (PRINTHEX(c))
834 {
835 if (f != NULL) fprintf(f, "%c", c);
836 yield++;
837 }
838 else
839 {
840 if (f != NULL) fprintf(f, "\\x%02x", c);
841 yield += 4;
842 }
843 }
844
845 return yield;
846 }
847
848
849
850 /*************************************************
851 * Callout function *
852 *************************************************/
853
854 /* Called from PCRE as a result of the (?C) item. We print out where we are in
855 the match. Yield zero unless more callouts than the fail count, or the callout
856 data is not zero. */
857
858 static int callout(pcre_callout_block *cb)
859 {
860 FILE *f = (first_callout | callout_extra)? outfile : NULL;
861 int i, pre_start, post_start, subject_length;
862
863 if (callout_extra)
864 {
865 fprintf(f, "Callout %d: last capture = %d\n",
866 cb->callout_number, cb->capture_last);
867
868 for (i = 0; i < cb->capture_top * 2; i += 2)
869 {
870 if (cb->offset_vector[i] < 0)
871 fprintf(f, "%2d: <unset>\n", i/2);
872 else
873 {
874 fprintf(f, "%2d: ", i/2);
875 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
876 cb->offset_vector[i+1] - cb->offset_vector[i], f);
877 fprintf(f, "\n");
878 }
879 }
880 }
881
882 /* Re-print the subject in canonical form, the first time or if giving full
883 datails. On subsequent calls in the same match, we use pchars just to find the
884 printed lengths of the substrings. */
885
886 if (f != NULL) fprintf(f, "--->");
887
888 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
889 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
890 cb->current_position - cb->start_match, f);
891
892 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
893
894 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
895 cb->subject_length - cb->current_position, f);
896
897 if (f != NULL) fprintf(f, "\n");
898
899 /* Always print appropriate indicators, with callout number if not already
900 shown. For automatic callouts, show the pattern offset. */
901
902 if (cb->callout_number == 255)
903 {
904 fprintf(outfile, "%+3d ", cb->pattern_position);
905 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
906 }
907 else
908 {
909 if (callout_extra) fprintf(outfile, " ");
910 else fprintf(outfile, "%3d ", cb->callout_number);
911 }
912
913 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
914 fprintf(outfile, "^");
915
916 if (post_start > 0)
917 {
918 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
919 fprintf(outfile, "^");
920 }
921
922 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
923 fprintf(outfile, " ");
924
925 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
926 pbuffer + cb->pattern_position);
927
928 fprintf(outfile, "\n");
929 first_callout = 0;
930
931 if (cb->callout_data != NULL)
932 {
933 int callout_data = *((int *)(cb->callout_data));
934 if (callout_data != 0)
935 {
936 fprintf(outfile, "Callout data = %d\n", callout_data);
937 return callout_data;
938 }
939 }
940
941 return (cb->callout_number != callout_fail_id)? 0 :
942 (++callout_count >= callout_fail_count)? 1 : 0;
943 }
944
945
946 /*************************************************
947 * Local malloc functions *
948 *************************************************/
949
950 /* Alternative malloc function, to test functionality and show the size of the
951 compiled re. */
952
953 static void *new_malloc(size_t size)
954 {
955 void *block = malloc(size);
956 gotten_store = size;
957 if (show_malloc)
958 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
959 return block;
960 }
961
962 static void new_free(void *block)
963 {
964 if (show_malloc)
965 fprintf(outfile, "free %p\n", block);
966 free(block);
967 }
968
969
970 /* For recursion malloc/free, to test stacking calls */
971
972 static void *stack_malloc(size_t size)
973 {
974 void *block = malloc(size);
975 if (show_malloc)
976 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
977 return block;
978 }
979
980 static void stack_free(void *block)
981 {
982 if (show_malloc)
983 fprintf(outfile, "stack_free %p\n", block);
984 free(block);
985 }
986
987
988 /*************************************************
989 * Call pcre_fullinfo() *
990 *************************************************/
991
992 /* Get one piece of information from the pcre_fullinfo() function */
993
994 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
995 {
996 int rc;
997 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
998 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
999 }
1000
1001
1002
1003 /*************************************************
1004 * Byte flipping function *
1005 *************************************************/
1006
1007 static unsigned long int
1008 byteflip(unsigned long int value, int n)
1009 {
1010 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1011 return ((value & 0x000000ff) << 24) |
1012 ((value & 0x0000ff00) << 8) |
1013 ((value & 0x00ff0000) >> 8) |
1014 ((value & 0xff000000) >> 24);
1015 }
1016
1017
1018
1019
1020 /*************************************************
1021 * Check match or recursion limit *
1022 *************************************************/
1023
1024 static int
1025 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1026 int start_offset, int options, int *use_offsets, int use_size_offsets,
1027 int flag, unsigned long int *limit, int errnumber, const char *msg)
1028 {
1029 int count;
1030 int min = 0;
1031 int mid = 64;
1032 int max = -1;
1033
1034 extra->flags |= flag;
1035
1036 for (;;)
1037 {
1038 *limit = mid;
1039
1040 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1041 use_offsets, use_size_offsets);
1042
1043 if (count == errnumber)
1044 {
1045 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1046 min = mid;
1047 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1048 }
1049
1050 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1051 count == PCRE_ERROR_PARTIAL)
1052 {
1053 if (mid == min + 1)
1054 {
1055 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1056 break;
1057 }
1058 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1059 max = mid;
1060 mid = (min + mid)/2;
1061 }
1062 else break; /* Some other error */
1063 }
1064
1065 extra->flags &= ~flag;
1066 return count;
1067 }
1068
1069
1070
1071 /*************************************************
1072 * Case-independent strncmp() function *
1073 *************************************************/
1074
1075 /*
1076 Arguments:
1077 s first string
1078 t second string
1079 n number of characters to compare
1080
1081 Returns: < 0, = 0, or > 0, according to the comparison
1082 */
1083
1084 static int
1085 strncmpic(uschar *s, uschar *t, int n)
1086 {
1087 while (n--)
1088 {
1089 int c = tolower(*s++) - tolower(*t++);
1090 if (c) return c;
1091 }
1092 return 0;
1093 }
1094
1095
1096
1097 /*************************************************
1098 * Check newline indicator *
1099 *************************************************/
1100
1101 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1102 a message and return 0 if there is no match.
1103
1104 Arguments:
1105 p points after the leading '<'
1106 f file for error message
1107
1108 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1109 */
1110
1111 static int
1112 check_newline(uschar *p, FILE *f)
1113 {
1114 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1115 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1116 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1117 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1118 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1119 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1120 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1121 fprintf(f, "Unknown newline type at: <%s\n", p);
1122 return 0;
1123 }
1124
1125
1126
1127 /*************************************************
1128 * Usage function *
1129 *************************************************/
1130
1131 static void
1132 usage(void)
1133 {
1134 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1135 printf("Input and output default to stdin and stdout.\n");
1136 #ifdef SUPPORT_LIBREADLINE
1137 printf("If input is a terminal, readline() is used to read from it.\n");
1138 #else
1139 printf("This version of pcretest is not linked with readline().\n");
1140 #endif
1141 printf("\nOptions:\n");
1142 printf(" -b show compiled code (bytecode)\n");
1143 printf(" -C show PCRE compile-time options and exit\n");
1144 printf(" -d debug: show compiled code and information (-b and -i)\n");
1145 #if !defined NODFA
1146 printf(" -dfa force DFA matching for all subjects\n");
1147 #endif
1148 printf(" -help show usage information\n");
1149 printf(" -i show information about compiled patterns\n"
1150 " -M find MATCH_LIMIT minimum for each subject\n"
1151 " -m output memory used information\n"
1152 " -o <n> set size of offsets vector to <n>\n");
1153 #if !defined NOPOSIX
1154 printf(" -p use POSIX interface\n");
1155 #endif
1156 printf(" -q quiet: do not output PCRE version number at start\n");
1157 printf(" -S <n> set stack size to <n> megabytes\n");
1158 printf(" -s output store (memory) used information\n"
1159 " -t time compilation and execution\n");
1160 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1161 printf(" -tm time execution (matching) only\n");
1162 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1163 }
1164
1165
1166
1167 /*************************************************
1168 * Main Program *
1169 *************************************************/
1170
1171 /* Read lines from named file or stdin and write to named file or stdout; lines
1172 consist of a regular expression, in delimiters and optionally followed by
1173 options, followed by a set of test data, terminated by an empty line. */
1174
1175 int main(int argc, char **argv)
1176 {
1177 FILE *infile = stdin;
1178 int options = 0;
1179 int study_options = 0;
1180 int default_find_match_limit = FALSE;
1181 int op = 1;
1182 int timeit = 0;
1183 int timeitm = 0;
1184 int showinfo = 0;
1185 int showstore = 0;
1186 int quiet = 0;
1187 int size_offsets = 45;
1188 int size_offsets_max;
1189 int *offsets = NULL;
1190 #if !defined NOPOSIX
1191 int posix = 0;
1192 #endif
1193 int debug = 0;
1194 int done = 0;
1195 int all_use_dfa = 0;
1196 int yield = 0;
1197 int stack_size;
1198
1199 /* These vectors store, end-to-end, a list of captured substring names. Assume
1200 that 1024 is plenty long enough for the few names we'll be testing. */
1201
1202 uschar copynames[1024];
1203 uschar getnames[1024];
1204
1205 uschar *copynamesptr;
1206 uschar *getnamesptr;
1207
1208 /* Get buffers from malloc() so that Electric Fence will check their misuse
1209 when I am debugging. They grow automatically when very long lines are read. */
1210
1211 buffer = (unsigned char *)malloc(buffer_size);
1212 dbuffer = (unsigned char *)malloc(buffer_size);
1213 pbuffer = (unsigned char *)malloc(buffer_size);
1214
1215 /* The outfile variable is static so that new_malloc can use it. */
1216
1217 outfile = stdout;
1218
1219 /* The following _setmode() stuff is some Windows magic that tells its runtime
1220 library to translate CRLF into a single LF character. At least, that's what
1221 I've been told: never having used Windows I take this all on trust. Originally
1222 it set 0x8000, but then I was advised that _O_BINARY was better. */
1223
1224 #if defined(_WIN32) || defined(WIN32)
1225 _setmode( _fileno( stdout ), _O_BINARY );
1226 #endif
1227
1228 /* Scan options */
1229
1230 while (argc > 1 && argv[op][0] == '-')
1231 {
1232 unsigned char *endptr;
1233
1234 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1235 showstore = 1;
1236 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1237 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1238 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1239 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1240 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1241 #if !defined NODFA
1242 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1243 #endif
1244 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1245 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1246 *endptr == 0))
1247 {
1248 op++;
1249 argc--;
1250 }
1251 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1252 {
1253 int both = argv[op][2] == 0;
1254 int temp;
1255 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1256 *endptr == 0))
1257 {
1258 timeitm = temp;
1259 op++;
1260 argc--;
1261 }
1262 else timeitm = LOOPREPEAT;
1263 if (both) timeit = timeitm;
1264 }
1265 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1266 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1267 *endptr == 0))
1268 {
1269 #if defined(_WIN32) || defined(WIN32)
1270 printf("PCRE: -S not supported on this OS\n");
1271 exit(1);
1272 #else
1273 int rc;
1274 struct rlimit rlim;
1275 getrlimit(RLIMIT_STACK, &rlim);
1276 rlim.rlim_cur = stack_size * 1024 * 1024;
1277 rc = setrlimit(RLIMIT_STACK, &rlim);
1278 if (rc != 0)
1279 {
1280 printf("PCRE: setrlimit() failed with error %d\n", rc);
1281 exit(1);
1282 }
1283 op++;
1284 argc--;
1285 #endif
1286 }
1287 #if !defined NOPOSIX
1288 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1289 #endif
1290 else if (strcmp(argv[op], "-C") == 0)
1291 {
1292 int rc;
1293 unsigned long int lrc;
1294 printf("PCRE version %s\n", pcre_version());
1295 printf("Compiled with\n");
1296 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1297 printf(" %sUTF-8 support\n", rc? "" : "No ");
1298 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1299 printf(" %sUnicode properties support\n", rc? "" : "No ");
1300 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1301 /* Note that these values are always the ASCII values, even
1302 in EBCDIC environments. CR is 13 and NL is 10. */
1303 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1304 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1305 (rc == -2)? "ANYCRLF" :
1306 (rc == -1)? "ANY" : "???");
1307 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1308 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1309 "all Unicode newlines");
1310 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1311 printf(" Internal link size = %d\n", rc);
1312 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1313 printf(" POSIX malloc threshold = %d\n", rc);
1314 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1315 printf(" Default match limit = %ld\n", lrc);
1316 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1317 printf(" Default recursion depth limit = %ld\n", lrc);
1318 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1319 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1320 goto EXIT;
1321 }
1322 else if (strcmp(argv[op], "-help") == 0 ||
1323 strcmp(argv[op], "--help") == 0)
1324 {
1325 usage();
1326 goto EXIT;
1327 }
1328 else
1329 {
1330 printf("** Unknown or malformed option %s\n", argv[op]);
1331 usage();
1332 yield = 1;
1333 goto EXIT;
1334 }
1335 op++;
1336 argc--;
1337 }
1338
1339 /* Get the store for the offsets vector, and remember what it was */
1340
1341 size_offsets_max = size_offsets;
1342 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1343 if (offsets == NULL)
1344 {
1345 printf("** Failed to get %d bytes of memory for offsets vector\n",
1346 (int)(size_offsets_max * sizeof(int)));
1347 yield = 1;
1348 goto EXIT;
1349 }
1350
1351 /* Sort out the input and output files */
1352
1353 if (argc > 1)
1354 {
1355 infile = fopen(argv[op], INPUT_MODE);
1356 if (infile == NULL)
1357 {
1358 printf("** Failed to open %s\n", argv[op]);
1359 yield = 1;
1360 goto EXIT;
1361 }
1362 }
1363
1364 if (argc > 2)
1365 {
1366 outfile = fopen(argv[op+1], OUTPUT_MODE);
1367 if (outfile == NULL)
1368 {
1369 printf("** Failed to open %s\n", argv[op+1]);
1370 yield = 1;
1371 goto EXIT;
1372 }
1373 }
1374
1375 /* Set alternative malloc function */
1376
1377 pcre_malloc = new_malloc;
1378 pcre_free = new_free;
1379 pcre_stack_malloc = stack_malloc;
1380 pcre_stack_free = stack_free;
1381
1382 /* Heading line unless quiet, then prompt for first regex if stdin */
1383
1384 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1385
1386 /* Main loop */
1387
1388 while (!done)
1389 {
1390 pcre *re = NULL;
1391 pcre_extra *extra = NULL;
1392
1393 #if !defined NOPOSIX /* There are still compilers that require no indent */
1394 regex_t preg;
1395 int do_posix = 0;
1396 #endif
1397
1398 const char *error;
1399 unsigned char *markptr;
1400 unsigned char *p, *pp, *ppp;
1401 unsigned char *to_file = NULL;
1402 const unsigned char *tables = NULL;
1403 unsigned long int true_size, true_study_size = 0;
1404 size_t size, regex_gotten_store;
1405 int do_mark = 0;
1406 int do_study = 0;
1407 int do_debug = debug;
1408 int do_G = 0;
1409 int do_g = 0;
1410 int do_showinfo = showinfo;
1411 int do_showrest = 0;
1412 int do_flip = 0;
1413 int erroroffset, len, delimiter, poffset;
1414
1415 use_utf8 = 0;
1416 debug_lengths = 1;
1417
1418 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1419 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1420 fflush(outfile);
1421
1422 p = buffer;
1423 while (isspace(*p)) p++;
1424 if (*p == 0) continue;
1425
1426 /* See if the pattern is to be loaded pre-compiled from a file. */
1427
1428 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1429 {
1430 unsigned long int magic, get_options;
1431 uschar sbuf[8];
1432 FILE *f;
1433
1434 p++;
1435 pp = p + (int)strlen((char *)p);
1436 while (isspace(pp[-1])) pp--;
1437 *pp = 0;
1438
1439 f = fopen((char *)p, "rb");
1440 if (f == NULL)
1441 {
1442 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1443 continue;
1444 }
1445
1446 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1447
1448 true_size =
1449 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1450 true_study_size =
1451 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1452
1453 re = (real_pcre *)new_malloc(true_size);
1454 regex_gotten_store = gotten_store;
1455
1456 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1457
1458 magic = ((real_pcre *)re)->magic_number;
1459 if (magic != MAGIC_NUMBER)
1460 {
1461 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1462 {
1463 do_flip = 1;
1464 }
1465 else
1466 {
1467 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1468 fclose(f);
1469 continue;
1470 }
1471 }
1472
1473 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1474 do_flip? " (byte-inverted)" : "", p);
1475
1476 /* Need to know if UTF-8 for printing data strings */
1477
1478 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1479 use_utf8 = (get_options & PCRE_UTF8) != 0;
1480
1481 /* Now see if there is any following study data */
1482
1483 if (true_study_size != 0)
1484 {
1485 pcre_study_data *psd;
1486
1487 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1488 extra->flags = PCRE_EXTRA_STUDY_DATA;
1489
1490 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1491 extra->study_data = psd;
1492
1493 if (fread(psd, 1, true_study_size, f) != true_study_size)
1494 {
1495 FAIL_READ:
1496 fprintf(outfile, "Failed to read data from %s\n", p);
1497 if (extra != NULL) new_free(extra);
1498 if (re != NULL) new_free(re);
1499 fclose(f);
1500 continue;
1501 }
1502 fprintf(outfile, "Study data loaded from %s\n", p);
1503 do_study = 1; /* To get the data output if requested */
1504 }
1505 else fprintf(outfile, "No study data\n");
1506
1507 fclose(f);
1508 goto SHOW_INFO;
1509 }
1510
1511 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1512 the pattern; if is isn't complete, read more. */
1513
1514 delimiter = *p++;
1515
1516 if (isalnum(delimiter) || delimiter == '\\')
1517 {
1518 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1519 goto SKIP_DATA;
1520 }
1521
1522 pp = p;
1523 poffset = (int)(p - buffer);
1524
1525 for(;;)
1526 {
1527 while (*pp != 0)
1528 {
1529 if (*pp == '\\' && pp[1] != 0) pp++;
1530 else if (*pp == delimiter) break;
1531 pp++;
1532 }
1533 if (*pp != 0) break;
1534 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1535 {
1536 fprintf(outfile, "** Unexpected EOF\n");
1537 done = 1;
1538 goto CONTINUE;
1539 }
1540 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1541 }
1542
1543 /* The buffer may have moved while being extended; reset the start of data
1544 pointer to the correct relative point in the buffer. */
1545
1546 p = buffer + poffset;
1547
1548 /* If the first character after the delimiter is backslash, make
1549 the pattern end with backslash. This is purely to provide a way
1550 of testing for the error message when a pattern ends with backslash. */
1551
1552 if (pp[1] == '\\') *pp++ = '\\';
1553
1554 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1555 for callouts. */
1556
1557 *pp++ = 0;
1558 strcpy((char *)pbuffer, (char *)p);
1559
1560 /* Look for options after final delimiter */
1561
1562 options = 0;
1563 study_options = 0;
1564 log_store = showstore; /* default from command line */
1565
1566 while (*pp != 0)
1567 {
1568 switch (*pp++)
1569 {
1570 case 'f': options |= PCRE_FIRSTLINE; break;
1571 case 'g': do_g = 1; break;
1572 case 'i': options |= PCRE_CASELESS; break;
1573 case 'm': options |= PCRE_MULTILINE; break;
1574 case 's': options |= PCRE_DOTALL; break;
1575 case 'x': options |= PCRE_EXTENDED; break;
1576
1577 case '+': do_showrest = 1; break;
1578 case 'A': options |= PCRE_ANCHORED; break;
1579 case 'B': do_debug = 1; break;
1580 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1581 case 'D': do_debug = do_showinfo = 1; break;
1582 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1583 case 'F': do_flip = 1; break;
1584 case 'G': do_G = 1; break;
1585 case 'I': do_showinfo = 1; break;
1586 case 'J': options |= PCRE_DUPNAMES; break;
1587 case 'K': do_mark = 1; break;
1588 case 'M': log_store = 1; break;
1589 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1590
1591 #if !defined NOPOSIX
1592 case 'P': do_posix = 1; break;
1593 #endif
1594
1595 case 'S': do_study = 1; break;
1596 case 'U': options |= PCRE_UNGREEDY; break;
1597 case 'W': options |= PCRE_UCP; break;
1598 case 'X': options |= PCRE_EXTRA; break;
1599 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1600 case 'Z': debug_lengths = 0; break;
1601 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1602 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1603
1604 case 'T':
1605 switch (*pp++)
1606 {
1607 case '0': tables = tables0; break;
1608 case '1': tables = tables1; break;
1609
1610 case '\r':
1611 case '\n':
1612 case ' ':
1613 case 0:
1614 fprintf(outfile, "** Missing table number after /T\n");
1615 goto SKIP_DATA;
1616
1617 default:
1618 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1619 goto SKIP_DATA;
1620 }
1621 break;
1622
1623 case 'L':
1624 ppp = pp;
1625 /* The '\r' test here is so that it works on Windows. */
1626 /* The '0' test is just in case this is an unterminated line. */
1627 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1628 *ppp = 0;
1629 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1630 {
1631 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1632 goto SKIP_DATA;
1633 }
1634 locale_set = 1;
1635 tables = pcre_maketables();
1636 pp = ppp;
1637 break;
1638
1639 case '>':
1640 to_file = pp;
1641 while (*pp != 0) pp++;
1642 while (isspace(pp[-1])) pp--;
1643 *pp = 0;
1644 break;
1645
1646 case '<':
1647 {
1648 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1649 {
1650 options |= PCRE_JAVASCRIPT_COMPAT;
1651 pp += 3;
1652 }
1653 else
1654 {
1655 int x = check_newline(pp, outfile);
1656 if (x == 0) goto SKIP_DATA;
1657 options |= x;
1658 while (*pp++ != '>');
1659 }
1660 }
1661 break;
1662
1663 case '\r': /* So that it works in Windows */
1664 case '\n':
1665 case ' ':
1666 break;
1667
1668 default:
1669 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1670 goto SKIP_DATA;
1671 }
1672 }
1673
1674 /* Handle compiling via the POSIX interface, which doesn't support the
1675 timing, showing, or debugging options, nor the ability to pass over
1676 local character tables. */
1677
1678 #if !defined NOPOSIX
1679 if (posix || do_posix)
1680 {
1681 int rc;
1682 int cflags = 0;
1683
1684 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1685 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1686 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1687 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1688 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1689 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1690 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1691
1692 rc = regcomp(&preg, (char *)p, cflags);
1693
1694 /* Compilation failed; go back for another re, skipping to blank line
1695 if non-interactive. */
1696
1697 if (rc != 0)
1698 {
1699 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1700 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1701 goto SKIP_DATA;
1702 }
1703 }
1704
1705 /* Handle compiling via the native interface */
1706
1707 else
1708 #endif /* !defined NOPOSIX */
1709
1710 {
1711 unsigned long int get_options;
1712
1713 if (timeit > 0)
1714 {
1715 register int i;
1716 clock_t time_taken;
1717 clock_t start_time = clock();
1718 for (i = 0; i < timeit; i++)
1719 {
1720 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1721 if (re != NULL) free(re);
1722 }
1723 time_taken = clock() - start_time;
1724 fprintf(outfile, "Compile time %.4f milliseconds\n",
1725 (((double)time_taken * 1000.0) / (double)timeit) /
1726 (double)CLOCKS_PER_SEC);
1727 }
1728
1729 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1730
1731 /* Compilation failed; go back for another re, skipping to blank line
1732 if non-interactive. */
1733
1734 if (re == NULL)
1735 {
1736 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1737 SKIP_DATA:
1738 if (infile != stdin)
1739 {
1740 for (;;)
1741 {
1742 if (extend_inputline(infile, buffer, NULL) == NULL)
1743 {
1744 done = 1;
1745 goto CONTINUE;
1746 }
1747 len = (int)strlen((char *)buffer);
1748 while (len > 0 && isspace(buffer[len-1])) len--;
1749 if (len == 0) break;
1750 }
1751 fprintf(outfile, "\n");
1752 }
1753 goto CONTINUE;
1754 }
1755
1756 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1757 within the regex; check for this so that we know how to process the data
1758 lines. */
1759
1760 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1761 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1762
1763 /* Print information if required. There are now two info-returning
1764 functions. The old one has a limited interface and returns only limited
1765 data. Check that it agrees with the newer one. */
1766
1767 if (log_store)
1768 fprintf(outfile, "Memory allocation (code space): %d\n",
1769 (int)(gotten_store -
1770 sizeof(real_pcre) -
1771 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1772
1773 /* Extract the size for possible writing before possibly flipping it,
1774 and remember the store that was got. */
1775
1776 true_size = ((real_pcre *)re)->size;
1777 regex_gotten_store = gotten_store;
1778
1779 /* If /S was present, study the regexp to generate additional info to
1780 help with the matching. */
1781
1782 if (do_study)
1783 {
1784 if (timeit > 0)
1785 {
1786 register int i;
1787 clock_t time_taken;
1788 clock_t start_time = clock();
1789 for (i = 0; i < timeit; i++)
1790 extra = pcre_study(re, study_options, &error);
1791 time_taken = clock() - start_time;
1792 if (extra != NULL) free(extra);
1793 fprintf(outfile, " Study time %.4f milliseconds\n",
1794 (((double)time_taken * 1000.0) / (double)timeit) /
1795 (double)CLOCKS_PER_SEC);
1796 }
1797 extra = pcre_study(re, study_options, &error);
1798 if (error != NULL)
1799 fprintf(outfile, "Failed to study: %s\n", error);
1800 else if (extra != NULL)
1801 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1802 }
1803
1804 /* If /K was present, we set up for handling MARK data. */
1805
1806 if (do_mark)
1807 {
1808 if (extra == NULL)
1809 {
1810 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1811 extra->flags = 0;
1812 }
1813 extra->mark = &markptr;
1814 extra->flags |= PCRE_EXTRA_MARK;
1815 }
1816
1817 /* If the 'F' option was present, we flip the bytes of all the integer
1818 fields in the regex data block and the study block. This is to make it
1819 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1820 compiled on a different architecture. */
1821
1822 if (do_flip)
1823 {
1824 real_pcre *rre = (real_pcre *)re;
1825 rre->magic_number =
1826 byteflip(rre->magic_number, sizeof(rre->magic_number));
1827 rre->size = byteflip(rre->size, sizeof(rre->size));
1828 rre->options = byteflip(rre->options, sizeof(rre->options));
1829 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1830 rre->top_bracket =
1831 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1832 rre->top_backref =
1833 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1834 rre->first_byte =
1835 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1836 rre->req_byte =
1837 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1838 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1839 sizeof(rre->name_table_offset));
1840 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1841 sizeof(rre->name_entry_size));
1842 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1843 sizeof(rre->name_count));
1844
1845 if (extra != NULL)
1846 {
1847 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1848 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1849 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1850 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1851 }
1852 }
1853
1854 /* Extract information from the compiled data if required */
1855
1856 SHOW_INFO:
1857
1858 if (do_debug)
1859 {
1860 fprintf(outfile, "------------------------------------------------------------------\n");
1861 pcre_printint(re, outfile, debug_lengths);
1862 }
1863
1864 /* We already have the options in get_options (see above) */
1865
1866 if (do_showinfo)
1867 {
1868 unsigned long int all_options;
1869 #if !defined NOINFOCHECK
1870 int old_first_char, old_options, old_count;
1871 #endif
1872 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1873 hascrorlf;
1874 int nameentrysize, namecount;
1875 const uschar *nametable;
1876
1877 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1878 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1879 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1880 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1881 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1882 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1883 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1884 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1885 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1886 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1887 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1888
1889 #if !defined NOINFOCHECK
1890 old_count = pcre_info(re, &old_options, &old_first_char);
1891 if (count < 0) fprintf(outfile,
1892 "Error %d from pcre_info()\n", count);
1893 else
1894 {
1895 if (old_count != count) fprintf(outfile,
1896 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1897 old_count);
1898
1899 if (old_first_char != first_char) fprintf(outfile,
1900 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1901 first_char, old_first_char);
1902
1903 if (old_options != (int)get_options) fprintf(outfile,
1904 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1905 get_options, old_options);
1906 }
1907 #endif
1908
1909 if (size != regex_gotten_store) fprintf(outfile,
1910 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1911 (int)size, (int)regex_gotten_store);
1912
1913 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1914 if (backrefmax > 0)
1915 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1916
1917 if (namecount > 0)
1918 {
1919 fprintf(outfile, "Named capturing subpatterns:\n");
1920 while (namecount-- > 0)
1921 {
1922 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1923 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1924 GET2(nametable, 0));
1925 nametable += nameentrysize;
1926 }
1927 }
1928
1929 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1930 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1931
1932 all_options = ((real_pcre *)re)->options;
1933 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1934
1935 if (get_options == 0) fprintf(outfile, "No options\n");
1936 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1937 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1938 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1939 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1940 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1941 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1942 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1943 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1944 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1945 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1946 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1947 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1948 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1949 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1950 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1951 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1952 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1953 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1954
1955 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1956
1957 switch (get_options & PCRE_NEWLINE_BITS)
1958 {
1959 case PCRE_NEWLINE_CR:
1960 fprintf(outfile, "Forced newline sequence: CR\n");
1961 break;
1962
1963 case PCRE_NEWLINE_LF:
1964 fprintf(outfile, "Forced newline sequence: LF\n");
1965 break;
1966
1967 case PCRE_NEWLINE_CRLF:
1968 fprintf(outfile, "Forced newline sequence: CRLF\n");
1969 break;
1970
1971 case PCRE_NEWLINE_ANYCRLF:
1972 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1973 break;
1974
1975 case PCRE_NEWLINE_ANY:
1976 fprintf(outfile, "Forced newline sequence: ANY\n");
1977 break;
1978
1979 default:
1980 break;
1981 }
1982
1983 if (first_char == -1)
1984 {
1985 fprintf(outfile, "First char at start or follows newline\n");
1986 }
1987 else if (first_char < 0)
1988 {
1989 fprintf(outfile, "No first char\n");
1990 }
1991 else
1992 {
1993 int ch = first_char & 255;
1994 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1995 "" : " (caseless)";
1996 if (PRINTHEX(ch))
1997 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1998 else
1999 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2000 }
2001
2002 if (need_char < 0)
2003 {
2004 fprintf(outfile, "No need char\n");
2005 }
2006 else
2007 {
2008 int ch = need_char & 255;
2009 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2010 "" : " (caseless)";
2011 if (PRINTHEX(ch))
2012 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2013 else
2014 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2015 }
2016
2017 /* Don't output study size; at present it is in any case a fixed
2018 value, but it varies, depending on the computer architecture, and
2019 so messes up the test suite. (And with the /F option, it might be
2020 flipped.) */
2021
2022 if (do_study)
2023 {
2024 if (extra == NULL)
2025 fprintf(outfile, "Study returned NULL\n");
2026 else
2027 {
2028 uschar *start_bits = NULL;
2029 int minlength;
2030
2031 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2032 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2033
2034 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2035 if (start_bits == NULL)
2036 fprintf(outfile, "No set of starting bytes\n");
2037 else
2038 {
2039 int i;
2040 int c = 24;
2041 fprintf(outfile, "Starting byte set: ");
2042 for (i = 0; i < 256; i++)
2043 {
2044 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2045 {
2046 if (c > 75)
2047 {
2048 fprintf(outfile, "\n ");
2049 c = 2;
2050 }
2051 if (PRINTHEX(i) && i != ' ')
2052 {
2053 fprintf(outfile, "%c ", i);
2054 c += 2;
2055 }
2056 else
2057 {
2058 fprintf(outfile, "\\x%02x ", i);
2059 c += 5;
2060 }
2061 }
2062 }
2063 fprintf(outfile, "\n");
2064 }
2065 }
2066 }
2067 }
2068
2069 /* If the '>' option was present, we write out the regex to a file, and
2070 that is all. The first 8 bytes of the file are the regex length and then
2071 the study length, in big-endian order. */
2072
2073 if (to_file != NULL)
2074 {
2075 FILE *f = fopen((char *)to_file, "wb");
2076 if (f == NULL)
2077 {
2078 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2079 }
2080 else
2081 {
2082 uschar sbuf[8];
2083 sbuf[0] = (uschar)((true_size >> 24) & 255);
2084 sbuf[1] = (uschar)((true_size >> 16) & 255);
2085 sbuf[2] = (uschar)((true_size >> 8) & 255);
2086 sbuf[3] = (uschar)((true_size) & 255);
2087
2088 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2089 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2090 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2091 sbuf[7] = (uschar)((true_study_size) & 255);
2092
2093 if (fwrite(sbuf, 1, 8, f) < 8 ||
2094 fwrite(re, 1, true_size, f) < true_size)
2095 {
2096 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2097 }
2098 else
2099 {
2100 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2101 if (extra != NULL)
2102 {
2103 if (fwrite(extra->study_data, 1, true_study_size, f) <
2104 true_study_size)
2105 {
2106 fprintf(outfile, "Write error on %s: %s\n", to_file,
2107 strerror(errno));
2108 }
2109 else fprintf(outfile, "Study data written to %s\n", to_file);
2110
2111 }
2112 }
2113 fclose(f);
2114 }
2115
2116 new_free(re);
2117 if (extra != NULL) new_free(extra);
2118 if (locale_set)
2119 {
2120 new_free((void *)tables);
2121 setlocale(LC_CTYPE, "C");
2122 locale_set = 0;
2123 }
2124 continue; /* With next regex */
2125 }
2126 } /* End of non-POSIX compile */
2127
2128 /* Read data lines and test them */
2129
2130 for (;;)
2131 {
2132 uschar *q;
2133 uschar *bptr;
2134 int *use_offsets = offsets;
2135 int use_size_offsets = size_offsets;
2136 int callout_data = 0;
2137 int callout_data_set = 0;
2138 int count, c;
2139 int copystrings = 0;
2140 int find_match_limit = default_find_match_limit;
2141 int getstrings = 0;
2142 int getlist = 0;
2143 int gmatched = 0;
2144 int start_offset = 0;
2145 int start_offset_sign = 1;
2146 int g_notempty = 0;
2147 int use_dfa = 0;
2148
2149 options = 0;
2150
2151 *copynames = 0;
2152 *getnames = 0;
2153
2154 copynamesptr = copynames;
2155 getnamesptr = getnames;
2156
2157 pcre_callout = callout;
2158 first_callout = 1;
2159 callout_extra = 0;
2160 callout_count = 0;
2161 callout_fail_count = 999999;
2162 callout_fail_id = -1;
2163 show_malloc = 0;
2164
2165 if (extra != NULL) extra->flags &=
2166 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2167
2168 len = 0;
2169 for (;;)
2170 {
2171 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2172 {
2173 if (len > 0) /* Reached EOF without hitting a newline */
2174 {
2175 fprintf(outfile, "\n");
2176 break;
2177 }
2178 done = 1;
2179 goto CONTINUE;
2180 }
2181 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2182 len = (int)strlen((char *)buffer);
2183 if (buffer[len-1] == '\n') break;
2184 }
2185
2186 while (len > 0 && isspace(buffer[len-1])) len--;
2187 buffer[len] = 0;
2188 if (len == 0) break;
2189
2190 p = buffer;
2191 while (isspace(*p)) p++;
2192
2193 bptr = q = dbuffer;
2194 while ((c = *p++) != 0)
2195 {
2196 int i = 0;
2197 int n = 0;
2198
2199 if (c == '\\') switch ((c = *p++))
2200 {
2201 case 'a': c = 7; break;
2202 case 'b': c = '\b'; break;
2203 case 'e': c = 27; break;
2204 case 'f': c = '\f'; break;
2205 case 'n': c = '\n'; break;
2206 case 'r': c = '\r'; break;
2207 case 't': c = '\t'; break;
2208 case 'v': c = '\v'; break;
2209
2210 case '0': case '1': case '2': case '3':
2211 case '4': case '5': case '6': case '7':
2212 c -= '0';
2213 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2214 c = c * 8 + *p++ - '0';
2215
2216 #if !defined NOUTF8
2217 if (use_utf8 && c > 255)
2218 {
2219 unsigned char buff8[8];
2220 int ii, utn;
2221 utn = ord2utf8(c, buff8);
2222 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2223 c = buff8[ii]; /* Last byte */
2224 }
2225 #endif
2226 break;
2227
2228 case 'x':
2229
2230 /* Handle \x{..} specially - new Perl thing for utf8 */
2231
2232 #if !defined NOUTF8
2233 if (*p == '{')
2234 {
2235 unsigned char *pt = p;
2236 c = 0;
2237 while (isxdigit(*(++pt)))
2238 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2239 if (*pt == '}')
2240 {
2241 unsigned char buff8[8];
2242 int ii, utn;
2243 if (use_utf8)
2244 {
2245 utn = ord2utf8(c, buff8);
2246 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2247 c = buff8[ii]; /* Last byte */
2248 }
2249 else
2250 {
2251 if (c > 255)
2252 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2253 "UTF-8 mode is not enabled.\n"
2254 "** Truncation will probably give the wrong result.\n", c);
2255 }
2256 p = pt + 1;
2257 break;
2258 }
2259 /* Not correct form; fall through */
2260 }
2261 #endif
2262
2263 /* Ordinary \x */
2264
2265 c = 0;
2266 while (i++ < 2 && isxdigit(*p))
2267 {
2268 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2269 p++;
2270 }
2271 break;
2272
2273 case 0: /* \ followed by EOF allows for an empty line */
2274 p--;
2275 continue;
2276
2277 case '>':
2278 if (*p == '-')
2279 {
2280 start_offset_sign = -1;
2281 p++;
2282 }
2283 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2284 start_offset *= start_offset_sign;
2285 continue;
2286
2287 case 'A': /* Option setting */
2288 options |= PCRE_ANCHORED;
2289 continue;
2290
2291 case 'B':
2292 options |= PCRE_NOTBOL;
2293 continue;
2294
2295 case 'C':
2296 if (isdigit(*p)) /* Set copy string */
2297 {
2298 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2299 copystrings |= 1 << n;
2300 }
2301 else if (isalnum(*p))
2302 {
2303 uschar *npp = copynamesptr;
2304 while (isalnum(*p)) *npp++ = *p++;
2305 *npp++ = 0;
2306 *npp = 0;
2307 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2308 if (n < 0)
2309 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2310 copynamesptr = npp;
2311 }
2312 else if (*p == '+')
2313 {
2314 callout_extra = 1;
2315 p++;
2316 }
2317 else if (*p == '-')
2318 {
2319 pcre_callout = NULL;
2320 p++;
2321 }
2322 else if (*p == '!')
2323 {
2324 callout_fail_id = 0;
2325 p++;
2326 while(isdigit(*p))
2327 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2328 callout_fail_count = 0;
2329 if (*p == '!')
2330 {
2331 p++;
2332 while(isdigit(*p))
2333 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2334 }
2335 }
2336 else if (*p == '*')
2337 {
2338 int sign = 1;
2339 callout_data = 0;
2340 if (*(++p) == '-') { sign = -1; p++; }
2341 while(isdigit(*p))
2342 callout_data = callout_data * 10 + *p++ - '0';
2343 callout_data *= sign;
2344 callout_data_set = 1;
2345 }
2346 continue;
2347
2348 #if !defined NODFA
2349 case 'D':
2350 #if !defined NOPOSIX
2351 if (posix || do_posix)
2352 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2353 else
2354 #endif
2355 use_dfa = 1;
2356 continue;
2357 #endif
2358
2359 #if !defined NODFA
2360 case 'F':
2361 options |= PCRE_DFA_SHORTEST;
2362 continue;
2363 #endif
2364
2365 case 'G':
2366 if (isdigit(*p))
2367 {
2368 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2369 getstrings |= 1 << n;
2370 }
2371 else if (isalnum(*p))
2372 {
2373 uschar *npp = getnamesptr;
2374 while (isalnum(*p)) *npp++ = *p++;
2375 *npp++ = 0;
2376 *npp = 0;
2377 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2378 if (n < 0)
2379 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2380 getnamesptr = npp;
2381 }
2382 continue;
2383
2384 case 'L':
2385 getlist = 1;
2386 continue;
2387
2388 case 'M':
2389 find_match_limit = 1;
2390 continue;
2391
2392 case 'N':
2393 if ((options & PCRE_NOTEMPTY) != 0)
2394 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2395 else
2396 options |= PCRE_NOTEMPTY;
2397 continue;
2398
2399 case 'O':
2400 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2401 if (n > size_offsets_max)
2402 {
2403 size_offsets_max = n;
2404 free(offsets);
2405 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2406 if (offsets == NULL)
2407 {
2408 printf("** Failed to get %d bytes of memory for offsets vector\n",
2409 (int)(size_offsets_max * sizeof(int)));
2410 yield = 1;
2411 goto EXIT;
2412 }
2413 }
2414 use_size_offsets = n;
2415 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2416 continue;
2417
2418 case 'P':
2419 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2420 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2421 continue;
2422
2423 case 'Q':
2424 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2425 if (extra == NULL)
2426 {
2427 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2428 extra->flags = 0;
2429 }
2430 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2431 extra->match_limit_recursion = n;
2432 continue;
2433
2434 case 'q':
2435 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2436 if (extra == NULL)
2437 {
2438 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2439 extra->flags = 0;
2440 }
2441 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2442 extra->match_limit = n;
2443 continue;
2444
2445 #if !defined NODFA
2446 case 'R':
2447 options |= PCRE_DFA_RESTART;
2448 continue;
2449 #endif
2450
2451 case 'S':
2452 show_malloc = 1;
2453 continue;
2454
2455 case 'Y':
2456 options |= PCRE_NO_START_OPTIMIZE;
2457 continue;
2458
2459 case 'Z':
2460 options |= PCRE_NOTEOL;
2461 continue;
2462
2463 case '?':
2464 options |= PCRE_NO_UTF8_CHECK;
2465 continue;
2466
2467 case '<':
2468 {
2469 int x = check_newline(p, outfile);
2470 if (x == 0) goto NEXT_DATA;
2471 options |= x;
2472 while (*p++ != '>');
2473 }
2474 continue;
2475 }
2476 *q++ = c;
2477 }
2478 *q = 0;
2479 len = (int)(q - dbuffer);
2480
2481 /* Move the data to the end of the buffer so that a read over the end of
2482 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2483 we are using the POSIX interface, we must include the terminating zero. */
2484
2485 #if !defined NOPOSIX
2486 if (posix || do_posix)
2487 {
2488 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2489 bptr += buffer_size - len - 1;
2490 }
2491 else
2492 #endif
2493 {
2494 memmove(bptr + buffer_size - len, bptr, len);
2495 bptr += buffer_size - len;
2496 }
2497
2498 if ((all_use_dfa || use_dfa) && find_match_limit)
2499 {
2500 printf("**Match limit not relevant for DFA matching: ignored\n");
2501 find_match_limit = 0;
2502 }
2503
2504 /* Handle matching via the POSIX interface, which does not
2505 support timing or playing with the match limit or callout data. */
2506
2507 #if !defined NOPOSIX
2508 if (posix || do_posix)
2509 {
2510 int rc;
2511 int eflags = 0;
2512 regmatch_t *pmatch = NULL;
2513 if (use_size_offsets > 0)
2514 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2515 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2516 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2517 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2518
2519 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2520
2521 if (rc != 0)
2522 {
2523 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2524 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2525 }
2526 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2527 != 0)
2528 {
2529 fprintf(outfile, "Matched with REG_NOSUB\n");
2530 }
2531 else
2532 {
2533 size_t i;
2534 for (i = 0; i < (size_t)use_size_offsets; i++)
2535 {
2536 if (pmatch[i].rm_so >= 0)
2537 {
2538 fprintf(outfile, "%2d: ", (int)i);
2539 (void)pchars(dbuffer + pmatch[i].rm_so,
2540 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2541 fprintf(outfile, "\n");
2542 if (i == 0 && do_showrest)
2543 {
2544 fprintf(outfile, " 0+ ");
2545 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2546 outfile);
2547 fprintf(outfile, "\n");
2548 }
2549 }
2550 }
2551 }
2552 free(pmatch);
2553 }
2554
2555 /* Handle matching via the native interface - repeats for /g and /G */
2556
2557 else
2558 #endif /* !defined NOPOSIX */
2559
2560 for (;; gmatched++) /* Loop for /g or /G */
2561 {
2562 markptr = NULL;
2563
2564 if (timeitm > 0)
2565 {
2566 register int i;
2567 clock_t time_taken;
2568 clock_t start_time = clock();
2569
2570 #if !defined NODFA
2571 if (all_use_dfa || use_dfa)
2572 {
2573 int workspace[1000];
2574 for (i = 0; i < timeitm; i++)
2575 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2576 options | g_notempty, use_offsets, use_size_offsets, workspace,
2577 sizeof(workspace)/sizeof(int));
2578 }
2579 else
2580 #endif
2581
2582 for (i = 0; i < timeitm; i++)
2583 count = pcre_exec(re, extra, (char *)bptr, len,
2584 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2585
2586 time_taken = clock() - start_time;
2587 fprintf(outfile, "Execute time %.4f milliseconds\n",
2588 (((double)time_taken * 1000.0) / (double)timeitm) /
2589 (double)CLOCKS_PER_SEC);
2590 }
2591
2592 /* If find_match_limit is set, we want to do repeated matches with
2593 varying limits in order to find the minimum value for the match limit and
2594 for the recursion limit. */
2595
2596 if (find_match_limit)
2597 {
2598 if (extra == NULL)
2599 {
2600 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2601 extra->flags = 0;
2602 }
2603
2604 (void)check_match_limit(re, extra, bptr, len, start_offset,
2605 options|g_notempty, use_offsets, use_size_offsets,
2606 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2607 PCRE_ERROR_MATCHLIMIT, "match()");
2608
2609 count = check_match_limit(re, extra, bptr, len, start_offset,
2610 options|g_notempty, use_offsets, use_size_offsets,
2611 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2612 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2613 }
2614
2615 /* If callout_data is set, use the interface with additional data */
2616
2617 else if (callout_data_set)
2618 {
2619 if (extra == NULL)
2620 {
2621 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2622 extra->flags = 0;
2623 }
2624 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2625 extra->callout_data = &callout_data;
2626 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2627 options | g_notempty, use_offsets, use_size_offsets);
2628 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2629 }
2630
2631 /* The normal case is just to do the match once, with the default
2632 value of match_limit. */
2633
2634 #if !defined NODFA
2635 else if (all_use_dfa || use_dfa)
2636 {
2637 int workspace[1000];
2638 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639 options | g_notempty, use_offsets, use_size_offsets, workspace,
2640 sizeof(workspace)/sizeof(int));
2641 if (count == 0)
2642 {
2643 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2644 count = use_size_offsets/2;
2645 }
2646 }
2647 #endif
2648
2649 else
2650 {
2651 count = pcre_exec(re, extra, (char *)bptr, len,
2652 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2653 if (count == 0)
2654 {
2655 fprintf(outfile, "Matched, but too many substrings\n");
2656 count = use_size_offsets/3;
2657 }
2658 }
2659
2660 /* Matched */
2661
2662 if (count >= 0)
2663 {
2664 int i, maxcount;
2665
2666 #if !defined NODFA
2667 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2668 #endif
2669 maxcount = use_size_offsets/3;
2670
2671 /* This is a check against a lunatic return value. */
2672
2673 if (count > maxcount)
2674 {
2675 fprintf(outfile,
2676 "** PCRE error: returned count %d is too big for offset size %d\n",
2677 count, use_size_offsets);
2678 count = use_size_offsets/3;
2679 if (do_g || do_G)
2680 {
2681 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2682 do_g = do_G = FALSE; /* Break g/G loop */
2683 }
2684 }
2685
2686 for (i = 0; i < count * 2; i += 2)
2687 {
2688 if (use_offsets[i] < 0)
2689 fprintf(outfile, "%2d: <unset>\n", i/2);
2690 else
2691 {
2692 fprintf(outfile, "%2d: ", i/2);
2693 (void)pchars(bptr + use_offsets[i],
2694 use_offsets[i+1] - use_offsets[i], outfile);
2695 fprintf(outfile, "\n");
2696 if (i == 0)
2697 {
2698 if (do_showrest)
2699 {
2700 fprintf(outfile, " 0+ ");
2701 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2702 outfile);
2703 fprintf(outfile, "\n");
2704 }
2705 }
2706 }
2707 }
2708
2709 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2710
2711 for (i = 0; i < 32; i++)
2712 {
2713 if ((copystrings & (1 << i)) != 0)
2714 {
2715 char copybuffer[256];
2716 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2717 i, copybuffer, sizeof(copybuffer));
2718 if (rc < 0)
2719 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2720 else
2721 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2722 }
2723 }
2724
2725 for (copynamesptr = copynames;
2726 *copynamesptr != 0;
2727 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2728 {
2729 char copybuffer[256];
2730 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2731 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2732 if (rc < 0)
2733 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2734 else
2735 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2736 }
2737
2738 for (i = 0; i < 32; i++)
2739 {
2740 if ((getstrings & (1 << i)) != 0)
2741 {
2742 const char *substring;
2743 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2744 i, &substring);
2745 if (rc < 0)
2746 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2747 else
2748 {
2749 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2750 pcre_free_substring(substring);
2751 }
2752 }
2753 }
2754
2755 for (getnamesptr = getnames;
2756 *getnamesptr != 0;
2757 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2758 {
2759 const char *substring;
2760 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2761 count, (char *)getnamesptr, &substring);
2762 if (rc < 0)
2763 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2764 else
2765 {
2766 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2767 pcre_free_substring(substring);
2768 }
2769 }
2770
2771 if (getlist)
2772 {
2773 const char **stringlist;
2774 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2775 &stringlist);
2776 if (rc < 0)
2777 fprintf(outfile, "get substring list failed %d\n", rc);
2778 else
2779 {
2780 for (i = 0; i < count; i++)
2781 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2782 if (stringlist[i] != NULL)
2783 fprintf(outfile, "string list not terminated by NULL\n");
2784 /* free((void *)stringlist); */
2785 pcre_free_substring_list(stringlist);
2786 }
2787 }
2788 }
2789
2790 /* There was a partial match */
2791
2792 else if (count == PCRE_ERROR_PARTIAL)
2793 {
2794 if (markptr == NULL) fprintf(outfile, "Partial match");
2795 else fprintf(outfile, "Partial match, mark=%s", markptr);
2796 if (use_size_offsets > 1)
2797 {
2798 fprintf(outfile, ": ");
2799 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2800 outfile);
2801 }
2802 fprintf(outfile, "\n");
2803 break; /* Out of the /g loop */
2804 }
2805
2806 /* Failed to match. If this is a /g or /G loop and we previously set
2807 g_notempty after a null match, this is not necessarily the end. We want
2808 to advance the start offset, and continue. We won't be at the end of the
2809 string - that was checked before setting g_notempty.
2810
2811 Complication arises in the case when the newline convention is "any",
2812 "crlf", or "anycrlf". If the previous match was at the end of a line
2813 terminated by CRLF, an advance of one character just passes the \r,
2814 whereas we should prefer the longer newline sequence, as does the code in
2815 pcre_exec(). Fudge the offset value to achieve this. We check for a
2816 newline setting in the pattern; if none was set, use pcre_config() to
2817 find the default.
2818
2819 Otherwise, in the case of UTF-8 matching, the advance must be one
2820 character, not one byte. */
2821
2822 else
2823 {
2824 if (g_notempty != 0)
2825 {
2826 int onechar = 1;
2827 unsigned int obits = ((real_pcre *)re)->options;
2828 use_offsets[0] = start_offset;
2829 if ((obits & PCRE_NEWLINE_BITS) == 0)
2830 {
2831 int d;
2832 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2833 /* Note that these values are always the ASCII ones, even in
2834 EBCDIC environments. CR = 13, NL = 10. */
2835 obits = (d == 13)? PCRE_NEWLINE_CR :
2836 (d == 10)? PCRE_NEWLINE_LF :
2837 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2838 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2839 (d == -1)? PCRE_NEWLINE_ANY : 0;
2840 }
2841 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2842 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2843 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2844 &&
2845 start_offset < len - 1 &&
2846 bptr[start_offset] == '\r' &&
2847 bptr[start_offset+1] == '\n')
2848 onechar++;
2849 else if (use_utf8)
2850 {
2851 while (start_offset + onechar < len)
2852 {
2853 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2854 onechar++;
2855 }
2856 }
2857 use_offsets[1] = start_offset + onechar;
2858 }
2859 else
2860 {
2861 if (count == PCRE_ERROR_NOMATCH)
2862 {
2863 if (gmatched == 0)
2864 {
2865 if (markptr == NULL) fprintf(outfile, "No match\n");
2866 else fprintf(outfile, "No match, mark = %s\n", markptr);
2867 }
2868 }
2869 else fprintf(outfile, "Error %d\n", count);
2870 break; /* Out of the /g loop */
2871 }
2872 }
2873
2874 /* If not /g or /G we are done */
2875
2876 if (!do_g && !do_G) break;
2877
2878 /* If we have matched an empty string, first check to see if we are at
2879 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2880 Perl's /g options does. This turns out to be rather cunning. First we set
2881 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2882 same point. If this fails (picked up above) we advance to the next
2883 character. */
2884
2885 g_notempty = 0;
2886
2887 if (use_offsets[0] == use_offsets[1])
2888 {
2889 if (use_offsets[0] == len) break;
2890 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2891 }
2892
2893 /* For /g, update the start offset, leaving the rest alone */
2894
2895 if (do_g) start_offset = use_offsets[1];
2896
2897 /* For /G, update the pointer and length */
2898
2899 else
2900 {
2901 bptr += use_offsets[1];
2902 len -= use_offsets[1];
2903 }
2904 } /* End of loop for /g and /G */
2905
2906 NEXT_DATA: continue;
2907 } /* End of loop for data lines */
2908
2909 CONTINUE:
2910
2911 #if !defined NOPOSIX
2912 if (posix || do_posix) regfree(&preg);
2913 #endif
2914
2915 if (re != NULL) new_free(re);
2916 if (extra != NULL) new_free(extra);
2917 if (locale_set)
2918 {
2919 new_free((void *)tables);
2920 setlocale(LC_CTYPE, "C");
2921 locale_set = 0;
2922 }
2923 }
2924
2925 if (infile == stdin) fprintf(outfile, "\n");
2926
2927 EXIT:
2928
2929 if (infile != NULL && infile != stdin) fclose(infile);
2930 if (outfile != NULL && outfile != stdout) fclose(outfile);
2931
2932 free(buffer);
2933 free(dbuffer);
2934 free(pbuffer);
2935 free(offsets);
2936
2937 return yield;
2938 }
2939
2940 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5