/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 580 by ph10, Fri Nov 26 11:16:43 2010 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88  /* Use the internal info for displaying the results of pcre_study(). */  /* Not Windows */
89    
90  #include "internal.h"  #else
91    #include <sys/time.h>          /* These two includes are needed */
92    #include <sys/resource.h>      /* for setrlimit(). */
93    #define INPUT_MODE   "rb"
94    #define OUTPUT_MODE  "wb"
95    #endif
96    
97    
98    /* We have to include pcre_internal.h because we need the internal info for
99    displaying the results of pcre_study() and we also need to know about the
100    internal macros, structures, and other internal data values; pcretest has
101    "inside information" compared to a program that strictly follows the PCRE API.
102    
103    Although pcre_internal.h does itself include pcre.h, we explicitly include it
104    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105    appropriately for an application, not for building PCRE. */
106    
107    #include "pcre.h"
108    #include "pcre_internal.h"
109    
110    /* We need access to some of the data tables that PCRE uses. So as not to have
111    to keep two copies, we include the source file here, changing the names of the
112    external symbols to prevent clashes. */
113    
114    #define _pcre_ucp_gentype      ucp_gentype
115    #define _pcre_utf8_table1      utf8_table1
116    #define _pcre_utf8_table1_size utf8_table1_size
117    #define _pcre_utf8_table2      utf8_table2
118    #define _pcre_utf8_table3      utf8_table3
119    #define _pcre_utf8_table4      utf8_table4
120    #define _pcre_utt              utt
121    #define _pcre_utt_size         utt_size
122    #define _pcre_utt_names        utt_names
123    #define _pcre_OP_lengths       OP_lengths
124    
125    #include "pcre_tables.c"
126    
127    /* We also need the pcre_printint() function for printing out compiled
128    patterns. This function is in a separate file so that it can be included in
129    pcre_compile.c when that module is compiled with debugging enabled. It needs to
130    know which case is being compiled. */
131    
132    #define COMPILING_PCRETEST
133    #include "pcre_printint.src"
134    
135    /* The definition of the macro PRINTABLE, which determines whether to print an
136    output character as-is or as a hex value when showing compiled patterns, is
137    contained in the printint.src file. We uses it here also, in cases when the
138    locale has not been explicitly changed, so as to get consistent output from
139    systems that differ in their output from isprint() even in the "C" locale. */
140    
141    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142    
143  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
144  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 148  Makefile. */
148  #include "pcreposix.h"  #include "pcreposix.h"
149  #endif  #endif
150    
151    /* It is also possible, for the benefit of the version currently imported into
152    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153    interface to the DFA matcher (NODFA), and without the doublecheck of the old
154    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155    UTF8 support if PCRE is built without it. */
156    
157    #ifndef SUPPORT_UTF8
158    #ifndef NOUTF8
159    #define NOUTF8
160    #endif
161    #endif
162    
163    
164    /* Other parameters */
165    
166  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
167  #ifdef CLK_TCK  #ifdef CLK_TCK
168  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 171  Makefile. */
171  #endif  #endif
172  #endif  #endif
173    
174  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
175    
176    #define LOOPREPEAT 500000
177    
178    /* Static variables */
179    
180  static FILE *outfile;  static FILE *outfile;
181  static int log_store = 0;  static int log_store = 0;
182    static int callout_count;
183    static int callout_extra;
184    static int callout_fail_count;
185    static int callout_fail_id;
186    static int debug_lengths;
187    static int first_callout;
188    static int locale_set = 0;
189    static int show_malloc;
190    static int use_utf8;
191  static size_t gotten_store;  static size_t gotten_store;
192    
193    /* The buffers grow automatically if very long input lines are encountered. */
194    
195    static int buffer_size = 50000;
196    static uschar *buffer = NULL;
197    static uschar *dbuffer = NULL;
198    static uschar *pbuffer = NULL;
199    
200    
201  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
202  code as contained in pcre.c under the DEBUG macro. */  *         Alternate character tables             *
203    *************************************************/
204    
205  static const char *OP_names[] = {  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
206    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  using the default tables of the library. However, the T option can be used to
207    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  select alternate sets of tables, for different kinds of testing. Note also that
208    "Opt", "^", "$", "Any", "chars", "not",  the L (locale) option also adjusts the tables. */
209    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
210    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  /* This is the set of tables distributed as default with PCRE. It recognizes
211    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  only ASCII characters. */
212    "*", "*?", "+", "+?", "?", "??", "{", "{",  
213    "class", "Ref", "Recurse",  static const unsigned char tables0[] = {
214    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
215    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  /* This table is a lower casing table. */
216    "Brazero", "Braminzero", "Bra"  
217        0,  1,  2,  3,  4,  5,  6,  7,
218        8,  9, 10, 11, 12, 13, 14, 15,
219       16, 17, 18, 19, 20, 21, 22, 23,
220       24, 25, 26, 27, 28, 29, 30, 31,
221       32, 33, 34, 35, 36, 37, 38, 39,
222       40, 41, 42, 43, 44, 45, 46, 47,
223       48, 49, 50, 51, 52, 53, 54, 55,
224       56, 57, 58, 59, 60, 61, 62, 63,
225       64, 97, 98, 99,100,101,102,103,
226      104,105,106,107,108,109,110,111,
227      112,113,114,115,116,117,118,119,
228      120,121,122, 91, 92, 93, 94, 95,
229       96, 97, 98, 99,100,101,102,103,
230      104,105,106,107,108,109,110,111,
231      112,113,114,115,116,117,118,119,
232      120,121,122,123,124,125,126,127,
233      128,129,130,131,132,133,134,135,
234      136,137,138,139,140,141,142,143,
235      144,145,146,147,148,149,150,151,
236      152,153,154,155,156,157,158,159,
237      160,161,162,163,164,165,166,167,
238      168,169,170,171,172,173,174,175,
239      176,177,178,179,180,181,182,183,
240      184,185,186,187,188,189,190,191,
241      192,193,194,195,196,197,198,199,
242      200,201,202,203,204,205,206,207,
243      208,209,210,211,212,213,214,215,
244      216,217,218,219,220,221,222,223,
245      224,225,226,227,228,229,230,231,
246      232,233,234,235,236,237,238,239,
247      240,241,242,243,244,245,246,247,
248      248,249,250,251,252,253,254,255,
249    
250    /* This table is a case flipping table. */
251    
252        0,  1,  2,  3,  4,  5,  6,  7,
253        8,  9, 10, 11, 12, 13, 14, 15,
254       16, 17, 18, 19, 20, 21, 22, 23,
255       24, 25, 26, 27, 28, 29, 30, 31,
256       32, 33, 34, 35, 36, 37, 38, 39,
257       40, 41, 42, 43, 44, 45, 46, 47,
258       48, 49, 50, 51, 52, 53, 54, 55,
259       56, 57, 58, 59, 60, 61, 62, 63,
260       64, 97, 98, 99,100,101,102,103,
261      104,105,106,107,108,109,110,111,
262      112,113,114,115,116,117,118,119,
263      120,121,122, 91, 92, 93, 94, 95,
264       96, 65, 66, 67, 68, 69, 70, 71,
265       72, 73, 74, 75, 76, 77, 78, 79,
266       80, 81, 82, 83, 84, 85, 86, 87,
267       88, 89, 90,123,124,125,126,127,
268      128,129,130,131,132,133,134,135,
269      136,137,138,139,140,141,142,143,
270      144,145,146,147,148,149,150,151,
271      152,153,154,155,156,157,158,159,
272      160,161,162,163,164,165,166,167,
273      168,169,170,171,172,173,174,175,
274      176,177,178,179,180,181,182,183,
275      184,185,186,187,188,189,190,191,
276      192,193,194,195,196,197,198,199,
277      200,201,202,203,204,205,206,207,
278      208,209,210,211,212,213,214,215,
279      216,217,218,219,220,221,222,223,
280      224,225,226,227,228,229,230,231,
281      232,233,234,235,236,237,238,239,
282      240,241,242,243,244,245,246,247,
283      248,249,250,251,252,253,254,255,
284    
285    /* This table contains bit maps for various character classes. Each map is 32
286    bytes long and the bits run from the least significant end of each byte. The
287    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
288    graph, print, punct, and cntrl. Other classes are built from combinations. */
289    
290      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
291      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294    
295      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
296      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299    
300      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
301      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304    
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
307      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309    
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
312      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314    
315      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
316      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
317      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319    
320      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
321      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
322      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324    
325      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
326      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
327      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
331      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
332      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
336      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
337      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340    /* This table identifies various classes of character by individual bits:
341      0x01   white space character
342      0x02   letter
343      0x04   decimal digit
344      0x08   hexadecimal digit
345      0x10   alphanumeric or '_'
346      0x80   regular expression metacharacter or binary zero
347    */
348    
349      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
350      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
353      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
354      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
355      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
356      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
357      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
358      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
359      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
360      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
361      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
362      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
363      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
364      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
375      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
378      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
379      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
380      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
381    
382    /* This is a set of tables that came orginally from a Windows user. It seems to
383    be at least an approximation of ISO 8859. In particular, there are characters
384    greater than 128 that are marked as spaces, letters, etc. */
385    
386    static const unsigned char tables1[] = {
387    0,1,2,3,4,5,6,7,
388    8,9,10,11,12,13,14,15,
389    16,17,18,19,20,21,22,23,
390    24,25,26,27,28,29,30,31,
391    32,33,34,35,36,37,38,39,
392    40,41,42,43,44,45,46,47,
393    48,49,50,51,52,53,54,55,
394    56,57,58,59,60,61,62,63,
395    64,97,98,99,100,101,102,103,
396    104,105,106,107,108,109,110,111,
397    112,113,114,115,116,117,118,119,
398    120,121,122,91,92,93,94,95,
399    96,97,98,99,100,101,102,103,
400    104,105,106,107,108,109,110,111,
401    112,113,114,115,116,117,118,119,
402    120,121,122,123,124,125,126,127,
403    128,129,130,131,132,133,134,135,
404    136,137,138,139,140,141,142,143,
405    144,145,146,147,148,149,150,151,
406    152,153,154,155,156,157,158,159,
407    160,161,162,163,164,165,166,167,
408    168,169,170,171,172,173,174,175,
409    176,177,178,179,180,181,182,183,
410    184,185,186,187,188,189,190,191,
411    224,225,226,227,228,229,230,231,
412    232,233,234,235,236,237,238,239,
413    240,241,242,243,244,245,246,215,
414    248,249,250,251,252,253,254,223,
415    224,225,226,227,228,229,230,231,
416    232,233,234,235,236,237,238,239,
417    240,241,242,243,244,245,246,247,
418    248,249,250,251,252,253,254,255,
419    0,1,2,3,4,5,6,7,
420    8,9,10,11,12,13,14,15,
421    16,17,18,19,20,21,22,23,
422    24,25,26,27,28,29,30,31,
423    32,33,34,35,36,37,38,39,
424    40,41,42,43,44,45,46,47,
425    48,49,50,51,52,53,54,55,
426    56,57,58,59,60,61,62,63,
427    64,97,98,99,100,101,102,103,
428    104,105,106,107,108,109,110,111,
429    112,113,114,115,116,117,118,119,
430    120,121,122,91,92,93,94,95,
431    96,65,66,67,68,69,70,71,
432    72,73,74,75,76,77,78,79,
433    80,81,82,83,84,85,86,87,
434    88,89,90,123,124,125,126,127,
435    128,129,130,131,132,133,134,135,
436    136,137,138,139,140,141,142,143,
437    144,145,146,147,148,149,150,151,
438    152,153,154,155,156,157,158,159,
439    160,161,162,163,164,165,166,167,
440    168,169,170,171,172,173,174,175,
441    176,177,178,179,180,181,182,183,
442    184,185,186,187,188,189,190,191,
443    224,225,226,227,228,229,230,231,
444    232,233,234,235,236,237,238,239,
445    240,241,242,243,244,245,246,215,
446    248,249,250,251,252,253,254,223,
447    192,193,194,195,196,197,198,199,
448    200,201,202,203,204,205,206,207,
449    208,209,210,211,212,213,214,247,
450    216,217,218,219,220,221,222,255,
451    0,62,0,0,1,0,0,0,
452    0,0,0,0,0,0,0,0,
453    32,0,0,0,1,0,0,0,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,255,3,
456    126,0,0,0,126,0,0,0,
457    0,0,0,0,0,0,0,0,
458    0,0,0,0,0,0,0,0,
459    0,0,0,0,0,0,255,3,
460    0,0,0,0,0,0,0,0,
461    0,0,0,0,0,0,12,2,
462    0,0,0,0,0,0,0,0,
463    0,0,0,0,0,0,0,0,
464    254,255,255,7,0,0,0,0,
465    0,0,0,0,0,0,0,0,
466    255,255,127,127,0,0,0,0,
467    0,0,0,0,0,0,0,0,
468    0,0,0,0,254,255,255,7,
469    0,0,0,0,0,4,32,4,
470    0,0,0,128,255,255,127,255,
471    0,0,0,0,0,0,255,3,
472    254,255,255,135,254,255,255,7,
473    0,0,0,0,0,4,44,6,
474    255,255,127,255,255,255,127,255,
475    0,0,0,0,254,255,255,255,
476    255,255,255,255,255,255,255,127,
477    0,0,0,0,254,255,255,255,
478    255,255,255,255,255,255,255,255,
479    0,2,0,0,255,255,255,255,
480    255,255,255,255,255,255,255,127,
481    0,0,0,0,255,255,255,255,
482    255,255,255,255,255,255,255,255,
483    0,0,0,0,254,255,0,252,
484    1,0,0,248,1,0,0,120,
485    0,0,0,0,254,255,255,255,
486    0,0,128,0,0,0,128,0,
487    255,255,255,255,0,0,0,0,
488    0,0,0,0,0,0,0,128,
489    255,255,255,255,0,0,0,0,
490    0,0,0,0,0,0,0,0,
491    128,0,0,0,0,0,0,0,
492    0,1,1,0,1,1,0,0,
493    0,0,0,0,0,0,0,0,
494    0,0,0,0,0,0,0,0,
495    1,0,0,0,128,0,0,0,
496    128,128,128,128,0,0,128,0,
497    28,28,28,28,28,28,28,28,
498    28,28,0,0,0,0,0,128,
499    0,26,26,26,26,26,26,18,
500    18,18,18,18,18,18,18,18,
501    18,18,18,18,18,18,18,18,
502    18,18,18,128,128,0,128,16,
503    0,26,26,26,26,26,26,18,
504    18,18,18,18,18,18,18,18,
505    18,18,18,18,18,18,18,18,
506    18,18,18,128,128,0,0,0,
507    0,0,0,0,0,1,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,0,0,
510    0,0,0,0,0,0,0,0,
511    1,0,0,0,0,0,0,0,
512    0,0,18,0,0,0,0,0,
513    0,0,20,20,0,18,0,0,
514    0,20,18,0,0,0,0,0,
515    18,18,18,18,18,18,18,18,
516    18,18,18,18,18,18,18,18,
517    18,18,18,18,18,18,18,0,
518    18,18,18,18,18,18,18,18,
519    18,18,18,18,18,18,18,18,
520    18,18,18,18,18,18,18,18,
521    18,18,18,18,18,18,18,0,
522    18,18,18,18,18,18,18,18
523  };  };
524    
525    
526  static void print_internals(pcre *re)  
527    
528    #ifndef HAVE_STRERROR
529    /*************************************************
530    *     Provide strerror() for non-ANSI libraries  *
531    *************************************************/
532    
533    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534    in their libraries, but can provide the same facility by this simple
535    alternative function. */
536    
537    extern int   sys_nerr;
538    extern char *sys_errlist[];
539    
540    char *
541    strerror(int n)
542  {  {
543  unsigned char *code = ((real_pcre *)re)->code;  if (n < 0 || n >= sys_nerr) return "unknown error number";
544    return sys_errlist[n];
545    }
546    #endif /* HAVE_STRERROR */
547    
 fprintf(outfile, "------------------------------------------------------------------\n");  
548    
 for(;;)  
   {  
   int c;  
   int charlength;  
549    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
550    
551    if (*code >= OP_BRA)  /*************************************************
552    *        Read or extend an input line            *
553    *************************************************/
554    
555    /* Input lines are read into buffer, but both patterns and data lines can be
556    continued over multiple input lines. In addition, if the buffer fills up, we
557    want to automatically expand it so as to be able to handle extremely large
558    lines that are needed for certain stress tests. When the input buffer is
559    expanded, the other two buffers must also be expanded likewise, and the
560    contents of pbuffer, which are a copy of the input for callouts, must be
561    preserved (for when expansion happens for a data line). This is not the most
562    optimal way of handling this, but hey, this is just a test program!
563    
564    Arguments:
565      f            the file to read
566      start        where in buffer to start (this *must* be within buffer)
567      prompt       for stdin or readline()
568    
569    Returns:       pointer to the start of new data
570                   could be a copy of start, or could be moved
571                   NULL if no data read and EOF reached
572    */
573    
574    static uschar *
575    extend_inputline(FILE *f, uschar *start, const char *prompt)
576    {
577    uschar *here = start;
578    
579    for (;;)
580      {
581      int rlen = (int)(buffer_size - (here - buffer));
582    
583      if (rlen > 1000)
584      {      {
585      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      int dlen;
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
586    
587        CLASS_REF_REPEAT:      /* If libreadline support is required, use readline() to read a line if the
588        input is a terminal. Note that readline() removes the trailing newline, so
589        we must put it back again, to be compatible with fgets(). */
590    
591        switch(*code)  #ifdef SUPPORT_LIBREADLINE
592          {      if (isatty(fileno(f)))
593          case OP_CRSTAR:        {
594          case OP_CRMINSTAR:        size_t len;
595          case OP_CRPLUS:        char *s = readline(prompt);
596          case OP_CRMINPLUS:        if (s == NULL) return (here == start)? NULL : start;
597          case OP_CRQUERY:        len = strlen(s);
598          case OP_CRMINQUERY:        if (len > 0) add_history(s);
599          fprintf(outfile, "%s", OP_names[*code]);        if (len > rlen - 1) len = rlen - 1;
600          break;        memcpy(here, s, len);
601          here[len] = '\n';
602          here[len+1] = 0;
603          free(s);
604          }
605        else
606    #endif
607    
608          case OP_CRRANGE:      /* Read the next line by normal means, prompting if the file is stdin. */
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
609    
610          default:        {
611          code--;        if (f == stdin) printf("%s", prompt);
612          }        if (fgets((char *)here, rlen,  f) == NULL)
613            return (here == start)? NULL : start;
614        }        }
     break;  
615    
616      /* Anything else is just a one-node item */      dlen = (int)strlen((char *)here);
617        if (dlen > 0 && here[dlen - 1] == '\n') return start;
618        here += dlen;
619        }
620    
621      else
622        {
623        int new_buffer_size = 2*buffer_size;
624        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
625        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
626        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
627    
628        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
629          {
630          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
631          exit(1);
632          }
633    
634        memcpy(new_buffer, buffer, buffer_size);
635        memcpy(new_pbuffer, pbuffer, buffer_size);
636    
637        buffer_size = new_buffer_size;
638    
639      default:      start = new_buffer + (start - buffer);
640      fprintf(outfile, "    %s", OP_names[*code]);      here = new_buffer + (here - buffer);
641      break;  
642        free(buffer);
643        free(dbuffer);
644        free(pbuffer);
645    
646        buffer = new_buffer;
647        dbuffer = new_dbuffer;
648        pbuffer = new_pbuffer;
649      }      }
650      }
651    
652    return NULL;  /* Control never gets here */
653    }
654    
655    
656    
657    
658    
659    
660    
661    /*************************************************
662    *          Read number from string               *
663    *************************************************/
664    
665    code++;  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
666    fprintf(outfile, "\n");  around with conditional compilation, just do the job by hand. It is only used
667    for unpicking arguments, so just keep it simple.
668    
669    Arguments:
670      str           string to be converted
671      endptr        where to put the end pointer
672    
673    Returns:        the unsigned long
674    */
675    
676    static int
677    get_value(unsigned char *str, unsigned char **endptr)
678    {
679    int result = 0;
680    while(*str != 0 && isspace(*str)) str++;
681    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
682    *endptr = str;
683    return(result);
684    }
685    
686    
687    
688    
689    /*************************************************
690    *            Convert UTF-8 string to value       *
691    *************************************************/
692    
693    /* This function takes one or more bytes that represents a UTF-8 character,
694    and returns the value of the character.
695    
696    Argument:
697      utf8bytes   a pointer to the byte vector
698      vptr        a pointer to an int to receive the value
699    
700    Returns:      >  0 => the number of bytes consumed
701                  -6 to 0 => malformed UTF-8 character at offset = (-return)
702    */
703    
704    #if !defined NOUTF8
705    
706    static int
707    utf82ord(unsigned char *utf8bytes, int *vptr)
708    {
709    int c = *utf8bytes++;
710    int d = c;
711    int i, j, s;
712    
713    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
714      {
715      if ((d & 0x80) == 0) break;
716      d <<= 1;
717      }
718    
719    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
720    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
721    
722    /* i now has a value in the range 1-5 */
723    
724    s = 6*i;
725    d = (c & utf8_table3[i]) << s;
726    
727    for (j = 0; j < i; j++)
728      {
729      c = *utf8bytes++;
730      if ((c & 0xc0) != 0x80) return -(j+1);
731      s -= 6;
732      d |= (c & 0x3f) << s;
733    }    }
734    
735    /* Check that encoding was the correct unique one */
736    
737    for (j = 0; j < utf8_table1_size; j++)
738      if (d <= utf8_table1[j]) break;
739    if (j != i) return -(i+1);
740    
741    /* Valid value */
742    
743    *vptr = d;
744    return i+1;
745    }
746    
747    #endif
748    
749    
750    
751    /*************************************************
752    *       Convert character value to UTF-8         *
753    *************************************************/
754    
755    /* This function takes an integer value in the range 0 - 0x7fffffff
756    and encodes it as a UTF-8 character in 0 to 6 bytes.
757    
758    Arguments:
759      cvalue     the character value
760      utf8bytes  pointer to buffer for result - at least 6 bytes long
761    
762    Returns:     number of characters placed in the buffer
763    */
764    
765    #if !defined NOUTF8
766    
767    static int
768    ord2utf8(int cvalue, uschar *utf8bytes)
769    {
770    register int i, j;
771    for (i = 0; i < utf8_table1_size; i++)
772      if (cvalue <= utf8_table1[i]) break;
773    utf8bytes += i;
774    for (j = i; j > 0; j--)
775     {
776     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
777     cvalue >>= 6;
778     }
779    *utf8bytes = utf8_table2[i] | cvalue;
780    return i + 1;
781  }  }
782    
783    #endif
784    
785    
 /* Character string printing function. */  
786    
787  static void pchars(unsigned char *p, int length)  /*************************************************
788    *             Print character string             *
789    *************************************************/
790    
791    /* Character string printing function. Must handle UTF-8 strings in utf8
792    mode. Yields number of characters printed. If handed a NULL file, just counts
793    chars without printing. */
794    
795    static int pchars(unsigned char *p, int length, FILE *f)
796  {  {
797  int c;  int c = 0;
798    int yield = 0;
799    
800  while (length-- > 0)  while (length-- > 0)
801    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
802      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
803      if (use_utf8)
804        {
805        int rc = utf82ord(p, &c);
806    
807        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
808          {
809          length -= rc - 1;
810          p += rc;
811          if (PRINTHEX(c))
812            {
813            if (f != NULL) fprintf(f, "%c", c);
814            yield++;
815            }
816          else
817            {
818            int n = 4;
819            if (f != NULL) fprintf(f, "\\x{%02x}", c);
820            yield += (n <= 0x000000ff)? 2 :
821                     (n <= 0x00000fff)? 3 :
822                     (n <= 0x0000ffff)? 4 :
823                     (n <= 0x000fffff)? 5 : 6;
824            }
825          continue;
826          }
827        }
828    #endif
829    
830       /* Not UTF-8, or malformed UTF-8  */
831    
832      c = *p++;
833      if (PRINTHEX(c))
834        {
835        if (f != NULL) fprintf(f, "%c", c);
836        yield++;
837        }
838      else
839        {
840        if (f != NULL) fprintf(f, "\\x%02x", c);
841        yield += 4;
842        }
843      }
844    
845    return yield;
846  }  }
847    
848    
849    
850    /*************************************************
851    *              Callout function                  *
852    *************************************************/
853    
854    /* Called from PCRE as a result of the (?C) item. We print out where we are in
855    the match. Yield zero unless more callouts than the fail count, or the callout
856    data is not zero. */
857    
858    static int callout(pcre_callout_block *cb)
859    {
860    FILE *f = (first_callout | callout_extra)? outfile : NULL;
861    int i, pre_start, post_start, subject_length;
862    
863    if (callout_extra)
864      {
865      fprintf(f, "Callout %d: last capture = %d\n",
866        cb->callout_number, cb->capture_last);
867    
868      for (i = 0; i < cb->capture_top * 2; i += 2)
869        {
870        if (cb->offset_vector[i] < 0)
871          fprintf(f, "%2d: <unset>\n", i/2);
872        else
873          {
874          fprintf(f, "%2d: ", i/2);
875          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
876            cb->offset_vector[i+1] - cb->offset_vector[i], f);
877          fprintf(f, "\n");
878          }
879        }
880      }
881    
882    /* Re-print the subject in canonical form, the first time or if giving full
883    datails. On subsequent calls in the same match, we use pchars just to find the
884    printed lengths of the substrings. */
885    
886    if (f != NULL) fprintf(f, "--->");
887    
888    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
889    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
890      cb->current_position - cb->start_match, f);
891    
892    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
893    
894    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
895      cb->subject_length - cb->current_position, f);
896    
897    if (f != NULL) fprintf(f, "\n");
898    
899    /* Always print appropriate indicators, with callout number if not already
900    shown. For automatic callouts, show the pattern offset. */
901    
902    if (cb->callout_number == 255)
903      {
904      fprintf(outfile, "%+3d ", cb->pattern_position);
905      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
906      }
907    else
908      {
909      if (callout_extra) fprintf(outfile, "    ");
910        else fprintf(outfile, "%3d ", cb->callout_number);
911      }
912    
913    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
914    fprintf(outfile, "^");
915    
916    if (post_start > 0)
917      {
918      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
919      fprintf(outfile, "^");
920      }
921    
922    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
923      fprintf(outfile, " ");
924    
925    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
926      pbuffer + cb->pattern_position);
927    
928    fprintf(outfile, "\n");
929    first_callout = 0;
930    
931    if (cb->callout_data != NULL)
932      {
933      int callout_data = *((int *)(cb->callout_data));
934      if (callout_data != 0)
935        {
936        fprintf(outfile, "Callout data = %d\n", callout_data);
937        return callout_data;
938        }
939      }
940    
941    return (cb->callout_number != callout_fail_id)? 0 :
942           (++callout_count >= callout_fail_count)? 1 : 0;
943    }
944    
945    
946    /*************************************************
947    *            Local malloc functions              *
948    *************************************************/
949    
950  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
951  compiled re. */  compiled re. */
952    
953  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
954  {  {
955    void *block = malloc(size);
956  gotten_store = size;  gotten_store = size;
957  if (log_store)  if (show_malloc)
958    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
959      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
 return malloc(size);  
960  }  }
961    
962    static void new_free(void *block)
963    {
964    if (show_malloc)
965      fprintf(outfile, "free             %p\n", block);
966    free(block);
967    }
968    
969    
970    /* For recursion malloc/free, to test stacking calls */
971    
972    static void *stack_malloc(size_t size)
973    {
974    void *block = malloc(size);
975    if (show_malloc)
976      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
977    return block;
978    }
979    
980    static void stack_free(void *block)
981    {
982    if (show_malloc)
983      fprintf(outfile, "stack_free       %p\n", block);
984    free(block);
985    }
986    
987    
988    /*************************************************
989    *          Call pcre_fullinfo()                  *
990    *************************************************/
991    
992  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
993    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 1000  if ((rc = pcre_fullinfo(re, study, optio
1000    
1001    
1002    
1003    /*************************************************
1004    *         Byte flipping function                 *
1005    *************************************************/
1006    
1007    static unsigned long int
1008    byteflip(unsigned long int value, int n)
1009    {
1010    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1011    return ((value & 0x000000ff) << 24) |
1012           ((value & 0x0000ff00) <<  8) |
1013           ((value & 0x00ff0000) >>  8) |
1014           ((value & 0xff000000) >> 24);
1015    }
1016    
1017    
1018    
1019    
1020    /*************************************************
1021    *        Check match or recursion limit          *
1022    *************************************************/
1023    
1024    static int
1025    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1026      int start_offset, int options, int *use_offsets, int use_size_offsets,
1027      int flag, unsigned long int *limit, int errnumber, const char *msg)
1028    {
1029    int count;
1030    int min = 0;
1031    int mid = 64;
1032    int max = -1;
1033    
1034    extra->flags |= flag;
1035    
1036    for (;;)
1037      {
1038      *limit = mid;
1039    
1040      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1041        use_offsets, use_size_offsets);
1042    
1043      if (count == errnumber)
1044        {
1045        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1046        min = mid;
1047        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1048        }
1049    
1050      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1051                             count == PCRE_ERROR_PARTIAL)
1052        {
1053        if (mid == min + 1)
1054          {
1055          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1056          break;
1057          }
1058        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1059        max = mid;
1060        mid = (min + mid)/2;
1061        }
1062      else break;    /* Some other error */
1063      }
1064    
1065    extra->flags &= ~flag;
1066    return count;
1067    }
1068    
1069    
1070    
1071    /*************************************************
1072    *         Case-independent strncmp() function    *
1073    *************************************************/
1074    
1075    /*
1076    Arguments:
1077      s         first string
1078      t         second string
1079      n         number of characters to compare
1080    
1081    Returns:    < 0, = 0, or > 0, according to the comparison
1082    */
1083    
1084    static int
1085    strncmpic(uschar *s, uschar *t, int n)
1086    {
1087    while (n--)
1088      {
1089      int c = tolower(*s++) - tolower(*t++);
1090      if (c) return c;
1091      }
1092    return 0;
1093    }
1094    
1095    
1096    
1097    /*************************************************
1098    *         Check newline indicator                *
1099    *************************************************/
1100    
1101    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1102    a message and return 0 if there is no match.
1103    
1104    Arguments:
1105      p           points after the leading '<'
1106      f           file for error message
1107    
1108    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1109    */
1110    
1111    static int
1112    check_newline(uschar *p, FILE *f)
1113    {
1114    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1115    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1116    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1117    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1118    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1119    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1120    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1121    fprintf(f, "Unknown newline type at: <%s\n", p);
1122    return 0;
1123    }
1124    
1125    
1126    
1127    /*************************************************
1128    *             Usage function                     *
1129    *************************************************/
1130    
1131    static void
1132    usage(void)
1133    {
1134    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1135    printf("Input and output default to stdin and stdout.\n");
1136    #ifdef SUPPORT_LIBREADLINE
1137    printf("If input is a terminal, readline() is used to read from it.\n");
1138    #else
1139    printf("This version of pcretest is not linked with readline().\n");
1140    #endif
1141    printf("\nOptions:\n");
1142    printf("  -b       show compiled code (bytecode)\n");
1143    printf("  -C       show PCRE compile-time options and exit\n");
1144    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1145    #if !defined NODFA
1146    printf("  -dfa     force DFA matching for all subjects\n");
1147    #endif
1148    printf("  -help    show usage information\n");
1149    printf("  -i       show information about compiled patterns\n"
1150           "  -M       find MATCH_LIMIT minimum for each subject\n"
1151           "  -m       output memory used information\n"
1152           "  -o <n>   set size of offsets vector to <n>\n");
1153    #if !defined NOPOSIX
1154    printf("  -p       use POSIX interface\n");
1155    #endif
1156    printf("  -q       quiet: do not output PCRE version number at start\n");
1157    printf("  -S <n>   set stack size to <n> megabytes\n");
1158    printf("  -s       output store (memory) used information\n"
1159           "  -t       time compilation and execution\n");
1160    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1161    printf("  -tm      time execution (matching) only\n");
1162    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1163    }
1164    
1165    
1166    
1167    /*************************************************
1168    *                Main Program                    *
1169    *************************************************/
1170    
1171  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
1172  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 313  int main(int argc, char **argv) Line 1177  int main(int argc, char **argv)
1177  FILE *infile = stdin;  FILE *infile = stdin;
1178  int options = 0;  int options = 0;
1179  int study_options = 0;  int study_options = 0;
1180    int default_find_match_limit = FALSE;
1181  int op = 1;  int op = 1;
1182  int timeit = 0;  int timeit = 0;
1183    int timeitm = 0;
1184  int showinfo = 0;  int showinfo = 0;
1185  int showstore = 0;  int showstore = 0;
1186    int quiet = 0;
1187    int size_offsets = 45;
1188    int size_offsets_max;
1189    int *offsets = NULL;
1190    #if !defined NOPOSIX
1191  int posix = 0;  int posix = 0;
1192    #endif
1193  int debug = 0;  int debug = 0;
1194  int done = 0;  int done = 0;
1195  unsigned char buffer[30000];  int all_use_dfa = 0;
1196  unsigned char dbuffer[1024];  int yield = 0;
1197    int stack_size;
1198    
1199    /* These vectors store, end-to-end, a list of captured substring names. Assume
1200    that 1024 is plenty long enough for the few names we'll be testing. */
1201    
1202  /* Static so that new_malloc can use it. */  uschar copynames[1024];
1203    uschar getnames[1024];
1204    
1205    uschar *copynamesptr;
1206    uschar *getnamesptr;
1207    
1208    /* Get buffers from malloc() so that Electric Fence will check their misuse
1209    when I am debugging. They grow automatically when very long lines are read. */
1210    
1211    buffer = (unsigned char *)malloc(buffer_size);
1212    dbuffer = (unsigned char *)malloc(buffer_size);
1213    pbuffer = (unsigned char *)malloc(buffer_size);
1214    
1215    /* The outfile variable is static so that new_malloc can use it. */
1216    
1217  outfile = stdout;  outfile = stdout;
1218    
1219    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1220    library to translate CRLF into a single LF character. At least, that's what
1221    I've been told: never having used Windows I take this all on trust. Originally
1222    it set 0x8000, but then I was advised that _O_BINARY was better. */
1223    
1224    #if defined(_WIN32) || defined(WIN32)
1225    _setmode( _fileno( stdout ), _O_BINARY );
1226    #endif
1227    
1228  /* Scan options */  /* Scan options */
1229    
1230  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1231    {    {
1232      unsigned char *endptr;
1233    
1234    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1235      showstore = 1;      showstore = 1;
1236    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1237      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1238    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1239    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1240      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1241    #if !defined NODFA
1242      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1243    #endif
1244      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1245          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1246            *endptr == 0))
1247        {
1248        op++;
1249        argc--;
1250        }
1251      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1252        {
1253        int both = argv[op][2] == 0;
1254        int temp;
1255        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1256                         *endptr == 0))
1257          {
1258          timeitm = temp;
1259          op++;
1260          argc--;
1261          }
1262        else timeitm = LOOPREPEAT;
1263        if (both) timeit = timeitm;
1264        }
1265      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1266          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1267            *endptr == 0))
1268        {
1269    #if defined(_WIN32) || defined(WIN32)
1270        printf("PCRE: -S not supported on this OS\n");
1271        exit(1);
1272    #else
1273        int rc;
1274        struct rlimit rlim;
1275        getrlimit(RLIMIT_STACK, &rlim);
1276        rlim.rlim_cur = stack_size * 1024 * 1024;
1277        rc = setrlimit(RLIMIT_STACK, &rlim);
1278        if (rc != 0)
1279          {
1280        printf("PCRE: setrlimit() failed with error %d\n", rc);
1281        exit(1);
1282          }
1283        op++;
1284        argc--;
1285    #endif
1286        }
1287    #if !defined NOPOSIX
1288    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1289    #endif
1290      else if (strcmp(argv[op], "-C") == 0)
1291        {
1292        int rc;
1293        unsigned long int lrc;
1294        printf("PCRE version %s\n", pcre_version());
1295        printf("Compiled with\n");
1296        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1297        printf("  %sUTF-8 support\n", rc? "" : "No ");
1298        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1299        printf("  %sUnicode properties support\n", rc? "" : "No ");
1300        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1301        /* Note that these values are always the ASCII values, even
1302        in EBCDIC environments. CR is 13 and NL is 10. */
1303        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1304          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1305          (rc == -2)? "ANYCRLF" :
1306          (rc == -1)? "ANY" : "???");
1307        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1308        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1309                                         "all Unicode newlines");
1310        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1311        printf("  Internal link size = %d\n", rc);
1312        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1313        printf("  POSIX malloc threshold = %d\n", rc);
1314        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1315        printf("  Default match limit = %ld\n", lrc);
1316        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1317        printf("  Default recursion depth limit = %ld\n", lrc);
1318        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1319        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1320        goto EXIT;
1321        }
1322      else if (strcmp(argv[op], "-help") == 0 ||
1323               strcmp(argv[op], "--help") == 0)
1324        {
1325        usage();
1326        goto EXIT;
1327        }
1328    else    else
1329      {      {
1330      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1331      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1332      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
1333             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
1334      }      }
1335    op++;    op++;
1336    argc--;    argc--;
1337    }    }
1338    
1339    /* Get the store for the offsets vector, and remember what it was */
1340    
1341    size_offsets_max = size_offsets;
1342    offsets = (int *)malloc(size_offsets_max * sizeof(int));
1343    if (offsets == NULL)
1344      {
1345      printf("** Failed to get %d bytes of memory for offsets vector\n",
1346        (int)(size_offsets_max * sizeof(int)));
1347      yield = 1;
1348      goto EXIT;
1349      }
1350    
1351  /* Sort out the input and output files */  /* Sort out the input and output files */
1352    
1353  if (argc > 1)  if (argc > 1)
1354    {    {
1355    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1356    if (infile == NULL)    if (infile == NULL)
1357      {      {
1358      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1359      return 1;      yield = 1;
1360        goto EXIT;
1361      }      }
1362    }    }
1363    
1364  if (argc > 2)  if (argc > 2)
1365    {    {
1366    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1367    if (outfile == NULL)    if (outfile == NULL)
1368      {      {
1369      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1370      return 1;      yield = 1;
1371        goto EXIT;
1372      }      }
1373    }    }
1374    
1375  /* Set alternative malloc function */  /* Set alternative malloc function */
1376    
1377  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1378    pcre_free = new_free;
1379    pcre_stack_malloc = stack_malloc;
1380    pcre_stack_free = stack_free;
1381    
1382  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1383    
1384  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1385    
1386  /* Main loop */  /* Main loop */
1387    
# Line 391  while (!done) Line 1392  while (!done)
1392    
1393  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
1394    regex_t preg;    regex_t preg;
1395      int do_posix = 0;
1396  #endif  #endif
1397    
1398    const char *error;    const char *error;
1399      unsigned char *markptr;
1400    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1401    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
1402      const unsigned char *tables = NULL;
1403      unsigned long int true_size, true_study_size = 0;
1404      size_t size, regex_gotten_store;
1405      int do_mark = 0;
1406    int do_study = 0;    int do_study = 0;
1407    int do_debug = debug;    int do_debug = debug;
1408    int do_G = 0;    int do_G = 0;
1409    int do_g = 0;    int do_g = 0;
1410    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1411    int do_showrest = 0;    int do_showrest = 0;
1412    int do_posix = 0;    int do_flip = 0;
1413    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1414    
1415      use_utf8 = 0;
1416      debug_lengths = 1;
1417    
1418    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1419    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1420      fflush(outfile);
1421    
1422    p = buffer;    p = buffer;
1423    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1424    if (*p == 0) continue;    if (*p == 0) continue;
1425    
1426    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1427    complete, read more. */  
1428      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1429        {
1430        unsigned long int magic, get_options;
1431        uschar sbuf[8];
1432        FILE *f;
1433    
1434        p++;
1435        pp = p + (int)strlen((char *)p);
1436        while (isspace(pp[-1])) pp--;
1437        *pp = 0;
1438    
1439        f = fopen((char *)p, "rb");
1440        if (f == NULL)
1441          {
1442          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1443          continue;
1444          }
1445    
1446        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1447    
1448        true_size =
1449          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1450        true_study_size =
1451          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1452    
1453        re = (real_pcre *)new_malloc(true_size);
1454        regex_gotten_store = gotten_store;
1455    
1456        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1457    
1458        magic = ((real_pcre *)re)->magic_number;
1459        if (magic != MAGIC_NUMBER)
1460          {
1461          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1462            {
1463            do_flip = 1;
1464            }
1465          else
1466            {
1467            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1468            fclose(f);
1469            continue;
1470            }
1471          }
1472    
1473        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1474          do_flip? " (byte-inverted)" : "", p);
1475    
1476        /* Need to know if UTF-8 for printing data strings */
1477    
1478        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1479        use_utf8 = (get_options & PCRE_UTF8) != 0;
1480    
1481        /* Now see if there is any following study data */
1482    
1483        if (true_study_size != 0)
1484          {
1485          pcre_study_data *psd;
1486    
1487          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1488          extra->flags = PCRE_EXTRA_STUDY_DATA;
1489    
1490          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1491          extra->study_data = psd;
1492    
1493          if (fread(psd, 1, true_study_size, f) != true_study_size)
1494            {
1495            FAIL_READ:
1496            fprintf(outfile, "Failed to read data from %s\n", p);
1497            if (extra != NULL) new_free(extra);
1498            if (re != NULL) new_free(re);
1499            fclose(f);
1500            continue;
1501            }
1502          fprintf(outfile, "Study data loaded from %s\n", p);
1503          do_study = 1;     /* To get the data output if requested */
1504          }
1505        else fprintf(outfile, "No study data\n");
1506    
1507        fclose(f);
1508        goto SHOW_INFO;
1509        }
1510    
1511      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1512      the pattern; if is isn't complete, read more. */
1513    
1514    delimiter = *p++;    delimiter = *p++;
1515    
1516    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1517      {      {
1518      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1519      goto SKIP_DATA;      goto SKIP_DATA;
1520      }      }
1521    
1522    pp = p;    pp = p;
1523      poffset = (int)(p - buffer);
1524    
1525    for(;;)    for(;;)
1526      {      {
# Line 435  while (!done) Line 1531  while (!done)
1531        pp++;        pp++;
1532        }        }
1533      if (*pp != 0) break;      if (*pp != 0) break;
1534        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1535        {        {
1536        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1537        done = 1;        done = 1;
# Line 453  while (!done) Line 1540  while (!done)
1540      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1541      }      }
1542    
1543      /* The buffer may have moved while being extended; reset the start of data
1544      pointer to the correct relative point in the buffer. */
1545    
1546      p = buffer + poffset;
1547    
1548    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1549    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1550    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1551    
1552    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1553    
1554    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1555      for callouts. */
1556    
1557    *pp++ = 0;    *pp++ = 0;
1558      strcpy((char *)pbuffer, (char *)p);
1559    
1560    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1561    
# Line 473  while (!done) Line 1567  while (!done)
1567      {      {
1568      switch (*pp++)      switch (*pp++)
1569        {        {
1570          case 'f': options |= PCRE_FIRSTLINE; break;
1571        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1572        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1573        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 481  while (!done) Line 1576  while (!done)
1576    
1577        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1578        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1579          case 'B': do_debug = 1; break;
1580          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1581        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1582        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1583          case 'F': do_flip = 1; break;
1584        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1585        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1586          case 'J': options |= PCRE_DUPNAMES; break;
1587          case 'K': do_mark = 1; break;
1588        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1589          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1590    
1591  #if !defined NOPOSIX  #if !defined NOPOSIX
1592        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 493  while (!done) Line 1594  while (!done)
1594    
1595        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1596        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1597          case 'W': options |= PCRE_UCP; break;
1598        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1599          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1600          case 'Z': debug_lengths = 0; break;
1601          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1602          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1603    
1604          case 'T':
1605          switch (*pp++)
1606            {
1607            case '0': tables = tables0; break;
1608            case '1': tables = tables1; break;
1609    
1610            case '\r':
1611            case '\n':
1612            case ' ':
1613            case 0:
1614            fprintf(outfile, "** Missing table number after /T\n");
1615            goto SKIP_DATA;
1616    
1617            default:
1618            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1619            goto SKIP_DATA;
1620            }
1621          break;
1622    
1623        case 'L':        case 'L':
1624        ppp = pp;        ppp = pp;
1625        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1626          /* The '0' test is just in case this is an unterminated line. */
1627          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1628        *ppp = 0;        *ppp = 0;
1629        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1630          {          {
1631          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1632          goto SKIP_DATA;          goto SKIP_DATA;
1633          }          }
1634          locale_set = 1;
1635        tables = pcre_maketables();        tables = pcre_maketables();
1636        pp = ppp;        pp = ppp;
1637        break;        break;
1638    
1639        case '\n': case ' ': break;        case '>':
1640          to_file = pp;
1641          while (*pp != 0) pp++;
1642          while (isspace(pp[-1])) pp--;
1643          *pp = 0;
1644          break;
1645    
1646          case '<':
1647            {
1648            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1649              {
1650              options |= PCRE_JAVASCRIPT_COMPAT;
1651              pp += 3;
1652              }
1653            else
1654              {
1655              int x = check_newline(pp, outfile);
1656              if (x == 0) goto SKIP_DATA;
1657              options |= x;
1658              while (*pp++ != '>');
1659              }
1660            }
1661          break;
1662    
1663          case '\r':                      /* So that it works in Windows */
1664          case '\n':
1665          case ' ':
1666          break;
1667    
1668        default:        default:
1669        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1670        goto SKIP_DATA;        goto SKIP_DATA;
# Line 524  while (!done) Line 1680  while (!done)
1680      {      {
1681      int rc;      int rc;
1682      int cflags = 0;      int cflags = 0;
1683    
1684      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1685      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1686        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1687        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1688        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1689        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1690        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1691    
1692      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1693    
1694      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 1696  while (!done)
1696    
1697      if (rc != 0)      if (rc != 0)
1698        {        {
1699        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1700        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1701        goto SKIP_DATA;        goto SKIP_DATA;
1702        }        }
# Line 545  while (!done) Line 1708  while (!done)
1708  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1709    
1710      {      {
1711      if (timeit)      unsigned long int get_options;
1712    
1713        if (timeit > 0)
1714        {        {
1715        register int i;        register int i;
1716        clock_t time_taken;        clock_t time_taken;
1717        clock_t start_time = clock();        clock_t start_time = clock();
1718        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1719          {          {
1720          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1721          if (re != NULL) free(re);          if (re != NULL) free(re);
1722          }          }
1723        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1724        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1725          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1726          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1727        }        }
1728    
1729      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 574  while (!done) Line 1739  while (!done)
1739          {          {
1740          for (;;)          for (;;)
1741            {            {
1742            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1743              {              {
1744              done = 1;              done = 1;
1745              goto CONTINUE;              goto CONTINUE;
# Line 588  while (!done) Line 1753  while (!done)
1753        goto CONTINUE;        goto CONTINUE;
1754        }        }
1755    
1756      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1757      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1758      returns only limited data. Check that it agrees with the newer one. */      lines. */
1759    
1760        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1761        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1762    
1763        /* Print information if required. There are now two info-returning
1764        functions. The old one has a limited interface and returns only limited
1765        data. Check that it agrees with the newer one. */
1766    
1767        if (log_store)
1768          fprintf(outfile, "Memory allocation (code space): %d\n",
1769            (int)(gotten_store -
1770                  sizeof(real_pcre) -
1771                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1772    
1773        /* Extract the size for possible writing before possibly flipping it,
1774        and remember the store that was got. */
1775    
1776        true_size = ((real_pcre *)re)->size;
1777        regex_gotten_store = gotten_store;
1778    
1779        /* If /S was present, study the regexp to generate additional info to
1780        help with the matching. */
1781    
1782        if (do_study)
1783          {
1784          if (timeit > 0)
1785            {
1786            register int i;
1787            clock_t time_taken;
1788            clock_t start_time = clock();
1789            for (i = 0; i < timeit; i++)
1790              extra = pcre_study(re, study_options, &error);
1791            time_taken = clock() - start_time;
1792            if (extra != NULL) free(extra);
1793            fprintf(outfile, "  Study time %.4f milliseconds\n",
1794              (((double)time_taken * 1000.0) / (double)timeit) /
1795                (double)CLOCKS_PER_SEC);
1796            }
1797          extra = pcre_study(re, study_options, &error);
1798          if (error != NULL)
1799            fprintf(outfile, "Failed to study: %s\n", error);
1800          else if (extra != NULL)
1801            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1802          }
1803    
1804        /* If /K was present, we set up for handling MARK data. */
1805    
1806        if (do_mark)
1807          {
1808          if (extra == NULL)
1809            {
1810            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1811            extra->flags = 0;
1812            }
1813          extra->mark = &markptr;
1814          extra->flags |= PCRE_EXTRA_MARK;
1815          }
1816    
1817        /* If the 'F' option was present, we flip the bytes of all the integer
1818        fields in the regex data block and the study block. This is to make it
1819        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1820        compiled on a different architecture. */
1821    
1822        if (do_flip)
1823          {
1824          real_pcre *rre = (real_pcre *)re;
1825          rre->magic_number =
1826            byteflip(rre->magic_number, sizeof(rre->magic_number));
1827          rre->size = byteflip(rre->size, sizeof(rre->size));
1828          rre->options = byteflip(rre->options, sizeof(rre->options));
1829          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1830          rre->top_bracket =
1831            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1832          rre->top_backref =
1833            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1834          rre->first_byte =
1835            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1836          rre->req_byte =
1837            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1838          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1839            sizeof(rre->name_table_offset));
1840          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1841            sizeof(rre->name_entry_size));
1842          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1843            sizeof(rre->name_count));
1844    
1845          if (extra != NULL)
1846            {
1847            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1848            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1849            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1850            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1851            }
1852          }
1853    
1854        /* Extract information from the compiled data if required */
1855    
1856        SHOW_INFO:
1857    
1858        if (do_debug)
1859          {
1860          fprintf(outfile, "------------------------------------------------------------------\n");
1861          pcre_printint(re, outfile, debug_lengths);
1862          }
1863    
1864        /* We already have the options in get_options (see above) */
1865    
1866      if (do_showinfo)      if (do_showinfo)
1867        {        {
1868          unsigned long int all_options;
1869    #if !defined NOINFOCHECK
1870        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1871        int count, backrefmax, first_char, need_char;  #endif
1872        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1873            hascrorlf;
1874        if (do_debug) print_internals(re);        int nameentrysize, namecount;
1875          const uschar *nametable;
1876    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &options);  
1877        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1878        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1879        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1880        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1881        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1882          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1883          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1884          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1885          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1886          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1887          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1888    
1889    #if !defined NOINFOCHECK
1890        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1891        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1892          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 620  while (!done) Line 1900  while (!done)
1900            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1901              first_char, old_first_char);              first_char, old_first_char);
1902    
1903          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
1904            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1905              old_options);              get_options, old_options);
1906          }          }
1907    #endif
1908    
1909        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1910          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1911          size, gotten_store);          (int)size, (int)regex_gotten_store);
1912    
1913        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1914        if (backrefmax > 0)        if (backrefmax > 0)
1915          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
1916    
1917        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
1918          fprintf(outfile, "Case state changes\n");          {
1919            fprintf(outfile, "Named capturing subpatterns:\n");
1920            while (namecount-- > 0)
1921              {
1922              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1923                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1924                GET2(nametable, 0));
1925              nametable += nameentrysize;
1926              }
1927            }
1928    
1929          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1930          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1931    
1932          all_options = ((real_pcre *)re)->options;
1933          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1934    
1935          if (get_options == 0) fprintf(outfile, "No options\n");
1936            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1937              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1938              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1939              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1940              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1941              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1942              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1943              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1944              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1945              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1946              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1947              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1948              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1949              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1950              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1951              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1952              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1953              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1954    
1955          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1956    
1957          switch (get_options & PCRE_NEWLINE_BITS)
1958            {
1959            case PCRE_NEWLINE_CR:
1960            fprintf(outfile, "Forced newline sequence: CR\n");
1961            break;
1962    
1963            case PCRE_NEWLINE_LF:
1964            fprintf(outfile, "Forced newline sequence: LF\n");
1965            break;
1966    
1967            case PCRE_NEWLINE_CRLF:
1968            fprintf(outfile, "Forced newline sequence: CRLF\n");
1969            break;
1970    
1971            case PCRE_NEWLINE_ANYCRLF:
1972            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1973            break;
1974    
1975            case PCRE_NEWLINE_ANY:
1976            fprintf(outfile, "Forced newline sequence: ANY\n");
1977            break;
1978    
1979            default:
1980            break;
1981            }
1982    
1983        if (first_char == -1)        if (first_char == -1)
1984          {          {
1985          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1986          }          }
1987        else if (first_char < 0)        else if (first_char < 0)
1988          {          {
# Line 656  while (!done) Line 1990  while (!done)
1990          }          }
1991        else        else
1992          {          {
1993          if (isprint(first_char))          int ch = first_char & 255;
1994            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1995              "" : " (caseless)";
1996            if (PRINTHEX(ch))
1997              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1998          else          else
1999            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
2000          }          }
2001    
2002        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 2005  while (!done)
2005          }          }
2006        else        else
2007          {          {
2008          if (isprint(need_char))          int ch = need_char & 255;
2009            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2010              "" : " (caseless)";
2011            if (PRINTHEX(ch))
2012              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2013          else          else
2014            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2015          }          }
       }  
2016    
2017      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
2018      help with the matching. */        value, but it varies, depending on the computer architecture, and
2019          so messes up the test suite. (And with the /F option, it might be
2020          flipped.) */
2021    
2022      if (do_study)        if (do_study)
       {  
       if (timeit)  
2023          {          {
2024          register int i;          if (extra == NULL)
2025          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
2026          clock_t start_time = clock();          else
2027          for (i = 0; i < LOOPREPEAT; i++)            {
2028            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
2029          time_taken = clock() - start_time;            int minlength;
2030          if (extra != NULL) free(extra);  
2031          fprintf(outfile, "  Study time %.3f milliseconds\n",            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2032            ((double)time_taken * 1000.0)/            fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2033            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
2034              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2035              if (start_bits == NULL)
2036                fprintf(outfile, "No set of starting bytes\n");
2037              else
2038                {
2039                int i;
2040                int c = 24;
2041                fprintf(outfile, "Starting byte set: ");
2042                for (i = 0; i < 256; i++)
2043                  {
2044                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2045                    {
2046                    if (c > 75)
2047                      {
2048                      fprintf(outfile, "\n  ");
2049                      c = 2;
2050                      }
2051                    if (PRINTHEX(i) && i != ' ')
2052                      {
2053                      fprintf(outfile, "%c ", i);
2054                      c += 2;
2055                      }
2056                    else
2057                      {
2058                      fprintf(outfile, "\\x%02x ", i);
2059                      c += 5;
2060                      }
2061                    }
2062                  }
2063                fprintf(outfile, "\n");
2064                }
2065              }
2066          }          }
2067          }
2068    
2069        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
2070        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
2071          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
2072    
2073        else if (do_showinfo)      if (to_file != NULL)
2074          {
2075          FILE *f = fopen((char *)to_file, "wb");
2076          if (f == NULL)
2077            {
2078            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2079            }
2080          else
2081          {          {
2082          uschar *start_bits = NULL;          uschar sbuf[8];
2083          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (uschar)((true_size >> 24) & 255);
2084          if (start_bits == NULL)          sbuf[1] = (uschar)((true_size >> 16) & 255);
2085            fprintf(outfile, "No starting character set\n");          sbuf[2] = (uschar)((true_size >>  8) & 255);
2086            sbuf[3] = (uschar)((true_size) & 255);
2087    
2088            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2089            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2090            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2091            sbuf[7] = (uschar)((true_study_size) & 255);
2092    
2093            if (fwrite(sbuf, 1, 8, f) < 8 ||
2094                fwrite(re, 1, true_size, f) < true_size)
2095              {
2096              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2097              }
2098          else          else
2099            {            {
2100            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
2101            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
2102              {              {
2103              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2104                    true_study_size)
2105                {                {
2106                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2107                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2108                }                }
2109                else fprintf(outfile, "Study data written to %s\n", to_file);
2110    
2111              }              }
           fprintf(outfile, "\n");  
2112            }            }
2113            fclose(f);
2114            }
2115    
2116          new_free(re);
2117          if (extra != NULL) new_free(extra);
2118          if (locale_set)
2119            {
2120            new_free((void *)tables);
2121            setlocale(LC_CTYPE, "C");
2122            locale_set = 0;
2123          }          }
2124          continue;  /* With next regex */
2125        }        }
2126      }      }        /* End of non-POSIX compile */
2127    
2128    /* Read data lines and test them */    /* Read data lines and test them */
2129    
2130    for (;;)    for (;;)
2131      {      {
2132      unsigned char *q;      uschar *q;
2133      unsigned char *bptr = dbuffer;      uschar *bptr;
2134        int *use_offsets = offsets;
2135        int use_size_offsets = size_offsets;
2136        int callout_data = 0;
2137        int callout_data_set = 0;
2138      int count, c;      int count, c;
2139      int copystrings = 0;      int copystrings = 0;
2140        int find_match_limit = default_find_match_limit;
2141      int getstrings = 0;      int getstrings = 0;
2142      int getlist = 0;      int getlist = 0;
2143      int gmatched = 0;      int gmatched = 0;
2144      int start_offset = 0;      int start_offset = 0;
2145        int start_offset_sign = 1;
2146      int g_notempty = 0;      int g_notempty = 0;
2147      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
2148    
2149      options = 0;      options = 0;
2150    
2151      if (infile == stdin) printf("data> ");      *copynames = 0;
2152      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2153    
2154        copynamesptr = copynames;
2155        getnamesptr = getnames;
2156    
2157        pcre_callout = callout;
2158        first_callout = 1;
2159        callout_extra = 0;
2160        callout_count = 0;
2161        callout_fail_count = 999999;
2162        callout_fail_id = -1;
2163        show_malloc = 0;
2164    
2165        if (extra != NULL) extra->flags &=
2166          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2167    
2168        len = 0;
2169        for (;;)
2170        {        {
2171        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2172        goto CONTINUE;          {
2173            if (len > 0)    /* Reached EOF without hitting a newline */
2174              {
2175              fprintf(outfile, "\n");
2176              break;
2177              }
2178            done = 1;
2179            goto CONTINUE;
2180            }
2181          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2182          len = (int)strlen((char *)buffer);
2183          if (buffer[len-1] == '\n') break;
2184        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2185    
     len = (int)strlen((char *)buffer);  
2186      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2187      buffer[len] = 0;      buffer[len] = 0;
2188      if (len == 0) break;      if (len == 0) break;
# Line 772  while (!done) Line 2190  while (!done)
2190      p = buffer;      p = buffer;
2191      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2192    
2193      q = dbuffer;      bptr = q = dbuffer;
2194      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2195        {        {
2196        int i = 0;        int i = 0;
2197        int n = 0;        int n = 0;
2198    
2199        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2200          {          {
2201          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 793  while (!done) Line 2212  while (!done)
2212          c -= '0';          c -= '0';
2213          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2214            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2215    
2216    #if !defined NOUTF8
2217            if (use_utf8 && c > 255)
2218              {
2219              unsigned char buff8[8];
2220              int ii, utn;
2221              utn = ord2utf8(c, buff8);
2222              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2223              c = buff8[ii];   /* Last byte */
2224              }
2225    #endif
2226          break;          break;
2227    
2228          case 'x':          case 'x':
2229    
2230            /* Handle \x{..} specially - new Perl thing for utf8 */
2231    
2232    #if !defined NOUTF8
2233            if (*p == '{')
2234              {
2235              unsigned char *pt = p;
2236              c = 0;
2237              while (isxdigit(*(++pt)))
2238                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2239              if (*pt == '}')
2240                {
2241                unsigned char buff8[8];
2242                int ii, utn;
2243                if (use_utf8)
2244                  {
2245                  utn = ord2utf8(c, buff8);
2246                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2247                  c = buff8[ii];   /* Last byte */
2248                  }
2249                else
2250                 {
2251                 if (c > 255)
2252                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2253                     "UTF-8 mode is not enabled.\n"
2254                     "** Truncation will probably give the wrong result.\n", c);
2255                 }
2256                p = pt + 1;
2257                break;
2258                }
2259              /* Not correct form; fall through */
2260              }
2261    #endif
2262    
2263            /* Ordinary \x */
2264    
2265          c = 0;          c = 0;
2266          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2267            {            {
# Line 804  while (!done) Line 2270  while (!done)
2270            }            }
2271          break;          break;
2272    
2273          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2274          p--;          p--;
2275          continue;          continue;
2276    
2277            case '>':
2278            if (*p == '-')
2279              {
2280              start_offset_sign = -1;
2281              p++;
2282              }
2283            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2284            start_offset *= start_offset_sign;
2285            continue;
2286    
2287          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2288          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2289          continue;          continue;
# Line 817  while (!done) Line 2293  while (!done)
2293          continue;          continue;
2294    
2295          case 'C':          case 'C':
2296          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
2297          copystrings |= 1 << n;            {
2298              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2299              copystrings |= 1 << n;
2300              }
2301            else if (isalnum(*p))
2302              {
2303              uschar *npp = copynamesptr;
2304              while (isalnum(*p)) *npp++ = *p++;
2305              *npp++ = 0;
2306              *npp = 0;
2307              n = pcre_get_stringnumber(re, (char *)copynamesptr);
2308              if (n < 0)
2309                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2310              copynamesptr = npp;
2311              }
2312            else if (*p == '+')
2313              {
2314              callout_extra = 1;
2315              p++;
2316              }
2317            else if (*p == '-')
2318              {
2319              pcre_callout = NULL;
2320              p++;
2321              }
2322            else if (*p == '!')
2323              {
2324              callout_fail_id = 0;
2325              p++;
2326              while(isdigit(*p))
2327                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2328              callout_fail_count = 0;
2329              if (*p == '!')
2330                {
2331                p++;
2332                while(isdigit(*p))
2333                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2334                }
2335              }
2336            else if (*p == '*')
2337              {
2338              int sign = 1;
2339              callout_data = 0;
2340              if (*(++p) == '-') { sign = -1; p++; }
2341              while(isdigit(*p))
2342                callout_data = callout_data * 10 + *p++ - '0';
2343              callout_data *= sign;
2344              callout_data_set = 1;
2345              }
2346            continue;
2347    
2348    #if !defined NODFA
2349            case 'D':
2350    #if !defined NOPOSIX
2351            if (posix || do_posix)
2352              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2353            else
2354    #endif
2355              use_dfa = 1;
2356            continue;
2357    #endif
2358    
2359    #if !defined NODFA
2360            case 'F':
2361            options |= PCRE_DFA_SHORTEST;
2362          continue;          continue;
2363    #endif
2364    
2365          case 'G':          case 'G':
2366          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
2367          getstrings |= 1 << n;            {
2368              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2369              getstrings |= 1 << n;
2370              }
2371            else if (isalnum(*p))
2372              {
2373              uschar *npp = getnamesptr;
2374              while (isalnum(*p)) *npp++ = *p++;
2375              *npp++ = 0;
2376              *npp = 0;
2377              n = pcre_get_stringnumber(re, (char *)getnamesptr);
2378              if (n < 0)
2379                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2380              getnamesptr = npp;
2381              }
2382          continue;          continue;
2383    
2384          case 'L':          case 'L':
2385          getlist = 1;          getlist = 1;
2386          continue;          continue;
2387    
2388            case 'M':
2389            find_match_limit = 1;
2390            continue;
2391    
2392          case 'N':          case 'N':
2393          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2394              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2395            else
2396              options |= PCRE_NOTEMPTY;
2397          continue;          continue;
2398    
2399          case 'O':          case 'O':
2400          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2401          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
2402              {
2403              size_offsets_max = n;
2404              free(offsets);
2405              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2406              if (offsets == NULL)
2407                {
2408                printf("** Failed to get %d bytes of memory for offsets vector\n",
2409                  (int)(size_offsets_max * sizeof(int)));
2410                yield = 1;
2411                goto EXIT;
2412                }
2413              }
2414            use_size_offsets = n;
2415            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2416            continue;
2417    
2418            case 'P':
2419            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2420              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2421            continue;
2422    
2423            case 'Q':
2424            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2425            if (extra == NULL)
2426              {
2427              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2428              extra->flags = 0;
2429              }
2430            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2431            extra->match_limit_recursion = n;
2432            continue;
2433    
2434            case 'q':
2435            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2436            if (extra == NULL)
2437              {
2438              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2439              extra->flags = 0;
2440              }
2441            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2442            extra->match_limit = n;
2443            continue;
2444    
2445    #if !defined NODFA
2446            case 'R':
2447            options |= PCRE_DFA_RESTART;
2448            continue;
2449    #endif
2450    
2451            case 'S':
2452            show_malloc = 1;
2453            continue;
2454    
2455            case 'Y':
2456            options |= PCRE_NO_START_OPTIMIZE;
2457          continue;          continue;
2458    
2459          case 'Z':          case 'Z':
2460          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2461          continue;          continue;
2462    
2463            case '?':
2464            options |= PCRE_NO_UTF8_CHECK;
2465            continue;
2466    
2467            case '<':
2468              {
2469              int x = check_newline(p, outfile);
2470              if (x == 0) goto NEXT_DATA;
2471              options |= x;
2472              while (*p++ != '>');
2473              }
2474            continue;
2475          }          }
2476        *q++ = c;        *q++ = c;
2477        }        }
2478      *q = 0;      *q = 0;
2479      len = q - dbuffer;      len = (int)(q - dbuffer);
2480    
2481        /* Move the data to the end of the buffer so that a read over the end of
2482        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2483        we are using the POSIX interface, we must include the terminating zero. */
2484    
2485    #if !defined NOPOSIX
2486        if (posix || do_posix)
2487          {
2488          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2489          bptr += buffer_size - len - 1;
2490          }
2491        else
2492    #endif
2493          {
2494          memmove(bptr + buffer_size - len, bptr, len);
2495          bptr += buffer_size - len;
2496          }
2497    
2498        if ((all_use_dfa || use_dfa) && find_match_limit)
2499          {
2500          printf("**Match limit not relevant for DFA matching: ignored\n");
2501          find_match_limit = 0;
2502          }
2503    
2504      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2505      support timing. */      support timing or playing with the match limit or callout data. */
2506    
2507  #if !defined NOPOSIX  #if !defined NOPOSIX
2508      if (posix || do_posix)      if (posix || do_posix)
2509        {        {
2510        int rc;        int rc;
2511        int eflags = 0;        int eflags = 0;
2512        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
2513          if (use_size_offsets > 0)
2514            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2515        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2516        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2517          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2518    
2519        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2520    
2521        if (rc != 0)        if (rc != 0)
2522          {          {
2523          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2524          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2525          }          }
2526          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2527                  != 0)
2528            {
2529            fprintf(outfile, "Matched with REG_NOSUB\n");
2530            }
2531        else        else
2532          {          {
2533          size_t i;          size_t i;
2534          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2535            {            {
2536            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2537              {              {
2538              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2539              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2540                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2541              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2542              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
2543                {                {
2544                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
2545                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2546                    outfile);
2547                fprintf(outfile, "\n");                fprintf(outfile, "\n");
2548                }                }
2549              }              }
2550            }            }
2551          }          }
2552          free(pmatch);
2553        }        }
2554    
2555      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 896  while (!done) Line 2559  while (!done)
2559    
2560      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2561        {        {
2562        if (timeit)        markptr = NULL;
2563    
2564          if (timeitm > 0)
2565          {          {
2566          register int i;          register int i;
2567          clock_t time_taken;          clock_t time_taken;
2568          clock_t start_time = clock();          clock_t start_time = clock();
2569          for (i = 0; i < LOOPREPEAT; i++)  
2570    #if !defined NODFA
2571            if (all_use_dfa || use_dfa)
2572              {
2573              int workspace[1000];
2574              for (i = 0; i < timeitm; i++)
2575                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2576                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2577                  sizeof(workspace)/sizeof(int));
2578              }
2579            else
2580    #endif
2581    
2582            for (i = 0; i < timeitm; i++)
2583            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2584              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2585    
2586          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2587          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2588            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2589            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2590            }
2591    
2592          /* If find_match_limit is set, we want to do repeated matches with
2593          varying limits in order to find the minimum value for the match limit and
2594          for the recursion limit. */
2595    
2596          if (find_match_limit)
2597            {
2598            if (extra == NULL)
2599              {
2600              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2601              extra->flags = 0;
2602              }
2603    
2604            (void)check_match_limit(re, extra, bptr, len, start_offset,
2605              options|g_notempty, use_offsets, use_size_offsets,
2606              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2607              PCRE_ERROR_MATCHLIMIT, "match()");
2608    
2609            count = check_match_limit(re, extra, bptr, len, start_offset,
2610              options|g_notempty, use_offsets, use_size_offsets,
2611              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2612              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2613          }          }
2614    
2615        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2616          start_offset, options | g_notempty, offsets, size_offsets);  
2617          else if (callout_data_set)
2618            {
2619            if (extra == NULL)
2620              {
2621              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2622              extra->flags = 0;
2623              }
2624            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2625            extra->callout_data = &callout_data;
2626            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2627              options | g_notempty, use_offsets, use_size_offsets);
2628            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2629            }
2630    
2631          /* The normal case is just to do the match once, with the default
2632          value of match_limit. */
2633    
2634    #if !defined NODFA
2635          else if (all_use_dfa || use_dfa)
2636            {
2637            int workspace[1000];
2638            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639              options | g_notempty, use_offsets, use_size_offsets, workspace,
2640              sizeof(workspace)/sizeof(int));
2641            if (count == 0)
2642              {
2643              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2644              count = use_size_offsets/2;
2645              }
2646            }
2647    #endif
2648    
2649        if (count == 0)        else
2650          {          {
2651          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2652          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2653            if (count == 0)
2654              {
2655              fprintf(outfile, "Matched, but too many substrings\n");
2656              count = use_size_offsets/3;
2657              }
2658          }          }
2659    
2660        /* Matched */        /* Matched */
2661    
2662        if (count >= 0)        if (count >= 0)
2663          {          {
2664          int i;          int i, maxcount;
2665    
2666    #if !defined NODFA
2667            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2668    #endif
2669              maxcount = use_size_offsets/3;
2670    
2671            /* This is a check against a lunatic return value. */
2672    
2673            if (count > maxcount)
2674              {
2675              fprintf(outfile,
2676                "** PCRE error: returned count %d is too big for offset size %d\n",
2677                count, use_size_offsets);
2678              count = use_size_offsets/3;
2679              if (do_g || do_G)
2680                {
2681                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2682                do_g = do_G = FALSE;        /* Break g/G loop */
2683                }
2684              }
2685    
2686          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2687            {            {
2688            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2689              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2690            else            else
2691              {              {
2692              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2693              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2694                  use_offsets[i+1] - use_offsets[i], outfile);
2695              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2696              if (i == 0)              if (i == 0)
2697                {                {
2698                if (do_showrest)                if (do_showrest)
2699                  {                  {
2700                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2701                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2702                      outfile);
2703                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2704                  }                  }
2705                }                }
2706              }              }
2707            }            }
2708    
2709            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2710    
2711          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2712            {            {
2713            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2714              {              {
2715              char copybuffer[16];              char copybuffer[256];
2716              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2717                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2718              if (rc < 0)              if (rc < 0)
2719                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 959  while (!done) Line 2722  while (!done)
2722              }              }
2723            }            }
2724    
2725            for (copynamesptr = copynames;
2726                 *copynamesptr != 0;
2727                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2728              {
2729              char copybuffer[256];
2730              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2731                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2732              if (rc < 0)
2733                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2734              else
2735                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2736              }
2737    
2738          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2739            {            {
2740            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2741              {              {
2742              const char *substring;              const char *substring;
2743              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2744                i, &substring);                i, &substring);
2745              if (rc < 0)              if (rc < 0)
2746                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2747              else              else
2748                {                {
2749                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2750                free((void *)substring);                pcre_free_substring(substring);
2751                }                }
2752              }              }
2753            }            }
2754    
2755            for (getnamesptr = getnames;
2756                 *getnamesptr != 0;
2757                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2758              {
2759              const char *substring;
2760              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2761                count, (char *)getnamesptr, &substring);
2762              if (rc < 0)
2763                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2764              else
2765                {
2766                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2767                pcre_free_substring(substring);
2768                }
2769              }
2770    
2771          if (getlist)          if (getlist)
2772            {            {
2773            const char **stringlist;            const char **stringlist;
2774            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2775              &stringlist);              &stringlist);
2776            if (rc < 0)            if (rc < 0)
2777              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 2781  while (!done)
2781                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2782              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2783                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2784              free((void *)stringlist);              /* free((void *)stringlist); */
2785                pcre_free_substring_list(stringlist);
2786              }              }
2787            }            }
2788          }          }
2789    
2790          /* There was a partial match */
2791    
2792          else if (count == PCRE_ERROR_PARTIAL)
2793            {
2794            if (markptr == NULL) fprintf(outfile, "Partial match");
2795              else fprintf(outfile, "Partial match, mark=%s", markptr);
2796            if (use_size_offsets > 1)
2797              {
2798              fprintf(outfile, ": ");
2799              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2800                outfile);
2801              }
2802            fprintf(outfile, "\n");
2803            break;  /* Out of the /g loop */
2804            }
2805    
2806        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2807        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2808        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2809        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2810        was checked before setting PCRE_NOTEMPTY. */  
2811          Complication arises in the case when the newline convention is "any",
2812          "crlf", or "anycrlf". If the previous match was at the end of a line
2813          terminated by CRLF, an advance of one character just passes the \r,
2814          whereas we should prefer the longer newline sequence, as does the code in
2815          pcre_exec(). Fudge the offset value to achieve this. We check for a
2816          newline setting in the pattern; if none was set, use pcre_config() to
2817          find the default.
2818    
2819          Otherwise, in the case of UTF-8 matching, the advance must be one
2820          character, not one byte. */
2821    
2822        else        else
2823          {          {
2824          if (g_notempty != 0)          if (g_notempty != 0)
2825            {            {
2826            offsets[0] = start_offset;            int onechar = 1;
2827            offsets[1] = start_offset + 1;            unsigned int obits = ((real_pcre *)re)->options;
2828              use_offsets[0] = start_offset;
2829              if ((obits & PCRE_NEWLINE_BITS) == 0)
2830                {
2831                int d;
2832                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2833                /* Note that these values are always the ASCII ones, even in
2834                EBCDIC environments. CR = 13, NL = 10. */
2835                obits = (d == 13)? PCRE_NEWLINE_CR :
2836                        (d == 10)? PCRE_NEWLINE_LF :
2837                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2838                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2839                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2840                }
2841              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2842                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2843                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2844                  &&
2845                  start_offset < len - 1 &&
2846                  bptr[start_offset] == '\r' &&
2847                  bptr[start_offset+1] == '\n')
2848                onechar++;
2849              else if (use_utf8)
2850                {
2851                while (start_offset + onechar < len)
2852                  {
2853                  if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2854                  onechar++;
2855                  }
2856                }
2857              use_offsets[1] = start_offset + onechar;
2858            }            }
2859          else          else
2860            {            {
2861            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2862              {              {
2863              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0)
2864                else fprintf(outfile, "Error %d\n", count);                {
2865                  if (markptr == NULL) fprintf(outfile, "No match\n");
2866                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2867                  }
2868              }              }
2869              else fprintf(outfile, "Error %d\n", count);
2870            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2871            }            }
2872          }          }
# Line 1023  while (!done) Line 2876  while (!done)
2876        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2877    
2878        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2879        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2880        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2881        we set PCRE_NOTEMPTY and try the match again at the same point. If this        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2882        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
2883          character. */
2884    
2885        g_notempty = 0;        g_notempty = 0;
2886        if (offsets[0] == offsets[1])  
2887          if (use_offsets[0] == use_offsets[1])
2888          {          {
2889          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
2890          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2891          }          }
2892    
2893        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2894    
2895        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
2896    
2897        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2898    
2899        else        else
2900          {          {
2901          bptr += offsets[1];          bptr += use_offsets[1];
2902          len -= offsets[1];          len -= use_offsets[1];
2903          }          }
2904        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2905    
2906        NEXT_DATA: continue;
2907      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2908    
2909    CONTINUE:    CONTINUE:
# Line 1055  while (!done) Line 2912  while (!done)
2912    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2913  #endif  #endif
2914    
2915    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2916    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2917    if (tables != NULL)    if (locale_set)
2918      {      {
2919      free((void *)tables);      new_free((void *)tables);
2920      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2921        locale_set = 0;
2922      }      }
2923    }    }
2924    
2925  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2926  return 0;  
2927    EXIT:
2928    
2929    if (infile != NULL && infile != stdin) fclose(infile);
2930    if (outfile != NULL && outfile != stdout) fclose(outfile);
2931    
2932    free(buffer);
2933    free(dbuffer);
2934    free(pbuffer);
2935    free(offsets);
2936    
2937    return yield;
2938  }  }
2939    
2940  /* End */  /* End of pcretest.c */

Legend:
Removed from v.43  
changed lines
  Added in v.580

  ViewVC Help
Powered by ViewVC 1.1.5