/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 25 by nigel, Sat Feb 24 21:38:45 2007 UTC revision 545 by ph10, Wed Jun 16 10:51:15 2010 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51  /* Use the internal info for displaying the results of pcre_study(). */  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59  #include "internal.h"  
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    #else
83    #include <sys/time.h>          /* These two includes are needed */
84    #include <sys/resource.h>      /* for setrlimit(). */
85    #define INPUT_MODE   "rb"
86    #define OUTPUT_MODE  "wb"
87    #endif
88    
89    
90    /* We have to include pcre_internal.h because we need the internal info for
91    displaying the results of pcre_study() and we also need to know about the
92    internal macros, structures, and other internal data values; pcretest has
93    "inside information" compared to a program that strictly follows the PCRE API.
94    
95    Although pcre_internal.h does itself include pcre.h, we explicitly include it
96    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97    appropriately for an application, not for building PCRE. */
98    
99    #include "pcre.h"
100    #include "pcre_internal.h"
101    
102    /* We need access to some of the data tables that PCRE uses. So as not to have
103    to keep two copies, we include the source file here, changing the names of the
104    external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107    #define _pcre_utf8_table1      utf8_table1
108    #define _pcre_utf8_table1_size utf8_table1_size
109    #define _pcre_utf8_table2      utf8_table2
110    #define _pcre_utf8_table3      utf8_table3
111    #define _pcre_utf8_table4      utf8_table4
112    #define _pcre_utt              utt
113    #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115    #define _pcre_OP_lengths       OP_lengths
116    
117    #include "pcre_tables.c"
118    
119    /* We also need the pcre_printint() function for printing out compiled
120    patterns. This function is in a separate file so that it can be included in
121    pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
123    
124    #define COMPILING_PCRETEST
125    #include "pcre_printint.src"
126    
127    /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135    /* It is possible to compile this test program without including support for
136    testing the POSIX interface, though this is not available via the standard
137    Makefile. */
138    
139    #if !defined NOPOSIX
140  #include "pcreposix.h"  #include "pcreposix.h"
141    #endif
142    
143    /* It is also possible, for the benefit of the version currently imported into
144    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145    interface to the DFA matcher (NODFA), and without the doublecheck of the old
146    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147    UTF8 support if PCRE is built without it. */
148    
149    #ifndef SUPPORT_UTF8
150    #ifndef NOUTF8
151    #define NOUTF8
152    #endif
153    #endif
154    
155    
156    /* Other parameters */
157    
158  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
159  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 163 
163  #endif  #endif
164  #endif  #endif
165    
166  #define LOOPREPEAT 10000  /* This is the default loop count for timing. */
167    
168    #define LOOPREPEAT 500000
169    
170    /* Static variables */
171    
172  static FILE *outfile;  static FILE *outfile;
173  static int log_store = 0;  static int log_store = 0;
174    static int callout_count;
175    static int callout_extra;
176    static int callout_fail_count;
177    static int callout_fail_id;
178    static int debug_lengths;
179    static int first_callout;
180    static int locale_set = 0;
181    static int show_malloc;
182    static int use_utf8;
183    static size_t gotten_store;
184    
185    /* The buffers grow automatically if very long input lines are encountered. */
186    
187    static int buffer_size = 50000;
188    static uschar *buffer = NULL;
189    static uschar *dbuffer = NULL;
190    static uschar *pbuffer = NULL;
191    
192    
193    /*************************************************
194    *         Alternate character tables             *
195    *************************************************/
196    
197  /* Debugging function to print the internal form of the regex. This is the same  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198  code as contained in pcre.c under the DEBUG macro. */  using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200  static const char *OP_names[] = {  the L (locale) option also adjusts the tables. */
201    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
202    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  /* This is the set of tables distributed as default with PCRE. It recognizes
203    "Opt", "^", "$", "Any", "chars", "not",  only ASCII characters. */
204    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
205    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  static const unsigned char tables0[] = {
206    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
207    "*", "*?", "+", "+?", "?", "??", "{", "{",  /* This table is a lower casing table. */
208    "class", "Ref",  
209    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",      0,  1,  2,  3,  4,  5,  6,  7,
210    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",      8,  9, 10, 11, 12, 13, 14, 15,
211    "Brazero", "Braminzero", "Bra"     16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515  };  };
516    
517    
 static void print_internals(pcre *re, FILE *outfile)  
 {  
 unsigned char *code = ((real_pcre *)re)->code;  
518    
519  fprintf(outfile, "------------------------------------------------------------------\n");  /*************************************************
520    *        Read or extend an input line            *
521    *************************************************/
522    
523    /* Input lines are read into buffer, but both patterns and data lines can be
524    continued over multiple input lines. In addition, if the buffer fills up, we
525    want to automatically expand it so as to be able to handle extremely large
526    lines that are needed for certain stress tests. When the input buffer is
527    expanded, the other two buffers must also be expanded likewise, and the
528    contents of pbuffer, which are a copy of the input for callouts, must be
529    preserved (for when expansion happens for a data line). This is not the most
530    optimal way of handling this, but hey, this is just a test program!
531    
532    Arguments:
533      f            the file to read
534      start        where in buffer to start (this *must* be within buffer)
535      prompt       for stdin or readline()
536    
537    Returns:       pointer to the start of new data
538                   could be a copy of start, or could be moved
539                   NULL if no data read and EOF reached
540    */
541    
542  for(;;)  static uschar *
543    extend_inputline(FILE *f, uschar *start, const char *prompt)
544    {
545    uschar *here = start;
546    
547    for (;;)
548    {    {
549    int c;    int rlen = (int)(buffer_size - (here - buffer));
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
550    
551          case OP_CRRANGE:    if (rlen > 1000)
552          case OP_CRMINRANGE:      {
553          min = (code[1] << 8) + code[2];      int dlen;
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
554    
555          default:      /* If libreadline support is required, use readline() to read a line if the
556          code--;      input is a terminal. Note that readline() removes the trailing newline, so
557          }      we must put it back again, to be compatible with fgets(). */
558    
559    #ifdef SUPPORT_LIBREADLINE
560        if (isatty(fileno(f)))
561          {
562          size_t len;
563          char *s = readline(prompt);
564          if (s == NULL) return (here == start)? NULL : start;
565          len = strlen(s);
566          if (len > 0) add_history(s);
567          if (len > rlen - 1) len = rlen - 1;
568          memcpy(here, s, len);
569          here[len] = '\n';
570          here[len+1] = 0;
571          free(s);
572          }
573        else
574    #endif
575    
576        /* Read the next line by normal means, prompting if the file is stdin. */
577    
578          {
579          if (f == stdin) printf("%s", prompt);
580          if (fgets((char *)here, rlen,  f) == NULL)
581            return (here == start)? NULL : start;
582          }
583    
584        dlen = (int)strlen((char *)here);
585        if (dlen > 0 && here[dlen - 1] == '\n') return start;
586        here += dlen;
587        }
588    
589      else
590        {
591        int new_buffer_size = 2*buffer_size;
592        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
593        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
594        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
595    
596        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
597          {
598          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
599          exit(1);
600        }        }
     break;  
601    
602      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
603        memcpy(new_pbuffer, pbuffer, buffer_size);
604    
605      default:      buffer_size = new_buffer_size;
606      fprintf(outfile, "    %s", OP_names[*code]);  
607      break;      start = new_buffer + (start - buffer);
608        here = new_buffer + (here - buffer);
609    
610        free(buffer);
611        free(dbuffer);
612        free(pbuffer);
613    
614        buffer = new_buffer;
615        dbuffer = new_dbuffer;
616        pbuffer = new_pbuffer;
617      }      }
618      }
619    
620    return NULL;  /* Control never gets here */
621    }
622    
623    
624    
625    
626    
627    
628    code++;  
629    fprintf(outfile, "\n");  /*************************************************
630    *          Read number from string               *
631    *************************************************/
632    
633    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
634    around with conditional compilation, just do the job by hand. It is only used
635    for unpicking arguments, so just keep it simple.
636    
637    Arguments:
638      str           string to be converted
639      endptr        where to put the end pointer
640    
641    Returns:        the unsigned long
642    */
643    
644    static int
645    get_value(unsigned char *str, unsigned char **endptr)
646    {
647    int result = 0;
648    while(*str != 0 && isspace(*str)) str++;
649    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
650    *endptr = str;
651    return(result);
652    }
653    
654    
655    
656    
657    /*************************************************
658    *            Convert UTF-8 string to value       *
659    *************************************************/
660    
661    /* This function takes one or more bytes that represents a UTF-8 character,
662    and returns the value of the character.
663    
664    Argument:
665      utf8bytes   a pointer to the byte vector
666      vptr        a pointer to an int to receive the value
667    
668    Returns:      >  0 => the number of bytes consumed
669                  -6 to 0 => malformed UTF-8 character at offset = (-return)
670    */
671    
672    #if !defined NOUTF8
673    
674    static int
675    utf82ord(unsigned char *utf8bytes, int *vptr)
676    {
677    int c = *utf8bytes++;
678    int d = c;
679    int i, j, s;
680    
681    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
682      {
683      if ((d & 0x80) == 0) break;
684      d <<= 1;
685    }    }
686    
687    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
688    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
689    
690    /* i now has a value in the range 1-5 */
691    
692    s = 6*i;
693    d = (c & utf8_table3[i]) << s;
694    
695    for (j = 0; j < i; j++)
696      {
697      c = *utf8bytes++;
698      if ((c & 0xc0) != 0x80) return -(j+1);
699      s -= 6;
700      d |= (c & 0x3f) << s;
701      }
702    
703    /* Check that encoding was the correct unique one */
704    
705    for (j = 0; j < utf8_table1_size; j++)
706      if (d <= utf8_table1[j]) break;
707    if (j != i) return -(i+1);
708    
709    /* Valid value */
710    
711    *vptr = d;
712    return i+1;
713  }  }
714    
715    #endif
716    
717    
718    
719    /*************************************************
720    *       Convert character value to UTF-8         *
721    *************************************************/
722    
723    /* This function takes an integer value in the range 0 - 0x7fffffff
724    and encodes it as a UTF-8 character in 0 to 6 bytes.
725    
726    Arguments:
727      cvalue     the character value
728      utf8bytes  pointer to buffer for result - at least 6 bytes long
729    
730    Returns:     number of characters placed in the buffer
731    */
732    
733    #if !defined NOUTF8
734    
735    static int
736    ord2utf8(int cvalue, uschar *utf8bytes)
737    {
738    register int i, j;
739    for (i = 0; i < utf8_table1_size; i++)
740      if (cvalue <= utf8_table1[i]) break;
741    utf8bytes += i;
742    for (j = i; j > 0; j--)
743     {
744     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
745     cvalue >>= 6;
746     }
747    *utf8bytes = utf8_table2[i] | cvalue;
748    return i + 1;
749    }
750    
751    #endif
752    
753    
754    
755  /* Character string printing function. */  /*************************************************
756    *             Print character string             *
757    *************************************************/
758    
759    /* Character string printing function. Must handle UTF-8 strings in utf8
760    mode. Yields number of characters printed. If handed a NULL file, just counts
761    chars without printing. */
762    
763  static void pchars(unsigned char *p, int length)  static int pchars(unsigned char *p, int length, FILE *f)
764  {  {
765  int c;  int c = 0;
766    int yield = 0;
767    
768  while (length-- > 0)  while (length-- > 0)
769    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
770      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
771      if (use_utf8)
772        {
773        int rc = utf82ord(p, &c);
774    
775        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
776          {
777          length -= rc - 1;
778          p += rc;
779          if (PRINTHEX(c))
780            {
781            if (f != NULL) fprintf(f, "%c", c);
782            yield++;
783            }
784          else
785            {
786            int n = 4;
787            if (f != NULL) fprintf(f, "\\x{%02x}", c);
788            yield += (n <= 0x000000ff)? 2 :
789                     (n <= 0x00000fff)? 3 :
790                     (n <= 0x0000ffff)? 4 :
791                     (n <= 0x000fffff)? 5 : 6;
792            }
793          continue;
794          }
795        }
796    #endif
797    
798       /* Not UTF-8, or malformed UTF-8  */
799    
800      c = *p++;
801      if (PRINTHEX(c))
802        {
803        if (f != NULL) fprintf(f, "%c", c);
804        yield++;
805        }
806      else
807        {
808        if (f != NULL) fprintf(f, "\\x%02x", c);
809        yield += 4;
810        }
811      }
812    
813    return yield;
814    }
815    
816    
817    
818    /*************************************************
819    *              Callout function                  *
820    *************************************************/
821    
822    /* Called from PCRE as a result of the (?C) item. We print out where we are in
823    the match. Yield zero unless more callouts than the fail count, or the callout
824    data is not zero. */
825    
826    static int callout(pcre_callout_block *cb)
827    {
828    FILE *f = (first_callout | callout_extra)? outfile : NULL;
829    int i, pre_start, post_start, subject_length;
830    
831    if (callout_extra)
832      {
833      fprintf(f, "Callout %d: last capture = %d\n",
834        cb->callout_number, cb->capture_last);
835    
836      for (i = 0; i < cb->capture_top * 2; i += 2)
837        {
838        if (cb->offset_vector[i] < 0)
839          fprintf(f, "%2d: <unset>\n", i/2);
840        else
841          {
842          fprintf(f, "%2d: ", i/2);
843          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
844            cb->offset_vector[i+1] - cb->offset_vector[i], f);
845          fprintf(f, "\n");
846          }
847        }
848      }
849    
850    /* Re-print the subject in canonical form, the first time or if giving full
851    datails. On subsequent calls in the same match, we use pchars just to find the
852    printed lengths of the substrings. */
853    
854    if (f != NULL) fprintf(f, "--->");
855    
856    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
857    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
858      cb->current_position - cb->start_match, f);
859    
860    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
861    
862    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
863      cb->subject_length - cb->current_position, f);
864    
865    if (f != NULL) fprintf(f, "\n");
866    
867    /* Always print appropriate indicators, with callout number if not already
868    shown. For automatic callouts, show the pattern offset. */
869    
870    if (cb->callout_number == 255)
871      {
872      fprintf(outfile, "%+3d ", cb->pattern_position);
873      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
874      }
875    else
876      {
877      if (callout_extra) fprintf(outfile, "    ");
878        else fprintf(outfile, "%3d ", cb->callout_number);
879      }
880    
881    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
882    fprintf(outfile, "^");
883    
884    if (post_start > 0)
885      {
886      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
887      fprintf(outfile, "^");
888      }
889    
890    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
891      fprintf(outfile, " ");
892    
893    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
894      pbuffer + cb->pattern_position);
895    
896    fprintf(outfile, "\n");
897    first_callout = 0;
898    
899    if (cb->callout_data != NULL)
900      {
901      int callout_data = *((int *)(cb->callout_data));
902      if (callout_data != 0)
903        {
904        fprintf(outfile, "Callout data = %d\n", callout_data);
905        return callout_data;
906        }
907      }
908    
909    return (cb->callout_number != callout_fail_id)? 0 :
910           (++callout_count >= callout_fail_count)? 1 : 0;
911  }  }
912    
913    
914    /*************************************************
915    *            Local malloc functions              *
916    *************************************************/
917    
918  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
919  compiled re. */  compiled re. */
920    
921  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
922  {  {
923  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
924  return malloc(size);  gotten_store = size;
925    if (show_malloc)
926      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
927    return block;
928    }
929    
930    static void new_free(void *block)
931    {
932    if (show_malloc)
933      fprintf(outfile, "free             %p\n", block);
934    free(block);
935    }
936    
937    
938    /* For recursion malloc/free, to test stacking calls */
939    
940    static void *stack_malloc(size_t size)
941    {
942    void *block = malloc(size);
943    if (show_malloc)
944      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
945    return block;
946    }
947    
948    static void stack_free(void *block)
949    {
950    if (show_malloc)
951      fprintf(outfile, "stack_free       %p\n", block);
952    free(block);
953    }
954    
955    
956    /*************************************************
957    *          Call pcre_fullinfo()                  *
958    *************************************************/
959    
960    /* Get one piece of information from the pcre_fullinfo() function */
961    
962    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
963    {
964    int rc;
965    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
966      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
967    }
968    
969    
970    
971    /*************************************************
972    *         Byte flipping function                 *
973    *************************************************/
974    
975    static unsigned long int
976    byteflip(unsigned long int value, int n)
977    {
978    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
979    return ((value & 0x000000ff) << 24) |
980           ((value & 0x0000ff00) <<  8) |
981           ((value & 0x00ff0000) >>  8) |
982           ((value & 0xff000000) >> 24);
983    }
984    
985    
986    
987    
988    /*************************************************
989    *        Check match or recursion limit          *
990    *************************************************/
991    
992    static int
993    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
994      int start_offset, int options, int *use_offsets, int use_size_offsets,
995      int flag, unsigned long int *limit, int errnumber, const char *msg)
996    {
997    int count;
998    int min = 0;
999    int mid = 64;
1000    int max = -1;
1001    
1002    extra->flags |= flag;
1003    
1004    for (;;)
1005      {
1006      *limit = mid;
1007    
1008      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1009        use_offsets, use_size_offsets);
1010    
1011      if (count == errnumber)
1012        {
1013        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1014        min = mid;
1015        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1016        }
1017    
1018      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1019                             count == PCRE_ERROR_PARTIAL)
1020        {
1021        if (mid == min + 1)
1022          {
1023          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1024          break;
1025          }
1026        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1027        max = mid;
1028        mid = (min + mid)/2;
1029        }
1030      else break;    /* Some other error */
1031      }
1032    
1033    extra->flags &= ~flag;
1034    return count;
1035    }
1036    
1037    
1038    
1039    /*************************************************
1040    *         Case-independent strncmp() function    *
1041    *************************************************/
1042    
1043    /*
1044    Arguments:
1045      s         first string
1046      t         second string
1047      n         number of characters to compare
1048    
1049    Returns:    < 0, = 0, or > 0, according to the comparison
1050    */
1051    
1052    static int
1053    strncmpic(uschar *s, uschar *t, int n)
1054    {
1055    while (n--)
1056      {
1057      int c = tolower(*s++) - tolower(*t++);
1058      if (c) return c;
1059      }
1060    return 0;
1061    }
1062    
1063    
1064    
1065    /*************************************************
1066    *         Check newline indicator                *
1067    *************************************************/
1068    
1069    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1070    a message and return 0 if there is no match.
1071    
1072    Arguments:
1073      p           points after the leading '<'
1074      f           file for error message
1075    
1076    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1077    */
1078    
1079    static int
1080    check_newline(uschar *p, FILE *f)
1081    {
1082    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1083    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1084    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1085    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1086    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1087    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1088    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1089    fprintf(f, "Unknown newline type at: <%s\n", p);
1090    return 0;
1091    }
1092    
1093    
1094    
1095    /*************************************************
1096    *             Usage function                     *
1097    *************************************************/
1098    
1099    static void
1100    usage(void)
1101    {
1102    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1103    printf("Input and output default to stdin and stdout.\n");
1104    #ifdef SUPPORT_LIBREADLINE
1105    printf("If input is a terminal, readline() is used to read from it.\n");
1106    #else
1107    printf("This version of pcretest is not linked with readline().\n");
1108    #endif
1109    printf("\nOptions:\n");
1110    printf("  -b       show compiled code (bytecode)\n");
1111    printf("  -C       show PCRE compile-time options and exit\n");
1112    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1113    #if !defined NODFA
1114    printf("  -dfa     force DFA matching for all subjects\n");
1115    #endif
1116    printf("  -help    show usage information\n");
1117    printf("  -i       show information about compiled patterns\n"
1118           "  -M       find MATCH_LIMIT minimum for each subject\n"
1119           "  -m       output memory used information\n"
1120           "  -o <n>   set size of offsets vector to <n>\n");
1121    #if !defined NOPOSIX
1122    printf("  -p       use POSIX interface\n");
1123    #endif
1124    printf("  -q       quiet: do not output PCRE version number at start\n");
1125    printf("  -S <n>   set stack size to <n> megabytes\n");
1126    printf("  -s       output store (memory) used information\n"
1127           "  -t       time compilation and execution\n");
1128    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1129    printf("  -tm      time execution (matching) only\n");
1130    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1131  }  }
1132    
1133    
1134    
1135    /*************************************************
1136    *                Main Program                    *
1137    *************************************************/
1138    
1139  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
1140  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
1141  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 289  int main(int argc, char **argv) Line 1145  int main(int argc, char **argv)
1145  FILE *infile = stdin;  FILE *infile = stdin;
1146  int options = 0;  int options = 0;
1147  int study_options = 0;  int study_options = 0;
1148    int default_find_match_limit = FALSE;
1149  int op = 1;  int op = 1;
1150  int timeit = 0;  int timeit = 0;
1151    int timeitm = 0;
1152  int showinfo = 0;  int showinfo = 0;
1153    int showstore = 0;
1154    int quiet = 0;
1155    int size_offsets = 45;
1156    int size_offsets_max;
1157    int *offsets = NULL;
1158    #if !defined NOPOSIX
1159  int posix = 0;  int posix = 0;
1160    #endif
1161  int debug = 0;  int debug = 0;
1162  int done = 0;  int done = 0;
1163  unsigned char buffer[30000];  int all_use_dfa = 0;
1164  unsigned char dbuffer[1024];  int yield = 0;
1165    int stack_size;
1166    
1167    /* These vectors store, end-to-end, a list of captured substring names. Assume
1168    that 1024 is plenty long enough for the few names we'll be testing. */
1169    
1170    uschar copynames[1024];
1171    uschar getnames[1024];
1172    
1173    uschar *copynamesptr;
1174    uschar *getnamesptr;
1175    
1176    /* Get buffers from malloc() so that Electric Fence will check their misuse
1177    when I am debugging. They grow automatically when very long lines are read. */
1178    
1179  /* Static so that new_malloc can use it. */  buffer = (unsigned char *)malloc(buffer_size);
1180    dbuffer = (unsigned char *)malloc(buffer_size);
1181    pbuffer = (unsigned char *)malloc(buffer_size);
1182    
1183    /* The outfile variable is static so that new_malloc can use it. */
1184    
1185  outfile = stdout;  outfile = stdout;
1186    
1187    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1188    library to translate CRLF into a single LF character. At least, that's what
1189    I've been told: never having used Windows I take this all on trust. Originally
1190    it set 0x8000, but then I was advised that _O_BINARY was better. */
1191    
1192    #if defined(_WIN32) || defined(WIN32)
1193    _setmode( _fileno( stdout ), _O_BINARY );
1194    #endif
1195    
1196  /* Scan options */  /* Scan options */
1197    
1198  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1199    {    {
1200    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
1201    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
1202      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1203        showstore = 1;
1204      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1205      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1206    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1207    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1208      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1209    #if !defined NODFA
1210      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1211    #endif
1212      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1213          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1214            *endptr == 0))
1215        {
1216        op++;
1217        argc--;
1218        }
1219      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1220        {
1221        int both = argv[op][2] == 0;
1222        int temp;
1223        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1224                         *endptr == 0))
1225          {
1226          timeitm = temp;
1227          op++;
1228          argc--;
1229          }
1230        else timeitm = LOOPREPEAT;
1231        if (both) timeit = timeitm;
1232        }
1233      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1234          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1235            *endptr == 0))
1236        {
1237    #if defined(_WIN32) || defined(WIN32)
1238        printf("PCRE: -S not supported on this OS\n");
1239        exit(1);
1240    #else
1241        int rc;
1242        struct rlimit rlim;
1243        getrlimit(RLIMIT_STACK, &rlim);
1244        rlim.rlim_cur = stack_size * 1024 * 1024;
1245        rc = setrlimit(RLIMIT_STACK, &rlim);
1246        if (rc != 0)
1247          {
1248        printf("PCRE: setrlimit() failed with error %d\n", rc);
1249        exit(1);
1250          }
1251        op++;
1252        argc--;
1253    #endif
1254        }
1255    #if !defined NOPOSIX
1256    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1257    #endif
1258      else if (strcmp(argv[op], "-C") == 0)
1259        {
1260        int rc;
1261        unsigned long int lrc;
1262        printf("PCRE version %s\n", pcre_version());
1263        printf("Compiled with\n");
1264        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1265        printf("  %sUTF-8 support\n", rc? "" : "No ");
1266        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1267        printf("  %sUnicode properties support\n", rc? "" : "No ");
1268        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1269        /* Note that these values are always the ASCII values, even
1270        in EBCDIC environments. CR is 13 and NL is 10. */
1271        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1272          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1273          (rc == -2)? "ANYCRLF" :
1274          (rc == -1)? "ANY" : "???");
1275        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1276        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1277                                         "all Unicode newlines");
1278        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1279        printf("  Internal link size = %d\n", rc);
1280        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1281        printf("  POSIX malloc threshold = %d\n", rc);
1282        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1283        printf("  Default match limit = %ld\n", lrc);
1284        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1285        printf("  Default recursion depth limit = %ld\n", lrc);
1286        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1287        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1288        goto EXIT;
1289        }
1290      else if (strcmp(argv[op], "-help") == 0 ||
1291               strcmp(argv[op], "--help") == 0)
1292        {
1293        usage();
1294        goto EXIT;
1295        }
1296    else    else
1297      {      {
1298      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1299      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1300      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
1301             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
1302      }      }
1303    op++;    op++;
1304    argc--;    argc--;
1305    }    }
1306    
1307    /* Get the store for the offsets vector, and remember what it was */
1308    
1309    size_offsets_max = size_offsets;
1310    offsets = (int *)malloc(size_offsets_max * sizeof(int));
1311    if (offsets == NULL)
1312      {
1313      printf("** Failed to get %d bytes of memory for offsets vector\n",
1314        (int)(size_offsets_max * sizeof(int)));
1315      yield = 1;
1316      goto EXIT;
1317      }
1318    
1319  /* Sort out the input and output files */  /* Sort out the input and output files */
1320    
1321  if (argc > 1)  if (argc > 1)
1322    {    {
1323    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1324    if (infile == NULL)    if (infile == NULL)
1325      {      {
1326      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1327      return 1;      yield = 1;
1328        goto EXIT;
1329      }      }
1330    }    }
1331    
1332  if (argc > 2)  if (argc > 2)
1333    {    {
1334    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1335    if (outfile == NULL)    if (outfile == NULL)
1336      {      {
1337      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1338      return 1;      yield = 1;
1339        goto EXIT;
1340      }      }
1341    }    }
1342    
1343  /* Set alternative malloc function */  /* Set alternative malloc function */
1344    
1345  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1346    pcre_free = new_free;
1347    pcre_stack_malloc = stack_malloc;
1348    pcre_stack_free = stack_free;
1349    
1350  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1351    
1352  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1353    
1354  /* Main loop */  /* Main loop */
1355    
# Line 362  while (!done) Line 1357  while (!done)
1357    {    {
1358    pcre *re = NULL;    pcre *re = NULL;
1359    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
1360    
1361    #if !defined NOPOSIX  /* There are still compilers that require no indent */
1362    regex_t preg;    regex_t preg;
1363      int do_posix = 0;
1364    #endif
1365    
1366    const char *error;    const char *error;
1367      unsigned char *markptr;
1368    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1369    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
1370      const unsigned char *tables = NULL;
1371      unsigned long int true_size, true_study_size = 0;
1372      size_t size, regex_gotten_store;
1373      int do_mark = 0;
1374    int do_study = 0;    int do_study = 0;
1375    int do_debug = debug;    int do_debug = debug;
1376      int do_G = 0;
1377      int do_g = 0;
1378    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1379    int do_posix = 0;    int do_showrest = 0;
1380    int erroroffset, len, delimiter;    int do_flip = 0;
1381      int erroroffset, len, delimiter, poffset;
1382    
1383      use_utf8 = 0;
1384      debug_lengths = 1;
1385    
1386    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1387    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1388      fflush(outfile);
1389    
1390    p = buffer;    p = buffer;
1391    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1392    if (*p == 0) continue;    if (*p == 0) continue;
1393    
1394    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1395    complete, read more. */  
1396      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1397        {
1398        unsigned long int magic, get_options;
1399        uschar sbuf[8];
1400        FILE *f;
1401    
1402        p++;
1403        pp = p + (int)strlen((char *)p);
1404        while (isspace(pp[-1])) pp--;
1405        *pp = 0;
1406    
1407        f = fopen((char *)p, "rb");
1408        if (f == NULL)
1409          {
1410          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1411          continue;
1412          }
1413    
1414        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1415    
1416        true_size =
1417          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1418        true_study_size =
1419          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1420    
1421        re = (real_pcre *)new_malloc(true_size);
1422        regex_gotten_store = gotten_store;
1423    
1424        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1425    
1426        magic = ((real_pcre *)re)->magic_number;
1427        if (magic != MAGIC_NUMBER)
1428          {
1429          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1430            {
1431            do_flip = 1;
1432            }
1433          else
1434            {
1435            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1436            fclose(f);
1437            continue;
1438            }
1439          }
1440    
1441        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1442          do_flip? " (byte-inverted)" : "", p);
1443    
1444        /* Need to know if UTF-8 for printing data strings */
1445    
1446        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1447        use_utf8 = (get_options & PCRE_UTF8) != 0;
1448    
1449        /* Now see if there is any following study data */
1450    
1451        if (true_study_size != 0)
1452          {
1453          pcre_study_data *psd;
1454    
1455          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1456          extra->flags = PCRE_EXTRA_STUDY_DATA;
1457    
1458          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1459          extra->study_data = psd;
1460    
1461          if (fread(psd, 1, true_study_size, f) != true_study_size)
1462            {
1463            FAIL_READ:
1464            fprintf(outfile, "Failed to read data from %s\n", p);
1465            if (extra != NULL) new_free(extra);
1466            if (re != NULL) new_free(re);
1467            fclose(f);
1468            continue;
1469            }
1470          fprintf(outfile, "Study data loaded from %s\n", p);
1471          do_study = 1;     /* To get the data output if requested */
1472          }
1473        else fprintf(outfile, "No study data\n");
1474    
1475        fclose(f);
1476        goto SHOW_INFO;
1477        }
1478    
1479      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1480      the pattern; if is isn't complete, read more. */
1481    
1482    delimiter = *p++;    delimiter = *p++;
1483    
1484    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1485      {      {
1486      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1487      goto SKIP_DATA;      goto SKIP_DATA;
1488      }      }
1489    
1490    pp = p;    pp = p;
1491      poffset = (int)(p - buffer);
1492    
1493    for(;;)    for(;;)
1494      {      {
1495      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1496        {        {
1497        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1498        goto SKIP_DATA;          else if (*pp == delimiter) break;
1499          pp++;
1500        }        }
1501        if (*pp != 0) break;
1502      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if (fgets((char *)pp, len, infile) == NULL)  
1503        {        {
1504        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1505        done = 1;        done = 1;
# Line 415  while (!done) Line 1508  while (!done)
1508      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1509      }      }
1510    
1511    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1512      pointer to the correct relative point in the buffer. */
1513    
1514      p = buffer + poffset;
1515    
1516      /* If the first character after the delimiter is backslash, make
1517      the pattern end with backslash. This is purely to provide a way
1518      of testing for the error message when a pattern ends with backslash. */
1519    
1520      if (pp[1] == '\\') *pp++ = '\\';
1521    
1522      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1523      for callouts. */
1524    
1525    *pp++ = 0;    *pp++ = 0;
1526      strcpy((char *)pbuffer, (char *)p);
1527    
1528    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1529    
1530    options = 0;    options = 0;
1531    study_options = 0;    study_options = 0;
1532      log_store = showstore;  /* default from command line */
1533    
1534    while (*pp != 0)    while (*pp != 0)
1535      {      {
1536      switch (*pp++)      switch (*pp++)
1537        {        {
1538          case 'f': options |= PCRE_FIRSTLINE; break;
1539          case 'g': do_g = 1; break;
1540        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1541        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1542        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1543        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1544    
1545          case '+': do_showrest = 1; break;
1546        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1547          case 'B': do_debug = 1; break;
1548          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1549        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1550        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1551          case 'F': do_flip = 1; break;
1552          case 'G': do_G = 1; break;
1553        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1554          case 'J': options |= PCRE_DUPNAMES; break;
1555          case 'K': do_mark = 1; break;
1556          case 'M': log_store = 1; break;
1557          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1558    
1559    #if !defined NOPOSIX
1560        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1561    #endif
1562    
1563        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1564        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1565          case 'W': options |= PCRE_UCP; break;
1566        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1567          case 'Z': debug_lengths = 0; break;
1568          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1569          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1570    
1571          case 'T':
1572          switch (*pp++)
1573            {
1574            case '0': tables = tables0; break;
1575            case '1': tables = tables1; break;
1576    
1577            case '\r':
1578            case '\n':
1579            case ' ':
1580            case 0:
1581            fprintf(outfile, "** Missing table number after /T\n");
1582            goto SKIP_DATA;
1583    
1584            default:
1585            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1586            goto SKIP_DATA;
1587            }
1588          break;
1589    
1590        case 'L':        case 'L':
1591        ppp = pp;        ppp = pp;
1592        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1593          /* The '0' test is just in case this is an unterminated line. */
1594          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1595        *ppp = 0;        *ppp = 0;
1596        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1597          {          {
1598          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1599          goto SKIP_DATA;          goto SKIP_DATA;
1600          }          }
1601          locale_set = 1;
1602        tables = pcre_maketables();        tables = pcre_maketables();
1603        pp = ppp;        pp = ppp;
1604        break;        break;
1605    
1606        case '\n': case ' ': break;        case '>':
1607          to_file = pp;
1608          while (*pp != 0) pp++;
1609          while (isspace(pp[-1])) pp--;
1610          *pp = 0;
1611          break;
1612    
1613          case '<':
1614            {
1615            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1616              {
1617              options |= PCRE_JAVASCRIPT_COMPAT;
1618              pp += 3;
1619              }
1620            else
1621              {
1622              int x = check_newline(pp, outfile);
1623              if (x == 0) goto SKIP_DATA;
1624              options |= x;
1625              while (*pp++ != '>');
1626              }
1627            }
1628          break;
1629    
1630          case '\r':                      /* So that it works in Windows */
1631          case '\n':
1632          case ' ':
1633          break;
1634    
1635        default:        default:
1636        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1637        goto SKIP_DATA;        goto SKIP_DATA;
# Line 465  while (!done) Line 1642  while (!done)
1642    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1643    local character tables. */    local character tables. */
1644    
1645    #if !defined NOPOSIX
1646    if (posix || do_posix)    if (posix || do_posix)
1647      {      {
1648      int rc;      int rc;
1649      int cflags = 0;      int cflags = 0;
1650    
1651      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1652      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1653        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1654        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1655        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1656        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1657        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1658    
1659      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1660    
1661      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 478  while (!done) Line 1663  while (!done)
1663    
1664      if (rc != 0)      if (rc != 0)
1665        {        {
1666        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1667        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1668        goto SKIP_DATA;        goto SKIP_DATA;
1669        }        }
# Line 487  while (!done) Line 1672  while (!done)
1672    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1673    
1674    else    else
1675    #endif  /* !defined NOPOSIX */
1676    
1677      {      {
1678      if (timeit)      unsigned long int get_options;
1679    
1680        if (timeit > 0)
1681        {        {
1682        register int i;        register int i;
1683        clock_t time_taken;        clock_t time_taken;
1684        clock_t start_time = clock();        clock_t start_time = clock();
1685        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1686          {          {
1687          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1688          if (re != NULL) free(re);          if (re != NULL) free(re);
1689          }          }
1690        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1691        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1692          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1693              (double)CLOCKS_PER_SEC);
1694        }        }
1695    
1696      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 516  while (!done) Line 1706  while (!done)
1706          {          {
1707          for (;;)          for (;;)
1708            {            {
1709            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1710              {              {
1711              done = 1;              done = 1;
1712              goto CONTINUE;              goto CONTINUE;
# Line 530  while (!done) Line 1720  while (!done)
1720        goto CONTINUE;        goto CONTINUE;
1721        }        }
1722    
1723      /* Compilation succeeded; print data if required */      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1724        within the regex; check for this so that we know how to process the data
1725      if (do_showinfo)      lines. */
1726        {  
1727        int first_char, count;      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1728        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1729    
1730        /* Print information if required. There are now two info-returning
1731        functions. The old one has a limited interface and returns only limited
1732        data. Check that it agrees with the newer one. */
1733    
1734        if (log_store)
1735          fprintf(outfile, "Memory allocation (code space): %d\n",
1736            (int)(gotten_store -
1737                  sizeof(real_pcre) -
1738                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1739    
1740        if (do_debug) print_internals(re, outfile);      /* Extract the size for possible writing before possibly flipping it,
1741        and remember the store that was got. */
1742    
1743        count = pcre_info(re, &options, &first_char);      true_size = ((real_pcre *)re)->size;
1744        if (count < 0) fprintf(outfile,      regex_gotten_store = gotten_store;
         "Error %d while reading info\n", count);  
       else  
         {  
         fprintf(outfile, "Identifying subpattern count = %d\n", count);  
         if (options == 0) fprintf(outfile, "No options\n");  
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
         else  
           {  
           if (isprint(first_char))  
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
         }  
       }  
1745    
1746      /* If /S was present, study the regexp to generate additional info to      /* If /S was present, study the regexp to generate additional info to
1747      help with the matching. */      help with the matching. */
1748    
1749      if (do_study)      if (do_study)
1750        {        {
1751        if (timeit)        if (timeit > 0)
1752          {          {
1753          register int i;          register int i;
1754          clock_t time_taken;          clock_t time_taken;
1755          clock_t start_time = clock();          clock_t start_time = clock();
1756          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1757            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1758          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1759          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1760          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1761            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeit) /
1762                (double)CLOCKS_PER_SEC);
1763          }          }
   
1764        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
1765        if (error != NULL)        if (error != NULL)
1766          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
1767        else if (extra == NULL)        else if (extra != NULL)
1768          fprintf(outfile, "Study returned NULL\n");          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1769          }
1770    
1771        /* This looks at internal information. A bit kludgy to do it this      /* If /K was present, we set up for handling MARK data. */
       way, but it is useful for testing. */  
1772    
1773        else if (do_showinfo)      if (do_mark)
1774          {
1775          if (extra == NULL)
1776            {
1777            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1778            extra->flags = 0;
1779            }
1780          extra->mark = &markptr;
1781          extra->flags |= PCRE_EXTRA_MARK;
1782          }
1783    
1784        /* If the 'F' option was present, we flip the bytes of all the integer
1785        fields in the regex data block and the study block. This is to make it
1786        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1787        compiled on a different architecture. */
1788    
1789        if (do_flip)
1790          {
1791          real_pcre *rre = (real_pcre *)re;
1792          rre->magic_number =
1793            byteflip(rre->magic_number, sizeof(rre->magic_number));
1794          rre->size = byteflip(rre->size, sizeof(rre->size));
1795          rre->options = byteflip(rre->options, sizeof(rre->options));
1796          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1797          rre->top_bracket =
1798            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1799          rre->top_backref =
1800            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1801          rre->first_byte =
1802            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1803          rre->req_byte =
1804            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1805          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1806            sizeof(rre->name_table_offset));
1807          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1808            sizeof(rre->name_entry_size));
1809          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1810            sizeof(rre->name_count));
1811    
1812          if (extra != NULL)
1813            {
1814            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1815            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1816            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1817            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1818            }
1819          }
1820    
1821        /* Extract information from the compiled data if required */
1822    
1823        SHOW_INFO:
1824    
1825        if (do_debug)
1826          {
1827          fprintf(outfile, "------------------------------------------------------------------\n");
1828          pcre_printint(re, outfile, debug_lengths);
1829          }
1830    
1831        /* We already have the options in get_options (see above) */
1832    
1833        if (do_showinfo)
1834          {
1835          unsigned long int all_options;
1836    #if !defined NOINFOCHECK
1837          int old_first_char, old_options, old_count;
1838    #endif
1839          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1840            hascrorlf;
1841          int nameentrysize, namecount;
1842          const uschar *nametable;
1843    
1844          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1845          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1846          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1847          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1848          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1849          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1850          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1851          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1852          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1853          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1854          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1855    
1856    #if !defined NOINFOCHECK
1857          old_count = pcre_info(re, &old_options, &old_first_char);
1858          if (count < 0) fprintf(outfile,
1859            "Error %d from pcre_info()\n", count);
1860          else
1861            {
1862            if (old_count != count) fprintf(outfile,
1863              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1864                old_count);
1865    
1866            if (old_first_char != first_char) fprintf(outfile,
1867              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1868                first_char, old_first_char);
1869    
1870            if (old_options != (int)get_options) fprintf(outfile,
1871              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1872                get_options, old_options);
1873            }
1874    #endif
1875    
1876          if (size != regex_gotten_store) fprintf(outfile,
1877            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1878            (int)size, (int)regex_gotten_store);
1879    
1880          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1881          if (backrefmax > 0)
1882            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1883    
1884          if (namecount > 0)
1885            {
1886            fprintf(outfile, "Named capturing subpatterns:\n");
1887            while (namecount-- > 0)
1888              {
1889              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1890                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1891                GET2(nametable, 0));
1892              nametable += nameentrysize;
1893              }
1894            }
1895    
1896          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1897          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1898    
1899          all_options = ((real_pcre *)re)->options;
1900          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1901    
1902          if (get_options == 0) fprintf(outfile, "No options\n");
1903            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1904              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1905              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1906              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1907              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1908              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1909              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1910              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1911              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1912              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1913              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1914              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1915              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1916              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1917              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1918              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1919              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1920    
1921          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1922    
1923          switch (get_options & PCRE_NEWLINE_BITS)
1924            {
1925            case PCRE_NEWLINE_CR:
1926            fprintf(outfile, "Forced newline sequence: CR\n");
1927            break;
1928    
1929            case PCRE_NEWLINE_LF:
1930            fprintf(outfile, "Forced newline sequence: LF\n");
1931            break;
1932    
1933            case PCRE_NEWLINE_CRLF:
1934            fprintf(outfile, "Forced newline sequence: CRLF\n");
1935            break;
1936    
1937            case PCRE_NEWLINE_ANYCRLF:
1938            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1939            break;
1940    
1941            case PCRE_NEWLINE_ANY:
1942            fprintf(outfile, "Forced newline sequence: ANY\n");
1943            break;
1944    
1945            default:
1946            break;
1947            }
1948    
1949          if (first_char == -1)
1950            {
1951            fprintf(outfile, "First char at start or follows newline\n");
1952            }
1953          else if (first_char < 0)
1954            {
1955            fprintf(outfile, "No first char\n");
1956            }
1957          else
1958            {
1959            int ch = first_char & 255;
1960            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1961              "" : " (caseless)";
1962            if (PRINTHEX(ch))
1963              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1964            else
1965              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1966            }
1967    
1968          if (need_char < 0)
1969            {
1970            fprintf(outfile, "No need char\n");
1971            }
1972          else
1973            {
1974            int ch = need_char & 255;
1975            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1976              "" : " (caseless)";
1977            if (PRINTHEX(ch))
1978              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1979            else
1980              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1981            }
1982    
1983          /* Don't output study size; at present it is in any case a fixed
1984          value, but it varies, depending on the computer architecture, and
1985          so messes up the test suite. (And with the /F option, it might be
1986          flipped.) */
1987    
1988          if (do_study)
1989            {
1990            if (extra == NULL)
1991              fprintf(outfile, "Study returned NULL\n");
1992            else
1993              {
1994              uschar *start_bits = NULL;
1995              int minlength;
1996    
1997              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1998              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1999    
2000              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2001              if (start_bits == NULL)
2002                fprintf(outfile, "No set of starting bytes\n");
2003              else
2004                {
2005                int i;
2006                int c = 24;
2007                fprintf(outfile, "Starting byte set: ");
2008                for (i = 0; i < 256; i++)
2009                  {
2010                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2011                    {
2012                    if (c > 75)
2013                      {
2014                      fprintf(outfile, "\n  ");
2015                      c = 2;
2016                      }
2017                    if (PRINTHEX(i) && i != ' ')
2018                      {
2019                      fprintf(outfile, "%c ", i);
2020                      c += 2;
2021                      }
2022                    else
2023                      {
2024                      fprintf(outfile, "\\x%02x ", i);
2025                      c += 5;
2026                      }
2027                    }
2028                  }
2029                fprintf(outfile, "\n");
2030                }
2031              }
2032            }
2033          }
2034    
2035        /* If the '>' option was present, we write out the regex to a file, and
2036        that is all. The first 8 bytes of the file are the regex length and then
2037        the study length, in big-endian order. */
2038    
2039        if (to_file != NULL)
2040          {
2041          FILE *f = fopen((char *)to_file, "wb");
2042          if (f == NULL)
2043          {          {
2044          real_pcre_extra *xx = (real_pcre_extra *)extra;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2045          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          }
2046            fprintf(outfile, "No starting character set\n");        else
2047            {
2048            uschar sbuf[8];
2049            sbuf[0] = (uschar)((true_size >> 24) & 255);
2050            sbuf[1] = (uschar)((true_size >> 16) & 255);
2051            sbuf[2] = (uschar)((true_size >>  8) & 255);
2052            sbuf[3] = (uschar)((true_size) & 255);
2053    
2054            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2055            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2056            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2057            sbuf[7] = (uschar)((true_study_size) & 255);
2058    
2059            if (fwrite(sbuf, 1, 8, f) < 8 ||
2060                fwrite(re, 1, true_size, f) < true_size)
2061              {
2062              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2063              }
2064          else          else
2065            {            {
2066            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
2067            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
2068              {              {
2069              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2070                    true_study_size)
2071                {                {
2072                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2073                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2074                }                }
2075                else fprintf(outfile, "Study data written to %s\n", to_file);
2076    
2077              }              }
           fprintf(outfile, "\n");  
2078            }            }
2079            fclose(f);
2080            }
2081    
2082          new_free(re);
2083          if (extra != NULL) new_free(extra);
2084          if (locale_set)
2085            {
2086            new_free((void *)tables);
2087            setlocale(LC_CTYPE, "C");
2088            locale_set = 0;
2089          }          }
2090          continue;  /* With next regex */
2091        }        }
2092      }      }        /* End of non-POSIX compile */
2093    
2094    /* Read data lines and test them */    /* Read data lines and test them */
2095    
2096    for (;;)    for (;;)
2097      {      {
2098      unsigned char *q;      uschar *q;
2099        uschar *bptr;
2100        int *use_offsets = offsets;
2101        int use_size_offsets = size_offsets;
2102        int callout_data = 0;
2103        int callout_data_set = 0;
2104      int count, c;      int count, c;
2105      int offsets[45];      int copystrings = 0;
2106      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = default_find_match_limit;
2107        int getstrings = 0;
2108        int getlist = 0;
2109        int gmatched = 0;
2110        int start_offset = 0;
2111        int g_notempty = 0;
2112        int use_dfa = 0;
2113    
2114      options = 0;      options = 0;
2115    
2116      if (infile == stdin) printf("  data> ");      *copynames = 0;
2117      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2118    
2119        copynamesptr = copynames;
2120        getnamesptr = getnames;
2121    
2122        pcre_callout = callout;
2123        first_callout = 1;
2124        callout_extra = 0;
2125        callout_count = 0;
2126        callout_fail_count = 999999;
2127        callout_fail_id = -1;
2128        show_malloc = 0;
2129    
2130        if (extra != NULL) extra->flags &=
2131          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2132    
2133        len = 0;
2134        for (;;)
2135        {        {
2136        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2137        goto CONTINUE;          {
2138            if (len > 0)    /* Reached EOF without hitting a newline */
2139              {
2140              fprintf(outfile, "\n");
2141              break;
2142              }
2143            done = 1;
2144            goto CONTINUE;
2145            }
2146          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2147          len = (int)strlen((char *)buffer);
2148          if (buffer[len-1] == '\n') break;
2149        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2150    
     len = (int)strlen((char *)buffer);  
2151      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2152      buffer[len] = 0;      buffer[len] = 0;
2153      if (len == 0) break;      if (len == 0) break;
# Line 663  while (!done) Line 2155  while (!done)
2155      p = buffer;      p = buffer;
2156      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2157    
2158      q = dbuffer;      bptr = q = dbuffer;
2159      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2160        {        {
2161        int i = 0;        int i = 0;
2162        int n = 0;        int n = 0;
2163    
2164        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2165          {          {
2166          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 684  while (!done) Line 2177  while (!done)
2177          c -= '0';          c -= '0';
2178          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2179            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2180    
2181    #if !defined NOUTF8
2182            if (use_utf8 && c > 255)
2183              {
2184              unsigned char buff8[8];
2185              int ii, utn;
2186              utn = ord2utf8(c, buff8);
2187              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2188              c = buff8[ii];   /* Last byte */
2189              }
2190    #endif
2191          break;          break;
2192    
2193          case 'x':          case 'x':
2194    
2195            /* Handle \x{..} specially - new Perl thing for utf8 */
2196    
2197    #if !defined NOUTF8
2198            if (*p == '{')
2199              {
2200              unsigned char *pt = p;
2201              c = 0;
2202              while (isxdigit(*(++pt)))
2203                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2204              if (*pt == '}')
2205                {
2206                unsigned char buff8[8];
2207                int ii, utn;
2208                if (use_utf8)
2209                  {
2210                  utn = ord2utf8(c, buff8);
2211                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2212                  c = buff8[ii];   /* Last byte */
2213                  }
2214                else
2215                 {
2216                 if (c > 255)
2217                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2218                     "UTF-8 mode is not enabled.\n"
2219                     "** Truncation will probably give the wrong result.\n", c);
2220                 }
2221                p = pt + 1;
2222                break;
2223                }
2224              /* Not correct form; fall through */
2225              }
2226    #endif
2227    
2228            /* Ordinary \x */
2229    
2230          c = 0;          c = 0;
2231          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2232            {            {
# Line 695  while (!done) Line 2235  while (!done)
2235            }            }
2236          break;          break;
2237    
2238          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2239          p--;          p--;
2240          continue;          continue;
2241    
2242            case '>':
2243            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2244            continue;
2245    
2246          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2247          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2248          continue;          continue;
# Line 707  while (!done) Line 2251  while (!done)
2251          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
2252          continue;          continue;
2253    
2254            case 'C':
2255            if (isdigit(*p))    /* Set copy string */
2256              {
2257              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2258              copystrings |= 1 << n;
2259              }
2260            else if (isalnum(*p))
2261              {
2262              uschar *npp = copynamesptr;
2263              while (isalnum(*p)) *npp++ = *p++;
2264              *npp++ = 0;
2265              *npp = 0;
2266              n = pcre_get_stringnumber(re, (char *)copynamesptr);
2267              if (n < 0)
2268                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2269              copynamesptr = npp;
2270              }
2271            else if (*p == '+')
2272              {
2273              callout_extra = 1;
2274              p++;
2275              }
2276            else if (*p == '-')
2277              {
2278              pcre_callout = NULL;
2279              p++;
2280              }
2281            else if (*p == '!')
2282              {
2283              callout_fail_id = 0;
2284              p++;
2285              while(isdigit(*p))
2286                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2287              callout_fail_count = 0;
2288              if (*p == '!')
2289                {
2290                p++;
2291                while(isdigit(*p))
2292                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2293                }
2294              }
2295            else if (*p == '*')
2296              {
2297              int sign = 1;
2298              callout_data = 0;
2299              if (*(++p) == '-') { sign = -1; p++; }
2300              while(isdigit(*p))
2301                callout_data = callout_data * 10 + *p++ - '0';
2302              callout_data *= sign;
2303              callout_data_set = 1;
2304              }
2305            continue;
2306    
2307    #if !defined NODFA
2308            case 'D':
2309    #if !defined NOPOSIX
2310            if (posix || do_posix)
2311              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2312            else
2313    #endif
2314              use_dfa = 1;
2315            continue;
2316    
2317            case 'F':
2318            options |= PCRE_DFA_SHORTEST;
2319            continue;
2320    #endif
2321    
2322            case 'G':
2323            if (isdigit(*p))
2324              {
2325              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2326              getstrings |= 1 << n;
2327              }
2328            else if (isalnum(*p))
2329              {
2330              uschar *npp = getnamesptr;
2331              while (isalnum(*p)) *npp++ = *p++;
2332              *npp++ = 0;
2333              *npp = 0;
2334              n = pcre_get_stringnumber(re, (char *)getnamesptr);
2335              if (n < 0)
2336                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2337              getnamesptr = npp;
2338              }
2339            continue;
2340    
2341            case 'L':
2342            getlist = 1;
2343            continue;
2344    
2345            case 'M':
2346            find_match_limit = 1;
2347            continue;
2348    
2349            case 'N':
2350            if ((options & PCRE_NOTEMPTY) != 0)
2351              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2352            else
2353              options |= PCRE_NOTEMPTY;
2354            continue;
2355    
2356          case 'O':          case 'O':
2357          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2358          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
2359              {
2360              size_offsets_max = n;
2361              free(offsets);
2362              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2363              if (offsets == NULL)
2364                {
2365                printf("** Failed to get %d bytes of memory for offsets vector\n",
2366                  (int)(size_offsets_max * sizeof(int)));
2367                yield = 1;
2368                goto EXIT;
2369                }
2370              }
2371            use_size_offsets = n;
2372            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2373            continue;
2374    
2375            case 'P':
2376            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2377              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2378            continue;
2379    
2380            case 'Q':
2381            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2382            if (extra == NULL)
2383              {
2384              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2385              extra->flags = 0;
2386              }
2387            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2388            extra->match_limit_recursion = n;
2389            continue;
2390    
2391            case 'q':
2392            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2393            if (extra == NULL)
2394              {
2395              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2396              extra->flags = 0;
2397              }
2398            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2399            extra->match_limit = n;
2400            continue;
2401    
2402    #if !defined NODFA
2403            case 'R':
2404            options |= PCRE_DFA_RESTART;
2405            continue;
2406    #endif
2407    
2408            case 'S':
2409            show_malloc = 1;
2410            continue;
2411    
2412            case 'Y':
2413            options |= PCRE_NO_START_OPTIMIZE;
2414          continue;          continue;
2415    
2416          case 'Z':          case 'Z':
2417          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2418          continue;          continue;
2419    
2420            case '?':
2421            options |= PCRE_NO_UTF8_CHECK;
2422            continue;
2423    
2424            case '<':
2425              {
2426              int x = check_newline(p, outfile);
2427              if (x == 0) goto NEXT_DATA;
2428              options |= x;
2429              while (*p++ != '>');
2430              }
2431            continue;
2432          }          }
2433        *q++ = c;        *q++ = c;
2434        }        }
2435      *q = 0;      *q = 0;
2436      len = q - dbuffer;      len = (int)(q - dbuffer);
2437    
2438        /* Move the data to the end of the buffer so that a read over the end of
2439        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2440        we are using the POSIX interface, we must include the terminating zero. */
2441    
2442    #if !defined NOPOSIX
2443        if (posix || do_posix)
2444          {
2445          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2446          bptr += buffer_size - len - 1;
2447          }
2448        else
2449    #endif
2450          {
2451          memmove(bptr + buffer_size - len, bptr, len);
2452          bptr += buffer_size - len;
2453          }
2454    
2455        if ((all_use_dfa || use_dfa) && find_match_limit)
2456          {
2457          printf("**Match limit not relevant for DFA matching: ignored\n");
2458          find_match_limit = 0;
2459          }
2460    
2461      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2462      support timing. */      support timing or playing with the match limit or callout data. */
2463    
2464    #if !defined NOPOSIX
2465      if (posix || do_posix)      if (posix || do_posix)
2466        {        {
2467        int rc;        int rc;
2468        int eflags = 0;        int eflags = 0;
2469        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
2470          if (use_size_offsets > 0)
2471            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2472        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2473        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2474          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2475    
2476        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
2477    
2478        if (rc != 0)        if (rc != 0)
2479          {          {
2480          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2481          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2482          }          }
2483          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2484                  != 0)
2485            {
2486            fprintf(outfile, "Matched with REG_NOSUB\n");
2487            }
2488        else        else
2489          {          {
2490          size_t i;          size_t i;
2491          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2492            {            {
2493            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2494              {              {
2495              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2496              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2497                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2498              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2499                if (i == 0 && do_showrest)
2500                  {
2501                  fprintf(outfile, " 0+ ");
2502                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2503                    outfile);
2504                  fprintf(outfile, "\n");
2505                  }
2506              }              }
2507            }            }
2508          }          }
2509          free(pmatch);
2510        }        }
2511    
2512      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
2513    
2514      else      else
2515    #endif  /* !defined NOPOSIX */
2516    
2517        for (;; gmatched++)    /* Loop for /g or /G */
2518        {        {
2519        if (timeit)        markptr = NULL;
2520    
2521          if (timeitm > 0)
2522          {          {
2523          register int i;          register int i;
2524          clock_t time_taken;          clock_t time_taken;
2525          clock_t start_time = clock();          clock_t start_time = clock();
2526          for (i = 0; i < 4000; i++)  
2527            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
2528              size_offsets);          if (all_use_dfa || use_dfa)
2529              {
2530              int workspace[1000];
2531              for (i = 0; i < timeitm; i++)
2532                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2533                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2534                  sizeof(workspace)/sizeof(int));
2535              }
2536            else
2537    #endif
2538    
2539            for (i = 0; i < timeitm; i++)
2540              count = pcre_exec(re, extra, (char *)bptr, len,
2541                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2542    
2543          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2544          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2545            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2546                (double)CLOCKS_PER_SEC);
2547            }
2548    
2549          /* If find_match_limit is set, we want to do repeated matches with
2550          varying limits in order to find the minimum value for the match limit and
2551          for the recursion limit. */
2552    
2553          if (find_match_limit)
2554            {
2555            if (extra == NULL)
2556              {
2557              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2558              extra->flags = 0;
2559              }
2560    
2561            (void)check_match_limit(re, extra, bptr, len, start_offset,
2562              options|g_notempty, use_offsets, use_size_offsets,
2563              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2564              PCRE_ERROR_MATCHLIMIT, "match()");
2565    
2566            count = check_match_limit(re, extra, bptr, len, start_offset,
2567              options|g_notempty, use_offsets, use_size_offsets,
2568              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2569              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2570            }
2571    
2572          /* If callout_data is set, use the interface with additional data */
2573    
2574          else if (callout_data_set)
2575            {
2576            if (extra == NULL)
2577              {
2578              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2579              extra->flags = 0;
2580              }
2581            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2582            extra->callout_data = &callout_data;
2583            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2584              options | g_notempty, use_offsets, use_size_offsets);
2585            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2586          }          }
2587    
2588        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2589          size_offsets);        value of match_limit. */
2590    
2591    #if !defined NODFA
2592          else if (all_use_dfa || use_dfa)
2593            {
2594            int workspace[1000];
2595            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2596              options | g_notempty, use_offsets, use_size_offsets, workspace,
2597              sizeof(workspace)/sizeof(int));
2598            if (count == 0)
2599              {
2600              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2601              count = use_size_offsets/2;
2602              }
2603            }
2604    #endif
2605    
2606        if (count == 0)        else
2607          {          {
2608          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2609          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2610            if (count == 0)
2611              {
2612              fprintf(outfile, "Matched, but too many substrings\n");
2613              count = use_size_offsets/3;
2614              }
2615          }          }
2616    
2617          /* Matched */
2618    
2619        if (count >= 0)        if (count >= 0)
2620          {          {
2621          int i;          int i, maxcount;
2622          count *= 2;  
2623          for (i = 0; i < count; i += 2)  #if !defined NODFA
2624            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2625    #endif
2626              maxcount = use_size_offsets/3;
2627    
2628            /* This is a check against a lunatic return value. */
2629    
2630            if (count > maxcount)
2631              {
2632              fprintf(outfile,
2633                "** PCRE error: returned count %d is too big for offset size %d\n",
2634                count, use_size_offsets);
2635              count = use_size_offsets/3;
2636              if (do_g || do_G)
2637                {
2638                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2639                do_g = do_G = FALSE;        /* Break g/G loop */
2640                }
2641              }
2642    
2643            for (i = 0; i < count * 2; i += 2)
2644            {            {
2645            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2646              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2647            else            else
2648              {              {
2649              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2650              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2651                  use_offsets[i+1] - use_offsets[i], outfile);
2652              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2653                if (i == 0)
2654                  {
2655                  if (do_showrest)
2656                    {
2657                    fprintf(outfile, " 0+ ");
2658                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2659                      outfile);
2660                    fprintf(outfile, "\n");
2661                    }
2662                  }
2663                }
2664              }
2665    
2666            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2667    
2668            for (i = 0; i < 32; i++)
2669              {
2670              if ((copystrings & (1 << i)) != 0)
2671                {
2672                char copybuffer[256];
2673                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2674                  i, copybuffer, sizeof(copybuffer));
2675                if (rc < 0)
2676                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2677                else
2678                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2679                }
2680              }
2681    
2682            for (copynamesptr = copynames;
2683                 *copynamesptr != 0;
2684                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2685              {
2686              char copybuffer[256];
2687              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2688                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2689              if (rc < 0)
2690                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2691              else
2692                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2693              }
2694    
2695            for (i = 0; i < 32; i++)
2696              {
2697              if ((getstrings & (1 << i)) != 0)
2698                {
2699                const char *substring;
2700                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2701                  i, &substring);
2702                if (rc < 0)
2703                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2704                else
2705                  {
2706                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2707                  pcre_free_substring(substring);
2708                  }
2709                }
2710              }
2711    
2712            for (getnamesptr = getnames;
2713                 *getnamesptr != 0;
2714                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2715              {
2716              const char *substring;
2717              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2718                count, (char *)getnamesptr, &substring);
2719              if (rc < 0)
2720                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2721              else
2722                {
2723                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2724                pcre_free_substring(substring);
2725                }
2726              }
2727    
2728            if (getlist)
2729              {
2730              const char **stringlist;
2731              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2732                &stringlist);
2733              if (rc < 0)
2734                fprintf(outfile, "get substring list failed %d\n", rc);
2735              else
2736                {
2737                for (i = 0; i < count; i++)
2738                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2739                if (stringlist[i] != NULL)
2740                  fprintf(outfile, "string list not terminated by NULL\n");
2741                /* free((void *)stringlist); */
2742                pcre_free_substring_list(stringlist);
2743              }              }
2744            }            }
2745          }          }
2746    
2747          /* There was a partial match */
2748    
2749          else if (count == PCRE_ERROR_PARTIAL)
2750            {
2751            if (markptr == NULL) fprintf(outfile, "Partial match");
2752              else fprintf(outfile, "Partial match, mark=%s", markptr);
2753            if (use_size_offsets > 1)
2754              {
2755              fprintf(outfile, ": ");
2756              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2757                outfile);
2758              }
2759            fprintf(outfile, "\n");
2760            break;  /* Out of the /g loop */
2761            }
2762    
2763          /* Failed to match. If this is a /g or /G loop and we previously set
2764          g_notempty after a null match, this is not necessarily the end. We want
2765          to advance the start offset, and continue. We won't be at the end of the
2766          string - that was checked before setting g_notempty.
2767    
2768          Complication arises in the case when the newline option is "any" or
2769          "anycrlf". If the previous match was at the end of a line terminated by
2770          CRLF, an advance of one character just passes the \r, whereas we should
2771          prefer the longer newline sequence, as does the code in pcre_exec().
2772          Fudge the offset value to achieve this.
2773    
2774          Otherwise, in the case of UTF-8 matching, the advance must be one
2775          character, not one byte. */
2776    
2777        else        else
2778          {          {
2779          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2780              {
2781              int onechar = 1;
2782              unsigned int obits = ((real_pcre *)re)->options;
2783              use_offsets[0] = start_offset;
2784              if ((obits & PCRE_NEWLINE_BITS) == 0)
2785                {
2786                int d;
2787                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2788                /* Note that these values are always the ASCII ones, even in
2789                EBCDIC environments. CR = 13, NL = 10. */
2790                obits = (d == 13)? PCRE_NEWLINE_CR :
2791                        (d == 10)? PCRE_NEWLINE_LF :
2792                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2793                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2794                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2795                }
2796              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2797                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2798                  &&
2799                  start_offset < len - 1 &&
2800                  bptr[start_offset] == '\r' &&
2801                  bptr[start_offset+1] == '\n')
2802                onechar++;
2803              else if (use_utf8)
2804                {
2805                while (start_offset + onechar < len)
2806                  {
2807                  int tb = bptr[start_offset+onechar];
2808                  if (tb <= 127) break;
2809                  tb &= 0xc0;
2810                  if (tb != 0 && tb != 0xc0) onechar++;
2811                  }
2812                }
2813              use_offsets[1] = start_offset + onechar;
2814              }
2815            else
2816              {
2817              if (count == PCRE_ERROR_NOMATCH)
2818                {
2819                if (gmatched == 0)
2820                  {
2821                  if (markptr == NULL) fprintf(outfile, "No match\n");
2822                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2823                  }
2824                }
2825            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2826              break;  /* Out of the /g loop */
2827              }
2828          }          }
2829        }  
2830      }        /* If not /g or /G we are done */
2831    
2832          if (!do_g && !do_G) break;
2833    
2834          /* If we have matched an empty string, first check to see if we are at
2835          the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2836          Perl's /g options does. This turns out to be rather cunning. First we set
2837          PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2838          same point. If this fails (picked up above) we advance to the next
2839          character. */
2840    
2841          g_notempty = 0;
2842    
2843          if (use_offsets[0] == use_offsets[1])
2844            {
2845            if (use_offsets[0] == len) break;
2846            g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2847            }
2848    
2849          /* For /g, update the start offset, leaving the rest alone */
2850    
2851          if (do_g) start_offset = use_offsets[1];
2852    
2853          /* For /G, update the pointer and length */
2854    
2855          else
2856            {
2857            bptr += use_offsets[1];
2858            len -= use_offsets[1];
2859            }
2860          }  /* End of loop for /g and /G */
2861    
2862        NEXT_DATA: continue;
2863        }    /* End of loop for data lines */
2864    
2865    CONTINUE:    CONTINUE:
2866    
2867    #if !defined NOPOSIX
2868    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2869    if (re != NULL) free(re);  #endif
2870    if (extra != NULL) free(extra);  
2871    if (tables != NULL)    if (re != NULL) new_free(re);
2872      if (extra != NULL) new_free(extra);
2873      if (locale_set)
2874      {      {
2875      free((void *)tables);      new_free((void *)tables);
2876      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2877        locale_set = 0;
2878      }      }
2879    }    }
2880    
2881  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2882  return 0;  
2883    EXIT:
2884    
2885    if (infile != NULL && infile != stdin) fclose(infile);
2886    if (outfile != NULL && outfile != stdout) fclose(outfile);
2887    
2888    free(buffer);
2889    free(dbuffer);
2890    free(pbuffer);
2891    free(offsets);
2892    
2893    return yield;
2894  }  }
2895    
2896  /* End */  /* End of pcretest.c */

Legend:
Removed from v.25  
changed lines
  Added in v.545

  ViewVC Help
Powered by ViewVC 1.1.5