/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 65 by nigel, Sat Feb 24 21:40:08 2007 UTC revision 545 by ph10, Wed Jun 16 10:51:15 2010 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 46  been extended and consequently is now ra
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    #else
83    #include <sys/time.h>          /* These two includes are needed */
84    #include <sys/resource.h>      /* for setrlimit(). */
85    #define INPUT_MODE   "rb"
86    #define OUTPUT_MODE  "wb"
87    #endif
88    
89    
90  /* We need the internal info for displaying the results of pcre_study(). Also  /* We have to include pcre_internal.h because we need the internal info for
91  for getting the opcodes for showing compiled code. */  displaying the results of pcre_study() and we also need to know about the
92    internal macros, structures, and other internal data values; pcretest has
93    "inside information" compared to a program that strictly follows the PCRE API.
94    
95    Although pcre_internal.h does itself include pcre.h, we explicitly include it
96    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97    appropriately for an application, not for building PCRE. */
98    
99    #include "pcre.h"
100    #include "pcre_internal.h"
101    
102    /* We need access to some of the data tables that PCRE uses. So as not to have
103    to keep two copies, we include the source file here, changing the names of the
104    external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107    #define _pcre_utf8_table1      utf8_table1
108    #define _pcre_utf8_table1_size utf8_table1_size
109    #define _pcre_utf8_table2      utf8_table2
110    #define _pcre_utf8_table3      utf8_table3
111    #define _pcre_utf8_table4      utf8_table4
112    #define _pcre_utt              utt
113    #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115    #define _pcre_OP_lengths       OP_lengths
116    
117    #include "pcre_tables.c"
118    
119    /* We also need the pcre_printint() function for printing out compiled
120    patterns. This function is in a separate file so that it can be included in
121    pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
123    
124    #define COMPILING_PCRETEST
125    #include "pcre_printint.src"
126    
127    /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
 #include "internal.h"  
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 140  Makefile. */
140  #include "pcreposix.h"  #include "pcreposix.h"
141  #endif  #endif
142    
143    /* It is also possible, for the benefit of the version currently imported into
144    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145    interface to the DFA matcher (NODFA), and without the doublecheck of the old
146    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147    UTF8 support if PCRE is built without it. */
148    
149    #ifndef SUPPORT_UTF8
150    #ifndef NOUTF8
151    #define NOUTF8
152    #endif
153    #endif
154    
155    
156    /* Other parameters */
157    
158  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
159  #ifdef CLK_TCK  #ifdef CLK_TCK
160  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 163  Makefile. */
163  #endif  #endif
164  #endif  #endif
165    
166  #define LOOPREPEAT 50000  /* This is the default loop count for timing. */
167    
168    #define LOOPREPEAT 500000
169    
170    /* Static variables */
171    
172  static FILE *outfile;  static FILE *outfile;
173  static int log_store = 0;  static int log_store = 0;
# Line 44  static int callout_count; Line 175  static int callout_count;
175  static int callout_extra;  static int callout_extra;
176  static int callout_fail_count;  static int callout_fail_count;
177  static int callout_fail_id;  static int callout_fail_id;
178    static int debug_lengths;
179  static int first_callout;  static int first_callout;
180  static int utf8;  static int locale_set = 0;
181    static int show_malloc;
182    static int use_utf8;
183  static size_t gotten_store;  static size_t gotten_store;
184    
185    /* The buffers grow automatically if very long input lines are encountered. */
186    
187    static int buffer_size = 50000;
188    static uschar *buffer = NULL;
189    static uschar *dbuffer = NULL;
190    static uschar *pbuffer = NULL;
191    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
192    
193  static int utf8_table2[] = {  /*************************************************
194    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  *         Alternate character tables             *
195    *************************************************/
196    
197    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198    using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200    the L (locale) option also adjusts the tables. */
201    
202    /* This is the set of tables distributed as default with PCRE. It recognizes
203    only ASCII characters. */
204    
205    static const unsigned char tables0[] = {
206    
207    /* This table is a lower casing table. */
208    
209        0,  1,  2,  3,  4,  5,  6,  7,
210        8,  9, 10, 11, 12, 13, 14, 15,
211       16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341  static int utf8_table3[] = {    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515    };
516    
517    
518    
519  /*************************************************  /*************************************************
520  *         Print compiled regex                   *  *        Read or extend an input line            *
521  *************************************************/  *************************************************/
522    
523  /* The code for doing this is held in a separate file that is also included in  /* Input lines are read into buffer, but both patterns and data lines can be
524  pcre.c when it is compiled with the debug switch. It defines a function called  continued over multiple input lines. In addition, if the buffer fills up, we
525  print_internals(), which uses a table of opcode lengths defined by the macro  want to automatically expand it so as to be able to handle extremely large
526  OP_LENGTHS, whose name must be OP_lengths. */  lines that are needed for certain stress tests. When the input buffer is
527    expanded, the other two buffers must also be expanded likewise, and the
528    contents of pbuffer, which are a copy of the input for callouts, must be
529    preserved (for when expansion happens for a data line). This is not the most
530    optimal way of handling this, but hey, this is just a test program!
531    
532    Arguments:
533      f            the file to read
534      start        where in buffer to start (this *must* be within buffer)
535      prompt       for stdin or readline()
536    
537    Returns:       pointer to the start of new data
538                   could be a copy of start, or could be moved
539                   NULL if no data read and EOF reached
540    */
541    
542    static uschar *
543    extend_inputline(FILE *f, uschar *start, const char *prompt)
544    {
545    uschar *here = start;
546    
547    for (;;)
548      {
549      int rlen = (int)(buffer_size - (here - buffer));
550    
551      if (rlen > 1000)
552        {
553        int dlen;
554    
555        /* If libreadline support is required, use readline() to read a line if the
556        input is a terminal. Note that readline() removes the trailing newline, so
557        we must put it back again, to be compatible with fgets(). */
558    
559    #ifdef SUPPORT_LIBREADLINE
560        if (isatty(fileno(f)))
561          {
562          size_t len;
563          char *s = readline(prompt);
564          if (s == NULL) return (here == start)? NULL : start;
565          len = strlen(s);
566          if (len > 0) add_history(s);
567          if (len > rlen - 1) len = rlen - 1;
568          memcpy(here, s, len);
569          here[len] = '\n';
570          here[len+1] = 0;
571          free(s);
572          }
573        else
574    #endif
575    
576        /* Read the next line by normal means, prompting if the file is stdin. */
577    
578          {
579          if (f == stdin) printf("%s", prompt);
580          if (fgets((char *)here, rlen,  f) == NULL)
581            return (here == start)? NULL : start;
582          }
583    
584        dlen = (int)strlen((char *)here);
585        if (dlen > 0 && here[dlen - 1] == '\n') return start;
586        here += dlen;
587        }
588    
589      else
590        {
591        int new_buffer_size = 2*buffer_size;
592        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
593        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
594        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
595    
596        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
597          {
598          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
599          exit(1);
600          }
601    
602        memcpy(new_buffer, buffer, buffer_size);
603        memcpy(new_pbuffer, pbuffer, buffer_size);
604    
605        buffer_size = new_buffer_size;
606    
607        start = new_buffer + (start - buffer);
608        here = new_buffer + (here - buffer);
609    
610        free(buffer);
611        free(dbuffer);
612        free(pbuffer);
613    
614        buffer = new_buffer;
615        dbuffer = new_dbuffer;
616        pbuffer = new_pbuffer;
617        }
618      }
619    
620    return NULL;  /* Control never gets here */
621    }
622    
623    
624    
 static uschar OP_lengths[] = { OP_LENGTHS };  
625    
 #include "printint.c"  
626    
627    
628    
# Line 82  static uschar OP_lengths[] = { OP_LENGTH Line 632  static uschar OP_lengths[] = { OP_LENGTH
632    
633  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
634  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
635  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
636    
637  Arguments:  Arguments:
638    str           string to be converted    str           string to be converted
# Line 103  return(result); Line 653  return(result);
653    
654    
655    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
656    
657  /*************************************************  /*************************************************
658  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 148  return i + 1; Line 662  return i + 1;
662  and returns the value of the character.  and returns the value of the character.
663    
664  Argument:  Argument:
665    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
666    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
667    
668  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
669             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
670  */  */
671    
672  int  #if !defined NOUTF8
673  utf82ord(unsigned char *buffer, int *vptr)  
674    static int
675    utf82ord(unsigned char *utf8bytes, int *vptr)
676  {  {
677  int c = *buffer++;  int c = *utf8bytes++;
678  int d = c;  int d = c;
679  int i, j, s;  int i, j, s;
680    
# Line 178  d = (c & utf8_table3[i]) << s; Line 694  d = (c & utf8_table3[i]) << s;
694    
695  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
696    {    {
697    c = *buffer++;    c = *utf8bytes++;
698    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
699    s -= 6;    s -= 6;
700    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 186  for (j = 0; j < i; j++) Line 702  for (j = 0; j < i; j++)
702    
703  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
704    
705  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
706    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
707  if (j != i) return -(i+1);  if (j != i) return -(i+1);
708    
# Line 196  if (j != i) return -(i+1); Line 712  if (j != i) return -(i+1);
712  return i+1;  return i+1;
713  }  }
714    
715    #endif
716    
717    
718    
719    /*************************************************
720    *       Convert character value to UTF-8         *
721    *************************************************/
722    
723    /* This function takes an integer value in the range 0 - 0x7fffffff
724    and encodes it as a UTF-8 character in 0 to 6 bytes.
725    
726    Arguments:
727      cvalue     the character value
728      utf8bytes  pointer to buffer for result - at least 6 bytes long
729    
730    Returns:     number of characters placed in the buffer
731    */
732    
733    #if !defined NOUTF8
734    
735    static int
736    ord2utf8(int cvalue, uschar *utf8bytes)
737    {
738    register int i, j;
739    for (i = 0; i < utf8_table1_size; i++)
740      if (cvalue <= utf8_table1[i]) break;
741    utf8bytes += i;
742    for (j = i; j > 0; j--)
743     {
744     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
745     cvalue >>= 6;
746     }
747    *utf8bytes = utf8_table2[i] | cvalue;
748    return i + 1;
749    }
750    
751    #endif
752    
753    
754    
755  /*************************************************  /*************************************************
# Line 208  chars without printing. */ Line 762  chars without printing. */
762    
763  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
764  {  {
765  int c;  int c = 0;
766  int yield = 0;  int yield = 0;
767    
768  while (length-- > 0)  while (length-- > 0)
769    {    {
770    if (utf8)  #if !defined NOUTF8
771      if (use_utf8)
772      {      {
773      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
774    
# Line 221  while (length-- > 0) Line 776  while (length-- > 0)
776        {        {
777        length -= rc - 1;        length -= rc - 1;
778        p += rc;        p += rc;
779        if (c < 256 && isprint(c))        if (PRINTHEX(c))
780          {          {
781          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
782          yield++;          yield++;
783          }          }
784        else        else
785          {          {
786          int n;          int n = 4;
787          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
788          yield += n;          yield += (n <= 0x000000ff)? 2 :
789                     (n <= 0x00000fff)? 3 :
790                     (n <= 0x0000ffff)? 4 :
791                     (n <= 0x000fffff)? 5 : 6;
792          }          }
793        continue;        continue;
794        }        }
795      }      }
796    #endif
797    
798     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
799    
800    if (isprint(c = *(p++)))    c = *p++;
801      if (PRINTHEX(c))
802      {      {
803      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
804      yield++;      yield++;
# Line 266  data is not zero. */ Line 826  data is not zero. */
826  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
827  {  {
828  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
829  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
830    
831  if (callout_extra)  if (callout_extra)
832    {    {
   int i;  
833    fprintf(f, "Callout %d: last capture = %d\n",    fprintf(f, "Callout %d: last capture = %d\n",
834      cb->callout_number, cb->capture_last);      cb->callout_number, cb->capture_last);
835    
# Line 298  pre_start = pchars((unsigned char *)cb-> Line 857  pre_start = pchars((unsigned char *)cb->
857  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
858    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
859    
860    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
861    
862  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
863    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
864    
865  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
866    
867  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
868  shown */  shown. For automatic callouts, show the pattern offset. */
869    
870  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
871    else fprintf(outfile, "%3d ", cb->callout_number);    {
872      fprintf(outfile, "%+3d ", cb->pattern_position);
873      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
874      }
875    else
876      {
877      if (callout_extra) fprintf(outfile, "    ");
878        else fprintf(outfile, "%3d ", cb->callout_number);
879      }
880    
881  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
882  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 318  if (post_start > 0) Line 887  if (post_start > 0)
887    fprintf(outfile, "^");    fprintf(outfile, "^");
888    }    }
889    
890  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
891      fprintf(outfile, " ");
892    
893    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
894      pbuffer + cb->pattern_position);
895    
896    fprintf(outfile, "\n");
897  first_callout = 0;  first_callout = 0;
898    
899  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
900    {    {
901    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
902    return (int)(cb->callout_data);    if (callout_data != 0)
903        {
904        fprintf(outfile, "Callout data = %d\n", callout_data);
905        return callout_data;
906        }
907    }    }
908    
909  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 334  return (cb->callout_number != callout_fa Line 912  return (cb->callout_number != callout_fa
912    
913    
914  /*************************************************  /*************************************************
915  *            Local malloc function               *  *            Local malloc functions              *
916  *************************************************/  *************************************************/
917    
918  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 342  compiled re. */ Line 920  compiled re. */
920    
921  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
922  {  {
923    void *block = malloc(size);
924  gotten_store = size;  gotten_store = size;
925  return malloc(size);  if (show_malloc)
926      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
927    return block;
928    }
929    
930    static void new_free(void *block)
931    {
932    if (show_malloc)
933      fprintf(outfile, "free             %p\n", block);
934    free(block);
935    }
936    
937    
938    /* For recursion malloc/free, to test stacking calls */
939    
940    static void *stack_malloc(size_t size)
941    {
942    void *block = malloc(size);
943    if (show_malloc)
944      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
945    return block;
946  }  }
947    
948    static void stack_free(void *block)
949    {
950    if (show_malloc)
951      fprintf(outfile, "stack_free       %p\n", block);
952    free(block);
953    }
954    
955    
956  /*************************************************  /*************************************************
# Line 364  if ((rc = pcre_fullinfo(re, study, optio Line 969  if ((rc = pcre_fullinfo(re, study, optio
969    
970    
971  /*************************************************  /*************************************************
972    *         Byte flipping function                 *
973    *************************************************/
974    
975    static unsigned long int
976    byteflip(unsigned long int value, int n)
977    {
978    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
979    return ((value & 0x000000ff) << 24) |
980           ((value & 0x0000ff00) <<  8) |
981           ((value & 0x00ff0000) >>  8) |
982           ((value & 0xff000000) >> 24);
983    }
984    
985    
986    
987    
988    /*************************************************
989    *        Check match or recursion limit          *
990    *************************************************/
991    
992    static int
993    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
994      int start_offset, int options, int *use_offsets, int use_size_offsets,
995      int flag, unsigned long int *limit, int errnumber, const char *msg)
996    {
997    int count;
998    int min = 0;
999    int mid = 64;
1000    int max = -1;
1001    
1002    extra->flags |= flag;
1003    
1004    for (;;)
1005      {
1006      *limit = mid;
1007    
1008      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1009        use_offsets, use_size_offsets);
1010    
1011      if (count == errnumber)
1012        {
1013        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1014        min = mid;
1015        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1016        }
1017    
1018      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1019                             count == PCRE_ERROR_PARTIAL)
1020        {
1021        if (mid == min + 1)
1022          {
1023          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1024          break;
1025          }
1026        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1027        max = mid;
1028        mid = (min + mid)/2;
1029        }
1030      else break;    /* Some other error */
1031      }
1032    
1033    extra->flags &= ~flag;
1034    return count;
1035    }
1036    
1037    
1038    
1039    /*************************************************
1040    *         Case-independent strncmp() function    *
1041    *************************************************/
1042    
1043    /*
1044    Arguments:
1045      s         first string
1046      t         second string
1047      n         number of characters to compare
1048    
1049    Returns:    < 0, = 0, or > 0, according to the comparison
1050    */
1051    
1052    static int
1053    strncmpic(uschar *s, uschar *t, int n)
1054    {
1055    while (n--)
1056      {
1057      int c = tolower(*s++) - tolower(*t++);
1058      if (c) return c;
1059      }
1060    return 0;
1061    }
1062    
1063    
1064    
1065    /*************************************************
1066    *         Check newline indicator                *
1067    *************************************************/
1068    
1069    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1070    a message and return 0 if there is no match.
1071    
1072    Arguments:
1073      p           points after the leading '<'
1074      f           file for error message
1075    
1076    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1077    */
1078    
1079    static int
1080    check_newline(uschar *p, FILE *f)
1081    {
1082    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1083    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1084    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1085    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1086    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1087    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1088    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1089    fprintf(f, "Unknown newline type at: <%s\n", p);
1090    return 0;
1091    }
1092    
1093    
1094    
1095    /*************************************************
1096    *             Usage function                     *
1097    *************************************************/
1098    
1099    static void
1100    usage(void)
1101    {
1102    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1103    printf("Input and output default to stdin and stdout.\n");
1104    #ifdef SUPPORT_LIBREADLINE
1105    printf("If input is a terminal, readline() is used to read from it.\n");
1106    #else
1107    printf("This version of pcretest is not linked with readline().\n");
1108    #endif
1109    printf("\nOptions:\n");
1110    printf("  -b       show compiled code (bytecode)\n");
1111    printf("  -C       show PCRE compile-time options and exit\n");
1112    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1113    #if !defined NODFA
1114    printf("  -dfa     force DFA matching for all subjects\n");
1115    #endif
1116    printf("  -help    show usage information\n");
1117    printf("  -i       show information about compiled patterns\n"
1118           "  -M       find MATCH_LIMIT minimum for each subject\n"
1119           "  -m       output memory used information\n"
1120           "  -o <n>   set size of offsets vector to <n>\n");
1121    #if !defined NOPOSIX
1122    printf("  -p       use POSIX interface\n");
1123    #endif
1124    printf("  -q       quiet: do not output PCRE version number at start\n");
1125    printf("  -S <n>   set stack size to <n> megabytes\n");
1126    printf("  -s       output store (memory) used information\n"
1127           "  -t       time compilation and execution\n");
1128    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1129    printf("  -tm      time execution (matching) only\n");
1130    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1131    }
1132    
1133    
1134    
1135    /*************************************************
1136  *                Main Program                    *  *                Main Program                    *
1137  *************************************************/  *************************************************/
1138    
# Line 376  int main(int argc, char **argv) Line 1145  int main(int argc, char **argv)
1145  FILE *infile = stdin;  FILE *infile = stdin;
1146  int options = 0;  int options = 0;
1147  int study_options = 0;  int study_options = 0;
1148    int default_find_match_limit = FALSE;
1149  int op = 1;  int op = 1;
1150  int timeit = 0;  int timeit = 0;
1151    int timeitm = 0;
1152  int showinfo = 0;  int showinfo = 0;
1153  int showstore = 0;  int showstore = 0;
1154    int quiet = 0;
1155  int size_offsets = 45;  int size_offsets = 45;
1156  int size_offsets_max;  int size_offsets_max;
1157  int *offsets;  int *offsets = NULL;
1158  #if !defined NOPOSIX  #if !defined NOPOSIX
1159  int posix = 0;  int posix = 0;
1160  #endif  #endif
1161  int debug = 0;  int debug = 0;
1162  int done = 0;  int done = 0;
1163  unsigned char buffer[30000];  int all_use_dfa = 0;
1164  unsigned char dbuffer[1024];  int yield = 0;
1165    int stack_size;
1166    
1167    /* These vectors store, end-to-end, a list of captured substring names. Assume
1168    that 1024 is plenty long enough for the few names we'll be testing. */
1169    
1170    uschar copynames[1024];
1171    uschar getnames[1024];
1172    
1173    uschar *copynamesptr;
1174    uschar *getnamesptr;
1175    
1176    /* Get buffers from malloc() so that Electric Fence will check their misuse
1177    when I am debugging. They grow automatically when very long lines are read. */
1178    
1179  /* Static so that new_malloc can use it. */  buffer = (unsigned char *)malloc(buffer_size);
1180    dbuffer = (unsigned char *)malloc(buffer_size);
1181    pbuffer = (unsigned char *)malloc(buffer_size);
1182    
1183    /* The outfile variable is static so that new_malloc can use it. */
1184    
1185  outfile = stdout;  outfile = stdout;
1186    
1187    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1188    library to translate CRLF into a single LF character. At least, that's what
1189    I've been told: never having used Windows I take this all on trust. Originally
1190    it set 0x8000, but then I was advised that _O_BINARY was better. */
1191    
1192    #if defined(_WIN32) || defined(WIN32)
1193    _setmode( _fileno( stdout ), _O_BINARY );
1194    #endif
1195    
1196  /* Scan options */  /* Scan options */
1197    
1198  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 403  while (argc > 1 && argv[op][0] == '-') Line 1201  while (argc > 1 && argv[op][0] == '-')
1201    
1202    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1203      showstore = 1;      showstore = 1;
1204    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1205      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1206    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1207    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1208      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1209    #if !defined NODFA
1210      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1211    #endif
1212    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1213        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1214          *endptr == 0))          *endptr == 0))
# Line 413  while (argc > 1 && argv[op][0] == '-') Line 1216  while (argc > 1 && argv[op][0] == '-')
1216      op++;      op++;
1217      argc--;      argc--;
1218      }      }
1219      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1220        {
1221        int both = argv[op][2] == 0;
1222        int temp;
1223        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1224                         *endptr == 0))
1225          {
1226          timeitm = temp;
1227          op++;
1228          argc--;
1229          }
1230        else timeitm = LOOPREPEAT;
1231        if (both) timeit = timeitm;
1232        }
1233      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1234          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1235            *endptr == 0))
1236        {
1237    #if defined(_WIN32) || defined(WIN32)
1238        printf("PCRE: -S not supported on this OS\n");
1239        exit(1);
1240    #else
1241        int rc;
1242        struct rlimit rlim;
1243        getrlimit(RLIMIT_STACK, &rlim);
1244        rlim.rlim_cur = stack_size * 1024 * 1024;
1245        rc = setrlimit(RLIMIT_STACK, &rlim);
1246        if (rc != 0)
1247          {
1248        printf("PCRE: setrlimit() failed with error %d\n", rc);
1249        exit(1);
1250          }
1251        op++;
1252        argc--;
1253    #endif
1254        }
1255  #if !defined NOPOSIX  #if !defined NOPOSIX
1256    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1257  #endif  #endif
1258    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1259      {      {
1260      int rc;      int rc;
1261        unsigned long int lrc;
1262      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1263      printf("Compiled with\n");      printf("Compiled with\n");
1264      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1265      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1266        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1267        printf("  %sUnicode properties support\n", rc? "" : "No ");
1268      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1269      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1270        in EBCDIC environments. CR is 13 and NL is 10. */
1271        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1272          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1273          (rc == -2)? "ANYCRLF" :
1274          (rc == -1)? "ANY" : "???");
1275        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1276        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1277                                         "all Unicode newlines");
1278      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1279      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1280      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1281      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1282      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1283      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1284      exit(0);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1285        printf("  Default recursion depth limit = %ld\n", lrc);
1286        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1287        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1288        goto EXIT;
1289        }
1290      else if (strcmp(argv[op], "-help") == 0 ||
1291               strcmp(argv[op], "--help") == 0)
1292        {
1293        usage();
1294        goto EXIT;
1295      }      }
1296    else    else
1297      {      {
1298      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1299      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1300      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
1301      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1302      }      }
1303    op++;    op++;
1304    argc--;    argc--;
# Line 455  while (argc > 1 && argv[op][0] == '-') Line 1307  while (argc > 1 && argv[op][0] == '-')
1307  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1308    
1309  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
1310  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
1311  if (offsets == NULL)  if (offsets == NULL)
1312    {    {
1313    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1314      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1315    return 1;    yield = 1;
1316      goto EXIT;
1317    }    }
1318    
1319  /* Sort out the input and output files */  /* Sort out the input and output files */
1320    
1321  if (argc > 1)  if (argc > 1)
1322    {    {
1323    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1324    if (infile == NULL)    if (infile == NULL)
1325      {      {
1326      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1327      return 1;      yield = 1;
1328        goto EXIT;
1329      }      }
1330    }    }
1331    
1332  if (argc > 2)  if (argc > 2)
1333    {    {
1334    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1335    if (outfile == NULL)    if (outfile == NULL)
1336      {      {
1337      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1338      return 1;      yield = 1;
1339        goto EXIT;
1340      }      }
1341    }    }
1342    
1343  /* Set alternative malloc function */  /* Set alternative malloc function */
1344    
1345  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1346    pcre_free = new_free;
1347    pcre_stack_malloc = stack_malloc;
1348    pcre_stack_free = stack_free;
1349    
1350  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1351    
1352  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1353    
1354  /* Main loop */  /* Main loop */
1355    
# Line 506  while (!done) Line 1364  while (!done)
1364  #endif  #endif
1365    
1366    const char *error;    const char *error;
1367      unsigned char *markptr;
1368    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1369      unsigned char *to_file = NULL;
1370    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1371      unsigned long int true_size, true_study_size = 0;
1372      size_t size, regex_gotten_store;
1373      int do_mark = 0;
1374    int do_study = 0;    int do_study = 0;
1375    int do_debug = debug;    int do_debug = debug;
1376    int do_G = 0;    int do_G = 0;
1377    int do_g = 0;    int do_g = 0;
1378    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1379    int do_showrest = 0;    int do_showrest = 0;
1380    int erroroffset, len, delimiter;    int do_flip = 0;
1381      int erroroffset, len, delimiter, poffset;
1382    
1383    utf8 = 0;    use_utf8 = 0;
1384      debug_lengths = 1;
1385    
1386    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1387    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1388    fflush(outfile);    fflush(outfile);
1389    
# Line 527  while (!done) Line 1391  while (!done)
1391    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1392    if (*p == 0) continue;    if (*p == 0) continue;
1393    
1394    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1395    complete, read more. */  
1396      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1397        {
1398        unsigned long int magic, get_options;
1399        uschar sbuf[8];
1400        FILE *f;
1401    
1402        p++;
1403        pp = p + (int)strlen((char *)p);
1404        while (isspace(pp[-1])) pp--;
1405        *pp = 0;
1406    
1407        f = fopen((char *)p, "rb");
1408        if (f == NULL)
1409          {
1410          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1411          continue;
1412          }
1413    
1414        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1415    
1416        true_size =
1417          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1418        true_study_size =
1419          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1420    
1421        re = (real_pcre *)new_malloc(true_size);
1422        regex_gotten_store = gotten_store;
1423    
1424        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1425    
1426        magic = ((real_pcre *)re)->magic_number;
1427        if (magic != MAGIC_NUMBER)
1428          {
1429          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1430            {
1431            do_flip = 1;
1432            }
1433          else
1434            {
1435            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1436            fclose(f);
1437            continue;
1438            }
1439          }
1440    
1441        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1442          do_flip? " (byte-inverted)" : "", p);
1443    
1444        /* Need to know if UTF-8 for printing data strings */
1445    
1446        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1447        use_utf8 = (get_options & PCRE_UTF8) != 0;
1448    
1449        /* Now see if there is any following study data */
1450    
1451        if (true_study_size != 0)
1452          {
1453          pcre_study_data *psd;
1454    
1455          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1456          extra->flags = PCRE_EXTRA_STUDY_DATA;
1457    
1458          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1459          extra->study_data = psd;
1460    
1461          if (fread(psd, 1, true_study_size, f) != true_study_size)
1462            {
1463            FAIL_READ:
1464            fprintf(outfile, "Failed to read data from %s\n", p);
1465            if (extra != NULL) new_free(extra);
1466            if (re != NULL) new_free(re);
1467            fclose(f);
1468            continue;
1469            }
1470          fprintf(outfile, "Study data loaded from %s\n", p);
1471          do_study = 1;     /* To get the data output if requested */
1472          }
1473        else fprintf(outfile, "No study data\n");
1474    
1475        fclose(f);
1476        goto SHOW_INFO;
1477        }
1478    
1479      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1480      the pattern; if is isn't complete, read more. */
1481    
1482    delimiter = *p++;    delimiter = *p++;
1483    
1484    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1485      {      {
1486      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1487      goto SKIP_DATA;      goto SKIP_DATA;
1488      }      }
1489    
1490    pp = p;    pp = p;
1491      poffset = (int)(p - buffer);
1492    
1493    for(;;)    for(;;)
1494      {      {
# Line 549  while (!done) Line 1499  while (!done)
1499        pp++;        pp++;
1500        }        }
1501      if (*pp != 0) break;      if (*pp != 0) break;
1502        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1503        {        {
1504        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1505        done = 1;        done = 1;
# Line 567  while (!done) Line 1508  while (!done)
1508      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1509      }      }
1510    
1511      /* The buffer may have moved while being extended; reset the start of data
1512      pointer to the correct relative point in the buffer. */
1513    
1514      p = buffer + poffset;
1515    
1516    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1517    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1518    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1519    
1520    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1521    
1522    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1523      for callouts. */
1524    
1525    *pp++ = 0;    *pp++ = 0;
1526      strcpy((char *)pbuffer, (char *)p);
1527    
1528    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1529    
# Line 587  while (!done) Line 1535  while (!done)
1535      {      {
1536      switch (*pp++)      switch (*pp++)
1537        {        {
1538          case 'f': options |= PCRE_FIRSTLINE; break;
1539        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1540        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1541        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 595  while (!done) Line 1544  while (!done)
1544    
1545        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1546        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1547          case 'B': do_debug = 1; break;
1548          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1549        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1550        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1551          case 'F': do_flip = 1; break;
1552        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1553        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1554          case 'J': options |= PCRE_DUPNAMES; break;
1555          case 'K': do_mark = 1; break;
1556        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1557        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1558    
# Line 608  while (!done) Line 1562  while (!done)
1562    
1563        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1564        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1565          case 'W': options |= PCRE_UCP; break;
1566        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1567        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Z': debug_lengths = 0; break;
1568          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1569          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1570    
1571          case 'T':
1572          switch (*pp++)
1573            {
1574            case '0': tables = tables0; break;
1575            case '1': tables = tables1; break;
1576    
1577            case '\r':
1578            case '\n':
1579            case ' ':
1580            case 0:
1581            fprintf(outfile, "** Missing table number after /T\n");
1582            goto SKIP_DATA;
1583    
1584            default:
1585            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1586            goto SKIP_DATA;
1587            }
1588          break;
1589    
1590        case 'L':        case 'L':
1591        ppp = pp;        ppp = pp;
1592        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1593          /* The '0' test is just in case this is an unterminated line. */
1594          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1595        *ppp = 0;        *ppp = 0;
1596        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1597          {          {
1598          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1599          goto SKIP_DATA;          goto SKIP_DATA;
1600          }          }
1601          locale_set = 1;
1602        tables = pcre_maketables();        tables = pcre_maketables();
1603        pp = ppp;        pp = ppp;
1604        break;        break;
1605    
1606        case '\n': case ' ': break;        case '>':
1607          to_file = pp;
1608          while (*pp != 0) pp++;
1609          while (isspace(pp[-1])) pp--;
1610          *pp = 0;
1611          break;
1612    
1613          case '<':
1614            {
1615            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1616              {
1617              options |= PCRE_JAVASCRIPT_COMPAT;
1618              pp += 3;
1619              }
1620            else
1621              {
1622              int x = check_newline(pp, outfile);
1623              if (x == 0) goto SKIP_DATA;
1624              options |= x;
1625              while (*pp++ != '>');
1626              }
1627            }
1628          break;
1629    
1630          case '\r':                      /* So that it works in Windows */
1631          case '\n':
1632          case ' ':
1633          break;
1634    
1635        default:        default:
1636        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1637        goto SKIP_DATA;        goto SKIP_DATA;
# Line 640  while (!done) Line 1647  while (!done)
1647      {      {
1648      int rc;      int rc;
1649      int cflags = 0;      int cflags = 0;
1650    
1651      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1652      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1653        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1654        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1655        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1656        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1657        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1658    
1659      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1660    
1661      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 649  while (!done) Line 1663  while (!done)
1663    
1664      if (rc != 0)      if (rc != 0)
1665        {        {
1666        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1667        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1668        goto SKIP_DATA;        goto SKIP_DATA;
1669        }        }
# Line 661  while (!done) Line 1675  while (!done)
1675  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1676    
1677      {      {
1678      if (timeit)      unsigned long int get_options;
1679    
1680        if (timeit > 0)
1681        {        {
1682        register int i;        register int i;
1683        clock_t time_taken;        clock_t time_taken;
1684        clock_t start_time = clock();        clock_t start_time = clock();
1685        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1686          {          {
1687          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1688          if (re != NULL) free(re);          if (re != NULL) free(re);
1689          }          }
1690        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1691        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1692          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1693            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1694        }        }
1695    
# Line 690  while (!done) Line 1706  while (!done)
1706          {          {
1707          for (;;)          for (;;)
1708            {            {
1709            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1710              {              {
1711              done = 1;              done = 1;
1712              goto CONTINUE;              goto CONTINUE;
# Line 704  while (!done) Line 1720  while (!done)
1720        goto CONTINUE;        goto CONTINUE;
1721        }        }
1722    
1723      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1724      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1725      returns only limited data. Check that it agrees with the newer one. */      lines. */
1726    
1727        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1728        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1729    
1730        /* Print information if required. There are now two info-returning
1731        functions. The old one has a limited interface and returns only limited
1732        data. Check that it agrees with the newer one. */
1733    
1734      if (log_store)      if (log_store)
1735        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 714  while (!done) Line 1737  while (!done)
1737                sizeof(real_pcre) -                sizeof(real_pcre) -
1738                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1739    
1740        /* Extract the size for possible writing before possibly flipping it,
1741        and remember the store that was got. */
1742    
1743        true_size = ((real_pcre *)re)->size;
1744        regex_gotten_store = gotten_store;
1745    
1746        /* If /S was present, study the regexp to generate additional info to
1747        help with the matching. */
1748    
1749        if (do_study)
1750          {
1751          if (timeit > 0)
1752            {
1753            register int i;
1754            clock_t time_taken;
1755            clock_t start_time = clock();
1756            for (i = 0; i < timeit; i++)
1757              extra = pcre_study(re, study_options, &error);
1758            time_taken = clock() - start_time;
1759            if (extra != NULL) free(extra);
1760            fprintf(outfile, "  Study time %.4f milliseconds\n",
1761              (((double)time_taken * 1000.0) / (double)timeit) /
1762                (double)CLOCKS_PER_SEC);
1763            }
1764          extra = pcre_study(re, study_options, &error);
1765          if (error != NULL)
1766            fprintf(outfile, "Failed to study: %s\n", error);
1767          else if (extra != NULL)
1768            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1769          }
1770    
1771        /* If /K was present, we set up for handling MARK data. */
1772    
1773        if (do_mark)
1774          {
1775          if (extra == NULL)
1776            {
1777            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1778            extra->flags = 0;
1779            }
1780          extra->mark = &markptr;
1781          extra->flags |= PCRE_EXTRA_MARK;
1782          }
1783    
1784        /* If the 'F' option was present, we flip the bytes of all the integer
1785        fields in the regex data block and the study block. This is to make it
1786        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1787        compiled on a different architecture. */
1788    
1789        if (do_flip)
1790          {
1791          real_pcre *rre = (real_pcre *)re;
1792          rre->magic_number =
1793            byteflip(rre->magic_number, sizeof(rre->magic_number));
1794          rre->size = byteflip(rre->size, sizeof(rre->size));
1795          rre->options = byteflip(rre->options, sizeof(rre->options));
1796          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1797          rre->top_bracket =
1798            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1799          rre->top_backref =
1800            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1801          rre->first_byte =
1802            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1803          rre->req_byte =
1804            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1805          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1806            sizeof(rre->name_table_offset));
1807          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1808            sizeof(rre->name_entry_size));
1809          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1810            sizeof(rre->name_count));
1811    
1812          if (extra != NULL)
1813            {
1814            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1815            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1816            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1817            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1818            }
1819          }
1820    
1821        /* Extract information from the compiled data if required */
1822    
1823        SHOW_INFO:
1824    
1825        if (do_debug)
1826          {
1827          fprintf(outfile, "------------------------------------------------------------------\n");
1828          pcre_printint(re, outfile, debug_lengths);
1829          }
1830    
1831        /* We already have the options in get_options (see above) */
1832    
1833      if (do_showinfo)      if (do_showinfo)
1834        {        {
1835        unsigned long int get_options;        unsigned long int all_options;
1836    #if !defined NOINFOCHECK
1837        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1838        int count, backrefmax, first_char, need_char;  #endif
1839          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1840            hascrorlf;
1841        int nameentrysize, namecount;        int nameentrysize, namecount;
1842        const uschar *nametable;        const uschar *nametable;
       size_t size;  
1843    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
   
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1844        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1845        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1846        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 737  while (!done) Line 1848  while (!done)
1848        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1849        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1850        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1851        new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1852          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1853          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1854          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1855    
1856    #if !defined NOINFOCHECK
1857        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1858        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1859          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 756  while (!done) Line 1871  while (!done)
1871            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1872              get_options, old_options);              get_options, old_options);
1873          }          }
1874    #endif
1875    
1876        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1877          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1878          size, gotten_store);          (int)size, (int)regex_gotten_store);
1879    
1880        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1881        if (backrefmax > 0)        if (backrefmax > 0)
# Line 777  while (!done) Line 1893  while (!done)
1893            }            }
1894          }          }
1895    
1896          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1897          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1898    
1899          all_options = ((real_pcre *)re)->options;
1900          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1901    
1902        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1903          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1904            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1905            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1906            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1907            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1908              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1909            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1910              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1911              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1912            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1913            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1914            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1915            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1916              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1917              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1918              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1919              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1920    
1921          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1922    
1923          switch (get_options & PCRE_NEWLINE_BITS)
1924            {
1925            case PCRE_NEWLINE_CR:
1926            fprintf(outfile, "Forced newline sequence: CR\n");
1927            break;
1928    
1929            case PCRE_NEWLINE_LF:
1930            fprintf(outfile, "Forced newline sequence: LF\n");
1931            break;
1932    
1933            case PCRE_NEWLINE_CRLF:
1934            fprintf(outfile, "Forced newline sequence: CRLF\n");
1935            break;
1936    
1937            case PCRE_NEWLINE_ANYCRLF:
1938            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1939            break;
1940    
1941        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_ANY:
1942          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1943            break;
1944    
1945            default:
1946            break;
1947            }
1948    
1949        if (first_char == -1)        if (first_char == -1)
1950          {          {
1951          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1952          }          }
1953        else if (first_char < 0)        else if (first_char < 0)
1954          {          {
# Line 803  while (!done) Line 1957  while (!done)
1957        else        else
1958          {          {
1959          int ch = first_char & 255;          int ch = first_char & 255;
1960          char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1961            "" : " (caseless)";            "" : " (caseless)";
1962          if (isprint(ch))          if (PRINTHEX(ch))
1963            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1964          else          else
1965            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 818  while (!done) Line 1972  while (!done)
1972        else        else
1973          {          {
1974          int ch = need_char & 255;          int ch = need_char & 255;
1975          char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1976            "" : " (caseless)";            "" : " (caseless)";
1977          if (isprint(ch))          if (PRINTHEX(ch))
1978            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1979          else          else
1980            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1981          }          }
       }  
1982    
1983      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1984      help with the matching. */        value, but it varies, depending on the computer architecture, and
1985          so messes up the test suite. (And with the /F option, it might be
1986          flipped.) */
1987    
1988      if (do_study)        if (do_study)
       {  
       if (timeit)  
1989          {          {
1990          register int i;          if (extra == NULL)
1991          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1992          clock_t start_time = clock();          else
1993          for (i = 0; i < LOOPREPEAT; i++)            {
1994            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1995          time_taken = clock() - start_time;            int minlength;
1996          if (extra != NULL) free(extra);  
1997          fprintf(outfile, "  Study time %.3f milliseconds\n",            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1998            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1999              (double)CLOCKS_PER_SEC);  
2000              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2001              if (start_bits == NULL)
2002                fprintf(outfile, "No set of starting bytes\n");
2003              else
2004                {
2005                int i;
2006                int c = 24;
2007                fprintf(outfile, "Starting byte set: ");
2008                for (i = 0; i < 256; i++)
2009                  {
2010                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2011                    {
2012                    if (c > 75)
2013                      {
2014                      fprintf(outfile, "\n  ");
2015                      c = 2;
2016                      }
2017                    if (PRINTHEX(i) && i != ' ')
2018                      {
2019                      fprintf(outfile, "%c ", i);
2020                      c += 2;
2021                      }
2022                    else
2023                      {
2024                      fprintf(outfile, "\\x%02x ", i);
2025                      c += 5;
2026                      }
2027                    }
2028                  }
2029                fprintf(outfile, "\n");
2030                }
2031              }
2032          }          }
2033          }
2034    
2035        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
2036        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
2037          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
2038    
2039        else if (do_showinfo)      if (to_file != NULL)
2040          {
2041          FILE *f = fopen((char *)to_file, "wb");
2042          if (f == NULL)
2043            {
2044            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2045            }
2046          else
2047          {          {
2048          size_t size;          uschar sbuf[8];
2049          uschar *start_bits = NULL;          sbuf[0] = (uschar)((true_size >> 24) & 255);
2050          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);          sbuf[1] = (uschar)((true_size >> 16) & 255);
2051          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[2] = (uschar)((true_size >>  8) & 255);
2052          fprintf(outfile, "Study size = %d\n", size);          sbuf[3] = (uschar)((true_size) & 255);
2053          if (start_bits == NULL)  
2054            fprintf(outfile, "No starting character set\n");          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2055            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2056            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2057            sbuf[7] = (uschar)((true_study_size) & 255);
2058    
2059            if (fwrite(sbuf, 1, 8, f) < 8 ||
2060                fwrite(re, 1, true_size, f) < true_size)
2061              {
2062              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2063              }
2064          else          else
2065            {            {
2066            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
2067            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
2068              {              {
2069              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2070                    true_study_size)
2071                {                {
2072                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2073                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2074                }                }
2075                else fprintf(outfile, "Study data written to %s\n", to_file);
2076    
2077              }              }
           fprintf(outfile, "\n");  
2078            }            }
2079            fclose(f);
2080            }
2081    
2082          new_free(re);
2083          if (extra != NULL) new_free(extra);
2084          if (locale_set)
2085            {
2086            new_free((void *)tables);
2087            setlocale(LC_CTYPE, "C");
2088            locale_set = 0;
2089          }          }
2090          continue;  /* With next regex */
2091        }        }
2092      }      }        /* End of non-POSIX compile */
2093    
2094    /* Read data lines and test them */    /* Read data lines and test them */
2095    
2096    for (;;)    for (;;)
2097      {      {
2098      unsigned char *q;      uschar *q;
2099      unsigned char *bptr = dbuffer;      uschar *bptr;
2100      int *use_offsets = offsets;      int *use_offsets = offsets;
2101      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2102      int callout_data = 0;      int callout_data = 0;
2103      int callout_data_set = 0;      int callout_data_set = 0;
2104      int count, c;      int count, c;
2105      int copystrings = 0;      int copystrings = 0;
2106      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2107      int getstrings = 0;      int getstrings = 0;
2108      int getlist = 0;      int getlist = 0;
2109      int gmatched = 0;      int gmatched = 0;
2110      int start_offset = 0;      int start_offset = 0;
2111      int g_notempty = 0;      int g_notempty = 0;
2112        int use_dfa = 0;
2113    
2114      options = 0;      options = 0;
2115    
2116        *copynames = 0;
2117        *getnames = 0;
2118    
2119        copynamesptr = copynames;
2120        getnamesptr = getnames;
2121    
2122      pcre_callout = callout;      pcre_callout = callout;
2123      first_callout = 1;      first_callout = 1;
2124      callout_extra = 0;      callout_extra = 0;
2125      callout_count = 0;      callout_count = 0;
2126      callout_fail_count = 999999;      callout_fail_count = 999999;
2127      callout_fail_id = -1;      callout_fail_id = -1;
2128        show_malloc = 0;
2129    
2130        if (extra != NULL) extra->flags &=
2131          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2132    
2133      if (infile == stdin) printf("data> ");      len = 0;
2134      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      for (;;)
2135        {        {
2136        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2137        goto CONTINUE;          {
2138            if (len > 0)    /* Reached EOF without hitting a newline */
2139              {
2140              fprintf(outfile, "\n");
2141              break;
2142              }
2143            done = 1;
2144            goto CONTINUE;
2145            }
2146          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2147          len = (int)strlen((char *)buffer);
2148          if (buffer[len-1] == '\n') break;
2149        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2150    
     len = (int)strlen((char *)buffer);  
2151      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2152      buffer[len] = 0;      buffer[len] = 0;
2153      if (len == 0) break;      if (len == 0) break;
# Line 937  while (!done) Line 2155  while (!done)
2155      p = buffer;      p = buffer;
2156      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2157    
2158      q = dbuffer;      bptr = q = dbuffer;
2159      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2160        {        {
2161        int i = 0;        int i = 0;
# Line 959  while (!done) Line 2177  while (!done)
2177          c -= '0';          c -= '0';
2178          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2179            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2180    
2181    #if !defined NOUTF8
2182            if (use_utf8 && c > 255)
2183              {
2184              unsigned char buff8[8];
2185              int ii, utn;
2186              utn = ord2utf8(c, buff8);
2187              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2188              c = buff8[ii];   /* Last byte */
2189              }
2190    #endif
2191          break;          break;
2192    
2193          case 'x':          case 'x':
2194    
2195          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2196    
2197    #if !defined NOUTF8
2198          if (*p == '{')          if (*p == '{')
2199            {            {
2200            unsigned char *pt = p;            unsigned char *pt = p;
# Line 973  while (!done) Line 2203  while (!done)
2203              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2204            if (*pt == '}')            if (*pt == '}')
2205              {              {
2206              unsigned char buffer[8];              unsigned char buff8[8];
2207              int ii, utn;              int ii, utn;
2208              utn = ord2utf8(c, buffer);              if (use_utf8)
2209              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];                {
2210              c = buffer[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2211                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2212                  c = buff8[ii];   /* Last byte */
2213                  }
2214                else
2215                 {
2216                 if (c > 255)
2217                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2218                     "UTF-8 mode is not enabled.\n"
2219                     "** Truncation will probably give the wrong result.\n", c);
2220                 }
2221              p = pt + 1;              p = pt + 1;
2222              break;              break;
2223              }              }
2224            /* Not correct form; fall through */            /* Not correct form; fall through */
2225            }            }
2226    #endif
2227    
2228          /* Ordinary \x */          /* Ordinary \x */
2229    
# Line 994  while (!done) Line 2235  while (!done)
2235            }            }
2236          break;          break;
2237    
2238          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2239          p--;          p--;
2240          continue;          continue;
2241    
2242            case '>':
2243            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2244            continue;
2245    
2246          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2247          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2248          continue;          continue;
# Line 1014  while (!done) Line 2259  while (!done)
2259            }            }
2260          else if (isalnum(*p))          else if (isalnum(*p))
2261            {            {
2262            uschar name[256];            uschar *npp = copynamesptr;
2263            uschar *pp = name;            while (isalnum(*p)) *npp++ = *p++;
2264            while (isalnum(*p)) *pp++ = *p++;            *npp++ = 0;
2265            *pp = 0;            *npp = 0;
2266            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2267            if (n < 0)            if (n < 0)
2268              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2269            else copystrings |= 1 << n;            copynamesptr = npp;
2270            }            }
2271          else if (*p == '+')          else if (*p == '+')
2272            {            {
# Line 1059  while (!done) Line 2304  while (!done)
2304            }            }
2305          continue;          continue;
2306    
2307    #if !defined NODFA
2308            case 'D':
2309    #if !defined NOPOSIX
2310            if (posix || do_posix)
2311              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2312            else
2313    #endif
2314              use_dfa = 1;
2315            continue;
2316    
2317            case 'F':
2318            options |= PCRE_DFA_SHORTEST;
2319            continue;
2320    #endif
2321    
2322          case 'G':          case 'G':
2323          if (isdigit(*p))          if (isdigit(*p))
2324            {            {
# Line 1067  while (!done) Line 2327  while (!done)
2327            }            }
2328          else if (isalnum(*p))          else if (isalnum(*p))
2329            {            {
2330            uschar name[256];            uschar *npp = getnamesptr;
2331            uschar *pp = name;            while (isalnum(*p)) *npp++ = *p++;
2332            while (isalnum(*p)) *pp++ = *p++;            *npp++ = 0;
2333            *pp = 0;            *npp = 0;
2334            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2335            if (n < 0)            if (n < 0)
2336              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2337            else getstrings |= 1 << n;            getnamesptr = npp;
2338            }            }
2339          continue;          continue;
2340    
# Line 1087  while (!done) Line 2347  while (!done)
2347          continue;          continue;
2348    
2349          case 'N':          case 'N':
2350          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2351              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2352            else
2353              options |= PCRE_NOTEMPTY;
2354          continue;          continue;
2355    
2356          case 'O':          case 'O':
# Line 1096  while (!done) Line 2359  while (!done)
2359            {            {
2360            size_offsets_max = n;            size_offsets_max = n;
2361            free(offsets);            free(offsets);
2362            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2363            if (offsets == NULL)            if (offsets == NULL)
2364              {              {
2365              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2366                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2367              return 1;              yield = 1;
2368                goto EXIT;
2369              }              }
2370            }            }
2371          use_size_offsets = n;          use_size_offsets = n;
2372          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2373          continue;          continue;
2374    
2375            case 'P':
2376            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2377              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2378            continue;
2379    
2380            case 'Q':
2381            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2382            if (extra == NULL)
2383              {
2384              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2385              extra->flags = 0;
2386              }
2387            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2388            extra->match_limit_recursion = n;
2389            continue;
2390    
2391            case 'q':
2392            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2393            if (extra == NULL)
2394              {
2395              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2396              extra->flags = 0;
2397              }
2398            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2399            extra->match_limit = n;
2400            continue;
2401    
2402    #if !defined NODFA
2403            case 'R':
2404            options |= PCRE_DFA_RESTART;
2405            continue;
2406    #endif
2407    
2408            case 'S':
2409            show_malloc = 1;
2410            continue;
2411    
2412            case 'Y':
2413            options |= PCRE_NO_START_OPTIMIZE;
2414            continue;
2415    
2416          case 'Z':          case 'Z':
2417          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2418          continue;          continue;
2419    
2420            case '?':
2421            options |= PCRE_NO_UTF8_CHECK;
2422            continue;
2423    
2424            case '<':
2425              {
2426              int x = check_newline(p, outfile);
2427              if (x == 0) goto NEXT_DATA;
2428              options |= x;
2429              while (*p++ != '>');
2430              }
2431            continue;
2432          }          }
2433        *q++ = c;        *q++ = c;
2434        }        }
2435      *q = 0;      *q = 0;
2436      len = q - dbuffer;      len = (int)(q - dbuffer);
2437    
2438        /* Move the data to the end of the buffer so that a read over the end of
2439        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2440        we are using the POSIX interface, we must include the terminating zero. */
2441    
2442    #if !defined NOPOSIX
2443        if (posix || do_posix)
2444          {
2445          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2446          bptr += buffer_size - len - 1;
2447          }
2448        else
2449    #endif
2450          {
2451          memmove(bptr + buffer_size - len, bptr, len);
2452          bptr += buffer_size - len;
2453          }
2454    
2455        if ((all_use_dfa || use_dfa) && find_match_limit)
2456          {
2457          printf("**Match limit not relevant for DFA matching: ignored\n");
2458          find_match_limit = 0;
2459          }
2460    
2461      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2462      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
# Line 1127  while (!done) Line 2468  while (!done)
2468        int eflags = 0;        int eflags = 0;
2469        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
2470        if (use_size_offsets > 0)        if (use_size_offsets > 0)
2471          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2472        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2473        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2474          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2475    
2476        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2477    
2478        if (rc != 0)        if (rc != 0)
2479          {          {
2480          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2481          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2482          }          }
2483          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2484                  != 0)
2485            {
2486            fprintf(outfile, "Matched with REG_NOSUB\n");
2487            }
2488        else        else
2489          {          {
2490          size_t i;          size_t i;
# Line 1169  while (!done) Line 2516  while (!done)
2516    
2517      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2518        {        {
2519        if (timeit)        markptr = NULL;
2520    
2521          if (timeitm > 0)
2522          {          {
2523          register int i;          register int i;
2524          clock_t time_taken;          clock_t time_taken;
2525          clock_t start_time = clock();          clock_t start_time = clock();
2526          for (i = 0; i < LOOPREPEAT; i++)  
2527    #if !defined NODFA
2528            if (all_use_dfa || use_dfa)
2529              {
2530              int workspace[1000];
2531              for (i = 0; i < timeitm; i++)
2532                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2533                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2534                  sizeof(workspace)/sizeof(int));
2535              }
2536            else
2537    #endif
2538    
2539            for (i = 0; i < timeitm; i++)
2540            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2541              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2542    
2543          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2544          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2545            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2546              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2547          }          }
2548    
2549        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2550        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2551          for the recursion limit. */
2552    
2553        if (find_match_limit)        if (find_match_limit)
2554          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2555          if (extra == NULL)          if (extra == NULL)
2556            {            {
2557            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2558            extra->flags = 0;            extra->flags = 0;
2559            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2560    
2561          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2562              options|g_notempty, use_offsets, use_size_offsets,
2563              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2564              PCRE_ERROR_MATCHLIMIT, "match()");
2565    
2566            count = check_match_limit(re, extra, bptr, len, start_offset,
2567              options|g_notempty, use_offsets, use_size_offsets,
2568              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2569              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2570          }          }
2571    
2572        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1233  while (!done) Line 2575  while (!done)
2575          {          {
2576          if (extra == NULL)          if (extra == NULL)
2577            {            {
2578            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2579            extra->flags = 0;            extra->flags = 0;
2580            }            }
2581          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2582          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
2583          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2584            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
2585          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1246  while (!done) Line 2588  while (!done)
2588        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2589        value of match_limit. */        value of match_limit. */
2590    
2591        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
2592          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
2593            {
2594            int workspace[1000];
2595            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2596              options | g_notempty, use_offsets, use_size_offsets, workspace,
2597              sizeof(workspace)/sizeof(int));
2598            if (count == 0)
2599              {
2600              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2601              count = use_size_offsets/2;
2602              }
2603            }
2604    #endif
2605    
2606        if (count == 0)        else
2607          {          {
2608          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2609          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2610            if (count == 0)
2611              {
2612              fprintf(outfile, "Matched, but too many substrings\n");
2613              count = use_size_offsets/3;
2614              }
2615          }          }
2616    
2617        /* Matched */        /* Matched */
2618    
2619        if (count >= 0)        if (count >= 0)
2620          {          {
2621          int i;          int i, maxcount;
2622    
2623    #if !defined NODFA
2624            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2625    #endif
2626              maxcount = use_size_offsets/3;
2627    
2628            /* This is a check against a lunatic return value. */
2629    
2630            if (count > maxcount)
2631              {
2632              fprintf(outfile,
2633                "** PCRE error: returned count %d is too big for offset size %d\n",
2634                count, use_size_offsets);
2635              count = use_size_offsets/3;
2636              if (do_g || do_G)
2637                {
2638                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2639                do_g = do_G = FALSE;        /* Break g/G loop */
2640                }
2641              }
2642    
2643          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2644            {            {
2645            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1283  while (!done) Line 2663  while (!done)
2663              }              }
2664            }            }
2665    
2666            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2667    
2668          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2669            {            {
2670            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2671              {              {
2672              char copybuffer[16];              char copybuffer[256];
2673              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2674                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2675              if (rc < 0)              if (rc < 0)
# Line 1297  while (!done) Line 2679  while (!done)
2679              }              }
2680            }            }
2681    
2682            for (copynamesptr = copynames;
2683                 *copynamesptr != 0;
2684                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2685              {
2686              char copybuffer[256];
2687              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2688                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2689              if (rc < 0)
2690                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2691              else
2692                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2693              }
2694    
2695          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2696            {            {
2697            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1309  while (!done) Line 2704  while (!done)
2704              else              else
2705                {                {
2706                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2707                pcre_free_substring(substring);                pcre_free_substring(substring);
2708                }                }
2709              }              }
2710            }            }
2711    
2712            for (getnamesptr = getnames;
2713                 *getnamesptr != 0;
2714                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2715              {
2716              const char *substring;
2717              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2718                count, (char *)getnamesptr, &substring);
2719              if (rc < 0)
2720                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2721              else
2722                {
2723                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2724                pcre_free_substring(substring);
2725                }
2726              }
2727    
2728          if (getlist)          if (getlist)
2729            {            {
2730            const char **stringlist;            const char **stringlist;
# Line 1334  while (!done) Line 2744  while (!done)
2744            }            }
2745          }          }
2746    
2747          /* There was a partial match */
2748    
2749          else if (count == PCRE_ERROR_PARTIAL)
2750            {
2751            if (markptr == NULL) fprintf(outfile, "Partial match");
2752              else fprintf(outfile, "Partial match, mark=%s", markptr);
2753            if (use_size_offsets > 1)
2754              {
2755              fprintf(outfile, ": ");
2756              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2757                outfile);
2758              }
2759            fprintf(outfile, "\n");
2760            break;  /* Out of the /g loop */
2761            }
2762    
2763        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2764        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2765        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2766        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2767        was checked before setting g_notempty. */  
2768          Complication arises in the case when the newline option is "any" or
2769          "anycrlf". If the previous match was at the end of a line terminated by
2770          CRLF, an advance of one character just passes the \r, whereas we should
2771          prefer the longer newline sequence, as does the code in pcre_exec().
2772          Fudge the offset value to achieve this.
2773    
2774          Otherwise, in the case of UTF-8 matching, the advance must be one
2775          character, not one byte. */
2776    
2777        else        else
2778          {          {
2779          if (g_notempty != 0)          if (g_notempty != 0)
2780            {            {
2781              int onechar = 1;
2782              unsigned int obits = ((real_pcre *)re)->options;
2783            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2784            use_offsets[1] = start_offset + 1;            if ((obits & PCRE_NEWLINE_BITS) == 0)
2785                {
2786                int d;
2787                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2788                /* Note that these values are always the ASCII ones, even in
2789                EBCDIC environments. CR = 13, NL = 10. */
2790                obits = (d == 13)? PCRE_NEWLINE_CR :
2791                        (d == 10)? PCRE_NEWLINE_LF :
2792                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2793                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2794                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2795                }
2796              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2797                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2798                  &&
2799                  start_offset < len - 1 &&
2800                  bptr[start_offset] == '\r' &&
2801                  bptr[start_offset+1] == '\n')
2802                onechar++;
2803              else if (use_utf8)
2804                {
2805                while (start_offset + onechar < len)
2806                  {
2807                  int tb = bptr[start_offset+onechar];
2808                  if (tb <= 127) break;
2809                  tb &= 0xc0;
2810                  if (tb != 0 && tb != 0xc0) onechar++;
2811                  }
2812                }
2813              use_offsets[1] = start_offset + onechar;
2814            }            }
2815          else          else
2816            {            {
2817            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2818              {              {
2819              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0)
2820                else fprintf(outfile, "Error %d\n", count);                {
2821                  if (markptr == NULL) fprintf(outfile, "No match\n");
2822                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2823                  }
2824              }              }
2825              else fprintf(outfile, "Error %d\n", count);
2826            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2827            }            }
2828          }          }
# Line 1363  while (!done) Line 2832  while (!done)
2832        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2833    
2834        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2835        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2836        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2837        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2838        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2839        character. */        character. */
2840    
2841        g_notempty = 0;        g_notempty = 0;
2842    
2843        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2844          {          {
2845          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2846          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2847          }          }
2848    
2849        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1388  while (!done) Line 2858  while (!done)
2858          len -= use_offsets[1];          len -= use_offsets[1];
2859          }          }
2860        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2861    
2862        NEXT_DATA: continue;
2863      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2864    
2865    CONTINUE:    CONTINUE:
# Line 1396  while (!done) Line 2868  while (!done)
2868    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2869  #endif  #endif
2870    
2871    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2872    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2873    if (tables != NULL)    if (locale_set)
2874      {      {
2875      free((void *)tables);      new_free((void *)tables);
2876      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2877        locale_set = 0;
2878      }      }
2879    }    }
2880    
2881  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2882  return 0;  
2883    EXIT:
2884    
2885    if (infile != NULL && infile != stdin) fclose(infile);
2886    if (outfile != NULL && outfile != stdout) fclose(outfile);
2887    
2888    free(buffer);
2889    free(dbuffer);
2890    free(pbuffer);
2891    free(offsets);
2892    
2893    return yield;
2894  }  }
2895    
2896  /* End */  /* End of pcretest.c */

Legend:
Removed from v.65  
changed lines
  Added in v.545

  ViewVC Help
Powered by ViewVC 1.1.5