/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 553 by ph10, Fri Oct 22 15:57:50 2010 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places.  been extended and consequently is now rather, er, *very* untidy in places.
8    
9  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
10  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  /* We need the internal info for displaying the results of pcre_study(). Also  #ifdef SUPPORT_LIBREADLINE
52  for getting the opcodes for showing compiled code. */  #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    #else
83    #include <sys/time.h>          /* These two includes are needed */
84    #include <sys/resource.h>      /* for setrlimit(). */
85    #define INPUT_MODE   "rb"
86    #define OUTPUT_MODE  "wb"
87    #endif
88    
89    
90    /* We have to include pcre_internal.h because we need the internal info for
91    displaying the results of pcre_study() and we also need to know about the
92    internal macros, structures, and other internal data values; pcretest has
93    "inside information" compared to a program that strictly follows the PCRE API.
94    
95    Although pcre_internal.h does itself include pcre.h, we explicitly include it
96    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97    appropriately for an application, not for building PCRE. */
98    
99    #include "pcre.h"
100    #include "pcre_internal.h"
101    
102    /* We need access to some of the data tables that PCRE uses. So as not to have
103    to keep two copies, we include the source file here, changing the names of the
104    external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107    #define _pcre_utf8_table1      utf8_table1
108    #define _pcre_utf8_table1_size utf8_table1_size
109    #define _pcre_utf8_table2      utf8_table2
110    #define _pcre_utf8_table3      utf8_table3
111    #define _pcre_utf8_table4      utf8_table4
112    #define _pcre_utt              utt
113    #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115    #define _pcre_OP_lengths       OP_lengths
116    
117    #include "pcre_tables.c"
118    
119    /* We also need the pcre_printint() function for printing out compiled
120    patterns. This function is in a separate file so that it can be included in
121    pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
123    
124    #define COMPILING_PCRETEST
125    #include "pcre_printint.src"
126    
127    /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
 #include "internal.h"  
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 58  Makefile. */ Line 140  Makefile. */
140  #include "pcreposix.h"  #include "pcreposix.h"
141  #endif  #endif
142    
143    /* It is also possible, for the benefit of the version currently imported into
144    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145    interface to the DFA matcher (NODFA), and without the doublecheck of the old
146    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147    UTF8 support if PCRE is built without it. */
148    
149    #ifndef SUPPORT_UTF8
150    #ifndef NOUTF8
151    #define NOUTF8
152    #endif
153    #endif
154    
155    
156    /* Other parameters */
157    
158  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
159  #ifdef CLK_TCK  #ifdef CLK_TCK
160  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 66  Makefile. */ Line 163  Makefile. */
163  #endif  #endif
164  #endif  #endif
165    
166  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
167    
168  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
169    
170    /* Static variables */
171    
172  static FILE *outfile;  static FILE *outfile;
173  static int log_store = 0;  static int log_store = 0;
# Line 79  static int callout_count; Line 175  static int callout_count;
175  static int callout_extra;  static int callout_extra;
176  static int callout_fail_count;  static int callout_fail_count;
177  static int callout_fail_id;  static int callout_fail_id;
178    static int debug_lengths;
179  static int first_callout;  static int first_callout;
180    static int locale_set = 0;
181  static int show_malloc;  static int show_malloc;
182  static int use_utf8;  static int use_utf8;
183  static size_t gotten_store;  static size_t gotten_store;
184    
185    /* The buffers grow automatically if very long input lines are encountered. */
186    
187    static int buffer_size = 50000;
188    static uschar *buffer = NULL;
189    static uschar *dbuffer = NULL;
190  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
191    
192    
193  static const int utf8_table1[] = {  /*************************************************
194    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  *         Alternate character tables             *
195    *************************************************/
196    
197  static const int utf8_table2[] = {  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200    the L (locale) option also adjusts the tables. */
201    
202    /* This is the set of tables distributed as default with PCRE. It recognizes
203    only ASCII characters. */
204    
205    static const unsigned char tables0[] = {
206    
207    /* This table is a lower casing table. */
208    
209        0,  1,  2,  3,  4,  5,  6,  7,
210        8,  9, 10, 11, 12, 13, 14, 15,
211       16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341  static const int utf8_table3[] = {    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515    };
516    
517    
518    
519  /*************************************************  /*************************************************
520  *         Print compiled regex                   *  *        Read or extend an input line            *
521  *************************************************/  *************************************************/
522    
523  /* The code for doing this is held in a separate file that is also included in  /* Input lines are read into buffer, but both patterns and data lines can be
524  pcre.c when it is compiled with the debug switch. It defines a function called  continued over multiple input lines. In addition, if the buffer fills up, we
525  print_internals(), which uses a table of opcode lengths defined by the macro  want to automatically expand it so as to be able to handle extremely large
526  OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates  lines that are needed for certain stress tests. When the input buffer is
527  Unicode property names to numbers; this is kept in a separate file. */  expanded, the other two buffers must also be expanded likewise, and the
528    contents of pbuffer, which are a copy of the input for callouts, must be
529  static uschar OP_lengths[] = { OP_LENGTHS };  preserved (for when expansion happens for a data line). This is not the most
530    optimal way of handling this, but hey, this is just a test program!
531  #include "ucp.h"  
532  #include "ucptypetable.c"  Arguments:
533  #include "printint.c"    f            the file to read
534      start        where in buffer to start (this *must* be within buffer)
535      prompt       for stdin or readline()
536    
537    Returns:       pointer to the start of new data
538                   could be a copy of start, or could be moved
539                   NULL if no data read and EOF reached
540    */
541    
542    static uschar *
543    extend_inputline(FILE *f, uschar *start, const char *prompt)
544    {
545    uschar *here = start;
546    
547    for (;;)
548      {
549      int rlen = (int)(buffer_size - (here - buffer));
550    
551      if (rlen > 1000)
552        {
553        int dlen;
554    
555        /* If libreadline support is required, use readline() to read a line if the
556        input is a terminal. Note that readline() removes the trailing newline, so
557        we must put it back again, to be compatible with fgets(). */
558    
559    #ifdef SUPPORT_LIBREADLINE
560        if (isatty(fileno(f)))
561          {
562          size_t len;
563          char *s = readline(prompt);
564          if (s == NULL) return (here == start)? NULL : start;
565          len = strlen(s);
566          if (len > 0) add_history(s);
567          if (len > rlen - 1) len = rlen - 1;
568          memcpy(here, s, len);
569          here[len] = '\n';
570          here[len+1] = 0;
571          free(s);
572          }
573        else
574    #endif
575    
576        /* Read the next line by normal means, prompting if the file is stdin. */
577    
578          {
579          if (f == stdin) printf("%s", prompt);
580          if (fgets((char *)here, rlen,  f) == NULL)
581            return (here == start)? NULL : start;
582          }
583    
584        dlen = (int)strlen((char *)here);
585        if (dlen > 0 && here[dlen - 1] == '\n') return start;
586        here += dlen;
587        }
588    
589      else
590        {
591        int new_buffer_size = 2*buffer_size;
592        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
593        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
594        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
595    
596        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
597          {
598          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
599          exit(1);
600          }
601    
602        memcpy(new_buffer, buffer, buffer_size);
603        memcpy(new_pbuffer, pbuffer, buffer_size);
604    
605        buffer_size = new_buffer_size;
606    
607        start = new_buffer + (start - buffer);
608        here = new_buffer + (here - buffer);
609    
610        free(buffer);
611        free(dbuffer);
612        free(pbuffer);
613    
614        buffer = new_buffer;
615        dbuffer = new_dbuffer;
616        pbuffer = new_pbuffer;
617        }
618      }
619    
620    return NULL;  /* Control never gets here */
621    }
622    
623    
624    
625    
626    
627    
628    
# Line 122  static uschar OP_lengths[] = { OP_LENGTH Line 632  static uschar OP_lengths[] = { OP_LENGTH
632    
633  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
634  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
635  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
636    
637  Arguments:  Arguments:
638    str           string to be converted    str           string to be converted
# Line 143  return(result); Line 653  return(result);
653    
654    
655    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
656    
657  /*************************************************  /*************************************************
658  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 188  return i + 1; Line 662  return i + 1;
662  and returns the value of the character.  and returns the value of the character.
663    
664  Argument:  Argument:
665    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
666    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
667    
668  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
669             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
670  */  */
671    
672    #if !defined NOUTF8
673    
674  static int  static int
675  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
676  {  {
677  int c = *buffer++;  int c = *utf8bytes++;
678  int d = c;  int d = c;
679  int i, j, s;  int i, j, s;
680    
# Line 218  d = (c & utf8_table3[i]) << s; Line 694  d = (c & utf8_table3[i]) << s;
694    
695  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
696    {    {
697    c = *buffer++;    c = *utf8bytes++;
698    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
699    s -= 6;    s -= 6;
700    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 226  for (j = 0; j < i; j++) Line 702  for (j = 0; j < i; j++)
702    
703  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
704    
705  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
706    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
707  if (j != i) return -(i+1);  if (j != i) return -(i+1);
708    
# Line 236  if (j != i) return -(i+1); Line 712  if (j != i) return -(i+1);
712  return i+1;  return i+1;
713  }  }
714    
715    #endif
716    
717    
718    
719    /*************************************************
720    *       Convert character value to UTF-8         *
721    *************************************************/
722    
723    /* This function takes an integer value in the range 0 - 0x7fffffff
724    and encodes it as a UTF-8 character in 0 to 6 bytes.
725    
726    Arguments:
727      cvalue     the character value
728      utf8bytes  pointer to buffer for result - at least 6 bytes long
729    
730    Returns:     number of characters placed in the buffer
731    */
732    
733    #if !defined NOUTF8
734    
735    static int
736    ord2utf8(int cvalue, uschar *utf8bytes)
737    {
738    register int i, j;
739    for (i = 0; i < utf8_table1_size; i++)
740      if (cvalue <= utf8_table1[i]) break;
741    utf8bytes += i;
742    for (j = i; j > 0; j--)
743     {
744     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
745     cvalue >>= 6;
746     }
747    *utf8bytes = utf8_table2[i] | cvalue;
748    return i + 1;
749    }
750    
751    #endif
752    
753    
754    
755  /*************************************************  /*************************************************
# Line 248  chars without printing. */ Line 762  chars without printing. */
762    
763  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
764  {  {
765  int c;  int c = 0;
766  int yield = 0;  int yield = 0;
767    
768  while (length-- > 0)  while (length-- > 0)
769    {    {
770    #if !defined NOUTF8
771    if (use_utf8)    if (use_utf8)
772      {      {
773      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 261  while (length-- > 0) Line 776  while (length-- > 0)
776        {        {
777        length -= rc - 1;        length -= rc - 1;
778        p += rc;        p += rc;
779        if (c < 256 && isprint(c))        if (PRINTHEX(c))
780          {          {
781          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
782          yield++;          yield++;
783          }          }
784        else        else
785          {          {
786          int n;          int n = 4;
787          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
788          yield += n;          yield += (n <= 0x000000ff)? 2 :
789                     (n <= 0x00000fff)? 3 :
790                     (n <= 0x0000ffff)? 4 :
791                     (n <= 0x000fffff)? 5 : 6;
792          }          }
793        continue;        continue;
794        }        }
795      }      }
796    #endif
797    
798     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
799    
800    if (isprint(c = *(p++)))    c = *p++;
801      if (PRINTHEX(c))
802      {      {
803      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
804      yield++;      yield++;
# Line 403  static void *new_malloc(size_t size) Line 923  static void *new_malloc(size_t size)
923  void *block = malloc(size);  void *block = malloc(size);
924  gotten_store = size;  gotten_store = size;
925  if (show_malloc)  if (show_malloc)
926    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
927  return block;  return block;
928  }  }
929    
# Line 421  static void *stack_malloc(size_t size) Line 941  static void *stack_malloc(size_t size)
941  {  {
942  void *block = malloc(size);  void *block = malloc(size);
943  if (show_malloc)  if (show_malloc)
944    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
945  return block;  return block;
946  }  }
947    
# Line 452  if ((rc = pcre_fullinfo(re, study, optio Line 972  if ((rc = pcre_fullinfo(re, study, optio
972  *         Byte flipping function                 *  *         Byte flipping function                 *
973  *************************************************/  *************************************************/
974    
975  static long int  static unsigned long int
976  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
977  {  {
978  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
979  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 466  return ((value & 0x000000ff) << 24) | Line 986  return ((value & 0x000000ff) << 24) |
986    
987    
988  /*************************************************  /*************************************************
989    *        Check match or recursion limit          *
990    *************************************************/
991    
992    static int
993    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
994      int start_offset, int options, int *use_offsets, int use_size_offsets,
995      int flag, unsigned long int *limit, int errnumber, const char *msg)
996    {
997    int count;
998    int min = 0;
999    int mid = 64;
1000    int max = -1;
1001    
1002    extra->flags |= flag;
1003    
1004    for (;;)
1005      {
1006      *limit = mid;
1007    
1008      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1009        use_offsets, use_size_offsets);
1010    
1011      if (count == errnumber)
1012        {
1013        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1014        min = mid;
1015        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1016        }
1017    
1018      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1019                             count == PCRE_ERROR_PARTIAL)
1020        {
1021        if (mid == min + 1)
1022          {
1023          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1024          break;
1025          }
1026        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1027        max = mid;
1028        mid = (min + mid)/2;
1029        }
1030      else break;    /* Some other error */
1031      }
1032    
1033    extra->flags &= ~flag;
1034    return count;
1035    }
1036    
1037    
1038    
1039    /*************************************************
1040    *         Case-independent strncmp() function    *
1041    *************************************************/
1042    
1043    /*
1044    Arguments:
1045      s         first string
1046      t         second string
1047      n         number of characters to compare
1048    
1049    Returns:    < 0, = 0, or > 0, according to the comparison
1050    */
1051    
1052    static int
1053    strncmpic(uschar *s, uschar *t, int n)
1054    {
1055    while (n--)
1056      {
1057      int c = tolower(*s++) - tolower(*t++);
1058      if (c) return c;
1059      }
1060    return 0;
1061    }
1062    
1063    
1064    
1065    /*************************************************
1066    *         Check newline indicator                *
1067    *************************************************/
1068    
1069    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1070    a message and return 0 if there is no match.
1071    
1072    Arguments:
1073      p           points after the leading '<'
1074      f           file for error message
1075    
1076    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1077    */
1078    
1079    static int
1080    check_newline(uschar *p, FILE *f)
1081    {
1082    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1083    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1084    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1085    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1086    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1087    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1088    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1089    fprintf(f, "Unknown newline type at: <%s\n", p);
1090    return 0;
1091    }
1092    
1093    
1094    
1095    /*************************************************
1096    *             Usage function                     *
1097    *************************************************/
1098    
1099    static void
1100    usage(void)
1101    {
1102    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1103    printf("Input and output default to stdin and stdout.\n");
1104    #ifdef SUPPORT_LIBREADLINE
1105    printf("If input is a terminal, readline() is used to read from it.\n");
1106    #else
1107    printf("This version of pcretest is not linked with readline().\n");
1108    #endif
1109    printf("\nOptions:\n");
1110    printf("  -b       show compiled code (bytecode)\n");
1111    printf("  -C       show PCRE compile-time options and exit\n");
1112    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1113    #if !defined NODFA
1114    printf("  -dfa     force DFA matching for all subjects\n");
1115    #endif
1116    printf("  -help    show usage information\n");
1117    printf("  -i       show information about compiled patterns\n"
1118           "  -M       find MATCH_LIMIT minimum for each subject\n"
1119           "  -m       output memory used information\n"
1120           "  -o <n>   set size of offsets vector to <n>\n");
1121    #if !defined NOPOSIX
1122    printf("  -p       use POSIX interface\n");
1123    #endif
1124    printf("  -q       quiet: do not output PCRE version number at start\n");
1125    printf("  -S <n>   set stack size to <n> megabytes\n");
1126    printf("  -s       output store (memory) used information\n"
1127           "  -t       time compilation and execution\n");
1128    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1129    printf("  -tm      time execution (matching) only\n");
1130    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1131    }
1132    
1133    
1134    
1135    /*************************************************
1136  *                Main Program                    *  *                Main Program                    *
1137  *************************************************/  *************************************************/
1138    
# Line 478  int main(int argc, char **argv) Line 1145  int main(int argc, char **argv)
1145  FILE *infile = stdin;  FILE *infile = stdin;
1146  int options = 0;  int options = 0;
1147  int study_options = 0;  int study_options = 0;
1148    int default_find_match_limit = FALSE;
1149  int op = 1;  int op = 1;
1150  int timeit = 0;  int timeit = 0;
1151    int timeitm = 0;
1152  int showinfo = 0;  int showinfo = 0;
1153  int showstore = 0;  int showstore = 0;
1154    int quiet = 0;
1155  int size_offsets = 45;  int size_offsets = 45;
1156  int size_offsets_max;  int size_offsets_max;
1157  int *offsets;  int *offsets = NULL;
1158  #if !defined NOPOSIX  #if !defined NOPOSIX
1159  int posix = 0;  int posix = 0;
1160  #endif  #endif
1161  int debug = 0;  int debug = 0;
1162  int done = 0;  int done = 0;
1163    int all_use_dfa = 0;
1164    int yield = 0;
1165    int stack_size;
1166    
1167    /* These vectors store, end-to-end, a list of captured substring names. Assume
1168    that 1024 is plenty long enough for the few names we'll be testing. */
1169    
1170    uschar copynames[1024];
1171    uschar getnames[1024];
1172    
1173  unsigned char *buffer;  uschar *copynamesptr;
1174  unsigned char *dbuffer;  uschar *getnamesptr;
1175    
1176  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1177  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
1178    
1179  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
1180  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
1181  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1182    
1183  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
1184    
1185  outfile = stdout;  outfile = stdout;
1186    
1187    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1188    library to translate CRLF into a single LF character. At least, that's what
1189    I've been told: never having used Windows I take this all on trust. Originally
1190    it set 0x8000, but then I was advised that _O_BINARY was better. */
1191    
1192    #if defined(_WIN32) || defined(WIN32)
1193    _setmode( _fileno( stdout ), _O_BINARY );
1194    #endif
1195    
1196  /* Scan options */  /* Scan options */
1197    
1198  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 519  while (argc > 1 && argv[op][0] == '-') Line 1201  while (argc > 1 && argv[op][0] == '-')
1201    
1202    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1203      showstore = 1;      showstore = 1;
1204    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1205      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1206    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1207    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1208      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1209    #if !defined NODFA
1210      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1211    #endif
1212    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1213        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1214          *endptr == 0))          *endptr == 0))
# Line 529  while (argc > 1 && argv[op][0] == '-') Line 1216  while (argc > 1 && argv[op][0] == '-')
1216      op++;      op++;
1217      argc--;      argc--;
1218      }      }
1219      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1220        {
1221        int both = argv[op][2] == 0;
1222        int temp;
1223        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1224                         *endptr == 0))
1225          {
1226          timeitm = temp;
1227          op++;
1228          argc--;
1229          }
1230        else timeitm = LOOPREPEAT;
1231        if (both) timeit = timeitm;
1232        }
1233      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1234          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1235            *endptr == 0))
1236        {
1237    #if defined(_WIN32) || defined(WIN32)
1238        printf("PCRE: -S not supported on this OS\n");
1239        exit(1);
1240    #else
1241        int rc;
1242        struct rlimit rlim;
1243        getrlimit(RLIMIT_STACK, &rlim);
1244        rlim.rlim_cur = stack_size * 1024 * 1024;
1245        rc = setrlimit(RLIMIT_STACK, &rlim);
1246        if (rc != 0)
1247          {
1248        printf("PCRE: setrlimit() failed with error %d\n", rc);
1249        exit(1);
1250          }
1251        op++;
1252        argc--;
1253    #endif
1254        }
1255  #if !defined NOPOSIX  #if !defined NOPOSIX
1256    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1257  #endif  #endif
1258    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1259      {      {
1260      int rc;      int rc;
1261        unsigned long int lrc;
1262      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1263      printf("Compiled with\n");      printf("Compiled with\n");
1264      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 542  while (argc > 1 && argv[op][0] == '-') Line 1266  while (argc > 1 && argv[op][0] == '-')
1266      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1267      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1268      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1269      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1270        in EBCDIC environments. CR is 13 and NL is 10. */
1271        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1272          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1273          (rc == -2)? "ANYCRLF" :
1274          (rc == -1)? "ANY" : "???");
1275        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1276        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1277                                         "all Unicode newlines");
1278      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1279      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1280      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1281      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1282      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1283      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1284        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1285        printf("  Default recursion depth limit = %ld\n", lrc);
1286      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1287      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1288      exit(0);      goto EXIT;
1289        }
1290      else if (strcmp(argv[op], "-help") == 0 ||
1291               strcmp(argv[op], "--help") == 0)
1292        {
1293        usage();
1294        goto EXIT;
1295      }      }
1296    else    else
1297      {      {
1298      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1299      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1300      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
1301      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1302      }      }
1303    op++;    op++;
1304    argc--;    argc--;
# Line 580  offsets = (int *)malloc(size_offsets_max Line 1311  offsets = (int *)malloc(size_offsets_max
1311  if (offsets == NULL)  if (offsets == NULL)
1312    {    {
1313    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1314      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1315    return 1;    yield = 1;
1316      goto EXIT;
1317    }    }
1318    
1319  /* Sort out the input and output files */  /* Sort out the input and output files */
1320    
1321  if (argc > 1)  if (argc > 1)
1322    {    {
1323    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1324    if (infile == NULL)    if (infile == NULL)
1325      {      {
1326      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1327      return 1;      yield = 1;
1328        goto EXIT;
1329      }      }
1330    }    }
1331    
1332  if (argc > 2)  if (argc > 2)
1333    {    {
1334    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1335    if (outfile == NULL)    if (outfile == NULL)
1336      {      {
1337      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1338      return 1;      yield = 1;
1339        goto EXIT;
1340      }      }
1341    }    }
1342    
# Line 613  pcre_free = new_free; Line 1347  pcre_free = new_free;
1347  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1348  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1349    
1350  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1351    
1352  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1353    
1354  /* Main loop */  /* Main loop */
1355    
# Line 630  while (!done) Line 1364  while (!done)
1364  #endif  #endif
1365    
1366    const char *error;    const char *error;
1367      unsigned char *markptr;
1368    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1369    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1370    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1371    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1372    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1373      int do_mark = 0;
1374    int do_study = 0;    int do_study = 0;
1375    int do_debug = debug;    int do_debug = debug;
1376    int do_G = 0;    int do_G = 0;
# Line 642  while (!done) Line 1378  while (!done)
1378    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1379    int do_showrest = 0;    int do_showrest = 0;
1380    int do_flip = 0;    int do_flip = 0;
1381    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1382    
1383    use_utf8 = 0;    use_utf8 = 0;
1384      debug_lengths = 1;
1385    
1386    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1387    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1388    fflush(outfile);    fflush(outfile);
1389    
# Line 659  while (!done) Line 1395  while (!done)
1395    
1396    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1397      {      {
1398      unsigned long int magic;      unsigned long int magic, get_options;
1399      uschar sbuf[8];      uschar sbuf[8];
1400      FILE *f;      FILE *f;
1401    
# Line 707  while (!done) Line 1443  while (!done)
1443    
1444      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1445    
1446      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1447      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1448    
1449      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1450    
# Line 747  while (!done) Line 1483  while (!done)
1483    
1484    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1485      {      {
1486      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1487      goto SKIP_DATA;      goto SKIP_DATA;
1488      }      }
1489    
1490    pp = p;    pp = p;
1491      poffset = (int)(p - buffer);
1492    
1493    for(;;)    for(;;)
1494      {      {
# Line 762  while (!done) Line 1499  while (!done)
1499        pp++;        pp++;
1500        }        }
1501      if (*pp != 0) break;      if (*pp != 0) break;
1502        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1503        {        {
1504        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1505        done = 1;        done = 1;
# Line 780  while (!done) Line 1508  while (!done)
1508      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1509      }      }
1510    
1511      /* The buffer may have moved while being extended; reset the start of data
1512      pointer to the correct relative point in the buffer. */
1513    
1514      p = buffer + poffset;
1515    
1516    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1517    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1518    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 802  while (!done) Line 1535  while (!done)
1535      {      {
1536      switch (*pp++)      switch (*pp++)
1537        {        {
1538          case 'f': options |= PCRE_FIRSTLINE; break;
1539        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1540        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1541        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 810  while (!done) Line 1544  while (!done)
1544    
1545        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1546        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1547          case 'B': do_debug = 1; break;
1548        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1549        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1550        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1551        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1552        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1553        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1554          case 'J': options |= PCRE_DUPNAMES; break;
1555          case 'K': do_mark = 1; break;
1556        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1557        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1558    
# Line 825  while (!done) Line 1562  while (!done)
1562    
1563        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1564        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1565          case 'W': options |= PCRE_UCP; break;
1566        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1567          case 'Z': debug_lengths = 0; break;
1568        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1569        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1570    
1571          case 'T':
1572          switch (*pp++)
1573            {
1574            case '0': tables = tables0; break;
1575            case '1': tables = tables1; break;
1576    
1577            case '\r':
1578            case '\n':
1579            case ' ':
1580            case 0:
1581            fprintf(outfile, "** Missing table number after /T\n");
1582            goto SKIP_DATA;
1583    
1584            default:
1585            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1586            goto SKIP_DATA;
1587            }
1588          break;
1589    
1590        case 'L':        case 'L':
1591        ppp = pp;        ppp = pp;
1592        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1593          /* The '0' test is just in case this is an unterminated line. */
1594          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1595        *ppp = 0;        *ppp = 0;
1596        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1597          {          {
1598          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1599          goto SKIP_DATA;          goto SKIP_DATA;
1600          }          }
1601          locale_set = 1;
1602        tables = pcre_maketables();        tables = pcre_maketables();
1603        pp = ppp;        pp = ppp;
1604        break;        break;
# Line 849  while (!done) Line 1610  while (!done)
1610        *pp = 0;        *pp = 0;
1611        break;        break;
1612    
1613        case '\n': case ' ': break;        case '<':
1614            {
1615            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1616              {
1617              options |= PCRE_JAVASCRIPT_COMPAT;
1618              pp += 3;
1619              }
1620            else
1621              {
1622              int x = check_newline(pp, outfile);
1623              if (x == 0) goto SKIP_DATA;
1624              options |= x;
1625              while (*pp++ != '>');
1626              }
1627            }
1628          break;
1629    
1630          case '\r':                      /* So that it works in Windows */
1631          case '\n':
1632          case ' ':
1633          break;
1634    
1635        default:        default:
1636        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 869  while (!done) Line 1650  while (!done)
1650    
1651      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1652      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1653        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1654        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1655        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1656        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1657        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1658    
1659      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1660    
1661      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 876  while (!done) Line 1663  while (!done)
1663    
1664      if (rc != 0)      if (rc != 0)
1665        {        {
1666        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1667        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1668        goto SKIP_DATA;        goto SKIP_DATA;
1669        }        }
# Line 888  while (!done) Line 1675  while (!done)
1675  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1676    
1677      {      {
1678      if (timeit)      unsigned long int get_options;
1679    
1680        if (timeit > 0)
1681        {        {
1682        register int i;        register int i;
1683        clock_t time_taken;        clock_t time_taken;
1684        clock_t start_time = clock();        clock_t start_time = clock();
1685        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1686          {          {
1687          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1688          if (re != NULL) free(re);          if (re != NULL) free(re);
1689          }          }
1690        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1691        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1692          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1693            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1694        }        }
1695    
# Line 917  while (!done) Line 1706  while (!done)
1706          {          {
1707          for (;;)          for (;;)
1708            {            {
1709            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1710              {              {
1711              done = 1;              done = 1;
1712              goto CONTINUE;              goto CONTINUE;
# Line 931  while (!done) Line 1720  while (!done)
1720        goto CONTINUE;        goto CONTINUE;
1721        }        }
1722    
1723      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1724      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1725      returns only limited data. Check that it agrees with the newer one. */      lines. */
1726    
1727        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1728        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1729    
1730        /* Print information if required. There are now two info-returning
1731        functions. The old one has a limited interface and returns only limited
1732        data. Check that it agrees with the newer one. */
1733    
1734      if (log_store)      if (log_store)
1735        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 952  while (!done) Line 1748  while (!done)
1748    
1749      if (do_study)      if (do_study)
1750        {        {
1751        if (timeit)        if (timeit > 0)
1752          {          {
1753          register int i;          register int i;
1754          clock_t time_taken;          clock_t time_taken;
1755          clock_t start_time = clock();          clock_t start_time = clock();
1756          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1757            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1758          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1759          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1760          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1761            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1762              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1763          }          }
1764        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 972  while (!done) Line 1768  while (!done)
1768          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1769        }        }
1770    
1771        /* If /K was present, we set up for handling MARK data. */
1772    
1773        if (do_mark)
1774          {
1775          if (extra == NULL)
1776            {
1777            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1778            extra->flags = 0;
1779            }
1780          extra->mark = &markptr;
1781          extra->flags |= PCRE_EXTRA_MARK;
1782          }
1783    
1784      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1785      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1786      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 980  while (!done) Line 1789  while (!done)
1789      if (do_flip)      if (do_flip)
1790        {        {
1791        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1792        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1793            byteflip(rre->magic_number, sizeof(rre->magic_number));
1794        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1795        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1796        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1797        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1798        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1799        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1800        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1801          rre->first_byte =
1802            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1803          rre->req_byte =
1804            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1805          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1806          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1807        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1808          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1809        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1810            sizeof(rre->name_count));
1811    
1812        if (extra != NULL)        if (extra != NULL)
1813          {          {
1814          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1815          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1816          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1817            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1818          }          }
1819        }        }
1820    
# Line 1005  while (!done) Line 1822  while (!done)
1822    
1823      SHOW_INFO:      SHOW_INFO:
1824    
1825        if (do_debug)
1826          {
1827          fprintf(outfile, "------------------------------------------------------------------\n");
1828          pcre_printint(re, outfile, debug_lengths);
1829          }
1830    
1831        /* We already have the options in get_options (see above) */
1832    
1833      if (do_showinfo)      if (do_showinfo)
1834        {        {
1835        unsigned long int get_options, all_options;        unsigned long int all_options;
1836    #if !defined NOINFOCHECK
1837        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1838        int count, backrefmax, first_char, need_char;  #endif
1839          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1840            hascrorlf;
1841        int nameentrysize, namecount;        int nameentrysize, namecount;
1842        const uschar *nametable;        const uschar *nametable;
1843    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
   
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1844        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1845        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1846        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1028  while (!done) Line 1849  while (!done)
1849        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1850        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1851        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1852          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1853          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1854          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1855    
1856    #if !defined NOINFOCHECK
1857        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1858        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1859          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1046  while (!done) Line 1871  while (!done)
1871            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1872              get_options, old_options);              get_options, old_options);
1873          }          }
1874    #endif
1875    
1876        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1877          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1878          size, regex_gotten_store);          (int)size, (int)regex_gotten_store);
1879    
1880        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1881        if (backrefmax > 0)        if (backrefmax > 0)
# Line 1067  while (!done) Line 1893  while (!done)
1893            }            }
1894          }          }
1895    
1896        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1897        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1898    
1899        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1900        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1901    
1902        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1903          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1904            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1905            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1906            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1907            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1908              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1909            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1910              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1911              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1912            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1913            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1914            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1915              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1916            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1917            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1918              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1919              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1920    
1921          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1922    
1923          switch (get_options & PCRE_NEWLINE_BITS)
1924            {
1925            case PCRE_NEWLINE_CR:
1926            fprintf(outfile, "Forced newline sequence: CR\n");
1927            break;
1928    
1929            case PCRE_NEWLINE_LF:
1930            fprintf(outfile, "Forced newline sequence: LF\n");
1931            break;
1932    
1933            case PCRE_NEWLINE_CRLF:
1934            fprintf(outfile, "Forced newline sequence: CRLF\n");
1935            break;
1936    
1937            case PCRE_NEWLINE_ANYCRLF:
1938            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1939            break;
1940    
1941            case PCRE_NEWLINE_ANY:
1942            fprintf(outfile, "Forced newline sequence: ANY\n");
1943            break;
1944    
1945        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          default:
1946          fprintf(outfile, "Case state changes\n");          break;
1947            }
1948    
1949        if (first_char == -1)        if (first_char == -1)
1950          {          {
1951          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1952          }          }
1953        else if (first_char < 0)        else if (first_char < 0)
1954          {          {
# Line 1108  while (!done) Line 1959  while (!done)
1959          int ch = first_char & 255;          int ch = first_char & 255;
1960          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1961            "" : " (caseless)";            "" : " (caseless)";
1962          if (isprint(ch))          if (PRINTHEX(ch))
1963            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1964          else          else
1965            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1123  while (!done) Line 1974  while (!done)
1974          int ch = need_char & 255;          int ch = need_char & 255;
1975          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1976            "" : " (caseless)";            "" : " (caseless)";
1977          if (isprint(ch))          if (PRINTHEX(ch))
1978            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1979          else          else
1980            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1141  while (!done) Line 1992  while (!done)
1992          else          else
1993            {            {
1994            uschar *start_bits = NULL;            uschar *start_bits = NULL;
1995            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
1996    
1997              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1998              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1999    
2000              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2001            if (start_bits == NULL)            if (start_bits == NULL)
2002              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2003            else            else
2004              {              {
2005              int i;              int i;
# Line 1159  while (!done) Line 2014  while (!done)
2014                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2015                    c = 2;                    c = 2;
2016                    }                    }
2017                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
2018                    {                    {
2019                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2020                    c += 2;                    c += 2;
# Line 1191  while (!done) Line 2046  while (!done)
2046        else        else
2047          {          {
2048          uschar sbuf[8];          uschar sbuf[8];
2049          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
2050          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
2051          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
2052          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
2053    
2054          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2055          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2056          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2057          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
2058    
2059          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2060              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1218  while (!done) Line 2073  while (!done)
2073                  strerror(errno));                  strerror(errno));
2074                }                }
2075              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
2076    
2077              }              }
2078            }            }
2079          fclose(f);          fclose(f);
2080          }          }
2081    
2082          new_free(re);
2083          if (extra != NULL) new_free(extra);
2084          if (locale_set)
2085            {
2086            new_free((void *)tables);
2087            setlocale(LC_CTYPE, "C");
2088            locale_set = 0;
2089            }
2090        continue;  /* With next regex */        continue;  /* With next regex */
2091        }        }
2092      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1230  while (!done) Line 2095  while (!done)
2095    
2096    for (;;)    for (;;)
2097      {      {
2098      unsigned char *q;      uschar *q;
2099      unsigned char *bptr = dbuffer;      uschar *bptr;
2100      int *use_offsets = offsets;      int *use_offsets = offsets;
2101      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2102      int callout_data = 0;      int callout_data = 0;
2103      int callout_data_set = 0;      int callout_data_set = 0;
2104      int count, c;      int count, c;
2105      int copystrings = 0;      int copystrings = 0;
2106      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2107      int getstrings = 0;      int getstrings = 0;
2108      int getlist = 0;      int getlist = 0;
2109      int gmatched = 0;      int gmatched = 0;
2110      int start_offset = 0;      int start_offset = 0;
2111      int g_notempty = 0;      int g_notempty = 0;
2112        int use_dfa = 0;
2113    
2114      options = 0;      options = 0;
2115    
2116        *copynames = 0;
2117        *getnames = 0;
2118    
2119        copynamesptr = copynames;
2120        getnamesptr = getnames;
2121    
2122      pcre_callout = callout;      pcre_callout = callout;
2123      first_callout = 1;      first_callout = 1;
2124      callout_extra = 0;      callout_extra = 0;
# Line 1255  while (!done) Line 2127  while (!done)
2127      callout_fail_id = -1;      callout_fail_id = -1;
2128      show_malloc = 0;      show_malloc = 0;
2129    
2130      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2131      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2132    
2133        len = 0;
2134        for (;;)
2135        {        {
2136        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2137        goto CONTINUE;          {
2138            if (len > 0)    /* Reached EOF without hitting a newline */
2139              {
2140              fprintf(outfile, "\n");
2141              break;
2142              }
2143            done = 1;
2144            goto CONTINUE;
2145            }
2146          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2147          len = (int)strlen((char *)buffer);
2148          if (buffer[len-1] == '\n') break;
2149        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2150    
     len = (int)strlen((char *)buffer);  
2151      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2152      buffer[len] = 0;      buffer[len] = 0;
2153      if (len == 0) break;      if (len == 0) break;
# Line 1271  while (!done) Line 2155  while (!done)
2155      p = buffer;      p = buffer;
2156      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2157    
2158      q = dbuffer;      bptr = q = dbuffer;
2159      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2160        {        {
2161        int i = 0;        int i = 0;
# Line 1293  while (!done) Line 2177  while (!done)
2177          c -= '0';          c -= '0';
2178          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2179            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2180    
2181    #if !defined NOUTF8
2182            if (use_utf8 && c > 255)
2183              {
2184              unsigned char buff8[8];
2185              int ii, utn;
2186              utn = ord2utf8(c, buff8);
2187              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2188              c = buff8[ii];   /* Last byte */
2189              }
2190    #endif
2191          break;          break;
2192    
2193          case 'x':          case 'x':
2194    
2195          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2196    
2197    #if !defined NOUTF8
2198          if (*p == '{')          if (*p == '{')
2199            {            {
2200            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1309  while (!done) Line 2205  while (!done)
2205              {              {
2206              unsigned char buff8[8];              unsigned char buff8[8];
2207              int ii, utn;              int ii, utn;
2208              utn = ord2utf8(c, buff8);              if (use_utf8)
2209              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2210              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2211                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2212                  c = buff8[ii];   /* Last byte */
2213                  }
2214                else
2215                 {
2216                 if (c > 255)
2217                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2218                     "UTF-8 mode is not enabled.\n"
2219                     "** Truncation will probably give the wrong result.\n", c);
2220                 }
2221              p = pt + 1;              p = pt + 1;
2222              break;              break;
2223              }              }
2224            /* Not correct form; fall through */            /* Not correct form; fall through */
2225            }            }
2226    #endif
2227    
2228          /* Ordinary \x */          /* Ordinary \x */
2229    
# Line 1352  while (!done) Line 2259  while (!done)
2259            }            }
2260          else if (isalnum(*p))          else if (isalnum(*p))
2261            {            {
2262            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
2263            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2264              *npp++ = 0;
2265            *npp = 0;            *npp = 0;
2266            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2267            if (n < 0)            if (n < 0)
2268              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2269            else copystrings |= 1 << n;            copynamesptr = npp;
2270            }            }
2271          else if (*p == '+')          else if (*p == '+')
2272            {            {
# Line 1397  while (!done) Line 2304  while (!done)
2304            }            }
2305          continue;          continue;
2306    
2307    #if !defined NODFA
2308            case 'D':
2309    #if !defined NOPOSIX
2310            if (posix || do_posix)
2311              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2312            else
2313    #endif
2314              use_dfa = 1;
2315            continue;
2316    #endif
2317    
2318    #if !defined NODFA
2319            case 'F':
2320            options |= PCRE_DFA_SHORTEST;
2321            continue;
2322    #endif
2323    
2324          case 'G':          case 'G':
2325          if (isdigit(*p))          if (isdigit(*p))
2326            {            {
# Line 1405  while (!done) Line 2329  while (!done)
2329            }            }
2330          else if (isalnum(*p))          else if (isalnum(*p))
2331            {            {
2332            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
2333            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2334              *npp++ = 0;
2335            *npp = 0;            *npp = 0;
2336            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2337            if (n < 0)            if (n < 0)
2338              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2339            else getstrings |= 1 << n;            getnamesptr = npp;
2340            }            }
2341          continue;          continue;
2342    
# Line 1425  while (!done) Line 2349  while (!done)
2349          continue;          continue;
2350    
2351          case 'N':          case 'N':
2352          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2353              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2354            else
2355              options |= PCRE_NOTEMPTY;
2356          continue;          continue;
2357    
2358          case 'O':          case 'O':
# Line 1438  while (!done) Line 2365  while (!done)
2365            if (offsets == NULL)            if (offsets == NULL)
2366              {              {
2367              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2368                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2369              return 1;              yield = 1;
2370                goto EXIT;
2371              }              }
2372            }            }
2373          use_size_offsets = n;          use_size_offsets = n;
# Line 1447  while (!done) Line 2375  while (!done)
2375          continue;          continue;
2376    
2377          case 'P':          case 'P':
2378          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2379              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2380          continue;          continue;
2381    
2382            case 'Q':
2383            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2384            if (extra == NULL)
2385              {
2386              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2387              extra->flags = 0;
2388              }
2389            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2390            extra->match_limit_recursion = n;
2391            continue;
2392    
2393            case 'q':
2394            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2395            if (extra == NULL)
2396              {
2397              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2398              extra->flags = 0;
2399              }
2400            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2401            extra->match_limit = n;
2402            continue;
2403    
2404    #if !defined NODFA
2405            case 'R':
2406            options |= PCRE_DFA_RESTART;
2407            continue;
2408    #endif
2409    
2410          case 'S':          case 'S':
2411          show_malloc = 1;          show_malloc = 1;
2412          continue;          continue;
2413    
2414            case 'Y':
2415            options |= PCRE_NO_START_OPTIMIZE;
2416            continue;
2417    
2418          case 'Z':          case 'Z':
2419          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2420          continue;          continue;
# Line 1461  while (!done) Line 2422  while (!done)
2422          case '?':          case '?':
2423          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2424          continue;          continue;
2425    
2426            case '<':
2427              {
2428              int x = check_newline(p, outfile);
2429              if (x == 0) goto NEXT_DATA;
2430              options |= x;
2431              while (*p++ != '>');
2432              }
2433            continue;
2434          }          }
2435        *q++ = c;        *q++ = c;
2436        }        }
2437      *q = 0;      *q = 0;
2438      len = q - dbuffer;      len = (int)(q - dbuffer);
2439    
2440        /* Move the data to the end of the buffer so that a read over the end of
2441        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2442        we are using the POSIX interface, we must include the terminating zero. */
2443    
2444    #if !defined NOPOSIX
2445        if (posix || do_posix)
2446          {
2447          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2448          bptr += buffer_size - len - 1;
2449          }
2450        else
2451    #endif
2452          {
2453          memmove(bptr + buffer_size - len, bptr, len);
2454          bptr += buffer_size - len;
2455          }
2456    
2457        if ((all_use_dfa || use_dfa) && find_match_limit)
2458          {
2459          printf("**Match limit not relevant for DFA matching: ignored\n");
2460          find_match_limit = 0;
2461          }
2462    
2463      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2464      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
# Line 1480  while (!done) Line 2473  while (!done)
2473          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2474        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2475        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2476          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2477    
2478        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2479    
2480        if (rc != 0)        if (rc != 0)
2481          {          {
2482          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2483          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2484          }          }
2485          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2486                  != 0)
2487            {
2488            fprintf(outfile, "Matched with REG_NOSUB\n");
2489            }
2490        else        else
2491          {          {
2492          size_t i;          size_t i;
# Line 1519  while (!done) Line 2518  while (!done)
2518    
2519      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2520        {        {
2521        if (timeit)        markptr = NULL;
2522    
2523          if (timeitm > 0)
2524          {          {
2525          register int i;          register int i;
2526          clock_t time_taken;          clock_t time_taken;
2527          clock_t start_time = clock();          clock_t start_time = clock();
2528          for (i = 0; i < LOOPREPEAT; i++)  
2529    #if !defined NODFA
2530            if (all_use_dfa || use_dfa)
2531              {
2532              int workspace[1000];
2533              for (i = 0; i < timeitm; i++)
2534                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2535                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2536                  sizeof(workspace)/sizeof(int));
2537              }
2538            else
2539    #endif
2540    
2541            for (i = 0; i < timeitm; i++)
2542            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2543              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2544    
2545          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2546          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2547            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2548              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2549          }          }
2550    
2551        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2552        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2553          for the recursion limit. */
2554    
2555        if (find_match_limit)        if (find_match_limit)
2556          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2557          if (extra == NULL)          if (extra == NULL)
2558            {            {
2559            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2560            extra->flags = 0;            extra->flags = 0;
2561            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2562    
2563          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2564              options|g_notempty, use_offsets, use_size_offsets,
2565              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2566              PCRE_ERROR_MATCHLIMIT, "match()");
2567    
2568            count = check_match_limit(re, extra, bptr, len, start_offset,
2569              options|g_notempty, use_offsets, use_size_offsets,
2570              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2571              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2572          }          }
2573    
2574        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1597  while (!done) Line 2590  while (!done)
2590        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2591        value of match_limit. */        value of match_limit. */
2592    
2593        else  #if !defined NODFA
2594          else if (all_use_dfa || use_dfa)
2595          {          {
2596          count = pcre_exec(re, extra, (char *)bptr, len,          int workspace[1000];
2597            start_offset, options | g_notempty, use_offsets, use_size_offsets);          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2598              options | g_notempty, use_offsets, use_size_offsets, workspace,
2599              sizeof(workspace)/sizeof(int));
2600            if (count == 0)
2601              {
2602              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2603              count = use_size_offsets/2;
2604              }
2605          }          }
2606    #endif
2607    
2608        if (count == 0)        else
2609          {          {
2610          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2611          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2612            if (count == 0)
2613              {
2614              fprintf(outfile, "Matched, but too many substrings\n");
2615              count = use_size_offsets/3;
2616              }
2617          }          }
2618    
2619        /* Matched */        /* Matched */
2620    
2621        if (count >= 0)        if (count >= 0)
2622          {          {
2623          int i;          int i, maxcount;
2624    
2625    #if !defined NODFA
2626            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2627    #endif
2628              maxcount = use_size_offsets/3;
2629    
2630            /* This is a check against a lunatic return value. */
2631    
2632            if (count > maxcount)
2633              {
2634              fprintf(outfile,
2635                "** PCRE error: returned count %d is too big for offset size %d\n",
2636                count, use_size_offsets);
2637              count = use_size_offsets/3;
2638              if (do_g || do_G)
2639                {
2640                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2641                do_g = do_G = FALSE;        /* Break g/G loop */
2642                }
2643              }
2644    
2645          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2646            {            {
2647            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1637  while (!done) Line 2665  while (!done)
2665              }              }
2666            }            }
2667    
2668            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2669    
2670          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2671            {            {
2672            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2673              {              {
2674              char copybuffer[16];              char copybuffer[256];
2675              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2676                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2677              if (rc < 0)              if (rc < 0)
# Line 1651  while (!done) Line 2681  while (!done)
2681              }              }
2682            }            }
2683    
2684            for (copynamesptr = copynames;
2685                 *copynamesptr != 0;
2686                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2687              {
2688              char copybuffer[256];
2689              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2690                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2691              if (rc < 0)
2692                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2693              else
2694                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2695              }
2696    
2697          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2698            {            {
2699            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1663  while (!done) Line 2706  while (!done)
2706              else              else
2707                {                {
2708                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2709                pcre_free_substring(substring);                pcre_free_substring(substring);
2710                }                }
2711              }              }
2712            }            }
2713    
2714            for (getnamesptr = getnames;
2715                 *getnamesptr != 0;
2716                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2717              {
2718              const char *substring;
2719              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2720                count, (char *)getnamesptr, &substring);
2721              if (rc < 0)
2722                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2723              else
2724                {
2725                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2726                pcre_free_substring(substring);
2727                }
2728              }
2729    
2730          if (getlist)          if (getlist)
2731            {            {
2732            const char **stringlist;            const char **stringlist;
# Line 1692  while (!done) Line 2750  while (!done)
2750    
2751        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2752          {          {
2753          fprintf(outfile, "Partial match\n");          if (markptr == NULL) fprintf(outfile, "Partial match");
2754              else fprintf(outfile, "Partial match, mark=%s", markptr);
2755            if (use_size_offsets > 1)
2756              {
2757              fprintf(outfile, ": ");
2758              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2759                outfile);
2760              }
2761            fprintf(outfile, "\n");
2762          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2763          }          }
2764    
2765        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2766        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2767        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2768        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2769        offset values to achieve this. We won't be at the end of the string -  
2770        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2771          "anycrlf". If the previous match was at the end of a line terminated by
2772          CRLF, an advance of one character just passes the \r, whereas we should
2773          prefer the longer newline sequence, as does the code in pcre_exec().
2774          Fudge the offset value to achieve this.
2775    
2776          Otherwise, in the case of UTF-8 matching, the advance must be one
2777          character, not one byte. */
2778    
2779        else        else
2780          {          {
2781          if (g_notempty != 0)          if (g_notempty != 0)
2782            {            {
2783            int onechar = 1;            int onechar = 1;
2784              unsigned int obits = ((real_pcre *)re)->options;
2785            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2786            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2787                {
2788                int d;
2789                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2790                /* Note that these values are always the ASCII ones, even in
2791                EBCDIC environments. CR = 13, NL = 10. */
2792                obits = (d == 13)? PCRE_NEWLINE_CR :
2793                        (d == 10)? PCRE_NEWLINE_LF :
2794                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2795                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2796                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2797                }
2798              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2799                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2800                  &&
2801                  start_offset < len - 1 &&
2802                  bptr[start_offset] == '\r' &&
2803                  bptr[start_offset+1] == '\n')
2804                onechar++;
2805              else if (use_utf8)
2806              {              {
2807              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2808                {                {
# Line 1725  while (!done) Line 2818  while (!done)
2818            {            {
2819            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2820              {              {
2821              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2822                  {
2823                  if (markptr == NULL) fprintf(outfile, "No match\n");
2824                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2825                  }
2826              }              }
2827            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2828            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 1737  while (!done) Line 2834  while (!done)
2834        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2835    
2836        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2837        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2838        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2839        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2840        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2841        character. */        character. */
2842    
2843        g_notempty = 0;        g_notempty = 0;
2844    
2845        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2846          {          {
2847          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2848          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2849          }          }
2850    
2851        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1762  while (!done) Line 2860  while (!done)
2860          len -= use_offsets[1];          len -= use_offsets[1];
2861          }          }
2862        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2863    
2864        NEXT_DATA: continue;
2865      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2866    
2867    CONTINUE:    CONTINUE:
# Line 1770  while (!done) Line 2870  while (!done)
2870    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2871  #endif  #endif
2872    
2873    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2874    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2875    if (tables != NULL)    if (locale_set)
2876      {      {
2877      free((void *)tables);      new_free((void *)tables);
2878      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2879        locale_set = 0;
2880      }      }
2881    }    }
2882    
2883  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2884  return 0;  
2885    EXIT:
2886    
2887    if (infile != NULL && infile != stdin) fclose(infile);
2888    if (outfile != NULL && outfile != stdout) fclose(outfile);
2889    
2890    free(buffer);
2891    free(dbuffer);
2892    free(pbuffer);
2893    free(offsets);
2894    
2895    return yield;
2896  }  }
2897    
2898  /* End */  /* End of pcretest.c */

Legend:
Removed from v.75  
changed lines
  Added in v.553

  ViewVC Help
Powered by ViewVC 1.1.5