/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 553 by ph10, Fri Oct 22 15:57:50 2010 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    #else
83    #include <sys/time.h>          /* These two includes are needed */
84    #include <sys/resource.h>      /* for setrlimit(). */
85    #define INPUT_MODE   "rb"
86    #define OUTPUT_MODE  "wb"
87    #endif
88    
 /* We need the internal info for displaying the results of pcre_study() and  
 other internal data; pcretest also uses some of the fixed tables, and generally  
 has "inside information" compared to a program that strictly follows the PCRE  
 API. */  
89    
90    /* We have to include pcre_internal.h because we need the internal info for
91    displaying the results of pcre_study() and we also need to know about the
92    internal macros, structures, and other internal data values; pcretest has
93    "inside information" compared to a program that strictly follows the PCRE API.
94    
95    Although pcre_internal.h does itself include pcre.h, we explicitly include it
96    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97    appropriately for an application, not for building PCRE. */
98    
99    #include "pcre.h"
100  #include "pcre_internal.h"  #include "pcre_internal.h"
101    
102    /* We need access to some of the data tables that PCRE uses. So as not to have
103    to keep two copies, we include the source file here, changing the names of the
104    external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107    #define _pcre_utf8_table1      utf8_table1
108    #define _pcre_utf8_table1_size utf8_table1_size
109    #define _pcre_utf8_table2      utf8_table2
110    #define _pcre_utf8_table3      utf8_table3
111    #define _pcre_utf8_table4      utf8_table4
112    #define _pcre_utt              utt
113    #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115    #define _pcre_OP_lengths       OP_lengths
116    
117    #include "pcre_tables.c"
118    
119    /* We also need the pcre_printint() function for printing out compiled
120    patterns. This function is in a separate file so that it can be included in
121    pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
123    
124    #define COMPILING_PCRETEST
125    #include "pcre_printint.src"
126    
127    /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 140  Makefile. */
140  #include "pcreposix.h"  #include "pcreposix.h"
141  #endif  #endif
142    
143    /* It is also possible, for the benefit of the version currently imported into
144    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145    interface to the DFA matcher (NODFA), and without the doublecheck of the old
146    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147    UTF8 support if PCRE is built without it. */
148    
149    #ifndef SUPPORT_UTF8
150    #ifndef NOUTF8
151    #define NOUTF8
152    #endif
153    #endif
154    
155    
156    /* Other parameters */
157    
158  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
159  #ifdef CLK_TCK  #ifdef CLK_TCK
160  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 70  Makefile. */ Line 163  Makefile. */
163  #endif  #endif
164  #endif  #endif
165    
166  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
167    
168  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
169    
170    /* Static variables */
171    
172  static FILE *outfile;  static FILE *outfile;
173  static int log_store = 0;  static int log_store = 0;
# Line 83  static int callout_count; Line 175  static int callout_count;
175  static int callout_extra;  static int callout_extra;
176  static int callout_fail_count;  static int callout_fail_count;
177  static int callout_fail_id;  static int callout_fail_id;
178    static int debug_lengths;
179  static int first_callout;  static int first_callout;
180    static int locale_set = 0;
181  static int show_malloc;  static int show_malloc;
182  static int use_utf8;  static int use_utf8;
183  static size_t gotten_store;  static size_t gotten_store;
184    
185    /* The buffers grow automatically if very long input lines are encountered. */
186    
187    static int buffer_size = 50000;
188    static uschar *buffer = NULL;
189    static uschar *dbuffer = NULL;
190  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
191    
192    
193    /*************************************************
194    *         Alternate character tables             *
195    *************************************************/
196    
197    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198    using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200    the L (locale) option also adjusts the tables. */
201    
202    /* This is the set of tables distributed as default with PCRE. It recognizes
203    only ASCII characters. */
204    
205    static const unsigned char tables0[] = {
206    
207    /* This table is a lower casing table. */
208    
209        0,  1,  2,  3,  4,  5,  6,  7,
210        8,  9, 10, 11, 12, 13, 14, 15,
211       16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515    };
516    
517    
518    
519    /*************************************************
520    *        Read or extend an input line            *
521    *************************************************/
522    
523    /* Input lines are read into buffer, but both patterns and data lines can be
524    continued over multiple input lines. In addition, if the buffer fills up, we
525    want to automatically expand it so as to be able to handle extremely large
526    lines that are needed for certain stress tests. When the input buffer is
527    expanded, the other two buffers must also be expanded likewise, and the
528    contents of pbuffer, which are a copy of the input for callouts, must be
529    preserved (for when expansion happens for a data line). This is not the most
530    optimal way of handling this, but hey, this is just a test program!
531    
532    Arguments:
533      f            the file to read
534      start        where in buffer to start (this *must* be within buffer)
535      prompt       for stdin or readline()
536    
537    Returns:       pointer to the start of new data
538                   could be a copy of start, or could be moved
539                   NULL if no data read and EOF reached
540    */
541    
542    static uschar *
543    extend_inputline(FILE *f, uschar *start, const char *prompt)
544    {
545    uschar *here = start;
546    
547    for (;;)
548      {
549      int rlen = (int)(buffer_size - (here - buffer));
550    
551      if (rlen > 1000)
552        {
553        int dlen;
554    
555        /* If libreadline support is required, use readline() to read a line if the
556        input is a terminal. Note that readline() removes the trailing newline, so
557        we must put it back again, to be compatible with fgets(). */
558    
559    #ifdef SUPPORT_LIBREADLINE
560        if (isatty(fileno(f)))
561          {
562          size_t len;
563          char *s = readline(prompt);
564          if (s == NULL) return (here == start)? NULL : start;
565          len = strlen(s);
566          if (len > 0) add_history(s);
567          if (len > rlen - 1) len = rlen - 1;
568          memcpy(here, s, len);
569          here[len] = '\n';
570          here[len+1] = 0;
571          free(s);
572          }
573        else
574    #endif
575    
576        /* Read the next line by normal means, prompting if the file is stdin. */
577    
578          {
579          if (f == stdin) printf("%s", prompt);
580          if (fgets((char *)here, rlen,  f) == NULL)
581            return (here == start)? NULL : start;
582          }
583    
584        dlen = (int)strlen((char *)here);
585        if (dlen > 0 && here[dlen - 1] == '\n') return start;
586        here += dlen;
587        }
588    
589      else
590        {
591        int new_buffer_size = 2*buffer_size;
592        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
593        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
594        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
595    
596        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
597          {
598          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
599          exit(1);
600          }
601    
602        memcpy(new_buffer, buffer, buffer_size);
603        memcpy(new_pbuffer, pbuffer, buffer_size);
604    
605        buffer_size = new_buffer_size;
606    
607        start = new_buffer + (start - buffer);
608        here = new_buffer + (here - buffer);
609    
610        free(buffer);
611        free(dbuffer);
612        free(pbuffer);
613    
614        buffer = new_buffer;
615        dbuffer = new_dbuffer;
616        pbuffer = new_pbuffer;
617        }
618      }
619    
620    return NULL;  /* Control never gets here */
621    }
622    
623    
624    
625    
626    
627    
628    
629  /*************************************************  /*************************************************
630  *          Read number from string               *  *          Read number from string               *
# Line 98  static uschar *pbuffer = NULL; Line 632  static uschar *pbuffer = NULL;
632    
633  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
634  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
635  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
636    
637  Arguments:  Arguments:
638    str           string to be converted    str           string to be converted
# Line 128  return(result); Line 662  return(result);
662  and returns the value of the character.  and returns the value of the character.
663    
664  Argument:  Argument:
665    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
666    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
667    
668  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
669             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
670  */  */
671    
672    #if !defined NOUTF8
673    
674  static int  static int
675  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
676  {  {
677  int c = *buffer++;  int c = *utf8bytes++;
678  int d = c;  int d = c;
679  int i, j, s;  int i, j, s;
680    
# Line 154  if (i == 0 || i == 6) return 0;        / Line 690  if (i == 0 || i == 6) return 0;        /
690  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
691    
692  s = 6*i;  s = 6*i;
693  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
694    
695  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
696    {    {
697    c = *buffer++;    c = *utf8bytes++;
698    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
699    s -= 6;    s -= 6;
700    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 166  for (j = 0; j < i; j++) Line 702  for (j = 0; j < i; j++)
702    
703  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
704    
705  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
706    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
707  if (j != i) return -(i+1);  if (j != i) return -(i+1);
708    
709  /* Valid value */  /* Valid value */
# Line 176  if (j != i) return -(i+1); Line 712  if (j != i) return -(i+1);
712  return i+1;  return i+1;
713  }  }
714    
715    #endif
716    
717    
718    
719    /*************************************************
720    *       Convert character value to UTF-8         *
721    *************************************************/
722    
723    /* This function takes an integer value in the range 0 - 0x7fffffff
724    and encodes it as a UTF-8 character in 0 to 6 bytes.
725    
726    Arguments:
727      cvalue     the character value
728      utf8bytes  pointer to buffer for result - at least 6 bytes long
729    
730    Returns:     number of characters placed in the buffer
731    */
732    
733    #if !defined NOUTF8
734    
735    static int
736    ord2utf8(int cvalue, uschar *utf8bytes)
737    {
738    register int i, j;
739    for (i = 0; i < utf8_table1_size; i++)
740      if (cvalue <= utf8_table1[i]) break;
741    utf8bytes += i;
742    for (j = i; j > 0; j--)
743     {
744     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
745     cvalue >>= 6;
746     }
747    *utf8bytes = utf8_table2[i] | cvalue;
748    return i + 1;
749    }
750    
751    #endif
752    
753    
754    
755  /*************************************************  /*************************************************
# Line 188  chars without printing. */ Line 762  chars without printing. */
762    
763  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
764  {  {
765  int c;  int c = 0;
766  int yield = 0;  int yield = 0;
767    
768  while (length-- > 0)  while (length-- > 0)
769    {    {
770    #if !defined NOUTF8
771    if (use_utf8)    if (use_utf8)
772      {      {
773      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 201  while (length-- > 0) Line 776  while (length-- > 0)
776        {        {
777        length -= rc - 1;        length -= rc - 1;
778        p += rc;        p += rc;
779        if (c < 256 && isprint(c))        if (PRINTHEX(c))
780          {          {
781          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
782          yield++;          yield++;
783          }          }
784        else        else
785          {          {
786          int n;          int n = 4;
787          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
788          yield += n;          yield += (n <= 0x000000ff)? 2 :
789                     (n <= 0x00000fff)? 3 :
790                     (n <= 0x0000ffff)? 4 :
791                     (n <= 0x000fffff)? 5 : 6;
792          }          }
793        continue;        continue;
794        }        }
795      }      }
796    #endif
797    
798     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
799    
800    if (isprint(c = *(p++)))    c = *p++;
801      if (PRINTHEX(c))
802      {      {
803      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
804      yield++;      yield++;
# Line 392  if ((rc = pcre_fullinfo(re, study, optio Line 972  if ((rc = pcre_fullinfo(re, study, optio
972  *         Byte flipping function                 *  *         Byte flipping function                 *
973  *************************************************/  *************************************************/
974    
975  static long int  static unsigned long int
976  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
977  {  {
978  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
979  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 406  return ((value & 0x000000ff) << 24) | Line 986  return ((value & 0x000000ff) << 24) |
986    
987    
988  /*************************************************  /*************************************************
989    *        Check match or recursion limit          *
990    *************************************************/
991    
992    static int
993    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
994      int start_offset, int options, int *use_offsets, int use_size_offsets,
995      int flag, unsigned long int *limit, int errnumber, const char *msg)
996    {
997    int count;
998    int min = 0;
999    int mid = 64;
1000    int max = -1;
1001    
1002    extra->flags |= flag;
1003    
1004    for (;;)
1005      {
1006      *limit = mid;
1007    
1008      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1009        use_offsets, use_size_offsets);
1010    
1011      if (count == errnumber)
1012        {
1013        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1014        min = mid;
1015        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1016        }
1017    
1018      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1019                             count == PCRE_ERROR_PARTIAL)
1020        {
1021        if (mid == min + 1)
1022          {
1023          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1024          break;
1025          }
1026        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1027        max = mid;
1028        mid = (min + mid)/2;
1029        }
1030      else break;    /* Some other error */
1031      }
1032    
1033    extra->flags &= ~flag;
1034    return count;
1035    }
1036    
1037    
1038    
1039    /*************************************************
1040    *         Case-independent strncmp() function    *
1041    *************************************************/
1042    
1043    /*
1044    Arguments:
1045      s         first string
1046      t         second string
1047      n         number of characters to compare
1048    
1049    Returns:    < 0, = 0, or > 0, according to the comparison
1050    */
1051    
1052    static int
1053    strncmpic(uschar *s, uschar *t, int n)
1054    {
1055    while (n--)
1056      {
1057      int c = tolower(*s++) - tolower(*t++);
1058      if (c) return c;
1059      }
1060    return 0;
1061    }
1062    
1063    
1064    
1065    /*************************************************
1066    *         Check newline indicator                *
1067    *************************************************/
1068    
1069    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1070    a message and return 0 if there is no match.
1071    
1072    Arguments:
1073      p           points after the leading '<'
1074      f           file for error message
1075    
1076    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1077    */
1078    
1079    static int
1080    check_newline(uschar *p, FILE *f)
1081    {
1082    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1083    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1084    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1085    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1086    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1087    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1088    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1089    fprintf(f, "Unknown newline type at: <%s\n", p);
1090    return 0;
1091    }
1092    
1093    
1094    
1095    /*************************************************
1096    *             Usage function                     *
1097    *************************************************/
1098    
1099    static void
1100    usage(void)
1101    {
1102    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1103    printf("Input and output default to stdin and stdout.\n");
1104    #ifdef SUPPORT_LIBREADLINE
1105    printf("If input is a terminal, readline() is used to read from it.\n");
1106    #else
1107    printf("This version of pcretest is not linked with readline().\n");
1108    #endif
1109    printf("\nOptions:\n");
1110    printf("  -b       show compiled code (bytecode)\n");
1111    printf("  -C       show PCRE compile-time options and exit\n");
1112    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1113    #if !defined NODFA
1114    printf("  -dfa     force DFA matching for all subjects\n");
1115    #endif
1116    printf("  -help    show usage information\n");
1117    printf("  -i       show information about compiled patterns\n"
1118           "  -M       find MATCH_LIMIT minimum for each subject\n"
1119           "  -m       output memory used information\n"
1120           "  -o <n>   set size of offsets vector to <n>\n");
1121    #if !defined NOPOSIX
1122    printf("  -p       use POSIX interface\n");
1123    #endif
1124    printf("  -q       quiet: do not output PCRE version number at start\n");
1125    printf("  -S <n>   set stack size to <n> megabytes\n");
1126    printf("  -s       output store (memory) used information\n"
1127           "  -t       time compilation and execution\n");
1128    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1129    printf("  -tm      time execution (matching) only\n");
1130    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1131    }
1132    
1133    
1134    
1135    /*************************************************
1136  *                Main Program                    *  *                Main Program                    *
1137  *************************************************/  *************************************************/
1138    
# Line 418  int main(int argc, char **argv) Line 1145  int main(int argc, char **argv)
1145  FILE *infile = stdin;  FILE *infile = stdin;
1146  int options = 0;  int options = 0;
1147  int study_options = 0;  int study_options = 0;
1148    int default_find_match_limit = FALSE;
1149  int op = 1;  int op = 1;
1150  int timeit = 0;  int timeit = 0;
1151    int timeitm = 0;
1152  int showinfo = 0;  int showinfo = 0;
1153  int showstore = 0;  int showstore = 0;
1154    int quiet = 0;
1155  int size_offsets = 45;  int size_offsets = 45;
1156  int size_offsets_max;  int size_offsets_max;
1157  int *offsets = NULL;  int *offsets = NULL;
# Line 432  int debug = 0; Line 1162  int debug = 0;
1162  int done = 0;  int done = 0;
1163  int all_use_dfa = 0;  int all_use_dfa = 0;
1164  int yield = 0;  int yield = 0;
1165    int stack_size;
1166    
1167    /* These vectors store, end-to-end, a list of captured substring names. Assume
1168    that 1024 is plenty long enough for the few names we'll be testing. */
1169    
1170    uschar copynames[1024];
1171    uschar getnames[1024];
1172    
1173  unsigned char *buffer;  uschar *copynamesptr;
1174  unsigned char *dbuffer;  uschar *getnamesptr;
1175    
1176  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1177  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
1178    
1179  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
1180  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
1181  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1182    
1183  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
1184    
1185  outfile = stdout;  outfile = stdout;
1186    
1187    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1188    library to translate CRLF into a single LF character. At least, that's what
1189    I've been told: never having used Windows I take this all on trust. Originally
1190    it set 0x8000, but then I was advised that _O_BINARY was better. */
1191    
1192    #if defined(_WIN32) || defined(WIN32)
1193    _setmode( _fileno( stdout ), _O_BINARY );
1194    #endif
1195    
1196  /* Scan options */  /* Scan options */
1197    
1198  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 461  while (argc > 1 && argv[op][0] == '-') Line 1201  while (argc > 1 && argv[op][0] == '-')
1201    
1202    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1203      showstore = 1;      showstore = 1;
1204    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1205      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1206    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1207    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1208      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1209    #if !defined NODFA
1210    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1211    #endif
1212    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1213        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1214          *endptr == 0))          *endptr == 0))
# Line 472  while (argc > 1 && argv[op][0] == '-') Line 1216  while (argc > 1 && argv[op][0] == '-')
1216      op++;      op++;
1217      argc--;      argc--;
1218      }      }
1219      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1220        {
1221        int both = argv[op][2] == 0;
1222        int temp;
1223        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1224                         *endptr == 0))
1225          {
1226          timeitm = temp;
1227          op++;
1228          argc--;
1229          }
1230        else timeitm = LOOPREPEAT;
1231        if (both) timeit = timeitm;
1232        }
1233      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1234          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1235            *endptr == 0))
1236        {
1237    #if defined(_WIN32) || defined(WIN32)
1238        printf("PCRE: -S not supported on this OS\n");
1239        exit(1);
1240    #else
1241        int rc;
1242        struct rlimit rlim;
1243        getrlimit(RLIMIT_STACK, &rlim);
1244        rlim.rlim_cur = stack_size * 1024 * 1024;
1245        rc = setrlimit(RLIMIT_STACK, &rlim);
1246        if (rc != 0)
1247          {
1248        printf("PCRE: setrlimit() failed with error %d\n", rc);
1249        exit(1);
1250          }
1251        op++;
1252        argc--;
1253    #endif
1254        }
1255  #if !defined NOPOSIX  #if !defined NOPOSIX
1256    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1257  #endif  #endif
1258    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1259      {      {
1260      int rc;      int rc;
1261        unsigned long int lrc;
1262      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1263      printf("Compiled with\n");      printf("Compiled with\n");
1264      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 485  while (argc > 1 && argv[op][0] == '-') Line 1266  while (argc > 1 && argv[op][0] == '-')
1266      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1267      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1268      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1269      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1270        in EBCDIC environments. CR is 13 and NL is 10. */
1271        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1272          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1273          (rc == -2)? "ANYCRLF" :
1274          (rc == -1)? "ANY" : "???");
1275        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1276        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1277                                         "all Unicode newlines");
1278      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1279      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1280      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1281      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1282      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1283      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1284        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1285        printf("  Default recursion depth limit = %ld\n", lrc);
1286      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1287      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1288      exit(0);      goto EXIT;
1289        }
1290      else if (strcmp(argv[op], "-help") == 0 ||
1291               strcmp(argv[op], "--help") == 0)
1292        {
1293        usage();
1294        goto EXIT;
1295      }      }
1296    else    else
1297      {      {
1298      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1299      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
     printf("  -dfa   force DFA matching for all subjects\n");  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
1300      yield = 1;      yield = 1;
1301      goto EXIT;      goto EXIT;
1302      }      }
# Line 525  offsets = (int *)malloc(size_offsets_max Line 1311  offsets = (int *)malloc(size_offsets_max
1311  if (offsets == NULL)  if (offsets == NULL)
1312    {    {
1313    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1314      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1315    yield = 1;    yield = 1;
1316    goto EXIT;    goto EXIT;
1317    }    }
# Line 534  if (offsets == NULL) Line 1320  if (offsets == NULL)
1320    
1321  if (argc > 1)  if (argc > 1)
1322    {    {
1323    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1324    if (infile == NULL)    if (infile == NULL)
1325      {      {
1326      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 545  if (argc > 1) Line 1331  if (argc > 1)
1331    
1332  if (argc > 2)  if (argc > 2)
1333    {    {
1334    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1335    if (outfile == NULL)    if (outfile == NULL)
1336      {      {
1337      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 561  pcre_free = new_free; Line 1347  pcre_free = new_free;
1347  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1348  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1349    
1350  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1351    
1352  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1353    
1354  /* Main loop */  /* Main loop */
1355    
# Line 578  while (!done) Line 1364  while (!done)
1364  #endif  #endif
1365    
1366    const char *error;    const char *error;
1367      unsigned char *markptr;
1368    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1369    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1370    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1371    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1372    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1373      int do_mark = 0;
1374    int do_study = 0;    int do_study = 0;
1375    int do_debug = debug;    int do_debug = debug;
1376    int do_G = 0;    int do_G = 0;
# Line 590  while (!done) Line 1378  while (!done)
1378    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1379    int do_showrest = 0;    int do_showrest = 0;
1380    int do_flip = 0;    int do_flip = 0;
1381    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1382    
1383    use_utf8 = 0;    use_utf8 = 0;
1384      debug_lengths = 1;
1385    
1386    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1387    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1388    fflush(outfile);    fflush(outfile);
1389    
# Line 607  while (!done) Line 1395  while (!done)
1395    
1396    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1397      {      {
1398      unsigned long int magic;      unsigned long int magic, get_options;
1399      uschar sbuf[8];      uschar sbuf[8];
1400      FILE *f;      FILE *f;
1401    
# Line 655  while (!done) Line 1443  while (!done)
1443    
1444      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1445    
1446      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1447      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1448    
1449      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1450    
# Line 695  while (!done) Line 1483  while (!done)
1483    
1484    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1485      {      {
1486      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1487      goto SKIP_DATA;      goto SKIP_DATA;
1488      }      }
1489    
1490    pp = p;    pp = p;
1491      poffset = (int)(p - buffer);
1492    
1493    for(;;)    for(;;)
1494      {      {
# Line 710  while (!done) Line 1499  while (!done)
1499        pp++;        pp++;
1500        }        }
1501      if (*pp != 0) break;      if (*pp != 0) break;
1502        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1503        {        {
1504        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1505        done = 1;        done = 1;
# Line 728  while (!done) Line 1508  while (!done)
1508      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1509      }      }
1510    
1511      /* The buffer may have moved while being extended; reset the start of data
1512      pointer to the correct relative point in the buffer. */
1513    
1514      p = buffer + poffset;
1515    
1516    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1517    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1518    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 759  while (!done) Line 1544  while (!done)
1544    
1545        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1546        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1547          case 'B': do_debug = 1; break;
1548        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1549        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1550        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1551        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1552        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1553        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1554          case 'J': options |= PCRE_DUPNAMES; break;
1555          case 'K': do_mark = 1; break;
1556        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1557        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1558    
# Line 774  while (!done) Line 1562  while (!done)
1562    
1563        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1564        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1565          case 'W': options |= PCRE_UCP; break;
1566        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1567          case 'Z': debug_lengths = 0; break;
1568        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1569        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1570    
1571          case 'T':
1572          switch (*pp++)
1573            {
1574            case '0': tables = tables0; break;
1575            case '1': tables = tables1; break;
1576    
1577            case '\r':
1578            case '\n':
1579            case ' ':
1580            case 0:
1581            fprintf(outfile, "** Missing table number after /T\n");
1582            goto SKIP_DATA;
1583    
1584            default:
1585            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1586            goto SKIP_DATA;
1587            }
1588          break;
1589    
1590        case 'L':        case 'L':
1591        ppp = pp;        ppp = pp;
1592        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1593        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1594          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1595        *ppp = 0;        *ppp = 0;
1596        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1597          {          {
1598          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1599          goto SKIP_DATA;          goto SKIP_DATA;
1600          }          }
1601          locale_set = 1;
1602        tables = pcre_maketables();        tables = pcre_maketables();
1603        pp = ppp;        pp = ppp;
1604        break;        break;
# Line 799  while (!done) Line 1610  while (!done)
1610        *pp = 0;        *pp = 0;
1611        break;        break;
1612    
1613          case '<':
1614            {
1615            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1616              {
1617              options |= PCRE_JAVASCRIPT_COMPAT;
1618              pp += 3;
1619              }
1620            else
1621              {
1622              int x = check_newline(pp, outfile);
1623              if (x == 0) goto SKIP_DATA;
1624              options |= x;
1625              while (*pp++ != '>');
1626              }
1627            }
1628          break;
1629    
1630        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1631        case '\n':        case '\n':
1632        case ' ':        case ' ':
# Line 823  while (!done) Line 1651  while (!done)
1651      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1652      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1653      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1654        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1655        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1656        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1657        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1658    
1659      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1660    
1661      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 830  while (!done) Line 1663  while (!done)
1663    
1664      if (rc != 0)      if (rc != 0)
1665        {        {
1666        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1667        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1668        goto SKIP_DATA;        goto SKIP_DATA;
1669        }        }
# Line 842  while (!done) Line 1675  while (!done)
1675  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1676    
1677      {      {
1678      if (timeit)      unsigned long int get_options;
1679    
1680        if (timeit > 0)
1681        {        {
1682        register int i;        register int i;
1683        clock_t time_taken;        clock_t time_taken;
1684        clock_t start_time = clock();        clock_t start_time = clock();
1685        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1686          {          {
1687          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1688          if (re != NULL) free(re);          if (re != NULL) free(re);
1689          }          }
1690        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1691        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1692          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1693            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1694        }        }
1695    
# Line 871  while (!done) Line 1706  while (!done)
1706          {          {
1707          for (;;)          for (;;)
1708            {            {
1709            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1710              {              {
1711              done = 1;              done = 1;
1712              goto CONTINUE;              goto CONTINUE;
# Line 885  while (!done) Line 1720  while (!done)
1720        goto CONTINUE;        goto CONTINUE;
1721        }        }
1722    
1723      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1724      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1725      returns only limited data. Check that it agrees with the newer one. */      lines. */
1726    
1727        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1728        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1729    
1730        /* Print information if required. There are now two info-returning
1731        functions. The old one has a limited interface and returns only limited
1732        data. Check that it agrees with the newer one. */
1733    
1734      if (log_store)      if (log_store)
1735        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 906  while (!done) Line 1748  while (!done)
1748    
1749      if (do_study)      if (do_study)
1750        {        {
1751        if (timeit)        if (timeit > 0)
1752          {          {
1753          register int i;          register int i;
1754          clock_t time_taken;          clock_t time_taken;
1755          clock_t start_time = clock();          clock_t start_time = clock();
1756          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1757            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1758          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1759          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1760          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1761            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1762              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1763          }          }
1764        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 926  while (!done) Line 1768  while (!done)
1768          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1769        }        }
1770    
1771        /* If /K was present, we set up for handling MARK data. */
1772    
1773        if (do_mark)
1774          {
1775          if (extra == NULL)
1776            {
1777            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1778            extra->flags = 0;
1779            }
1780          extra->mark = &markptr;
1781          extra->flags |= PCRE_EXTRA_MARK;
1782          }
1783    
1784      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1785      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1786      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 934  while (!done) Line 1789  while (!done)
1789      if (do_flip)      if (do_flip)
1790        {        {
1791        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1792        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1793            byteflip(rre->magic_number, sizeof(rre->magic_number));
1794        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1795        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1796        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1797        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1798        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1799        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1800        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1801          rre->first_byte =
1802            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1803          rre->req_byte =
1804            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1805          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1806          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1807        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1808          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1809        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1810            sizeof(rre->name_count));
1811    
1812        if (extra != NULL)        if (extra != NULL)
1813          {          {
1814          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1815          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1816          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1817            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1818          }          }
1819        }        }
1820    
# Line 959  while (!done) Line 1822  while (!done)
1822    
1823      SHOW_INFO:      SHOW_INFO:
1824    
1825        if (do_debug)
1826          {
1827          fprintf(outfile, "------------------------------------------------------------------\n");
1828          pcre_printint(re, outfile, debug_lengths);
1829          }
1830    
1831        /* We already have the options in get_options (see above) */
1832    
1833      if (do_showinfo)      if (do_showinfo)
1834        {        {
1835        unsigned long int get_options, all_options;        unsigned long int all_options;
1836    #if !defined NOINFOCHECK
1837        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1838        int count, backrefmax, first_char, need_char;  #endif
1839          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1840            hascrorlf;
1841        int nameentrysize, namecount;        int nameentrysize, namecount;
1842        const uschar *nametable;        const uschar *nametable;
1843    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         _pcre_printint(re, outfile);  
         }  
   
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1844        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1845        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1846        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 982  while (!done) Line 1849  while (!done)
1849        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1850        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1851        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1852          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1853          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1854          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1855    
1856    #if !defined NOINFOCHECK
1857        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1858        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1859          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1000  while (!done) Line 1871  while (!done)
1871            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1872              get_options, old_options);              get_options, old_options);
1873          }          }
1874    #endif
1875    
1876        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1877          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1021  while (!done) Line 1893  while (!done)
1893            }            }
1894          }          }
1895    
1896        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1897        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1898    
1899        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1900        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1901    
1902        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1903          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1904            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1905            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1906            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1907            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1908            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1909            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1910              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1911              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1912            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1913            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1914            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1915              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1916            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1917            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1918              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1919              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1920    
1921          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1922    
1923          switch (get_options & PCRE_NEWLINE_BITS)
1924            {
1925            case PCRE_NEWLINE_CR:
1926            fprintf(outfile, "Forced newline sequence: CR\n");
1927            break;
1928    
1929            case PCRE_NEWLINE_LF:
1930            fprintf(outfile, "Forced newline sequence: LF\n");
1931            break;
1932    
1933            case PCRE_NEWLINE_CRLF:
1934            fprintf(outfile, "Forced newline sequence: CRLF\n");
1935            break;
1936    
1937            case PCRE_NEWLINE_ANYCRLF:
1938            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1939            break;
1940    
1941            case PCRE_NEWLINE_ANY:
1942            fprintf(outfile, "Forced newline sequence: ANY\n");
1943            break;
1944    
1945        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          default:
1946          fprintf(outfile, "Case state changes\n");          break;
1947            }
1948    
1949        if (first_char == -1)        if (first_char == -1)
1950          {          {
1951          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1952          }          }
1953        else if (first_char < 0)        else if (first_char < 0)
1954          {          {
# Line 1063  while (!done) Line 1959  while (!done)
1959          int ch = first_char & 255;          int ch = first_char & 255;
1960          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1961            "" : " (caseless)";            "" : " (caseless)";
1962          if (isprint(ch))          if (PRINTHEX(ch))
1963            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1964          else          else
1965            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1078  while (!done) Line 1974  while (!done)
1974          int ch = need_char & 255;          int ch = need_char & 255;
1975          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1976            "" : " (caseless)";            "" : " (caseless)";
1977          if (isprint(ch))          if (PRINTHEX(ch))
1978            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1979          else          else
1980            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1096  while (!done) Line 1992  while (!done)
1992          else          else
1993            {            {
1994            uschar *start_bits = NULL;            uschar *start_bits = NULL;
1995            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
1996    
1997              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1998              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1999    
2000              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2001            if (start_bits == NULL)            if (start_bits == NULL)
2002              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2003            else            else
2004              {              {
2005              int i;              int i;
# Line 1114  while (!done) Line 2014  while (!done)
2014                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2015                    c = 2;                    c = 2;
2016                    }                    }
2017                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
2018                    {                    {
2019                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2020                    c += 2;                    c += 2;
# Line 1146  while (!done) Line 2046  while (!done)
2046        else        else
2047          {          {
2048          uschar sbuf[8];          uschar sbuf[8];
2049          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
2050          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
2051          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
2052          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
2053    
2054          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2055          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2056          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2057          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
2058    
2059          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2060              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1173  while (!done) Line 2073  while (!done)
2073                  strerror(errno));                  strerror(errno));
2074                }                }
2075              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
2076    
2077              }              }
2078            }            }
2079          fclose(f);          fclose(f);
# Line 1180  while (!done) Line 2081  while (!done)
2081    
2082        new_free(re);        new_free(re);
2083        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2084        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2085            {
2086            new_free((void *)tables);
2087            setlocale(LC_CTYPE, "C");
2088            locale_set = 0;
2089            }
2090        continue;  /* With next regex */        continue;  /* With next regex */
2091        }        }
2092      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1189  while (!done) Line 2095  while (!done)
2095    
2096    for (;;)    for (;;)
2097      {      {
2098      unsigned char *q;      uschar *q;
2099      unsigned char *bptr = dbuffer;      uschar *bptr;
2100      int *use_offsets = offsets;      int *use_offsets = offsets;
2101      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2102      int callout_data = 0;      int callout_data = 0;
2103      int callout_data_set = 0;      int callout_data_set = 0;
2104      int count, c;      int count, c;
2105      int copystrings = 0;      int copystrings = 0;
2106      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2107      int getstrings = 0;      int getstrings = 0;
2108      int getlist = 0;      int getlist = 0;
2109      int gmatched = 0;      int gmatched = 0;
# Line 1207  while (!done) Line 2113  while (!done)
2113    
2114      options = 0;      options = 0;
2115    
2116        *copynames = 0;
2117        *getnames = 0;
2118    
2119        copynamesptr = copynames;
2120        getnamesptr = getnames;
2121    
2122      pcre_callout = callout;      pcre_callout = callout;
2123      first_callout = 1;      first_callout = 1;
2124      callout_extra = 0;      callout_extra = 0;
# Line 1215  while (!done) Line 2127  while (!done)
2127      callout_fail_id = -1;      callout_fail_id = -1;
2128      show_malloc = 0;      show_malloc = 0;
2129    
2130      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2131      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2132    
2133        len = 0;
2134        for (;;)
2135        {        {
2136        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2137        goto CONTINUE;          {
2138            if (len > 0)    /* Reached EOF without hitting a newline */
2139              {
2140              fprintf(outfile, "\n");
2141              break;
2142              }
2143            done = 1;
2144            goto CONTINUE;
2145            }
2146          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2147          len = (int)strlen((char *)buffer);
2148          if (buffer[len-1] == '\n') break;
2149        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2150    
     len = (int)strlen((char *)buffer);  
2151      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2152      buffer[len] = 0;      buffer[len] = 0;
2153      if (len == 0) break;      if (len == 0) break;
# Line 1231  while (!done) Line 2155  while (!done)
2155      p = buffer;      p = buffer;
2156      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2157    
2158      q = dbuffer;      bptr = q = dbuffer;
2159      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2160        {        {
2161        int i = 0;        int i = 0;
# Line 1253  while (!done) Line 2177  while (!done)
2177          c -= '0';          c -= '0';
2178          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2179            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2180    
2181    #if !defined NOUTF8
2182            if (use_utf8 && c > 255)
2183              {
2184              unsigned char buff8[8];
2185              int ii, utn;
2186              utn = ord2utf8(c, buff8);
2187              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2188              c = buff8[ii];   /* Last byte */
2189              }
2190    #endif
2191          break;          break;
2192    
2193          case 'x':          case 'x':
2194    
2195          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2196    
2197    #if !defined NOUTF8
2198          if (*p == '{')          if (*p == '{')
2199            {            {
2200            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1269  while (!done) Line 2205  while (!done)
2205              {              {
2206              unsigned char buff8[8];              unsigned char buff8[8];
2207              int ii, utn;              int ii, utn;
2208              utn = _pcre_ord2utf8(c, buff8);              if (use_utf8)
2209              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2210              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2211                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2212                  c = buff8[ii];   /* Last byte */
2213                  }
2214                else
2215                 {
2216                 if (c > 255)
2217                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2218                     "UTF-8 mode is not enabled.\n"
2219                     "** Truncation will probably give the wrong result.\n", c);
2220                 }
2221              p = pt + 1;              p = pt + 1;
2222              break;              break;
2223              }              }
2224            /* Not correct form; fall through */            /* Not correct form; fall through */
2225            }            }
2226    #endif
2227    
2228          /* Ordinary \x */          /* Ordinary \x */
2229    
# Line 1312  while (!done) Line 2259  while (!done)
2259            }            }
2260          else if (isalnum(*p))          else if (isalnum(*p))
2261            {            {
2262            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
2263            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2264              *npp++ = 0;
2265            *npp = 0;            *npp = 0;
2266            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2267            if (n < 0)            if (n < 0)
2268              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2269            else copystrings |= 1 << n;            copynamesptr = npp;
2270            }            }
2271          else if (*p == '+')          else if (*p == '+')
2272            {            {
# Line 1357  while (!done) Line 2304  while (!done)
2304            }            }
2305          continue;          continue;
2306    
2307    #if !defined NODFA
2308          case 'D':          case 'D':
2309    #if !defined NOPOSIX
2310          if (posix || do_posix)          if (posix || do_posix)
2311            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2312          else          else
2313    #endif
2314            use_dfa = 1;            use_dfa = 1;
2315          continue;          continue;
2316    #endif
2317    
2318    #if !defined NODFA
2319          case 'F':          case 'F':
2320          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2321          continue;          continue;
2322    #endif
2323    
2324          case 'G':          case 'G':
2325          if (isdigit(*p))          if (isdigit(*p))
# Line 1376  while (!done) Line 2329  while (!done)
2329            }            }
2330          else if (isalnum(*p))          else if (isalnum(*p))
2331            {            {
2332            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
2333            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2334              *npp++ = 0;
2335            *npp = 0;            *npp = 0;
2336            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2337            if (n < 0)            if (n < 0)
2338              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2339            else getstrings |= 1 << n;            getnamesptr = npp;
2340            }            }
2341          continue;          continue;
2342    
# Line 1396  while (!done) Line 2349  while (!done)
2349          continue;          continue;
2350    
2351          case 'N':          case 'N':
2352          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2353              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2354            else
2355              options |= PCRE_NOTEMPTY;
2356          continue;          continue;
2357    
2358          case 'O':          case 'O':
# Line 1409  while (!done) Line 2365  while (!done)
2365            if (offsets == NULL)            if (offsets == NULL)
2366              {              {
2367              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2368                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2369              yield = 1;              yield = 1;
2370              goto EXIT;              goto EXIT;
2371              }              }
# Line 1419  while (!done) Line 2375  while (!done)
2375          continue;          continue;
2376    
2377          case 'P':          case 'P':
2378          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2379              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2380            continue;
2381    
2382            case 'Q':
2383            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2384            if (extra == NULL)
2385              {
2386              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2387              extra->flags = 0;
2388              }
2389            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2390            extra->match_limit_recursion = n;
2391            continue;
2392    
2393            case 'q':
2394            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2395            if (extra == NULL)
2396              {
2397              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2398              extra->flags = 0;
2399              }
2400            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2401            extra->match_limit = n;
2402          continue;          continue;
2403    
2404    #if !defined NODFA
2405          case 'R':          case 'R':
2406          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
2407          continue;          continue;
2408    #endif
2409    
2410          case 'S':          case 'S':
2411          show_malloc = 1;          show_malloc = 1;
2412          continue;          continue;
2413    
2414            case 'Y':
2415            options |= PCRE_NO_START_OPTIMIZE;
2416            continue;
2417    
2418          case 'Z':          case 'Z':
2419          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2420          continue;          continue;
# Line 1437  while (!done) Line 2422  while (!done)
2422          case '?':          case '?':
2423          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2424          continue;          continue;
2425    
2426            case '<':
2427              {
2428              int x = check_newline(p, outfile);
2429              if (x == 0) goto NEXT_DATA;
2430              options |= x;
2431              while (*p++ != '>');
2432              }
2433            continue;
2434          }          }
2435        *q++ = c;        *q++ = c;
2436        }        }
2437      *q = 0;      *q = 0;
2438      len = q - dbuffer;      len = (int)(q - dbuffer);
2439    
2440        /* Move the data to the end of the buffer so that a read over the end of
2441        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2442        we are using the POSIX interface, we must include the terminating zero. */
2443    
2444    #if !defined NOPOSIX
2445        if (posix || do_posix)
2446          {
2447          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2448          bptr += buffer_size - len - 1;
2449          }
2450        else
2451    #endif
2452          {
2453          memmove(bptr + buffer_size - len, bptr, len);
2454          bptr += buffer_size - len;
2455          }
2456    
2457      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2458        {        {
# Line 1462  while (!done) Line 2473  while (!done)
2473          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2474        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2475        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2476          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2477    
2478        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2479    
2480        if (rc != 0)        if (rc != 0)
2481          {          {
2482          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2483          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2484          }          }
2485          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2486                  != 0)
2487            {
2488            fprintf(outfile, "Matched with REG_NOSUB\n");
2489            }
2490        else        else
2491          {          {
2492          size_t i;          size_t i;
# Line 1501  while (!done) Line 2518  while (!done)
2518    
2519      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2520        {        {
2521        if (timeit)        markptr = NULL;
2522    
2523          if (timeitm > 0)
2524          {          {
2525          register int i;          register int i;
2526          clock_t time_taken;          clock_t time_taken;
2527          clock_t start_time = clock();          clock_t start_time = clock();
2528    
2529    #if !defined NODFA
2530          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2531            {            {
2532            int workspace[1000];            int workspace[1000];
2533            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2534              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2535                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2536                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2537            }            }
2538          else          else
2539    #endif
2540    
2541          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2542            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2543              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2544    
2545          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2546          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2547            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2548              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2549          }          }
2550    
2551        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2552        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2553          for the recursion limit. */
2554    
2555        if (find_match_limit)        if (find_match_limit)
2556          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2557          if (extra == NULL)          if (extra == NULL)
2558            {            {
2559            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2560            extra->flags = 0;            extra->flags = 0;
2561            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2562    
2563          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2564              options|g_notempty, use_offsets, use_size_offsets,
2565              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2566              PCRE_ERROR_MATCHLIMIT, "match()");
2567    
2568            count = check_match_limit(re, extra, bptr, len, start_offset,
2569              options|g_notempty, use_offsets, use_size_offsets,
2570              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2571              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2572          }          }
2573    
2574        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1591  while (!done) Line 2590  while (!done)
2590        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2591        value of match_limit. */        value of match_limit. */
2592    
2593    #if !defined NODFA
2594        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2595          {          {
2596          int workspace[1000];          int workspace[1000];
2597          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2598            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2599            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2600          if (count == 0)          if (count == 0)
# Line 1603  while (!done) Line 2603  while (!done)
2603            count = use_size_offsets/2;            count = use_size_offsets/2;
2604            }            }
2605          }          }
2606    #endif
2607    
2608        else        else
2609          {          {
# Line 1619  while (!done) Line 2620  while (!done)
2620    
2621        if (count >= 0)        if (count >= 0)
2622          {          {
2623          int i;          int i, maxcount;
2624    
2625    #if !defined NODFA
2626            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2627    #endif
2628              maxcount = use_size_offsets/3;
2629    
2630            /* This is a check against a lunatic return value. */
2631    
2632            if (count > maxcount)
2633              {
2634              fprintf(outfile,
2635                "** PCRE error: returned count %d is too big for offset size %d\n",
2636                count, use_size_offsets);
2637              count = use_size_offsets/3;
2638              if (do_g || do_G)
2639                {
2640                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2641                do_g = do_G = FALSE;        /* Break g/G loop */
2642                }
2643              }
2644    
2645          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2646            {            {
2647            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1643  while (!done) Line 2665  while (!done)
2665              }              }
2666            }            }
2667    
2668            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2669    
2670          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2671            {            {
2672            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2673              {              {
2674              char copybuffer[16];              char copybuffer[256];
2675              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2676                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2677              if (rc < 0)              if (rc < 0)
# Line 1657  while (!done) Line 2681  while (!done)
2681              }              }
2682            }            }
2683    
2684            for (copynamesptr = copynames;
2685                 *copynamesptr != 0;
2686                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2687              {
2688              char copybuffer[256];
2689              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2690                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2691              if (rc < 0)
2692                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2693              else
2694                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2695              }
2696    
2697          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2698            {            {
2699            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1669  while (!done) Line 2706  while (!done)
2706              else              else
2707                {                {
2708                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2709                pcre_free_substring(substring);                pcre_free_substring(substring);
2710                }                }
2711              }              }
2712            }            }
2713    
2714            for (getnamesptr = getnames;
2715                 *getnamesptr != 0;
2716                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2717              {
2718              const char *substring;
2719              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2720                count, (char *)getnamesptr, &substring);
2721              if (rc < 0)
2722                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2723              else
2724                {
2725                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2726                pcre_free_substring(substring);
2727                }
2728              }
2729    
2730          if (getlist)          if (getlist)
2731            {            {
2732            const char **stringlist;            const char **stringlist;
# Line 1698  while (!done) Line 2750  while (!done)
2750    
2751        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2752          {          {
2753          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2754          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            else fprintf(outfile, "Partial match, mark=%s", markptr);
2755            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],          if (use_size_offsets > 1)
2756              bptr + use_offsets[0]);            {
2757              fprintf(outfile, ": ");
2758              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2759                outfile);
2760              }
2761          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2762          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2763          }          }
2764    
2765        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2766        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2767        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2768        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2769        offset values to achieve this. We won't be at the end of the string -  
2770        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2771          "anycrlf". If the previous match was at the end of a line terminated by
2772          CRLF, an advance of one character just passes the \r, whereas we should
2773          prefer the longer newline sequence, as does the code in pcre_exec().
2774          Fudge the offset value to achieve this.
2775    
2776          Otherwise, in the case of UTF-8 matching, the advance must be one
2777          character, not one byte. */
2778    
2779        else        else
2780          {          {
2781          if (g_notempty != 0)          if (g_notempty != 0)
2782            {            {
2783            int onechar = 1;            int onechar = 1;
2784              unsigned int obits = ((real_pcre *)re)->options;
2785            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2786            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2787                {
2788                int d;
2789                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2790                /* Note that these values are always the ASCII ones, even in
2791                EBCDIC environments. CR = 13, NL = 10. */
2792                obits = (d == 13)? PCRE_NEWLINE_CR :
2793                        (d == 10)? PCRE_NEWLINE_LF :
2794                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2795                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2796                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2797                }
2798              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2799                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2800                  &&
2801                  start_offset < len - 1 &&
2802                  bptr[start_offset] == '\r' &&
2803                  bptr[start_offset+1] == '\n')
2804                onechar++;
2805              else if (use_utf8)
2806              {              {
2807              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2808                {                {
# Line 1735  while (!done) Line 2818  while (!done)
2818            {            {
2819            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2820              {              {
2821              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2822                  {
2823                  if (markptr == NULL) fprintf(outfile, "No match\n");
2824                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2825                  }
2826              }              }
2827            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2828            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 1747  while (!done) Line 2834  while (!done)
2834        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2835    
2836        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2837        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2838        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2839        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2840        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2841        character. */        character. */
2842    
2843        g_notempty = 0;        g_notempty = 0;
2844    
2845        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2846          {          {
2847          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2848          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2849          }          }
2850    
2851        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1772  while (!done) Line 2860  while (!done)
2860          len -= use_offsets[1];          len -= use_offsets[1];
2861          }          }
2862        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2863    
2864        NEXT_DATA: continue;
2865      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2866    
2867    CONTINUE:    CONTINUE:
# Line 1782  while (!done) Line 2872  while (!done)
2872    
2873    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
2874    if (extra != NULL) new_free(extra);    if (extra != NULL) new_free(extra);
2875    if (tables != NULL)    if (locale_set)
2876      {      {
2877      new_free((void *)tables);      new_free((void *)tables);
2878      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2879        locale_set = 0;
2880      }      }
2881    }    }
2882    

Legend:
Removed from v.77  
changed lines
  Added in v.553

  ViewVC Help
Powered by ViewVC 1.1.5