/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 541 by ph10, Mon Jun 14 15:19:33 2010 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    #else
83    #include <sys/time.h>          /* These two includes are needed */
84    #include <sys/resource.h>      /* for setrlimit(). */
85    #define INPUT_MODE   "rb"
86    #define OUTPUT_MODE  "wb"
87    #endif
88    
89    
90  /* We need the internal info for displaying the results of pcre_study() and  /* We have to include pcre_internal.h because we need the internal info for
91  other internal data; pcretest also uses some of the fixed tables, and generally  displaying the results of pcre_study() and we also need to know about the
92  has "inside information" compared to a program that strictly follows the PCRE  internal macros, structures, and other internal data values; pcretest has
93  API. */  "inside information" compared to a program that strictly follows the PCRE API.
94    
95    Although pcre_internal.h does itself include pcre.h, we explicitly include it
96    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97    appropriately for an application, not for building PCRE. */
98    
99    #include "pcre.h"
100  #include "pcre_internal.h"  #include "pcre_internal.h"
101    
102    /* We need access to some of the data tables that PCRE uses. So as not to have
103    to keep two copies, we include the source file here, changing the names of the
104    external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107    #define _pcre_utf8_table1      utf8_table1
108    #define _pcre_utf8_table1_size utf8_table1_size
109    #define _pcre_utf8_table2      utf8_table2
110    #define _pcre_utf8_table3      utf8_table3
111    #define _pcre_utf8_table4      utf8_table4
112    #define _pcre_utt              utt
113    #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115    #define _pcre_OP_lengths       OP_lengths
116    
117    #include "pcre_tables.c"
118    
119    /* We also need the pcre_printint() function for printing out compiled
120    patterns. This function is in a separate file so that it can be included in
121    pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
123    
124    #define COMPILING_PCRETEST
125    #include "pcre_printint.src"
126    
127    /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 140  Makefile. */
140  #include "pcreposix.h"  #include "pcreposix.h"
141  #endif  #endif
142    
143    /* It is also possible, for the benefit of the version currently imported into
144    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145    interface to the DFA matcher (NODFA), and without the doublecheck of the old
146    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147    UTF8 support if PCRE is built without it. */
148    
149    #ifndef SUPPORT_UTF8
150    #ifndef NOUTF8
151    #define NOUTF8
152    #endif
153    #endif
154    
155    
156    /* Other parameters */
157    
158  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
159  #ifdef CLK_TCK  #ifdef CLK_TCK
160  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 70  Makefile. */ Line 163  Makefile. */
163  #endif  #endif
164  #endif  #endif
165    
166  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
167    
168  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
169    
170    /* Static variables */
171    
172  static FILE *outfile;  static FILE *outfile;
173  static int log_store = 0;  static int log_store = 0;
# Line 83  static int callout_count; Line 175  static int callout_count;
175  static int callout_extra;  static int callout_extra;
176  static int callout_fail_count;  static int callout_fail_count;
177  static int callout_fail_id;  static int callout_fail_id;
178    static int debug_lengths;
179  static int first_callout;  static int first_callout;
180    static int locale_set = 0;
181  static int show_malloc;  static int show_malloc;
182  static int use_utf8;  static int use_utf8;
183  static size_t gotten_store;  static size_t gotten_store;
184    
185    /* The buffers grow automatically if very long input lines are encountered. */
186    
187    static int buffer_size = 50000;
188    static uschar *buffer = NULL;
189    static uschar *dbuffer = NULL;
190  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
191    
192    
193    /*************************************************
194    *         Alternate character tables             *
195    *************************************************/
196    
197    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198    using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200    the L (locale) option also adjusts the tables. */
201    
202    /* This is the set of tables distributed as default with PCRE. It recognizes
203    only ASCII characters. */
204    
205    static const unsigned char tables0[] = {
206    
207    /* This table is a lower casing table. */
208    
209        0,  1,  2,  3,  4,  5,  6,  7,
210        8,  9, 10, 11, 12, 13, 14, 15,
211       16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515    };
516    
517    
518    
519    /*************************************************
520    *        Read or extend an input line            *
521    *************************************************/
522    
523    /* Input lines are read into buffer, but both patterns and data lines can be
524    continued over multiple input lines. In addition, if the buffer fills up, we
525    want to automatically expand it so as to be able to handle extremely large
526    lines that are needed for certain stress tests. When the input buffer is
527    expanded, the other two buffers must also be expanded likewise, and the
528    contents of pbuffer, which are a copy of the input for callouts, must be
529    preserved (for when expansion happens for a data line). This is not the most
530    optimal way of handling this, but hey, this is just a test program!
531    
532    Arguments:
533      f            the file to read
534      start        where in buffer to start (this *must* be within buffer)
535      prompt       for stdin or readline()
536    
537    Returns:       pointer to the start of new data
538                   could be a copy of start, or could be moved
539                   NULL if no data read and EOF reached
540    */
541    
542    static uschar *
543    extend_inputline(FILE *f, uschar *start, const char *prompt)
544    {
545    uschar *here = start;
546    
547    for (;;)
548      {
549      int rlen = (int)(buffer_size - (here - buffer));
550    
551      if (rlen > 1000)
552        {
553        int dlen;
554    
555        /* If libreadline support is required, use readline() to read a line if the
556        input is a terminal. Note that readline() removes the trailing newline, so
557        we must put it back again, to be compatible with fgets(). */
558    
559    #ifdef SUPPORT_LIBREADLINE
560        if (isatty(fileno(f)))
561          {
562          size_t len;
563          char *s = readline(prompt);
564          if (s == NULL) return (here == start)? NULL : start;
565          len = strlen(s);
566          if (len > 0) add_history(s);
567          if (len > rlen - 1) len = rlen - 1;
568          memcpy(here, s, len);
569          here[len] = '\n';
570          here[len+1] = 0;
571          free(s);
572          }
573        else
574    #endif
575    
576        /* Read the next line by normal means, prompting if the file is stdin. */
577    
578          {
579          if (f == stdin) printf("%s", prompt);
580          if (fgets((char *)here, rlen,  f) == NULL)
581            return (here == start)? NULL : start;
582          }
583    
584        dlen = (int)strlen((char *)here);
585        if (dlen > 0 && here[dlen - 1] == '\n') return start;
586        here += dlen;
587        }
588    
589      else
590        {
591        int new_buffer_size = 2*buffer_size;
592        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
593        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
594        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
595    
596        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
597          {
598          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
599          exit(1);
600          }
601    
602        memcpy(new_buffer, buffer, buffer_size);
603        memcpy(new_pbuffer, pbuffer, buffer_size);
604    
605        buffer_size = new_buffer_size;
606    
607        start = new_buffer + (start - buffer);
608        here = new_buffer + (here - buffer);
609    
610        free(buffer);
611        free(dbuffer);
612        free(pbuffer);
613    
614        buffer = new_buffer;
615        dbuffer = new_dbuffer;
616        pbuffer = new_pbuffer;
617        }
618      }
619    
620    return NULL;  /* Control never gets here */
621    }
622    
623    
624    
625    
626    
627    
628    
629  /*************************************************  /*************************************************
630  *          Read number from string               *  *          Read number from string               *
# Line 98  static uschar *pbuffer = NULL; Line 632  static uschar *pbuffer = NULL;
632    
633  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
634  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
635  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
636    
637  Arguments:  Arguments:
638    str           string to be converted    str           string to be converted
# Line 128  return(result); Line 662  return(result);
662  and returns the value of the character.  and returns the value of the character.
663    
664  Argument:  Argument:
665    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
666    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
667    
668  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
669             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
670  */  */
671    
672    #if !defined NOUTF8
673    
674  static int  static int
675  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
676  {  {
677  int c = *buffer++;  int c = *utf8bytes++;
678  int d = c;  int d = c;
679  int i, j, s;  int i, j, s;
680    
# Line 154  if (i == 0 || i == 6) return 0;        / Line 690  if (i == 0 || i == 6) return 0;        /
690  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
691    
692  s = 6*i;  s = 6*i;
693  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
694    
695  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
696    {    {
697    c = *buffer++;    c = *utf8bytes++;
698    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
699    s -= 6;    s -= 6;
700    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 166  for (j = 0; j < i; j++) Line 702  for (j = 0; j < i; j++)
702    
703  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
704    
705  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
706    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
707  if (j != i) return -(i+1);  if (j != i) return -(i+1);
708    
709  /* Valid value */  /* Valid value */
# Line 176  if (j != i) return -(i+1); Line 712  if (j != i) return -(i+1);
712  return i+1;  return i+1;
713  }  }
714    
715    #endif
716    
717    
718    
719    /*************************************************
720    *       Convert character value to UTF-8         *
721    *************************************************/
722    
723    /* This function takes an integer value in the range 0 - 0x7fffffff
724    and encodes it as a UTF-8 character in 0 to 6 bytes.
725    
726    Arguments:
727      cvalue     the character value
728      utf8bytes  pointer to buffer for result - at least 6 bytes long
729    
730    Returns:     number of characters placed in the buffer
731    */
732    
733    #if !defined NOUTF8
734    
735    static int
736    ord2utf8(int cvalue, uschar *utf8bytes)
737    {
738    register int i, j;
739    for (i = 0; i < utf8_table1_size; i++)
740      if (cvalue <= utf8_table1[i]) break;
741    utf8bytes += i;
742    for (j = i; j > 0; j--)
743     {
744     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
745     cvalue >>= 6;
746     }
747    *utf8bytes = utf8_table2[i] | cvalue;
748    return i + 1;
749    }
750    
751    #endif
752    
753    
754    
755  /*************************************************  /*************************************************
# Line 188  chars without printing. */ Line 762  chars without printing. */
762    
763  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
764  {  {
765  int c;  int c = 0;
766  int yield = 0;  int yield = 0;
767    
768  while (length-- > 0)  while (length-- > 0)
769    {    {
770    #if !defined NOUTF8
771    if (use_utf8)    if (use_utf8)
772      {      {
773      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 201  while (length-- > 0) Line 776  while (length-- > 0)
776        {        {
777        length -= rc - 1;        length -= rc - 1;
778        p += rc;        p += rc;
779        if (c < 256 && isprint(c))        if (PRINTHEX(c))
780          {          {
781          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
782          yield++;          yield++;
783          }          }
784        else        else
785          {          {
786          int n;          int n = 4;
787          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
788          yield += n;          yield += (n <= 0x000000ff)? 2 :
789                     (n <= 0x00000fff)? 3 :
790                     (n <= 0x0000ffff)? 4 :
791                     (n <= 0x000fffff)? 5 : 6;
792          }          }
793        continue;        continue;
794        }        }
795      }      }
796    #endif
797    
798     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
799    
800    if (isprint(c = *(p++)))    c = *p++;
801      if (PRINTHEX(c))
802      {      {
803      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
804      yield++;      yield++;
# Line 392  if ((rc = pcre_fullinfo(re, study, optio Line 972  if ((rc = pcre_fullinfo(re, study, optio
972  *         Byte flipping function                 *  *         Byte flipping function                 *
973  *************************************************/  *************************************************/
974    
975  static long int  static unsigned long int
976  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
977  {  {
978  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
979  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 406  return ((value & 0x000000ff) << 24) | Line 986  return ((value & 0x000000ff) << 24) |
986    
987    
988  /*************************************************  /*************************************************
989    *        Check match or recursion limit          *
990    *************************************************/
991    
992    static int
993    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
994      int start_offset, int options, int *use_offsets, int use_size_offsets,
995      int flag, unsigned long int *limit, int errnumber, const char *msg)
996    {
997    int count;
998    int min = 0;
999    int mid = 64;
1000    int max = -1;
1001    
1002    extra->flags |= flag;
1003    
1004    for (;;)
1005      {
1006      *limit = mid;
1007    
1008      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1009        use_offsets, use_size_offsets);
1010    
1011      if (count == errnumber)
1012        {
1013        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1014        min = mid;
1015        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1016        }
1017    
1018      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1019                             count == PCRE_ERROR_PARTIAL)
1020        {
1021        if (mid == min + 1)
1022          {
1023          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1024          break;
1025          }
1026        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1027        max = mid;
1028        mid = (min + mid)/2;
1029        }
1030      else break;    /* Some other error */
1031      }
1032    
1033    extra->flags &= ~flag;
1034    return count;
1035    }
1036    
1037    
1038    
1039    /*************************************************
1040    *         Case-independent strncmp() function    *
1041    *************************************************/
1042    
1043    /*
1044    Arguments:
1045      s         first string
1046      t         second string
1047      n         number of characters to compare
1048    
1049    Returns:    < 0, = 0, or > 0, according to the comparison
1050    */
1051    
1052    static int
1053    strncmpic(uschar *s, uschar *t, int n)
1054    {
1055    while (n--)
1056      {
1057      int c = tolower(*s++) - tolower(*t++);
1058      if (c) return c;
1059      }
1060    return 0;
1061    }
1062    
1063    
1064    
1065    /*************************************************
1066    *         Check newline indicator                *
1067    *************************************************/
1068    
1069    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1070    a message and return 0 if there is no match.
1071    
1072    Arguments:
1073      p           points after the leading '<'
1074      f           file for error message
1075    
1076    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1077    */
1078    
1079    static int
1080    check_newline(uschar *p, FILE *f)
1081    {
1082    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1083    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1084    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1085    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1086    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1087    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1088    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1089    fprintf(f, "Unknown newline type at: <%s\n", p);
1090    return 0;
1091    }
1092    
1093    
1094    
1095    /*************************************************
1096    *             Usage function                     *
1097    *************************************************/
1098    
1099    static void
1100    usage(void)
1101    {
1102    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1103    printf("Input and output default to stdin and stdout.\n");
1104    #ifdef SUPPORT_LIBREADLINE
1105    printf("If input is a terminal, readline() is used to read from it.\n");
1106    #else
1107    printf("This version of pcretest is not linked with readline().\n");
1108    #endif
1109    printf("\nOptions:\n");
1110    printf("  -b       show compiled code (bytecode)\n");
1111    printf("  -C       show PCRE compile-time options and exit\n");
1112    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1113    #if !defined NODFA
1114    printf("  -dfa     force DFA matching for all subjects\n");
1115    #endif
1116    printf("  -help    show usage information\n");
1117    printf("  -i       show information about compiled patterns\n"
1118           "  -M       find MATCH_LIMIT minimum for each subject\n"
1119           "  -m       output memory used information\n"
1120           "  -o <n>   set size of offsets vector to <n>\n");
1121    #if !defined NOPOSIX
1122    printf("  -p       use POSIX interface\n");
1123    #endif
1124    printf("  -q       quiet: do not output PCRE version number at start\n");
1125    printf("  -S <n>   set stack size to <n> megabytes\n");
1126    printf("  -s       output store (memory) used information\n"
1127           "  -t       time compilation and execution\n");
1128    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1129    printf("  -tm      time execution (matching) only\n");
1130    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1131    }
1132    
1133    
1134    
1135    /*************************************************
1136  *                Main Program                    *  *                Main Program                    *
1137  *************************************************/  *************************************************/
1138    
# Line 418  int main(int argc, char **argv) Line 1145  int main(int argc, char **argv)
1145  FILE *infile = stdin;  FILE *infile = stdin;
1146  int options = 0;  int options = 0;
1147  int study_options = 0;  int study_options = 0;
1148    int default_find_match_limit = FALSE;
1149  int op = 1;  int op = 1;
1150  int timeit = 0;  int timeit = 0;
1151    int timeitm = 0;
1152  int showinfo = 0;  int showinfo = 0;
1153  int showstore = 0;  int showstore = 0;
1154    int quiet = 0;
1155  int size_offsets = 45;  int size_offsets = 45;
1156  int size_offsets_max;  int size_offsets_max;
1157  int *offsets = NULL;  int *offsets = NULL;
# Line 432  int debug = 0; Line 1162  int debug = 0;
1162  int done = 0;  int done = 0;
1163  int all_use_dfa = 0;  int all_use_dfa = 0;
1164  int yield = 0;  int yield = 0;
1165    int stack_size;
1166    
1167    /* These vectors store, end-to-end, a list of captured substring names. Assume
1168    that 1024 is plenty long enough for the few names we'll be testing. */
1169    
1170    uschar copynames[1024];
1171    uschar getnames[1024];
1172    
1173  unsigned char *buffer;  uschar *copynamesptr;
1174  unsigned char *dbuffer;  uschar *getnamesptr;
1175    
1176  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1177  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
1178    
1179  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
1180  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
1181  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1182    
1183  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
1184    
1185  outfile = stdout;  outfile = stdout;
1186    
1187    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1188    library to translate CRLF into a single LF character. At least, that's what
1189    I've been told: never having used Windows I take this all on trust. Originally
1190    it set 0x8000, but then I was advised that _O_BINARY was better. */
1191    
1192    #if defined(_WIN32) || defined(WIN32)
1193    _setmode( _fileno( stdout ), _O_BINARY );
1194    #endif
1195    
1196  /* Scan options */  /* Scan options */
1197    
1198  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 461  while (argc > 1 && argv[op][0] == '-') Line 1201  while (argc > 1 && argv[op][0] == '-')
1201    
1202    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1203      showstore = 1;      showstore = 1;
1204    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1205      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1206    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1207    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1208      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1209    #if !defined NODFA
1210    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1211    #endif
1212    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1213        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1214          *endptr == 0))          *endptr == 0))
# Line 472  while (argc > 1 && argv[op][0] == '-') Line 1216  while (argc > 1 && argv[op][0] == '-')
1216      op++;      op++;
1217      argc--;      argc--;
1218      }      }
1219      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1220        {
1221        int both = argv[op][2] == 0;
1222        int temp;
1223        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1224                         *endptr == 0))
1225          {
1226          timeitm = temp;
1227          op++;
1228          argc--;
1229          }
1230        else timeitm = LOOPREPEAT;
1231        if (both) timeit = timeitm;
1232        }
1233      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1234          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1235            *endptr == 0))
1236        {
1237    #if defined(_WIN32) || defined(WIN32)
1238        printf("PCRE: -S not supported on this OS\n");
1239        exit(1);
1240    #else
1241        int rc;
1242        struct rlimit rlim;
1243        getrlimit(RLIMIT_STACK, &rlim);
1244        rlim.rlim_cur = stack_size * 1024 * 1024;
1245        rc = setrlimit(RLIMIT_STACK, &rlim);
1246        if (rc != 0)
1247          {
1248        printf("PCRE: setrlimit() failed with error %d\n", rc);
1249        exit(1);
1250          }
1251        op++;
1252        argc--;
1253    #endif
1254        }
1255  #if !defined NOPOSIX  #if !defined NOPOSIX
1256    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1257  #endif  #endif
1258    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1259      {      {
1260      int rc;      int rc;
1261        unsigned long int lrc;
1262      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1263      printf("Compiled with\n");      printf("Compiled with\n");
1264      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 485  while (argc > 1 && argv[op][0] == '-') Line 1266  while (argc > 1 && argv[op][0] == '-')
1266      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1267      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1268      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1269      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1270        in EBCDIC environments. CR is 13 and NL is 10. */
1271        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1272          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1273          (rc == -2)? "ANYCRLF" :
1274          (rc == -1)? "ANY" : "???");
1275        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1276        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1277                                         "all Unicode newlines");
1278      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1279      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1280      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1281      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1282      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1283      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1284        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1285        printf("  Default recursion depth limit = %ld\n", lrc);
1286      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1287      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1288      exit(0);      goto EXIT;
1289        }
1290      else if (strcmp(argv[op], "-help") == 0 ||
1291               strcmp(argv[op], "--help") == 0)
1292        {
1293        usage();
1294        goto EXIT;
1295      }      }
1296    else    else
1297      {      {
1298      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1299      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
     printf("  -dfa   force DFA matching for all subjects\n");  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
1300      yield = 1;      yield = 1;
1301      goto EXIT;      goto EXIT;
1302      }      }
# Line 525  offsets = (int *)malloc(size_offsets_max Line 1311  offsets = (int *)malloc(size_offsets_max
1311  if (offsets == NULL)  if (offsets == NULL)
1312    {    {
1313    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1314      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1315    yield = 1;    yield = 1;
1316    goto EXIT;    goto EXIT;
1317    }    }
# Line 534  if (offsets == NULL) Line 1320  if (offsets == NULL)
1320    
1321  if (argc > 1)  if (argc > 1)
1322    {    {
1323    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1324    if (infile == NULL)    if (infile == NULL)
1325      {      {
1326      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 545  if (argc > 1) Line 1331  if (argc > 1)
1331    
1332  if (argc > 2)  if (argc > 2)
1333    {    {
1334    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1335    if (outfile == NULL)    if (outfile == NULL)
1336      {      {
1337      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 561  pcre_free = new_free; Line 1347  pcre_free = new_free;
1347  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1348  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1349    
1350  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1351    
1352  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1353    
1354  /* Main loop */  /* Main loop */
1355    
# Line 578  while (!done) Line 1364  while (!done)
1364  #endif  #endif
1365    
1366    const char *error;    const char *error;
1367      unsigned char *markptr;
1368    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1369    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1370    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1371    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1372    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1373      int do_mark = 0;
1374    int do_study = 0;    int do_study = 0;
1375    int do_debug = debug;    int do_debug = debug;
1376    int do_G = 0;    int do_G = 0;
# Line 590  while (!done) Line 1378  while (!done)
1378    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1379    int do_showrest = 0;    int do_showrest = 0;
1380    int do_flip = 0;    int do_flip = 0;
1381    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1382    
1383    use_utf8 = 0;    use_utf8 = 0;
1384      debug_lengths = 1;
1385    
1386    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1387    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1388    fflush(outfile);    fflush(outfile);
1389    
# Line 607  while (!done) Line 1395  while (!done)
1395    
1396    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1397      {      {
1398      unsigned long int magic;      unsigned long int magic, get_options;
1399      uschar sbuf[8];      uschar sbuf[8];
1400      FILE *f;      FILE *f;
1401    
# Line 655  while (!done) Line 1443  while (!done)
1443    
1444      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1445    
1446      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1447      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1448    
1449      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1450    
# Line 695  while (!done) Line 1483  while (!done)
1483    
1484    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1485      {      {
1486      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1487      goto SKIP_DATA;      goto SKIP_DATA;
1488      }      }
1489    
1490    pp = p;    pp = p;
1491      poffset = (int)(p - buffer);
1492    
1493    for(;;)    for(;;)
1494      {      {
# Line 710  while (!done) Line 1499  while (!done)
1499        pp++;        pp++;
1500        }        }
1501      if (*pp != 0) break;      if (*pp != 0) break;
1502        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1503        {        {
1504        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1505        done = 1;        done = 1;
# Line 728  while (!done) Line 1508  while (!done)
1508      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1509      }      }
1510    
1511      /* The buffer may have moved while being extended; reset the start of data
1512      pointer to the correct relative point in the buffer. */
1513    
1514      p = buffer + poffset;
1515    
1516    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1517    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1518    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 759  while (!done) Line 1544  while (!done)
1544    
1545        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1546        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1547          case 'B': do_debug = 1; break;
1548        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1549        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1550        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1551        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1552        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1553        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1554          case 'J': options |= PCRE_DUPNAMES; break;
1555          case 'K': do_mark = 1; break;
1556        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1557        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1558    
# Line 774  while (!done) Line 1562  while (!done)
1562    
1563        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1564        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1565          case 'W': options |= PCRE_UCP; break;
1566        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1567          case 'Z': debug_lengths = 0; break;
1568        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1569        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1570    
1571          case 'T':
1572          switch (*pp++)
1573            {
1574            case '0': tables = tables0; break;
1575            case '1': tables = tables1; break;
1576    
1577            case '\r':
1578            case '\n':
1579            case ' ':
1580            case 0:
1581            fprintf(outfile, "** Missing table number after /T\n");
1582            goto SKIP_DATA;
1583    
1584            default:
1585            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1586            goto SKIP_DATA;
1587            }
1588          break;
1589    
1590        case 'L':        case 'L':
1591        ppp = pp;        ppp = pp;
1592        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1593        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1594          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1595        *ppp = 0;        *ppp = 0;
1596        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1597          {          {
1598          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1599          goto SKIP_DATA;          goto SKIP_DATA;
1600          }          }
1601          locale_set = 1;
1602        tables = pcre_maketables();        tables = pcre_maketables();
1603        pp = ppp;        pp = ppp;
1604        break;        break;
# Line 799  while (!done) Line 1610  while (!done)
1610        *pp = 0;        *pp = 0;
1611        break;        break;
1612    
1613          case '<':
1614            {
1615            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1616              {
1617              options |= PCRE_JAVASCRIPT_COMPAT;
1618              pp += 3;
1619              }
1620            else
1621              {
1622              int x = check_newline(pp, outfile);
1623              if (x == 0) goto SKIP_DATA;
1624              options |= x;
1625              while (*pp++ != '>');
1626              }
1627            }
1628          break;
1629    
1630        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1631        case '\n':        case '\n':
1632        case ' ':        case ' ':
# Line 823  while (!done) Line 1651  while (!done)
1651      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1652      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1653      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1654        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1655        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1656        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1657        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1658    
1659      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1660    
1661      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 830  while (!done) Line 1663  while (!done)
1663    
1664      if (rc != 0)      if (rc != 0)
1665        {        {
1666        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1667        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1668        goto SKIP_DATA;        goto SKIP_DATA;
1669        }        }
# Line 842  while (!done) Line 1675  while (!done)
1675  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1676    
1677      {      {
1678      if (timeit)      unsigned long int get_options;
1679    
1680        if (timeit > 0)
1681        {        {
1682        register int i;        register int i;
1683        clock_t time_taken;        clock_t time_taken;
1684        clock_t start_time = clock();        clock_t start_time = clock();
1685        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1686          {          {
1687          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1688          if (re != NULL) free(re);          if (re != NULL) free(re);
1689          }          }
1690        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1691        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1692          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1693            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1694        }        }
1695    
# Line 871  while (!done) Line 1706  while (!done)
1706          {          {
1707          for (;;)          for (;;)
1708            {            {
1709            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1710              {              {
1711              done = 1;              done = 1;
1712              goto CONTINUE;              goto CONTINUE;
# Line 885  while (!done) Line 1720  while (!done)
1720        goto CONTINUE;        goto CONTINUE;
1721        }        }
1722    
1723      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1724      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1725      returns only limited data. Check that it agrees with the newer one. */      lines. */
1726    
1727        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1728        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1729    
1730        /* Print information if required. There are now two info-returning
1731        functions. The old one has a limited interface and returns only limited
1732        data. Check that it agrees with the newer one. */
1733    
1734      if (log_store)      if (log_store)
1735        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 906  while (!done) Line 1748  while (!done)
1748    
1749      if (do_study)      if (do_study)
1750        {        {
1751        if (timeit)        if (timeit > 0)
1752          {          {
1753          register int i;          register int i;
1754          clock_t time_taken;          clock_t time_taken;
1755          clock_t start_time = clock();          clock_t start_time = clock();
1756          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1757            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1758          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1759          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1760          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1761            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1762              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1763          }          }
1764        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 926  while (!done) Line 1768  while (!done)
1768          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1769        }        }
1770    
1771        /* If /K was present, we set up for handling MARK data. */
1772    
1773        if (do_mark)
1774          {
1775          if (extra == NULL)
1776            {
1777            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1778            extra->flags = 0;
1779            }
1780          extra->mark = &markptr;
1781          extra->flags |= PCRE_EXTRA_MARK;
1782          }
1783    
1784      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1785      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1786      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 934  while (!done) Line 1789  while (!done)
1789      if (do_flip)      if (do_flip)
1790        {        {
1791        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1792        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1793            byteflip(rre->magic_number, sizeof(rre->magic_number));
1794        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1795        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1796        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1797        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1798        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1799        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1800        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1801          rre->first_byte =
1802            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1803          rre->req_byte =
1804            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1805          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1806          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1807        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1808          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1809        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1810            sizeof(rre->name_count));
1811    
1812        if (extra != NULL)        if (extra != NULL)
1813          {          {
1814          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1815          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1816          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1817            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1818          }          }
1819        }        }
1820    
# Line 959  while (!done) Line 1822  while (!done)
1822    
1823      SHOW_INFO:      SHOW_INFO:
1824    
1825        if (do_debug)
1826          {
1827          fprintf(outfile, "------------------------------------------------------------------\n");
1828          pcre_printint(re, outfile, debug_lengths);
1829          }
1830    
1831        /* We already have the options in get_options (see above) */
1832    
1833      if (do_showinfo)      if (do_showinfo)
1834        {        {
1835        unsigned long int get_options, all_options;        unsigned long int all_options;
1836    #if !defined NOINFOCHECK
1837        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1838        int count, backrefmax, first_char, need_char;  #endif
1839          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1840            hascrorlf;
1841        int nameentrysize, namecount;        int nameentrysize, namecount;
1842        const uschar *nametable;        const uschar *nametable;
1843    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         _pcre_printint(re, outfile);  
         }  
   
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1844        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1845        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1846        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 982  while (!done) Line 1849  while (!done)
1849        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1850        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1851        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1852          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1853          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1854          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1855    
1856    #if !defined NOINFOCHECK
1857        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1858        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1859          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1000  while (!done) Line 1871  while (!done)
1871            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1872              get_options, old_options);              get_options, old_options);
1873          }          }
1874    #endif
1875    
1876        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1877          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1021  while (!done) Line 1893  while (!done)
1893            }            }
1894          }          }
1895    
1896        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1897        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1898    
1899        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1900        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1901    
1902        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1903          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1904            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1905            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1906            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1907            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1908            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1909            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1910              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1911              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1912            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1913            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1914            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1915              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1916            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1917            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1918              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1919              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1920    
1921        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1922          fprintf(outfile, "Case state changes\n");  
1923          switch (get_options & PCRE_NEWLINE_BITS)
1924            {
1925            case PCRE_NEWLINE_CR:
1926            fprintf(outfile, "Forced newline sequence: CR\n");
1927            break;
1928    
1929            case PCRE_NEWLINE_LF:
1930            fprintf(outfile, "Forced newline sequence: LF\n");
1931            break;
1932    
1933            case PCRE_NEWLINE_CRLF:
1934            fprintf(outfile, "Forced newline sequence: CRLF\n");
1935            break;
1936    
1937            case PCRE_NEWLINE_ANYCRLF:
1938            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1939            break;
1940    
1941            case PCRE_NEWLINE_ANY:
1942            fprintf(outfile, "Forced newline sequence: ANY\n");
1943            break;
1944    
1945            default:
1946            break;
1947            }
1948    
1949        if (first_char == -1)        if (first_char == -1)
1950          {          {
1951          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1952          }          }
1953        else if (first_char < 0)        else if (first_char < 0)
1954          {          {
# Line 1063  while (!done) Line 1959  while (!done)
1959          int ch = first_char & 255;          int ch = first_char & 255;
1960          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1961            "" : " (caseless)";            "" : " (caseless)";
1962          if (isprint(ch))          if (PRINTHEX(ch))
1963            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1964          else          else
1965            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1078  while (!done) Line 1974  while (!done)
1974          int ch = need_char & 255;          int ch = need_char & 255;
1975          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1976            "" : " (caseless)";            "" : " (caseless)";
1977          if (isprint(ch))          if (PRINTHEX(ch))
1978            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1979          else          else
1980            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1096  while (!done) Line 1992  while (!done)
1992          else          else
1993            {            {
1994            uschar *start_bits = NULL;            uschar *start_bits = NULL;
1995            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
1996    
1997              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1998              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1999    
2000              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2001            if (start_bits == NULL)            if (start_bits == NULL)
2002              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2003            else            else
2004              {              {
2005              int i;              int i;
# Line 1114  while (!done) Line 2014  while (!done)
2014                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2015                    c = 2;                    c = 2;
2016                    }                    }
2017                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
2018                    {                    {
2019                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2020                    c += 2;                    c += 2;
# Line 1146  while (!done) Line 2046  while (!done)
2046        else        else
2047          {          {
2048          uschar sbuf[8];          uschar sbuf[8];
2049          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
2050          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
2051          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
2052          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
2053    
2054          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2055          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2056          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2057          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
2058    
2059          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2060              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1173  while (!done) Line 2073  while (!done)
2073                  strerror(errno));                  strerror(errno));
2074                }                }
2075              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
2076    
2077              }              }
2078            }            }
2079          fclose(f);          fclose(f);
# Line 1180  while (!done) Line 2081  while (!done)
2081    
2082        new_free(re);        new_free(re);
2083        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2084        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2085            {
2086            new_free((void *)tables);
2087            setlocale(LC_CTYPE, "C");
2088            locale_set = 0;
2089            }
2090        continue;  /* With next regex */        continue;  /* With next regex */
2091        }        }
2092      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1189  while (!done) Line 2095  while (!done)
2095    
2096    for (;;)    for (;;)
2097      {      {
2098      unsigned char *q;      uschar *q;
2099      unsigned char *bptr = dbuffer;      uschar *bptr;
2100      int *use_offsets = offsets;      int *use_offsets = offsets;
2101      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2102      int callout_data = 0;      int callout_data = 0;
2103      int callout_data_set = 0;      int callout_data_set = 0;
2104      int count, c;      int count, c;
2105      int copystrings = 0;      int copystrings = 0;
2106      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2107      int getstrings = 0;      int getstrings = 0;
2108      int getlist = 0;      int getlist = 0;
2109      int gmatched = 0;      int gmatched = 0;
# Line 1207  while (!done) Line 2113  while (!done)
2113    
2114      options = 0;      options = 0;
2115    
2116        *copynames = 0;
2117        *getnames = 0;
2118    
2119        copynamesptr = copynames;
2120        getnamesptr = getnames;
2121    
2122      pcre_callout = callout;      pcre_callout = callout;
2123      first_callout = 1;      first_callout = 1;
2124      callout_extra = 0;      callout_extra = 0;
# Line 1215  while (!done) Line 2127  while (!done)
2127      callout_fail_id = -1;      callout_fail_id = -1;
2128      show_malloc = 0;      show_malloc = 0;
2129    
2130      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2131      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2132    
2133        len = 0;
2134        for (;;)
2135        {        {
2136        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2137        goto CONTINUE;          {
2138            if (len > 0)    /* Reached EOF without hitting a newline */
2139              {
2140              fprintf(outfile, "\n");
2141              break;
2142              }
2143            done = 1;
2144            goto CONTINUE;
2145            }
2146          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2147          len = (int)strlen((char *)buffer);
2148          if (buffer[len-1] == '\n') break;
2149        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2150    
     len = (int)strlen((char *)buffer);  
2151      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2152      buffer[len] = 0;      buffer[len] = 0;
2153      if (len == 0) break;      if (len == 0) break;
# Line 1231  while (!done) Line 2155  while (!done)
2155      p = buffer;      p = buffer;
2156      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2157    
2158      q = dbuffer;      bptr = q = dbuffer;
2159      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2160        {        {
2161        int i = 0;        int i = 0;
# Line 1253  while (!done) Line 2177  while (!done)
2177          c -= '0';          c -= '0';
2178          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2179            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2180    
2181    #if !defined NOUTF8
2182            if (use_utf8 && c > 255)
2183              {
2184              unsigned char buff8[8];
2185              int ii, utn;
2186              utn = ord2utf8(c, buff8);
2187              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2188              c = buff8[ii];   /* Last byte */
2189              }
2190    #endif
2191          break;          break;
2192    
2193          case 'x':          case 'x':
2194    
2195          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2196    
2197    #if !defined NOUTF8
2198          if (*p == '{')          if (*p == '{')
2199            {            {
2200            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1269  while (!done) Line 2205  while (!done)
2205              {              {
2206              unsigned char buff8[8];              unsigned char buff8[8];
2207              int ii, utn;              int ii, utn;
2208              utn = _pcre_ord2utf8(c, buff8);              if (use_utf8)
2209              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2210              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2211                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2212                  c = buff8[ii];   /* Last byte */
2213                  }
2214                else
2215                 {
2216                 if (c > 255)
2217                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2218                     "UTF-8 mode is not enabled.\n"
2219                     "** Truncation will probably give the wrong result.\n", c);
2220                 }
2221              p = pt + 1;              p = pt + 1;
2222              break;              break;
2223              }              }
2224            /* Not correct form; fall through */            /* Not correct form; fall through */
2225            }            }
2226    #endif
2227    
2228          /* Ordinary \x */          /* Ordinary \x */
2229    
# Line 1312  while (!done) Line 2259  while (!done)
2259            }            }
2260          else if (isalnum(*p))          else if (isalnum(*p))
2261            {            {
2262            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
2263            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2264              *npp++ = 0;
2265            *npp = 0;            *npp = 0;
2266            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2267            if (n < 0)            if (n < 0)
2268              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2269            else copystrings |= 1 << n;            copynamesptr = npp;
2270            }            }
2271          else if (*p == '+')          else if (*p == '+')
2272            {            {
# Line 1357  while (!done) Line 2304  while (!done)
2304            }            }
2305          continue;          continue;
2306    
2307    #if !defined NODFA
2308          case 'D':          case 'D':
2309    #if !defined NOPOSIX
2310          if (posix || do_posix)          if (posix || do_posix)
2311            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2312          else          else
2313    #endif
2314            use_dfa = 1;            use_dfa = 1;
2315          continue;          continue;
2316    
2317          case 'F':          case 'F':
2318          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2319          continue;          continue;
2320    #endif
2321    
2322          case 'G':          case 'G':
2323          if (isdigit(*p))          if (isdigit(*p))
# Line 1376  while (!done) Line 2327  while (!done)
2327            }            }
2328          else if (isalnum(*p))          else if (isalnum(*p))
2329            {            {
2330            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
2331            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2332              *npp++ = 0;
2333            *npp = 0;            *npp = 0;
2334            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2335            if (n < 0)            if (n < 0)
2336              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2337            else getstrings |= 1 << n;            getnamesptr = npp;
2338            }            }
2339          continue;          continue;
2340    
# Line 1396  while (!done) Line 2347  while (!done)
2347          continue;          continue;
2348    
2349          case 'N':          case 'N':
2350          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2351              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2352            else
2353              options |= PCRE_NOTEMPTY;
2354          continue;          continue;
2355    
2356          case 'O':          case 'O':
# Line 1409  while (!done) Line 2363  while (!done)
2363            if (offsets == NULL)            if (offsets == NULL)
2364              {              {
2365              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2366                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2367              yield = 1;              yield = 1;
2368              goto EXIT;              goto EXIT;
2369              }              }
# Line 1419  while (!done) Line 2373  while (!done)
2373          continue;          continue;
2374    
2375          case 'P':          case 'P':
2376          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2377              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2378            continue;
2379    
2380            case 'Q':
2381            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2382            if (extra == NULL)
2383              {
2384              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2385              extra->flags = 0;
2386              }
2387            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2388            extra->match_limit_recursion = n;
2389          continue;          continue;
2390    
2391            case 'q':
2392            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2393            if (extra == NULL)
2394              {
2395              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2396              extra->flags = 0;
2397              }
2398            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2399            extra->match_limit = n;
2400            continue;
2401    
2402    #if !defined NODFA
2403          case 'R':          case 'R':
2404          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
2405          continue;          continue;
2406    #endif
2407    
2408          case 'S':          case 'S':
2409          show_malloc = 1;          show_malloc = 1;
2410          continue;          continue;
2411    
2412            case 'Y':
2413            options |= PCRE_NO_START_OPTIMIZE;
2414            continue;
2415    
2416          case 'Z':          case 'Z':
2417          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2418          continue;          continue;
# Line 1437  while (!done) Line 2420  while (!done)
2420          case '?':          case '?':
2421          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2422          continue;          continue;
2423    
2424            case '<':
2425              {
2426              int x = check_newline(p, outfile);
2427              if (x == 0) goto NEXT_DATA;
2428              options |= x;
2429              while (*p++ != '>');
2430              }
2431            continue;
2432          }          }
2433        *q++ = c;        *q++ = c;
2434        }        }
2435      *q = 0;      *q = 0;
2436      len = q - dbuffer;      len = (int)(q - dbuffer);
2437    
2438        /* Move the data to the end of the buffer so that a read over the end of
2439        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2440        we are using the POSIX interface, we must include the terminating zero. */
2441    
2442    #if !defined NOPOSIX
2443        if (posix || do_posix)
2444          {
2445          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2446          bptr += buffer_size - len - 1;
2447          }
2448        else
2449    #endif
2450          {
2451          memmove(bptr + buffer_size - len, bptr, len);
2452          bptr += buffer_size - len;
2453          }
2454    
2455      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2456        {        {
# Line 1462  while (!done) Line 2471  while (!done)
2471          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2472        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2473        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2474          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2475    
2476        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2477    
2478        if (rc != 0)        if (rc != 0)
2479          {          {
2480          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2481          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2482          }          }
2483          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2484                  != 0)
2485            {
2486            fprintf(outfile, "Matched with REG_NOSUB\n");
2487            }
2488        else        else
2489          {          {
2490          size_t i;          size_t i;
# Line 1501  while (!done) Line 2516  while (!done)
2516    
2517      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2518        {        {
2519        if (timeit)        markptr = NULL;
2520    
2521          if (timeitm > 0)
2522          {          {
2523          register int i;          register int i;
2524          clock_t time_taken;          clock_t time_taken;
2525          clock_t start_time = clock();          clock_t start_time = clock();
2526    
2527    #if !defined NODFA
2528          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2529            {            {
2530            int workspace[1000];            int workspace[1000];
2531            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2532              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2533                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2534                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2535            }            }
2536          else          else
2537    #endif
2538    
2539          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2540            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2541              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2542    
2543          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2544          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2545            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2546              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2547          }          }
2548    
2549        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2550        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2551          for the recursion limit. */
2552    
2553        if (find_match_limit)        if (find_match_limit)
2554          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2555          if (extra == NULL)          if (extra == NULL)
2556            {            {
2557            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2558            extra->flags = 0;            extra->flags = 0;
2559            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2560    
2561          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2562              options|g_notempty, use_offsets, use_size_offsets,
2563              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2564              PCRE_ERROR_MATCHLIMIT, "match()");
2565    
2566            count = check_match_limit(re, extra, bptr, len, start_offset,
2567              options|g_notempty, use_offsets, use_size_offsets,
2568              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2569              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2570          }          }
2571    
2572        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1591  while (!done) Line 2588  while (!done)
2588        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2589        value of match_limit. */        value of match_limit. */
2590    
2591    #if !defined NODFA
2592        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2593          {          {
2594          int workspace[1000];          int workspace[1000];
2595          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2596            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2597            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2598          if (count == 0)          if (count == 0)
# Line 1603  while (!done) Line 2601  while (!done)
2601            count = use_size_offsets/2;            count = use_size_offsets/2;
2602            }            }
2603          }          }
2604    #endif
2605    
2606        else        else
2607          {          {
# Line 1619  while (!done) Line 2618  while (!done)
2618    
2619        if (count >= 0)        if (count >= 0)
2620          {          {
2621          int i;          int i, maxcount;
2622    
2623    #if !defined NODFA
2624            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2625    #endif
2626              maxcount = use_size_offsets/3;
2627    
2628            /* This is a check against a lunatic return value. */
2629    
2630            if (count > maxcount)
2631              {
2632              fprintf(outfile,
2633                "** PCRE error: returned count %d is too big for offset size %d\n",
2634                count, use_size_offsets);
2635              count = use_size_offsets/3;
2636              if (do_g || do_G)
2637                {
2638                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2639                do_g = do_G = FALSE;        /* Break g/G loop */
2640                }
2641              }
2642    
2643          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2644            {            {
2645            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1643  while (!done) Line 2663  while (!done)
2663              }              }
2664            }            }
2665    
2666            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2667    
2668          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2669            {            {
2670            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2671              {              {
2672              char copybuffer[16];              char copybuffer[256];
2673              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2674                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2675              if (rc < 0)              if (rc < 0)
# Line 1657  while (!done) Line 2679  while (!done)
2679              }              }
2680            }            }
2681    
2682            for (copynamesptr = copynames;
2683                 *copynamesptr != 0;
2684                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2685              {
2686              char copybuffer[256];
2687              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2688                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2689              if (rc < 0)
2690                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2691              else
2692                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2693              }
2694    
2695          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2696            {            {
2697            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1669  while (!done) Line 2704  while (!done)
2704              else              else
2705                {                {
2706                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2707                pcre_free_substring(substring);                pcre_free_substring(substring);
2708                }                }
2709              }              }
2710            }            }
2711    
2712            for (getnamesptr = getnames;
2713                 *getnamesptr != 0;
2714                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2715              {
2716              const char *substring;
2717              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2718                count, (char *)getnamesptr, &substring);
2719              if (rc < 0)
2720                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2721              else
2722                {
2723                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2724                pcre_free_substring(substring);
2725                }
2726              }
2727    
2728          if (getlist)          if (getlist)
2729            {            {
2730            const char **stringlist;            const char **stringlist;
# Line 1698  while (!done) Line 2748  while (!done)
2748    
2749        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2750          {          {
2751          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2752          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            else fprintf(outfile, "Partial match, mark=%s", markptr);
2753            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],          if (use_size_offsets > 1)
2754              bptr + use_offsets[0]);            {
2755              fprintf(outfile, ": ");
2756              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2757                outfile);
2758              }
2759          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2760          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2761          }          }
2762    
2763        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2764        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2765        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2766        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2767        offset values to achieve this. We won't be at the end of the string -  
2768        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2769          "anycrlf". If the previous match was at the end of a line terminated by
2770          CRLF, an advance of one character just passes the \r, whereas we should
2771          prefer the longer newline sequence, as does the code in pcre_exec().
2772          Fudge the offset value to achieve this.
2773    
2774          Otherwise, in the case of UTF-8 matching, the advance must be one
2775          character, not one byte. */
2776    
2777        else        else
2778          {          {
2779          if (g_notempty != 0)          if (g_notempty != 0)
2780            {            {
2781            int onechar = 1;            int onechar = 1;
2782              unsigned int obits = ((real_pcre *)re)->options;
2783            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2784            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2785                {
2786                int d;
2787                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2788                /* Note that these values are always the ASCII ones, even in
2789                EBCDIC environments. CR = 13, NL = 10. */
2790                obits = (d == 13)? PCRE_NEWLINE_CR :
2791                        (d == 10)? PCRE_NEWLINE_LF :
2792                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2793                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2794                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2795                }
2796              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2797                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2798                  &&
2799                  start_offset < len - 1 &&
2800                  bptr[start_offset] == '\r' &&
2801                  bptr[start_offset+1] == '\n')
2802                onechar++;
2803              else if (use_utf8)
2804              {              {
2805              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2806                {                {
# Line 1735  while (!done) Line 2816  while (!done)
2816            {            {
2817            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2818              {              {
2819              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2820                  {
2821                  if (markptr == NULL) fprintf(outfile, "No match\n");
2822                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2823                  }
2824              }              }
2825            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2826            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 1747  while (!done) Line 2832  while (!done)
2832        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2833    
2834        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2835        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2836        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2837        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2838        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2839        character. */        character. */
2840    
2841        g_notempty = 0;        g_notempty = 0;
2842    
2843        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2844          {          {
2845          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2846          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2847          }          }
2848    
2849        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1772  while (!done) Line 2858  while (!done)
2858          len -= use_offsets[1];          len -= use_offsets[1];
2859          }          }
2860        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2861    
2862        NEXT_DATA: continue;
2863      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2864    
2865    CONTINUE:    CONTINUE:
# Line 1782  while (!done) Line 2870  while (!done)
2870    
2871    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
2872    if (extra != NULL) new_free(extra);    if (extra != NULL) new_free(extra);
2873    if (tables != NULL)    if (locale_set)
2874      {      {
2875      new_free((void *)tables);      new_free((void *)tables);
2876      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2877        locale_set = 0;
2878      }      }
2879    }    }
2880    

Legend:
Removed from v.77  
changed lines
  Added in v.541

  ViewVC Help
Powered by ViewVC 1.1.5