/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 512 by ph10, Tue Mar 30 11:11:52 2010 UTC code/branches/pcre16/pcretest.c revision 805 by ph10, Wed Dec 14 16:49:20 2011 UTC
# Line 79  input mode under Windows. */ Line 79  input mode under Windows. */
79  #define fileno _fileno  #define fileno _fileno
80  #endif  #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
92  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 99  appropriately for an application, not fo Line 107  appropriately for an application, not fo
107  #include "pcre.h"  #include "pcre.h"
108  #include "pcre_internal.h"  #include "pcre_internal.h"
109    
110    /* The pcre_printint() function, which prints the internal form of a compiled
111    regex, is held in a separate file so that (a) it can be compiled in either
112    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
113    when that is compiled in debug mode. */
114    
115    #ifdef SUPPORT_PCRE8
116    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
117    #endif
118    #ifdef SUPPORT_PCRE16
119    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
120    #endif
121    
122  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
123  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
124  external symbols to prevent clashes. */  external symbols to prevent clashes. */
125    
126  #define _pcre_ucp_gentype      ucp_gentype  #define _pcre_ucp_gentype      ucp_gentype
127    #define _pcre_ucp_typerange    ucp_typerange
128  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
129  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
130  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 116  external symbols to prevent clashes. */ Line 137  external symbols to prevent clashes. */
137    
138  #include "pcre_tables.c"  #include "pcre_tables.c"
139    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
140  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
141  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
142  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
143  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
144  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
145    
146    #ifdef EBCDIC
147    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
148    #else
149    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
150    #endif
151    
152  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
153    
# Line 181  static int locale_set = 0; Line 200  static int locale_set = 0;
200  static int show_malloc;  static int show_malloc;
201  static int use_utf8;  static int use_utf8;
202  static size_t gotten_store;  static size_t gotten_store;
203    static size_t first_gotten_store = 0;
204    static const unsigned char *last_callout_mark = NULL;
205    
206    static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
207    
208  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
209    
210  static int buffer_size = 50000;  static int buffer_size = 50000;
211  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
212  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
213  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
214    
215    #ifdef SUPPORT_PCRE16
216    static int buffer16_size = 0;
217    static pcre_uint16 *buffer16 = NULL;
218    #endif
219    
220    /* Textual explanations for runtime error codes */
221    
222    static const char *errtexts[] = {
223      NULL,  /* 0 is no error */
224      NULL,  /* NOMATCH is handled specially */
225      "NULL argument passed",
226      "bad option value",
227      "magic number missing",
228      "unknown opcode - pattern overwritten?",
229      "no more memory",
230      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
231      "match limit exceeded",
232      "callout error code",
233      NULL,  /* BADUTF8 is handled specially */
234      "bad UTF-8 offset",
235      NULL,  /* PARTIAL is handled specially */
236      "not used - internal error",
237      "internal error - pattern overwritten?",
238      "bad count value",
239      "item unsupported for DFA matching",
240      "backreference condition or recursion test not supported for DFA matching",
241      "match limit not supported for DFA matching",
242      "workspace size exceeded in DFA matching",
243      "too much recursion for DFA matching",
244      "recursion limit exceeded",
245      "not used - internal error",
246      "invalid combination of newline options",
247      "bad offset value",
248      NULL,  /* SHORTUTF8 is handled specially */
249      "nested recursion at the same subject position",
250      "JIT stack limit reached",
251      "pattern compiled in wrong mode (8-bit/16-bit error)"
252    };
253    
254    
255    /*************************************************
256    *         Alternate character tables             *
257    *************************************************/
258    
259    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
260    using the default tables of the library. However, the T option can be used to
261    select alternate sets of tables, for different kinds of testing. Note also that
262    the L (locale) option also adjusts the tables. */
263    
264    /* This is the set of tables distributed as default with PCRE. It recognizes
265    only ASCII characters. */
266    
267    static const unsigned char tables0[] = {
268    
269    /* This table is a lower casing table. */
270    
271        0,  1,  2,  3,  4,  5,  6,  7,
272        8,  9, 10, 11, 12, 13, 14, 15,
273       16, 17, 18, 19, 20, 21, 22, 23,
274       24, 25, 26, 27, 28, 29, 30, 31,
275       32, 33, 34, 35, 36, 37, 38, 39,
276       40, 41, 42, 43, 44, 45, 46, 47,
277       48, 49, 50, 51, 52, 53, 54, 55,
278       56, 57, 58, 59, 60, 61, 62, 63,
279       64, 97, 98, 99,100,101,102,103,
280      104,105,106,107,108,109,110,111,
281      112,113,114,115,116,117,118,119,
282      120,121,122, 91, 92, 93, 94, 95,
283       96, 97, 98, 99,100,101,102,103,
284      104,105,106,107,108,109,110,111,
285      112,113,114,115,116,117,118,119,
286      120,121,122,123,124,125,126,127,
287      128,129,130,131,132,133,134,135,
288      136,137,138,139,140,141,142,143,
289      144,145,146,147,148,149,150,151,
290      152,153,154,155,156,157,158,159,
291      160,161,162,163,164,165,166,167,
292      168,169,170,171,172,173,174,175,
293      176,177,178,179,180,181,182,183,
294      184,185,186,187,188,189,190,191,
295      192,193,194,195,196,197,198,199,
296      200,201,202,203,204,205,206,207,
297      208,209,210,211,212,213,214,215,
298      216,217,218,219,220,221,222,223,
299      224,225,226,227,228,229,230,231,
300      232,233,234,235,236,237,238,239,
301      240,241,242,243,244,245,246,247,
302      248,249,250,251,252,253,254,255,
303    
304    /* This table is a case flipping table. */
305    
306        0,  1,  2,  3,  4,  5,  6,  7,
307        8,  9, 10, 11, 12, 13, 14, 15,
308       16, 17, 18, 19, 20, 21, 22, 23,
309       24, 25, 26, 27, 28, 29, 30, 31,
310       32, 33, 34, 35, 36, 37, 38, 39,
311       40, 41, 42, 43, 44, 45, 46, 47,
312       48, 49, 50, 51, 52, 53, 54, 55,
313       56, 57, 58, 59, 60, 61, 62, 63,
314       64, 97, 98, 99,100,101,102,103,
315      104,105,106,107,108,109,110,111,
316      112,113,114,115,116,117,118,119,
317      120,121,122, 91, 92, 93, 94, 95,
318       96, 65, 66, 67, 68, 69, 70, 71,
319       72, 73, 74, 75, 76, 77, 78, 79,
320       80, 81, 82, 83, 84, 85, 86, 87,
321       88, 89, 90,123,124,125,126,127,
322      128,129,130,131,132,133,134,135,
323      136,137,138,139,140,141,142,143,
324      144,145,146,147,148,149,150,151,
325      152,153,154,155,156,157,158,159,
326      160,161,162,163,164,165,166,167,
327      168,169,170,171,172,173,174,175,
328      176,177,178,179,180,181,182,183,
329      184,185,186,187,188,189,190,191,
330      192,193,194,195,196,197,198,199,
331      200,201,202,203,204,205,206,207,
332      208,209,210,211,212,213,214,215,
333      216,217,218,219,220,221,222,223,
334      224,225,226,227,228,229,230,231,
335      232,233,234,235,236,237,238,239,
336      240,241,242,243,244,245,246,247,
337      248,249,250,251,252,253,254,255,
338    
339    /* This table contains bit maps for various character classes. Each map is 32
340    bytes long and the bits run from the least significant end of each byte. The
341    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
342    graph, print, punct, and cntrl. Other classes are built from combinations. */
343    
344      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
345      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348    
349      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
350      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353    
354      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
355      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358    
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363    
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368    
369      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
370      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373    
374      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
375      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
378    
379      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
380      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
381      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
382      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
383    
384      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
385      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
386      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
387      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
388    
389      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
390      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
391      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
392      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
393    
394    /* This table identifies various classes of character by individual bits:
395      0x01   white space character
396      0x02   letter
397      0x04   decimal digit
398      0x08   hexadecimal digit
399      0x10   alphanumeric or '_'
400      0x80   regular expression metacharacter or binary zero
401    */
402    
403      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
404      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
407      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
408      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
409      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
410      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
411      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
412      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
413      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
414      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
415      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
416      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
417      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
418      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
419      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
420      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
421      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
422      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
423      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
424      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
425      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
426      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
427      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
428      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
429      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
430      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
431      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
432      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
433      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
434      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
435    
436    /* This is a set of tables that came orginally from a Windows user. It seems to
437    be at least an approximation of ISO 8859. In particular, there are characters
438    greater than 128 that are marked as spaces, letters, etc. */
439    
440    static const unsigned char tables1[] = {
441    0,1,2,3,4,5,6,7,
442    8,9,10,11,12,13,14,15,
443    16,17,18,19,20,21,22,23,
444    24,25,26,27,28,29,30,31,
445    32,33,34,35,36,37,38,39,
446    40,41,42,43,44,45,46,47,
447    48,49,50,51,52,53,54,55,
448    56,57,58,59,60,61,62,63,
449    64,97,98,99,100,101,102,103,
450    104,105,106,107,108,109,110,111,
451    112,113,114,115,116,117,118,119,
452    120,121,122,91,92,93,94,95,
453    96,97,98,99,100,101,102,103,
454    104,105,106,107,108,109,110,111,
455    112,113,114,115,116,117,118,119,
456    120,121,122,123,124,125,126,127,
457    128,129,130,131,132,133,134,135,
458    136,137,138,139,140,141,142,143,
459    144,145,146,147,148,149,150,151,
460    152,153,154,155,156,157,158,159,
461    160,161,162,163,164,165,166,167,
462    168,169,170,171,172,173,174,175,
463    176,177,178,179,180,181,182,183,
464    184,185,186,187,188,189,190,191,
465    224,225,226,227,228,229,230,231,
466    232,233,234,235,236,237,238,239,
467    240,241,242,243,244,245,246,215,
468    248,249,250,251,252,253,254,223,
469    224,225,226,227,228,229,230,231,
470    232,233,234,235,236,237,238,239,
471    240,241,242,243,244,245,246,247,
472    248,249,250,251,252,253,254,255,
473    0,1,2,3,4,5,6,7,
474    8,9,10,11,12,13,14,15,
475    16,17,18,19,20,21,22,23,
476    24,25,26,27,28,29,30,31,
477    32,33,34,35,36,37,38,39,
478    40,41,42,43,44,45,46,47,
479    48,49,50,51,52,53,54,55,
480    56,57,58,59,60,61,62,63,
481    64,97,98,99,100,101,102,103,
482    104,105,106,107,108,109,110,111,
483    112,113,114,115,116,117,118,119,
484    120,121,122,91,92,93,94,95,
485    96,65,66,67,68,69,70,71,
486    72,73,74,75,76,77,78,79,
487    80,81,82,83,84,85,86,87,
488    88,89,90,123,124,125,126,127,
489    128,129,130,131,132,133,134,135,
490    136,137,138,139,140,141,142,143,
491    144,145,146,147,148,149,150,151,
492    152,153,154,155,156,157,158,159,
493    160,161,162,163,164,165,166,167,
494    168,169,170,171,172,173,174,175,
495    176,177,178,179,180,181,182,183,
496    184,185,186,187,188,189,190,191,
497    224,225,226,227,228,229,230,231,
498    232,233,234,235,236,237,238,239,
499    240,241,242,243,244,245,246,215,
500    248,249,250,251,252,253,254,223,
501    192,193,194,195,196,197,198,199,
502    200,201,202,203,204,205,206,207,
503    208,209,210,211,212,213,214,247,
504    216,217,218,219,220,221,222,255,
505    0,62,0,0,1,0,0,0,
506    0,0,0,0,0,0,0,0,
507    32,0,0,0,1,0,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,255,3,
510    126,0,0,0,126,0,0,0,
511    0,0,0,0,0,0,0,0,
512    0,0,0,0,0,0,0,0,
513    0,0,0,0,0,0,255,3,
514    0,0,0,0,0,0,0,0,
515    0,0,0,0,0,0,12,2,
516    0,0,0,0,0,0,0,0,
517    0,0,0,0,0,0,0,0,
518    254,255,255,7,0,0,0,0,
519    0,0,0,0,0,0,0,0,
520    255,255,127,127,0,0,0,0,
521    0,0,0,0,0,0,0,0,
522    0,0,0,0,254,255,255,7,
523    0,0,0,0,0,4,32,4,
524    0,0,0,128,255,255,127,255,
525    0,0,0,0,0,0,255,3,
526    254,255,255,135,254,255,255,7,
527    0,0,0,0,0,4,44,6,
528    255,255,127,255,255,255,127,255,
529    0,0,0,0,254,255,255,255,
530    255,255,255,255,255,255,255,127,
531    0,0,0,0,254,255,255,255,
532    255,255,255,255,255,255,255,255,
533    0,2,0,0,255,255,255,255,
534    255,255,255,255,255,255,255,127,
535    0,0,0,0,255,255,255,255,
536    255,255,255,255,255,255,255,255,
537    0,0,0,0,254,255,0,252,
538    1,0,0,248,1,0,0,120,
539    0,0,0,0,254,255,255,255,
540    0,0,128,0,0,0,128,0,
541    255,255,255,255,0,0,0,0,
542    0,0,0,0,0,0,0,128,
543    255,255,255,255,0,0,0,0,
544    0,0,0,0,0,0,0,0,
545    128,0,0,0,0,0,0,0,
546    0,1,1,0,1,1,0,0,
547    0,0,0,0,0,0,0,0,
548    0,0,0,0,0,0,0,0,
549    1,0,0,0,128,0,0,0,
550    128,128,128,128,0,0,128,0,
551    28,28,28,28,28,28,28,28,
552    28,28,0,0,0,0,0,128,
553    0,26,26,26,26,26,26,18,
554    18,18,18,18,18,18,18,18,
555    18,18,18,18,18,18,18,18,
556    18,18,18,128,128,0,128,16,
557    0,26,26,26,26,26,26,18,
558    18,18,18,18,18,18,18,18,
559    18,18,18,18,18,18,18,18,
560    18,18,18,128,128,0,0,0,
561    0,0,0,0,0,1,0,0,
562    0,0,0,0,0,0,0,0,
563    0,0,0,0,0,0,0,0,
564    0,0,0,0,0,0,0,0,
565    1,0,0,0,0,0,0,0,
566    0,0,18,0,0,0,0,0,
567    0,0,20,20,0,18,0,0,
568    0,20,18,0,0,0,0,0,
569    18,18,18,18,18,18,18,18,
570    18,18,18,18,18,18,18,18,
571    18,18,18,18,18,18,18,0,
572    18,18,18,18,18,18,18,18,
573    18,18,18,18,18,18,18,18,
574    18,18,18,18,18,18,18,18,
575    18,18,18,18,18,18,18,0,
576    18,18,18,18,18,18,18,18
577    };
578    
579    
580    
581    
582    #ifndef HAVE_STRERROR
583    /*************************************************
584    *     Provide strerror() for non-ANSI libraries  *
585    *************************************************/
586    
587    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
588    in their libraries, but can provide the same facility by this simple
589    alternative function. */
590    
591    extern int   sys_nerr;
592    extern char *sys_errlist[];
593    
594    char *
595    strerror(int n)
596    {
597    if (n < 0 || n >= sys_nerr) return "unknown error number";
598    return sys_errlist[n];
599    }
600    #endif /* HAVE_STRERROR */
601    
602    
603    /*************************************************
604    *         JIT memory callback                    *
605    *************************************************/
606    
607    static pcre_jit_stack* jit_callback(void *arg)
608    {
609    return (pcre_jit_stack *)arg;
610    }
611    
612    
613    #ifdef SUPPORT_PCRE16
614    /*************************************************
615    *         Convert a string to 16-bit             *
616    *************************************************/
617    
618    /* The result is always left in buffer16. */
619    
620    static int
621    to16(unsigned char *p, int utf)
622    {
623    pcre_uint16 *pp;
624    int len = (int)strlen((char *)p) + 1;
625    
626    if (buffer16_size < 2*len)
627      {
628      if (buffer16 != NULL) free(buffer16);
629      buffer16_size = 2*len;
630      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
631      if (buffer16 == NULL)
632        {
633        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
634        exit(1);
635        }
636      }
637    
638    pp = buffer16;
639    
640    if (!utf)
641      {
642      while (*p != 0) *pp++ = *p++;
643      *pp++ = 0;
644      }
645    
646    else
647      {
648    fprintf(stderr, "pcretest: no support yet for UTF-16\n");
649    exit(1);
650      }
651    
652    return pp - buffer16;
653    }
654    #endif
655    
656    
657  /*************************************************  /*************************************************
# Line 214  Returns:       pointer to the start of n Line 677  Returns:       pointer to the start of n
677                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
678  */  */
679    
680  static uschar *  static pcre_uint8 *
681  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
682  {  {
683  uschar *here = start;  pcre_uint8 *here = start;
684    
685  for (;;)  for (;;)
686    {    {
687    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
688    
689    if (rlen > 1000)    if (rlen > 1000)
690      {      {
# Line 251  for (;;) Line 714  for (;;)
714      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
715    
716        {        {
717        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
718        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
719          return (here == start)? NULL : start;          return (here == start)? NULL : start;
720        }        }
# Line 264  for (;;) Line 727  for (;;)
727    else    else
728      {      {
729      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
730      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
731      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
732      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
733    
734      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
735        {        {
# Line 408  Returns:     number of characters placed Line 871  Returns:     number of characters placed
871  #if !defined NOUTF8  #if !defined NOUTF8
872    
873  static int  static int
874  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
875  {  {
876  register int i, j;  register int i, j;
877  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 571  fprintf(outfile, "%.*s", (cb->next_item_ Line 1034  fprintf(outfile, "%.*s", (cb->next_item_
1034  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1035  first_callout = 0;  first_callout = 0;
1036    
1037    if (cb->mark != last_callout_mark)
1038      {
1039      fprintf(outfile, "Latest Mark: %s\n",
1040        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1041      last_callout_mark = cb->mark;
1042      }
1043    
1044  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1045    {    {
1046    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 590  return (cb->callout_number != callout_fa Line 1060  return (cb->callout_number != callout_fa
1060  *            Local malloc functions              *  *            Local malloc functions              *
1061  *************************************************/  *************************************************/
1062    
1063  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1064  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1065    show_malloc variable is set only during matching. */
1066    
1067  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1068  {  {
1069  void *block = malloc(size);  void *block = malloc(size);
1070  gotten_store = size;  gotten_store = size;
1071    if (first_gotten_store == 0) first_gotten_store = size;
1072  if (show_malloc)  if (show_malloc)
1073    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1074  return block;  return block;
# Line 609  if (show_malloc) Line 1081  if (show_malloc)
1081  free(block);  free(block);
1082  }  }
1083    
   
1084  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1085    
1086  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 637  free(block); Line 1108  free(block);
1108  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1109  {  {
1110  int rc;  int rc;
1111  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
1112    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1113  }  }
1114    
# Line 665  return ((value & 0x000000ff) << 24) | Line 1136  return ((value & 0x000000ff) << 24) |
1136  *************************************************/  *************************************************/
1137    
1138  static int  static int
1139  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1140    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1141    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1142  {  {
# Line 725  Returns:    < 0, = 0, or > 0, according Line 1196  Returns:    < 0, = 0, or > 0, according
1196  */  */
1197    
1198  static int  static int
1199  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1200  {  {
1201  while (n--)  while (n--)
1202    {    {
# Line 741  return 0; Line 1212  return 0;
1212  *         Check newline indicator                *  *         Check newline indicator                *
1213  *************************************************/  *************************************************/
1214    
1215  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1216  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1217    
1218  Arguments:  Arguments:
1219    p           points after the leading '<'    p           points after the leading '<'
# Line 753  Returns:      appropriate PCRE_NEWLINE_x Line 1223  Returns:      appropriate PCRE_NEWLINE_x
1223  */  */
1224    
1225  static int  static int
1226  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1227  {  {
1228  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1229  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1230  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1231  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1232  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1233  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1234  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1235  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1236  return 0;  return 0;
1237  }  }
# Line 783  printf("If input is a terminal, readline Line 1253  printf("If input is a terminal, readline
1253  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
1254  #endif  #endif
1255  printf("\nOptions:\n");  printf("\nOptions:\n");
1256    #ifdef SUPPORT_PCRE16
1257    printf("  -16      use 16-bit interface\n");
1258    #endif
1259  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1260  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1261  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 799  printf("  -p       use POSIX interface\n Line 1272  printf("  -p       use POSIX interface\n
1272  #endif  #endif
1273  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1274  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1275  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1276           "  -s+      force each pattern to be studied, using JIT if available\n"
1277         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1278  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1279  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 827  int timeit = 0; Line 1301  int timeit = 0;
1301  int timeitm = 0;  int timeitm = 0;
1302  int showinfo = 0;  int showinfo = 0;
1303  int showstore = 0;  int showstore = 0;
1304    int force_study = -1;
1305    int force_study_options = 0;
1306  int quiet = 0;  int quiet = 0;
1307  int size_offsets = 45;  int size_offsets = 45;
1308  int size_offsets_max;  int size_offsets_max;
# Line 837  int posix = 0; Line 1313  int posix = 0;
1313  int debug = 0;  int debug = 0;
1314  int done = 0;  int done = 0;
1315  int all_use_dfa = 0;  int all_use_dfa = 0;
1316    int use_pcre16 = 0;
1317  int yield = 0;  int yield = 0;
1318  int stack_size;  int stack_size;
1319    
1320    pcre_jit_stack *jit_stack = NULL;
1321    
1322  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1323  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1324    
1325  uschar copynames[1024];  pcre_uchar copynames[1024];
1326  uschar getnames[1024];  pcre_uchar getnames[1024];
1327    
1328  uschar *copynamesptr;  pcre_uchar *copynamesptr;
1329  uschar *getnamesptr;  pcre_uchar *getnamesptr;
1330    
1331  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that valgrind will check their misuse when
1332  when I am debugging. They grow automatically when very long lines are read. */  debugging. They grow automatically when very long lines are read. The 16-bit
1333    buffer (buffer16) is obtained only if needed. */
1334  buffer = (unsigned char *)malloc(buffer_size);  
1335  dbuffer = (unsigned char *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
1336  pbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (pcre_uint8 *)malloc(buffer_size);
1337    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1338    
1339  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1340    
# Line 875  while (argc > 1 && argv[op][0] == '-') Line 1355  while (argc > 1 && argv[op][0] == '-')
1355    {    {
1356    unsigned char *endptr;    unsigned char *endptr;
1357    
1358    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1359      showstore = 1;    else if (strcmp(argv[op], "-m") == 0) showstore = 1;
1360      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1361      else if (strcmp(argv[op], "-s+") == 0)
1362        {
1363        force_study = 1;
1364        force_study_options = PCRE_STUDY_JIT_COMPILE;
1365        }
1366    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1367    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1368    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 910  while (argc > 1 && argv[op][0] == '-') Line 1396  while (argc > 1 && argv[op][0] == '-')
1396        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1397          *endptr == 0))          *endptr == 0))
1398      {      {
1399  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1400      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1401      exit(1);      exit(1);
1402  #else  #else
# Line 937  while (argc > 1 && argv[op][0] == '-') Line 1423  while (argc > 1 && argv[op][0] == '-')
1423      unsigned long int lrc;      unsigned long int lrc;
1424      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1425      printf("Compiled with\n");      printf("Compiled with\n");
1426    
1427    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */
1428    
1429    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1430        printf("  8-bit and 16-bit support\n");
1431      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1432      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1433        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1434        printf("  %sUTF-16 support\n", rc? "" : "No ");
1435    #elif defined SUPPORT_PCRE8
1436        printf("  8-bit support only\n");
1437        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1438        printf("  %sUTF-8 support\n", rc? "" : "No ");
1439    #else
1440        printf("  16-bit support only\n");
1441        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1442        printf("  %sUTF-16 support\n", rc? "" : "No ");
1443    #endif
1444    
1445      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1446      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1447        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1448        if (rc)
1449          printf("  Just-in-time compiler support\n");
1450        else
1451          printf("  No just-in-time compiler support\n");
1452      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1453      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
1454      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
# Line 980  while (argc > 1 && argv[op][0] == '-') Line 1488  while (argc > 1 && argv[op][0] == '-')
1488    argc--;    argc--;
1489    }    }
1490    
1491    /* Select which fullinfo function to use. */
1492    
1493    fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
1494    
1495  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1496    
1497  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
# Line 1018  if (argc > 2) Line 1530  if (argc > 2)
1530    
1531  /* Set alternative malloc function */  /* Set alternative malloc function */
1532    
1533    #ifdef SUPPORT_PCRE8
1534  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1535  pcre_free = new_free;  pcre_free = new_free;
1536  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1537  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1538    #endif
1539    
1540    #ifdef SUPPORT_PCRE16
1541    pcre16_malloc = new_malloc;
1542    pcre16_free = new_free;
1543    pcre16_stack_malloc = stack_malloc;
1544    pcre16_stack_free = stack_free;
1545    #endif
1546    
1547  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1548    
# Line 1046  while (!done) Line 1567  while (!done)
1567    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1568    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1569    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1570      int do_allcaps = 0;
1571    int do_mark = 0;    int do_mark = 0;
1572    int do_study = 0;    int do_study = 0;
1573      int no_force_study = 0;
1574    int do_debug = debug;    int do_debug = debug;
1575    int do_G = 0;    int do_G = 0;
1576    int do_g = 0;    int do_g = 0;
1577    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1578    int do_showrest = 0;    int do_showrest = 0;
1579      int do_showcaprest = 0;
1580    int do_flip = 0;    int do_flip = 0;
1581    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1582    
# Line 1072  while (!done) Line 1596  while (!done)
1596    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1597      {      {
1598      unsigned long int magic, get_options;      unsigned long int magic, get_options;
1599      uschar sbuf[8];      pcre_uint8 sbuf[8];
1600      FILE *f;      FILE *f;
1601    
1602      p++;      p++;
# Line 1095  while (!done) Line 1619  while (!done)
1619        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1620    
1621      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1622      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1623    
1624      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1625    
# Line 1114  while (!done) Line 1638  while (!done)
1638          }          }
1639        }        }
1640    
1641      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1642        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1643    
1644      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
# Line 1122  while (!done) Line 1646  while (!done)
1646      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1647      use_utf8 = (get_options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1648    
1649      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1650    
1651      if (true_study_size != 0)      if (true_study_size != 0)
1652        {        {
# Line 1138  while (!done) Line 1662  while (!done)
1662          {          {
1663          FAIL_READ:          FAIL_READ:
1664          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1665          if (extra != NULL) new_free(extra);          if (extra != NULL) pcre_free_study(extra);
1666          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1667          fclose(f);          fclose(f);
1668          continue;          continue;
# Line 1164  while (!done) Line 1688  while (!done)
1688      }      }
1689    
1690    pp = p;    pp = p;
1691    poffset = p - buffer;    poffset = (int)(p - buffer);
1692    
1693    for(;;)    for(;;)
1694      {      {
# Line 1218  while (!done) Line 1742  while (!done)
1742        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1743        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1744    
1745        case '+': do_showrest = 1; break;        case '+':
1746          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1747          break;
1748    
1749          case '=': do_allcaps = 1; break;
1750        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1751        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
1752        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1236  while (!done) Line 1764  while (!done)
1764        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1765  #endif  #endif
1766    
1767        case 'S': do_study = 1; break;        case 'S':
1768          if (do_study == 0)
1769            {
1770            do_study = 1;
1771            if (*pp == '+')
1772              {
1773              study_options |= PCRE_STUDY_JIT_COMPILE;
1774              pp++;
1775              }
1776            }
1777          else
1778            {
1779            do_study = 0;
1780            no_force_study = 1;
1781            }
1782          break;
1783    
1784        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1785          case 'W': options |= PCRE_UCP; break;
1786        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1787          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1788        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1789        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1790        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1791    
1792          case 'T':
1793          switch (*pp++)
1794            {
1795            case '0': tables = tables0; break;
1796            case '1': tables = tables1; break;
1797    
1798            case '\r':
1799            case '\n':
1800            case ' ':
1801            case 0:
1802            fprintf(outfile, "** Missing table number after /T\n");
1803            goto SKIP_DATA;
1804    
1805            default:
1806            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1807            goto SKIP_DATA;
1808            }
1809          break;
1810    
1811        case 'L':        case 'L':
1812        ppp = pp;        ppp = pp;
1813        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1268  while (!done) Line 1833  while (!done)
1833    
1834        case '<':        case '<':
1835          {          {
1836          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1837            {            {
1838            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
1839            pp += 3;            pp += 3;
# Line 1296  while (!done) Line 1861  while (!done)
1861    
1862    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1863    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1864    local character tables. */    local character tables. Neither does it have 16-bit support. */
1865    
1866  #if !defined NOPOSIX  #if !defined NOPOSIX
1867    if (posix || do_posix)    if (posix || do_posix)
# Line 1309  while (!done) Line 1874  while (!done)
1874      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1875      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1876      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1877        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1878      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1879    
1880        first_gotten_store = 0;
1881      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1882    
1883      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1331  while (!done) Line 1898  while (!done)
1898    
1899      {      {
1900      unsigned long int get_options;      unsigned long int get_options;
1901    
1902        /* In 16-bit mode, convert the input. The space needed for a non-UTF string
1903        is exactly double the 8-bit size. For a UTF-8 string, the size needed for
1904        UTF-16 is no more than double, because up to 0xffff uses no more than 3
1905        bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
1906        and up to 4 bytes in UTF-16. */
1907    
1908    #ifdef SUPPORT_PCRE16
1909        if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
1910    #endif
1911    
1912        /* Compile many times when timing */
1913    
1914      if (timeit > 0)      if (timeit > 0)
1915        {        {
# Line 1339  while (!done) Line 1918  while (!done)
1918        clock_t start_time = clock();        clock_t start_time = clock();
1919        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
1920          {          {
1921          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  #ifdef SUPPORT_PCRE16
1922            if (use_pcre16)
1923              re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1924            else
1925    #endif
1926              re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1927          if (re != NULL) free(re);          if (re != NULL) free(re);
1928          }          }
1929        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1348  while (!done) Line 1932  while (!done)
1932            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1933        }        }
1934    
1935      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
1936    
1937    #ifdef SUPPORT_PCRE16
1938        if (use_pcre16)
1939          re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1940        else
1941    #endif
1942          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1943    
1944      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1945      if non-interactive. */      if non-interactive. */
# Line 1382  while (!done) Line 1973  while (!done)
1973      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1974      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1975    
1976      /* Print information if required. There are now two info-returning      /* Extract the size for possible writing before possibly flipping it,
1977      functions. The old one has a limited interface and returns only limited      and remember the store that was got. */
1978      data. Check that it agrees with the newer one. */  
1979        true_size = ((real_pcre *)re)->size;
1980        regex_gotten_store = first_gotten_store;
1981    
1982        /* Output code size information if requested */
1983    
1984      if (log_store)      if (log_store)
1985        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
1986          (int)(gotten_store -          (int)(first_gotten_store -
1987                sizeof(real_pcre) -                sizeof(real_pcre) -
1988                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1989    
1990      /* Extract the size for possible writing before possibly flipping it,      /* If -s or /S was present, study the regex to generate additional info to
1991      and remember the store that was got. */      help with the matching, unless the pattern has the SS option, which
1992        suppresses the effect of /S (used for a few test patterns where studying is
1993      true_size = ((real_pcre *)re)->size;      never sensible). */
     regex_gotten_store = gotten_store;  
   
     /* If /S was present, study the regexp to generate additional info to  
     help with the matching. */  
1994    
1995      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
1996        {        {
1997        if (timeit > 0)        if (timeit > 0)
1998          {          {
# Line 1409  while (!done) Line 2000  while (!done)
2000          clock_t time_taken;          clock_t time_taken;
2001          clock_t start_time = clock();          clock_t start_time = clock();
2002          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2003            extra = pcre_study(re, study_options, &error);            {
2004              if (use_pcre16)
2005                extra = pcre16_study(re, study_options | force_study_options, &error);
2006              else
2007                extra = pcre_study(re, study_options | force_study_options, &error);
2008              }
2009          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2010          if (extra != NULL) free(extra);          if (extra != NULL) pcre_free_study(extra);
2011          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2012            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2013              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2014          }          }
2015        extra = pcre_study(re, study_options, &error);        if (use_pcre16)
2016            extra = pcre16_study(re, study_options | force_study_options, &error);
2017          else
2018            extra = pcre_study(re, study_options | force_study_options, &error);
2019        if (error != NULL)        if (error != NULL)
2020          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2021        else if (extra != NULL)        else if (extra != NULL)
2022            {
2023          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2024            if (log_store)
2025              {
2026              size_t jitsize;
2027              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2028              if (jitsize != 0)
2029                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2030              }
2031            }
2032        }        }
2033    
2034      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1453  while (!done) Line 2061  while (!done)
2061          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2062        rre->top_backref =        rre->top_backref =
2063          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2064        rre->first_byte =        rre->first_char =
2065          (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2066        rre->req_byte =        rre->req_char =
2067          (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));          (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2068        rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,        rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2069          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
2070        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
# Line 1473  while (!done) Line 2081  while (!done)
2081          }          }
2082        }        }
2083    
2084      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
2085    
2086      SHOW_INFO:      SHOW_INFO:
2087    
2088      if (do_debug)      if (do_debug)
2089        {        {
2090        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2091        pcre_printint(re, outfile, debug_lengths);        if (use_pcre16)
2092            pcre16_printint(re, outfile, debug_lengths);
2093          else
2094            pcre_printint(re, outfile, debug_lengths);
2095        }        }
2096    
2097      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1494  while (!done) Line 2105  while (!done)
2105        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2106          hascrorlf;          hascrorlf;
2107        int nameentrysize, namecount;        int nameentrysize, namecount;
2108        const uschar *nametable;        const pcre_uchar *nametable;
2109    
2110        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2111        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1508  while (!done) Line 2119  while (!done)
2119        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2120        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2121    
2122          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2123          that it gives the same results as the new function. */
2124    
2125  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2126        old_count = pcre_info(re, &old_options, &old_first_char);        if (!use_pcre16)
2127        if (count < 0) fprintf(outfile,          {
2128          "Error %d from pcre_info()\n", count);          old_count = pcre_info(re, &old_options, &old_first_char);
2129        else          if (count < 0) fprintf(outfile,
2130          {            "Error %d from pcre_info()\n", count);
2131          if (old_count != count) fprintf(outfile,          else
2132            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,            {
2133              old_count);            if (old_count != count) fprintf(outfile,
2134                "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2135          if (old_first_char != first_char) fprintf(outfile,                old_count);
2136            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
2137              first_char, old_first_char);            if (old_first_char != first_char) fprintf(outfile,
2138                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2139          if (old_options != (int)get_options) fprintf(outfile,                first_char, old_first_char);
2140            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
2141              get_options, old_options);            if (old_options != (int)get_options) fprintf(outfile,
2142          }              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2143                  get_options, old_options);
2144              }
2145            }
2146  #endif  #endif
2147    
2148        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
# Line 1555  while (!done) Line 2172  while (!done)
2172        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2173    
2174        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2175          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2176            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2177            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2178            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1569  while (!done) Line 2186  while (!done)
2186            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2187            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2188            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2189              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2190            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2191              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2192            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2193    
2194        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1610  while (!done) Line 2229  while (!done)
2229          }          }
2230        else        else
2231          {          {
2232          int ch = first_char & 255;          const char *caseless =
2233          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2234            "" : " (caseless)";            "" : " (caseless)";
2235          if (PRINTHEX(ch))  
2236            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(first_char))
2237              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2238          else          else
2239            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2240          }          }
2241    
2242        if (need_char < 0)        if (need_char < 0)
# Line 1625  while (!done) Line 2245  while (!done)
2245          }          }
2246        else        else
2247          {          {
2248          int ch = need_char & 255;          const char *caseless =
2249          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2250            "" : " (caseless)";            "" : " (caseless)";
2251          if (PRINTHEX(ch))  
2252            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(need_char))
2253              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2254          else          else
2255            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2256          }          }
2257    
2258        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2259        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2260        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2261        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2262          information unless -i or -d was also present. This means that, except
2263          when auto-callouts are involved, the output from runs with and without
2264          -s should be identical. */
2265    
2266        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2267          {          {
2268          if (extra == NULL)          if (extra == NULL)
2269            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2270          else          else
2271            {            {
2272            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2273            int minlength;            int minlength;
2274    
2275            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
# Line 1683  while (!done) Line 2307  while (!done)
2307              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2308              }              }
2309            }            }
2310    
2311            /* Show this only if the JIT was set by /S, not by -s. */
2312    
2313            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2314              {
2315              int jit;
2316              new_info(re, extra, PCRE_INFO_JIT, &jit);
2317              if (jit)
2318                fprintf(outfile, "JIT study was successful\n");
2319              else
2320    #ifdef SUPPORT_JIT
2321                fprintf(outfile, "JIT study was not successful\n");
2322    #else
2323                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2324    #endif
2325              }
2326          }          }
2327        }        }
2328    
# Line 1699  while (!done) Line 2339  while (!done)
2339          }          }
2340        else        else
2341          {          {
2342          uschar sbuf[8];          pcre_uint8 sbuf[8];
2343          sbuf[0] = (uschar)((true_size >> 24) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2344          sbuf[1] = (uschar)((true_size >> 16) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2345          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2346          sbuf[3] = (uschar)((true_size) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
2347    
2348          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2349          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2350          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2351          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2352    
2353          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2354              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1717  while (!done) Line 2357  while (!done)
2357            }            }
2358          else          else
2359            {            {
2360            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2361    
2362              /* If there is study data, write it. */
2363    
2364            if (extra != NULL)            if (extra != NULL)
2365              {              {
2366              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1727  while (!done) Line 2370  while (!done)
2370                  strerror(errno));                  strerror(errno));
2371                }                }
2372              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2373              }              }
2374            }            }
2375          fclose(f);          fclose(f);
2376          }          }
2377    
2378        new_free(re);        new_free(re);
2379        if (extra != NULL) new_free(extra);        if (extra != NULL) pcre_free_study(extra);
2380        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2381            {
2382            new_free((void *)tables);
2383            setlocale(LC_CTYPE, "C");
2384            locale_set = 0;
2385            }
2386        continue;  /* With next regex */        continue;  /* With next regex */
2387        }        }
2388      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1744  while (!done) Line 2391  while (!done)
2391    
2392    for (;;)    for (;;)
2393      {      {
2394      uschar *q;      pcre_uint8 *q;
2395      uschar *bptr;      pcre_uint8 *bptr;
2396      int *use_offsets = offsets;      int *use_offsets = offsets;
2397      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2398      int callout_data = 0;      int callout_data = 0;
# Line 1757  while (!done) Line 2404  while (!done)
2404      int getlist = 0;      int getlist = 0;
2405      int gmatched = 0;      int gmatched = 0;
2406      int start_offset = 0;      int start_offset = 0;
2407        int start_offset_sign = 1;
2408      int g_notempty = 0;      int g_notempty = 0;
2409      int use_dfa = 0;      int use_dfa = 0;
2410    
# Line 1770  while (!done) Line 2418  while (!done)
2418    
2419      pcre_callout = callout;      pcre_callout = callout;
2420      first_callout = 1;      first_callout = 1;
2421        last_callout_mark = NULL;
2422      callout_extra = 0;      callout_extra = 0;
2423      callout_count = 0;      callout_count = 0;
2424      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1784  while (!done) Line 2433  while (!done)
2433        {        {
2434        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2435          {          {
2436          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2437              {
2438              fprintf(outfile, "\n");
2439              break;
2440              }
2441          done = 1;          done = 1;
2442          goto CONTINUE;          goto CONTINUE;
2443          }          }
# Line 1844  while (!done) Line 2497  while (!done)
2497            {            {
2498            unsigned char *pt = p;            unsigned char *pt = p;
2499            c = 0;            c = 0;
2500            while (isxdigit(*(++pt)))  
2501              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2502              when isxdigit() is a macro that refers to its argument more than
2503              once. This is banned by the C Standard, but apparently happens in at
2504              least one MacOS environment. */
2505    
2506              for (pt++; isxdigit(*pt); pt++)
2507                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2508            if (*pt == '}')            if (*pt == '}')
2509              {              {
2510              unsigned char buff8[8];              unsigned char buff8[8];
# Line 1875  while (!done) Line 2534  while (!done)
2534          c = 0;          c = 0;
2535          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2536            {            {
2537            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2538            p++;            p++;
2539            }            }
2540          break;          break;
# Line 1885  while (!done) Line 2544  while (!done)
2544          continue;          continue;
2545    
2546          case '>':          case '>':
2547            if (*p == '-')
2548              {
2549              start_offset_sign = -1;
2550              p++;
2551              }
2552          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2553            start_offset *= start_offset_sign;
2554          continue;          continue;
2555    
2556          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1904  while (!done) Line 2569  while (!done)
2569            }            }
2570          else if (isalnum(*p))          else if (isalnum(*p))
2571            {            {
2572            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
2573            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2574            *npp++ = 0;            *npp++ = 0;
2575            *npp = 0;            *npp = 0;
# Line 1958  while (!done) Line 2623  while (!done)
2623  #endif  #endif
2624            use_dfa = 1;            use_dfa = 1;
2625          continue;          continue;
2626    #endif
2627    
2628    #if !defined NODFA
2629          case 'F':          case 'F':
2630          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2631          continue;          continue;
# Line 1972  while (!done) Line 2639  while (!done)
2639            }            }
2640          else if (isalnum(*p))          else if (isalnum(*p))
2641            {            {
2642            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
2643            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2644            *npp++ = 0;            *npp++ = 0;
2645            *npp = 0;            *npp = 0;
# Line 1983  while (!done) Line 2650  while (!done)
2650            }            }
2651          continue;          continue;
2652    
2653            case 'J':
2654            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2655            if (extra != NULL
2656                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2657                && extra->executable_jit != NULL)
2658              {
2659              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2660              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2661              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2662              }
2663            continue;
2664    
2665          case 'L':          case 'L':
2666          getlist = 1;          getlist = 1;
2667          continue;          continue;
# Line 2078  while (!done) Line 2757  while (!done)
2757        *q++ = c;        *q++ = c;
2758        }        }
2759      *q = 0;      *q = 0;
2760      len = q - dbuffer;      len = (int)(q - dbuffer);
2761    
2762      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
2763      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
# Line 2141  while (!done) Line 2820  while (!done)
2820              (void)pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2821                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2822              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2823              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2824                {                {
2825                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2826                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2827                  outfile);                  outfile);
2828                fprintf(outfile, "\n");                fprintf(outfile, "\n");
# Line 2168  while (!done) Line 2847  while (!done)
2847          register int i;          register int i;
2848          clock_t time_taken;          clock_t time_taken;
2849          clock_t start_time = clock();          clock_t start_time = clock();
2850    
2851    #ifdef SUPPORT_PCRE16
2852            if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
2853    #endif
2854    
2855    
2856  #if !defined NODFA  #if !defined NODFA
2857          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
# Line 2193  while (!done) Line 2877  while (!done)
2877    
2878        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2879        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
2880        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
2881          running of pcre_exec(), so disable the JIT optimization. This makes it
2882          possible to run the same set of tests with and without JIT externally
2883          requested. */
2884    
2885        if (find_match_limit)        if (find_match_limit)
2886          {          {
# Line 2202  while (!done) Line 2889  while (!done)
2889            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2890            extra->flags = 0;            extra->flags = 0;
2891            }            }
2892            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2893    
2894          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
2895            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2250  while (!done) Line 2938  while (!done)
2938    
2939        else        else
2940          {          {
2941          count = pcre_exec(re, extra, (char *)bptr, len,          if (use_pcre16)
2942            start_offset, options | g_notempty, use_offsets, use_size_offsets);            count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
2943                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2944            else
2945              count = pcre_exec(re, extra, (char *)bptr, len,
2946                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2947          if (count == 0)          if (count == 0)
2948            {            {
2949            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2285  while (!done) Line 2977  while (!done)
2977              }              }
2978            }            }
2979    
2980            /* do_allcaps requests showing of all captures in the pattern, to check
2981            unset ones at the end. */
2982    
2983            if (do_allcaps)
2984              {
2985              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2986              count++;   /* Allow for full match */
2987              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2988              }
2989    
2990            /* Output the captured substrings */
2991    
2992          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2993            {            {
2994            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2995                {
2996                if (use_offsets[i] != -1)
2997                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2998                    use_offsets[i], i);
2999                if (use_offsets[i+1] != -1)
3000                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3001                    use_offsets[i+1], i+1);
3002              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3003                }
3004            else            else
3005              {              {
3006              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3007              (void)pchars(bptr + use_offsets[i],              (void)pchars(bptr + use_offsets[i],
3008                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3009              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3010              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3011                {                {
3012                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3013                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
3014                  fprintf(outfile, " 0+ ");                  outfile);
3015                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3016                }                }
3017              }              }
3018            }            }
# Line 2383  while (!done) Line 3092  while (!done)
3092                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3093              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3094                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3095              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3096              }              }
3097            }            }
# Line 2410  while (!done) Line 3118  while (!done)
3118        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
3119        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
3120    
3121        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
3122        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
3123        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
3124        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
3125        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
3126          newline setting in the pattern; if none was set, use pcre_config() to
3127          find the default.
3128    
3129        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
3130        character, not one byte. */        character, not one byte. */
# Line 2439  while (!done) Line 3149  while (!done)
3149                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
3150              }              }
3151            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3152                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3153                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3154                &&                &&
3155                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2449  while (!done) Line 3160  while (!done)
3160              {              {
3161              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3162                {                {
3163                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3164                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3165                }                }
3166              }              }
3167            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3168            }            }
3169          else          else
3170            {            {
3171            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3172              {              {
3173                case PCRE_ERROR_NOMATCH:
3174              if (gmatched == 0)              if (gmatched == 0)
3175                {                {
3176                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL) fprintf(outfile, "No match\n");
3177                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  else fprintf(outfile, "No match, mark = %s\n", markptr);
3178                }                }
3179                break;
3180    
3181                case PCRE_ERROR_BADUTF8:
3182                case PCRE_ERROR_SHORTUTF8:
3183                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3184                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3185                if (use_size_offsets >= 2)
3186                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3187                    use_offsets[1]);
3188                fprintf(outfile, "\n");
3189                break;
3190    
3191                default:
3192                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3193                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3194                else
3195                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3196                break;
3197              }              }
3198            else fprintf(outfile, "Error %d\n", count);  
3199            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3200            }            }
3201          }          }
# Line 2514  while (!done) Line 3242  while (!done)
3242  #endif  #endif
3243    
3244    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3245    if (extra != NULL) new_free(extra);    if (extra != NULL) pcre_free_study(extra);
3246    if (tables != NULL)    if (locale_set)
3247      {      {
3248      new_free((void *)tables);      new_free((void *)tables);
3249      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3250      locale_set = 0;      locale_set = 0;
3251      }      }
3252      if (jit_stack != NULL)
3253        {
3254        pcre_jit_stack_free(jit_stack);
3255        jit_stack = NULL;
3256        }
3257    }    }
3258    
3259  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 2535  free(dbuffer); Line 3268  free(dbuffer);
3268  free(pbuffer);  free(pbuffer);
3269  free(offsets);  free(offsets);
3270    
3271    #ifdef SUPPORT_PCRE16
3272    if (buffer16 != NULL) free(buffer16);
3273    #endif
3274    
3275  return yield;  return yield;
3276  }  }
3277    

Legend:
Removed from v.512  
changed lines
  Added in v.805

  ViewVC Help
Powered by ViewVC 1.1.5