/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 389 by ph10, Sun Mar 15 18:24:05 2009 UTC code/branches/pcre16/pcretest.c revision 801 by ph10, Mon Dec 12 16:23:37 2011 UTC
# Line 71  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74  #define isatty _isatty         /* This is what Windows calls them, I'm told */  #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79  #define fileno _fileno  #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
# Line 99  to keep two copies, we include the sourc Line 112  to keep two copies, we include the sourc
112  external symbols to prevent clashes. */  external symbols to prevent clashes. */
113    
114  #define _pcre_ucp_gentype      ucp_gentype  #define _pcre_ucp_gentype      ucp_gentype
115    #define _pcre_ucp_typerange    ucp_typerange
116  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
117  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
118  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 113  external symbols to prevent clashes. */ Line 127  external symbols to prevent clashes. */
127    
128  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
129  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
130  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
131    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
132    
133    #define COMPILING_PCRETEST
134  #include "pcre_printint.src"  #include "pcre_printint.src"
135    
136  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
137    output character as-is or as a hex value when showing compiled patterns, is
138    contained in the printint.src file. We uses it here also, in cases when the
139    locale has not been explicitly changed, so as to get consistent output from
140    systems that differ in their output from isprint() even in the "C" locale. */
141    
142    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
143    
144  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
145  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 175  static int locale_set = 0; Line 190  static int locale_set = 0;
190  static int show_malloc;  static int show_malloc;
191  static int use_utf8;  static int use_utf8;
192  static size_t gotten_store;  static size_t gotten_store;
193    static size_t first_gotten_store = 0;
194    static const unsigned char *last_callout_mark = NULL;
195    
196  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
197    
198  static int buffer_size = 50000;  static int buffer_size = 50000;
199  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
200  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
201  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
202    
203    /* Textual explanations for runtime error codes */
204    
205    static const char *errtexts[] = {
206      NULL,  /* 0 is no error */
207      NULL,  /* NOMATCH is handled specially */
208      "NULL argument passed",
209      "bad option value",
210      "magic number missing",
211      "unknown opcode - pattern overwritten?",
212      "no more memory",
213      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
214      "match limit exceeded",
215      "callout error code",
216      NULL,  /* BADUTF8 is handled specially */
217      "bad UTF-8 offset",
218      NULL,  /* PARTIAL is handled specially */
219      "not used - internal error",
220      "internal error - pattern overwritten?",
221      "bad count value",
222      "item unsupported for DFA matching",
223      "backreference condition or recursion test not supported for DFA matching",
224      "match limit not supported for DFA matching",
225      "workspace size exceeded in DFA matching",
226      "too much recursion for DFA matching",
227      "recursion limit exceeded",
228      "not used - internal error",
229      "invalid combination of newline options",
230      "bad offset value",
231      NULL,  /* SHORTUTF8 is handled specially */
232      "nested recursion at the same subject position",
233      "JIT stack limit reached"
234    };
235    
236    
237    /*************************************************
238    *         Alternate character tables             *
239    *************************************************/
240    
241    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242    using the default tables of the library. However, the T option can be used to
243    select alternate sets of tables, for different kinds of testing. Note also that
244    the L (locale) option also adjusts the tables. */
245    
246    /* This is the set of tables distributed as default with PCRE. It recognizes
247    only ASCII characters. */
248    
249    static const unsigned char tables0[] = {
250    
251    /* This table is a lower casing table. */
252    
253        0,  1,  2,  3,  4,  5,  6,  7,
254        8,  9, 10, 11, 12, 13, 14, 15,
255       16, 17, 18, 19, 20, 21, 22, 23,
256       24, 25, 26, 27, 28, 29, 30, 31,
257       32, 33, 34, 35, 36, 37, 38, 39,
258       40, 41, 42, 43, 44, 45, 46, 47,
259       48, 49, 50, 51, 52, 53, 54, 55,
260       56, 57, 58, 59, 60, 61, 62, 63,
261       64, 97, 98, 99,100,101,102,103,
262      104,105,106,107,108,109,110,111,
263      112,113,114,115,116,117,118,119,
264      120,121,122, 91, 92, 93, 94, 95,
265       96, 97, 98, 99,100,101,102,103,
266      104,105,106,107,108,109,110,111,
267      112,113,114,115,116,117,118,119,
268      120,121,122,123,124,125,126,127,
269      128,129,130,131,132,133,134,135,
270      136,137,138,139,140,141,142,143,
271      144,145,146,147,148,149,150,151,
272      152,153,154,155,156,157,158,159,
273      160,161,162,163,164,165,166,167,
274      168,169,170,171,172,173,174,175,
275      176,177,178,179,180,181,182,183,
276      184,185,186,187,188,189,190,191,
277      192,193,194,195,196,197,198,199,
278      200,201,202,203,204,205,206,207,
279      208,209,210,211,212,213,214,215,
280      216,217,218,219,220,221,222,223,
281      224,225,226,227,228,229,230,231,
282      232,233,234,235,236,237,238,239,
283      240,241,242,243,244,245,246,247,
284      248,249,250,251,252,253,254,255,
285    
286    /* This table is a case flipping table. */
287    
288        0,  1,  2,  3,  4,  5,  6,  7,
289        8,  9, 10, 11, 12, 13, 14, 15,
290       16, 17, 18, 19, 20, 21, 22, 23,
291       24, 25, 26, 27, 28, 29, 30, 31,
292       32, 33, 34, 35, 36, 37, 38, 39,
293       40, 41, 42, 43, 44, 45, 46, 47,
294       48, 49, 50, 51, 52, 53, 54, 55,
295       56, 57, 58, 59, 60, 61, 62, 63,
296       64, 97, 98, 99,100,101,102,103,
297      104,105,106,107,108,109,110,111,
298      112,113,114,115,116,117,118,119,
299      120,121,122, 91, 92, 93, 94, 95,
300       96, 65, 66, 67, 68, 69, 70, 71,
301       72, 73, 74, 75, 76, 77, 78, 79,
302       80, 81, 82, 83, 84, 85, 86, 87,
303       88, 89, 90,123,124,125,126,127,
304      128,129,130,131,132,133,134,135,
305      136,137,138,139,140,141,142,143,
306      144,145,146,147,148,149,150,151,
307      152,153,154,155,156,157,158,159,
308      160,161,162,163,164,165,166,167,
309      168,169,170,171,172,173,174,175,
310      176,177,178,179,180,181,182,183,
311      184,185,186,187,188,189,190,191,
312      192,193,194,195,196,197,198,199,
313      200,201,202,203,204,205,206,207,
314      208,209,210,211,212,213,214,215,
315      216,217,218,219,220,221,222,223,
316      224,225,226,227,228,229,230,231,
317      232,233,234,235,236,237,238,239,
318      240,241,242,243,244,245,246,247,
319      248,249,250,251,252,253,254,255,
320    
321    /* This table contains bit maps for various character classes. Each map is 32
322    bytes long and the bits run from the least significant end of each byte. The
323    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324    graph, print, punct, and cntrl. Other classes are built from combinations. */
325    
326      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375    
376    /* This table identifies various classes of character by individual bits:
377      0x01   white space character
378      0x02   letter
379      0x04   decimal digit
380      0x08   hexadecimal digit
381      0x10   alphanumeric or '_'
382      0x80   regular expression metacharacter or binary zero
383    */
384    
385      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
386      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
387      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
388      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
389      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
390      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
391      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
392      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
393      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
394      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
395      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
396      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
397      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
398      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
399      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
400      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
401      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417    
418    /* This is a set of tables that came orginally from a Windows user. It seems to
419    be at least an approximation of ISO 8859. In particular, there are characters
420    greater than 128 that are marked as spaces, letters, etc. */
421    
422    static const unsigned char tables1[] = {
423    0,1,2,3,4,5,6,7,
424    8,9,10,11,12,13,14,15,
425    16,17,18,19,20,21,22,23,
426    24,25,26,27,28,29,30,31,
427    32,33,34,35,36,37,38,39,
428    40,41,42,43,44,45,46,47,
429    48,49,50,51,52,53,54,55,
430    56,57,58,59,60,61,62,63,
431    64,97,98,99,100,101,102,103,
432    104,105,106,107,108,109,110,111,
433    112,113,114,115,116,117,118,119,
434    120,121,122,91,92,93,94,95,
435    96,97,98,99,100,101,102,103,
436    104,105,106,107,108,109,110,111,
437    112,113,114,115,116,117,118,119,
438    120,121,122,123,124,125,126,127,
439    128,129,130,131,132,133,134,135,
440    136,137,138,139,140,141,142,143,
441    144,145,146,147,148,149,150,151,
442    152,153,154,155,156,157,158,159,
443    160,161,162,163,164,165,166,167,
444    168,169,170,171,172,173,174,175,
445    176,177,178,179,180,181,182,183,
446    184,185,186,187,188,189,190,191,
447    224,225,226,227,228,229,230,231,
448    232,233,234,235,236,237,238,239,
449    240,241,242,243,244,245,246,215,
450    248,249,250,251,252,253,254,223,
451    224,225,226,227,228,229,230,231,
452    232,233,234,235,236,237,238,239,
453    240,241,242,243,244,245,246,247,
454    248,249,250,251,252,253,254,255,
455    0,1,2,3,4,5,6,7,
456    8,9,10,11,12,13,14,15,
457    16,17,18,19,20,21,22,23,
458    24,25,26,27,28,29,30,31,
459    32,33,34,35,36,37,38,39,
460    40,41,42,43,44,45,46,47,
461    48,49,50,51,52,53,54,55,
462    56,57,58,59,60,61,62,63,
463    64,97,98,99,100,101,102,103,
464    104,105,106,107,108,109,110,111,
465    112,113,114,115,116,117,118,119,
466    120,121,122,91,92,93,94,95,
467    96,65,66,67,68,69,70,71,
468    72,73,74,75,76,77,78,79,
469    80,81,82,83,84,85,86,87,
470    88,89,90,123,124,125,126,127,
471    128,129,130,131,132,133,134,135,
472    136,137,138,139,140,141,142,143,
473    144,145,146,147,148,149,150,151,
474    152,153,154,155,156,157,158,159,
475    160,161,162,163,164,165,166,167,
476    168,169,170,171,172,173,174,175,
477    176,177,178,179,180,181,182,183,
478    184,185,186,187,188,189,190,191,
479    224,225,226,227,228,229,230,231,
480    232,233,234,235,236,237,238,239,
481    240,241,242,243,244,245,246,215,
482    248,249,250,251,252,253,254,223,
483    192,193,194,195,196,197,198,199,
484    200,201,202,203,204,205,206,207,
485    208,209,210,211,212,213,214,247,
486    216,217,218,219,220,221,222,255,
487    0,62,0,0,1,0,0,0,
488    0,0,0,0,0,0,0,0,
489    32,0,0,0,1,0,0,0,
490    0,0,0,0,0,0,0,0,
491    0,0,0,0,0,0,255,3,
492    126,0,0,0,126,0,0,0,
493    0,0,0,0,0,0,0,0,
494    0,0,0,0,0,0,0,0,
495    0,0,0,0,0,0,255,3,
496    0,0,0,0,0,0,0,0,
497    0,0,0,0,0,0,12,2,
498    0,0,0,0,0,0,0,0,
499    0,0,0,0,0,0,0,0,
500    254,255,255,7,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    255,255,127,127,0,0,0,0,
503    0,0,0,0,0,0,0,0,
504    0,0,0,0,254,255,255,7,
505    0,0,0,0,0,4,32,4,
506    0,0,0,128,255,255,127,255,
507    0,0,0,0,0,0,255,3,
508    254,255,255,135,254,255,255,7,
509    0,0,0,0,0,4,44,6,
510    255,255,127,255,255,255,127,255,
511    0,0,0,0,254,255,255,255,
512    255,255,255,255,255,255,255,127,
513    0,0,0,0,254,255,255,255,
514    255,255,255,255,255,255,255,255,
515    0,2,0,0,255,255,255,255,
516    255,255,255,255,255,255,255,127,
517    0,0,0,0,255,255,255,255,
518    255,255,255,255,255,255,255,255,
519    0,0,0,0,254,255,0,252,
520    1,0,0,248,1,0,0,120,
521    0,0,0,0,254,255,255,255,
522    0,0,128,0,0,0,128,0,
523    255,255,255,255,0,0,0,0,
524    0,0,0,0,0,0,0,128,
525    255,255,255,255,0,0,0,0,
526    0,0,0,0,0,0,0,0,
527    128,0,0,0,0,0,0,0,
528    0,1,1,0,1,1,0,0,
529    0,0,0,0,0,0,0,0,
530    0,0,0,0,0,0,0,0,
531    1,0,0,0,128,0,0,0,
532    128,128,128,128,0,0,128,0,
533    28,28,28,28,28,28,28,28,
534    28,28,0,0,0,0,0,128,
535    0,26,26,26,26,26,26,18,
536    18,18,18,18,18,18,18,18,
537    18,18,18,18,18,18,18,18,
538    18,18,18,128,128,0,128,16,
539    0,26,26,26,26,26,26,18,
540    18,18,18,18,18,18,18,18,
541    18,18,18,18,18,18,18,18,
542    18,18,18,128,128,0,0,0,
543    0,0,0,0,0,1,0,0,
544    0,0,0,0,0,0,0,0,
545    0,0,0,0,0,0,0,0,
546    0,0,0,0,0,0,0,0,
547    1,0,0,0,0,0,0,0,
548    0,0,18,0,0,0,0,0,
549    0,0,20,20,0,18,0,0,
550    0,20,18,0,0,0,0,0,
551    18,18,18,18,18,18,18,18,
552    18,18,18,18,18,18,18,18,
553    18,18,18,18,18,18,18,0,
554    18,18,18,18,18,18,18,18,
555    18,18,18,18,18,18,18,18,
556    18,18,18,18,18,18,18,18,
557    18,18,18,18,18,18,18,0,
558    18,18,18,18,18,18,18,18
559    };
560    
561    
562    
563    
564    #ifndef HAVE_STRERROR
565    /*************************************************
566    *     Provide strerror() for non-ANSI libraries  *
567    *************************************************/
568    
569    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570    in their libraries, but can provide the same facility by this simple
571    alternative function. */
572    
573    extern int   sys_nerr;
574    extern char *sys_errlist[];
575    
576    char *
577    strerror(int n)
578    {
579    if (n < 0 || n >= sys_nerr) return "unknown error number";
580    return sys_errlist[n];
581    }
582    #endif /* HAVE_STRERROR */
583    
584    
585    /*************************************************
586    *         JIT memory callback                    *
587    *************************************************/
588    
589    static pcre_jit_stack* jit_callback(void *arg)
590    {
591    return (pcre_jit_stack *)arg;
592    }
593    
594    
595  /*************************************************  /*************************************************
596  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 208  Returns:       pointer to the start of n Line 615  Returns:       pointer to the start of n
615                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
616  */  */
617    
618  static uschar *  static pcre_uint8 *
619  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
620  {  {
621  uschar *here = start;  pcre_uint8 *here = start;
622    
623  for (;;)  for (;;)
624    {    {
625    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
626    
627    if (rlen > 1000)    if (rlen > 1000)
628      {      {
# Line 245  for (;;) Line 652  for (;;)
652      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
653    
654        {        {
655        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
656        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
657          return (here == start)? NULL : start;          return (here == start)? NULL : start;
658        }        }
# Line 258  for (;;) Line 665  for (;;)
665    else    else
666      {      {
667      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
668      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
669      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671    
672      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673        {        {
# Line 402  Returns:     number of characters placed Line 809  Returns:     number of characters placed
809  #if !defined NOUTF8  #if !defined NOUTF8
810    
811  static int  static int
812  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
813  {  {
814  register int i, j;  register int i, j;
815  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 565  fprintf(outfile, "%.*s", (cb->next_item_ Line 972  fprintf(outfile, "%.*s", (cb->next_item_
972  fprintf(outfile, "\n");  fprintf(outfile, "\n");
973  first_callout = 0;  first_callout = 0;
974    
975    if (cb->mark != last_callout_mark)
976      {
977      fprintf(outfile, "Latest Mark: %s\n",
978        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979      last_callout_mark = cb->mark;
980      }
981    
982  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
983    {    {
984    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 584  return (cb->callout_number != callout_fa Line 998  return (cb->callout_number != callout_fa
998  *            Local malloc functions              *  *            Local malloc functions              *
999  *************************************************/  *************************************************/
1000    
1001  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1002  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1003    show_malloc variable is set only during matching. */
1004    
1005  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1006  {  {
1007  void *block = malloc(size);  void *block = malloc(size);
1008  gotten_store = size;  gotten_store = size;
1009    if (first_gotten_store == 0) first_gotten_store = size;
1010  if (show_malloc)  if (show_malloc)
1011    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1012  return block;  return block;
# Line 603  if (show_malloc) Line 1019  if (show_malloc)
1019  free(block);  free(block);
1020  }  }
1021    
   
1022  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1023    
1024  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 659  return ((value & 0x000000ff) << 24) | Line 1074  return ((value & 0x000000ff) << 24) |
1074  *************************************************/  *************************************************/
1075    
1076  static int  static int
1077  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1078    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1079    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1080  {  {
# Line 719  Returns:    < 0, = 0, or > 0, according Line 1134  Returns:    < 0, = 0, or > 0, according
1134  */  */
1135    
1136  static int  static int
1137  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1138  {  {
1139  while (n--)  while (n--)
1140    {    {
# Line 735  return 0; Line 1150  return 0;
1150  *         Check newline indicator                *  *         Check newline indicator                *
1151  *************************************************/  *************************************************/
1152    
1153  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1154  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1155    
1156  Arguments:  Arguments:
1157    p           points after the leading '<'    p           points after the leading '<'
# Line 747  Returns:      appropriate PCRE_NEWLINE_x Line 1161  Returns:      appropriate PCRE_NEWLINE_x
1161  */  */
1162    
1163  static int  static int
1164  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1165  {  {
1166  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1167  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1168  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1169  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1170  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1171  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1172  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1173  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1174  return 0;  return 0;
1175  }  }
# Line 793  printf("  -p       use POSIX interface\n Line 1207  printf("  -p       use POSIX interface\n
1207  #endif  #endif
1208  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1209  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1210  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1211           "  -s+      force each pattern to be studied, using JIT if available\n"
1212         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1213  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1214  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 821  int timeit = 0; Line 1236  int timeit = 0;
1236  int timeitm = 0;  int timeitm = 0;
1237  int showinfo = 0;  int showinfo = 0;
1238  int showstore = 0;  int showstore = 0;
1239    int force_study = -1;
1240    int force_study_options = 0;
1241  int quiet = 0;  int quiet = 0;
1242  int size_offsets = 45;  int size_offsets = 45;
1243  int size_offsets_max;  int size_offsets_max;
# Line 834  int all_use_dfa = 0; Line 1251  int all_use_dfa = 0;
1251  int yield = 0;  int yield = 0;
1252  int stack_size;  int stack_size;
1253    
1254    pcre_jit_stack *jit_stack = NULL;
1255    
1256    
1257  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1258  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1259    
1260  uschar copynames[1024];  pcre_uchar copynames[1024];
1261  uschar getnames[1024];  pcre_uchar getnames[1024];
1262    
1263  uschar *copynamesptr;  pcre_uchar *copynamesptr;
1264  uschar *getnamesptr;  pcre_uchar *getnamesptr;
1265    
1266  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1267  when I am debugging. They grow automatically when very long lines are read. */  when I am debugging. They grow automatically when very long lines are read. */
1268    
1269  buffer = (unsigned char *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
1270  dbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (pcre_uint8 *)malloc(buffer_size);
1271  pbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
1272    
1273  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1274    
# Line 869  while (argc > 1 && argv[op][0] == '-') Line 1289  while (argc > 1 && argv[op][0] == '-')
1289    {    {
1290    unsigned char *endptr;    unsigned char *endptr;
1291    
1292    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1293      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1294      else if (strcmp(argv[op], "-s+") == 0)
1295        {
1296        force_study = 1;
1297        force_study_options = PCRE_STUDY_JIT_COMPILE;
1298        }
1299    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1300    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1301    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1302    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1303    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1304  #if !defined NODFA  #if !defined NODFA
1305    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1306  #endif  #endif
# Line 904  while (argc > 1 && argv[op][0] == '-') Line 1329  while (argc > 1 && argv[op][0] == '-')
1329        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1330          *endptr == 0))          *endptr == 0))
1331      {      {
1332  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1333      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1334      exit(1);      exit(1);
1335  #else  #else
# Line 928  while (argc > 1 && argv[op][0] == '-') Line 1353  while (argc > 1 && argv[op][0] == '-')
1353    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1354      {      {
1355      int rc;      int rc;
1356      unsigned long int lrc;      unsigned long int lrc;
1357      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1358      printf("Compiled with\n");      printf("Compiled with\n");
1359      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1360      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1361      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1362      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1363        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1364        if (rc)
1365          printf("  Just-in-time compiler support\n");
1366        else
1367          printf("  No just-in-time compiler support\n");
1368      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1369      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1370        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1371        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1372          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1373        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
1374        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1375      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)pcre_config(PCRE_CONFIG_BSR, &rc);
# Line 1032  while (!done) Line 1464  while (!done)
1464  #endif  #endif
1465    
1466    const char *error;    const char *error;
1467      unsigned char *markptr;
1468    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1469    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1470    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1471    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1472    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1473      int do_allcaps = 0;
1474      int do_mark = 0;
1475    int do_study = 0;    int do_study = 0;
1476      int no_force_study = 0;
1477    int do_debug = debug;    int do_debug = debug;
1478    int do_G = 0;    int do_G = 0;
1479    int do_g = 0;    int do_g = 0;
1480    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1481    int do_showrest = 0;    int do_showrest = 0;
1482      int do_showcaprest = 0;
1483    int do_flip = 0;    int do_flip = 0;
1484    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1485    
# Line 1062  while (!done) Line 1499  while (!done)
1499    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1500      {      {
1501      unsigned long int magic, get_options;      unsigned long int magic, get_options;
1502      uschar sbuf[8];      pcre_uint8 sbuf[8];
1503      FILE *f;      FILE *f;
1504    
1505      p++;      p++;
# Line 1085  while (!done) Line 1522  while (!done)
1522        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1523    
1524      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1525      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1526    
1527      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1528    
# Line 1104  while (!done) Line 1541  while (!done)
1541          }          }
1542        }        }
1543    
1544      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1545        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1546    
1547      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
# Line 1112  while (!done) Line 1549  while (!done)
1549      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1550      use_utf8 = (get_options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1551    
1552      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1553    
1554      if (true_study_size != 0)      if (true_study_size != 0)
1555        {        {
# Line 1128  while (!done) Line 1565  while (!done)
1565          {          {
1566          FAIL_READ:          FAIL_READ:
1567          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1568          if (extra != NULL) new_free(extra);          if (extra != NULL) pcre_free_study(extra);
1569          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1570          fclose(f);          fclose(f);
1571          continue;          continue;
# Line 1154  while (!done) Line 1591  while (!done)
1591      }      }
1592    
1593    pp = p;    pp = p;
1594    poffset = p - buffer;    poffset = (int)(p - buffer);
1595    
1596    for(;;)    for(;;)
1597      {      {
# Line 1208  while (!done) Line 1645  while (!done)
1645        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1646        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1647    
1648        case '+': do_showrest = 1; break;        case '+':
1649          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1650          break;
1651    
1652          case '=': do_allcaps = 1; break;
1653        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1654        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
1655        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1218  while (!done) Line 1659  while (!done)
1659        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1660        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1661        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1662          case 'K': do_mark = 1; break;
1663        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1664        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1665    
# Line 1225  while (!done) Line 1667  while (!done)
1667        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1668  #endif  #endif
1669    
1670        case 'S': do_study = 1; break;        case 'S':
1671          if (do_study == 0)
1672            {
1673            do_study = 1;
1674            if (*pp == '+')
1675              {
1676              study_options |= PCRE_STUDY_JIT_COMPILE;
1677              pp++;
1678              }
1679            }
1680          else
1681            {
1682            do_study = 0;
1683            no_force_study = 1;
1684            }
1685          break;
1686    
1687        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1688          case 'W': options |= PCRE_UCP; break;
1689        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1690          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1691        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1692        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1693        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1694    
1695          case 'T':
1696          switch (*pp++)
1697            {
1698            case '0': tables = tables0; break;
1699            case '1': tables = tables1; break;
1700    
1701            case '\r':
1702            case '\n':
1703            case ' ':
1704            case 0:
1705            fprintf(outfile, "** Missing table number after /T\n");
1706            goto SKIP_DATA;
1707    
1708            default:
1709            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1710            goto SKIP_DATA;
1711            }
1712          break;
1713    
1714        case 'L':        case 'L':
1715        ppp = pp;        ppp = pp;
1716        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1257  while (!done) Line 1736  while (!done)
1736    
1737        case '<':        case '<':
1738          {          {
1739          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1740            {            {
1741            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
1742            pp += 3;            pp += 3;
# Line 1298  while (!done) Line 1777  while (!done)
1777      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1778      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1779      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1780        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1781        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1782    
1783        first_gotten_store = 0;
1784      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1785    
1786      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1318  while (!done) Line 1800  while (!done)
1800  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1801    
1802      {      {
1803        unsigned long int get_options;
1804    
1805      if (timeit > 0)      if (timeit > 0)
1806        {        {
1807        register int i;        register int i;
# Line 1334  while (!done) Line 1818  while (!done)
1818            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1819        }        }
1820    
1821        first_gotten_store = 0;
1822      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1823    
1824      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1361  while (!done) Line 1846  while (!done)
1846        goto CONTINUE;        goto CONTINUE;
1847        }        }
1848    
1849      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1850      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1851      returns only limited data. Check that it agrees with the newer one. */      lines. */
1852    
1853      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1854        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
1855    
1856      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
1857      and remember the store that was got. */      and remember the store that was got. */
1858    
1859      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
1860      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1861    
1862        /* Output code size information if requested */
1863    
1864        if (log_store)
1865          fprintf(outfile, "Memory allocation (code space): %d\n",
1866            (int)(first_gotten_store -
1867                  sizeof(real_pcre) -
1868                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1869    
1870      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
1871      help with the matching. */      help with the matching, unless the pattern has the SS option, which
1872        suppresses the effect of /S (used for a few test patterns where studying is
1873        never sensible). */
1874    
1875      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
1876        {        {
1877        if (timeit > 0)        if (timeit > 0)
1878          {          {
# Line 1388  while (!done) Line 1880  while (!done)
1880          clock_t time_taken;          clock_t time_taken;
1881          clock_t start_time = clock();          clock_t start_time = clock();
1882          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
1883            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options | force_study_options, &error);
1884          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1885          if (extra != NULL) free(extra);          if (extra != NULL) pcre_free_study(extra);
1886          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1887            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
1888              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1889          }          }
1890        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options | force_study_options, &error);
1891        if (error != NULL)        if (error != NULL)
1892          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
1893        else if (extra != NULL)        else if (extra != NULL)
1894            {
1895          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1896            if (log_store)
1897              {
1898              size_t jitsize;
1899              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
1900              if (jitsize != 0)
1901                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
1902              }
1903            }
1904          }
1905    
1906        /* If /K was present, we set up for handling MARK data. */
1907    
1908        if (do_mark)
1909          {
1910          if (extra == NULL)
1911            {
1912            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1913            extra->flags = 0;
1914            }
1915          extra->mark = &markptr;
1916          extra->flags |= PCRE_EXTRA_MARK;
1917        }        }
1918    
1919      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
# Line 1419  while (!done) Line 1933  while (!done)
1933          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1934        rre->top_backref =        rre->top_backref =
1935          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1936        rre->first_byte =        rre->first_char =
1937          (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
1938        rre->req_byte =        rre->req_char =
1939          (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));          (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
1940        rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,        rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1941          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1942        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
# Line 1434  while (!done) Line 1948  while (!done)
1948          {          {
1949          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1950          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1951          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1952            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1953          }          }
1954        }        }
1955    
1956      /* Extract information from the compiled data if required */      /* Extract information from the compiled data if required. There are now
1957        two info-returning functions. The old one has a limited interface and
1958        returns only limited data. Check that it agrees with the newer one. */
1959    
1960      SHOW_INFO:      SHOW_INFO:
1961    
# Line 1448  while (!done) Line 1965  while (!done)
1965        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1966        }        }
1967    
1968        /* We already have the options in get_options (see above) */
1969    
1970      if (do_showinfo)      if (do_showinfo)
1971        {        {
1972        unsigned long int get_options, all_options;        unsigned long int all_options;
1973  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1974        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1975  #endif  #endif
1976        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1977          hascrorlf;          hascrorlf;
1978        int nameentrysize, namecount;        int nameentrysize, namecount;
1979        const uschar *nametable;        const pcre_uchar *nametable;
1980    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1981        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1982        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1983        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1519  while (!done) Line 2037  while (!done)
2037        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2038    
2039        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2040          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2041            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2042            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2043            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1533  while (!done) Line 2051  while (!done)
2051            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2052            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2053            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2054              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2055            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2056              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2057            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2058    
2059        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1574  while (!done) Line 2094  while (!done)
2094          }          }
2095        else        else
2096          {          {
2097          int ch = first_char & 255;          const char *caseless =
2098          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2099            "" : " (caseless)";            "" : " (caseless)";
2100          if (PRINTHEX(ch))  
2101            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(first_char))
2102              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2103          else          else
2104            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2105          }          }
2106    
2107        if (need_char < 0)        if (need_char < 0)
# Line 1589  while (!done) Line 2110  while (!done)
2110          }          }
2111        else        else
2112          {          {
2113          int ch = need_char & 255;          const char *caseless =
2114          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2115            "" : " (caseless)";            "" : " (caseless)";
2116          if (PRINTHEX(ch))  
2117            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(need_char))
2118              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2119          else          else
2120            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2121          }          }
2122    
2123        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2124        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2125        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2126        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2127          information unless -i or -d was also present. This means that, except
2128          when auto-callouts are involved, the output from runs with and without
2129          -s should be identical. */
2130    
2131        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2132          {          {
2133          if (extra == NULL)          if (extra == NULL)
2134            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2135          else          else
2136            {            {
2137            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2138            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2139    
2140              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2141              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2142    
2143              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2144            if (start_bits == NULL)            if (start_bits == NULL)
2145              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2146            else            else
2147              {              {
2148              int i;              int i;
# Line 1643  while (!done) Line 2172  while (!done)
2172              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2173              }              }
2174            }            }
2175    
2176            /* Show this only if the JIT was set by /S, not by -s. */
2177    
2178            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2179              {
2180              int jit;
2181              new_info(re, extra, PCRE_INFO_JIT, &jit);
2182              if (jit)
2183                fprintf(outfile, "JIT study was successful\n");
2184              else
2185    #ifdef SUPPORT_JIT
2186                fprintf(outfile, "JIT study was not successful\n");
2187    #else
2188                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2189    #endif
2190              }
2191          }          }
2192        }        }
2193    
# Line 1659  while (!done) Line 2204  while (!done)
2204          }          }
2205        else        else
2206          {          {
2207          uschar sbuf[8];          pcre_uint8 sbuf[8];
2208          sbuf[0] = (uschar)((true_size >> 24) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2209          sbuf[1] = (uschar)((true_size >> 16) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2210          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2211          sbuf[3] = (uschar)((true_size) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
2212    
2213          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2214          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2215          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2216          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2217    
2218          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2219              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1677  while (!done) Line 2222  while (!done)
2222            }            }
2223          else          else
2224            {            {
2225            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2226    
2227              /* If there is study data, write it. */
2228    
2229            if (extra != NULL)            if (extra != NULL)
2230              {              {
2231              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1687  while (!done) Line 2235  while (!done)
2235                  strerror(errno));                  strerror(errno));
2236                }                }
2237              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2238              }              }
2239            }            }
2240          fclose(f);          fclose(f);
2241          }          }
2242    
2243        new_free(re);        new_free(re);
2244        if (extra != NULL) new_free(extra);        if (extra != NULL) pcre_free_study(extra);
2245        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2246            {
2247            new_free((void *)tables);
2248            setlocale(LC_CTYPE, "C");
2249            locale_set = 0;
2250            }
2251        continue;  /* With next regex */        continue;  /* With next regex */
2252        }        }
2253      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1704  while (!done) Line 2256  while (!done)
2256    
2257    for (;;)    for (;;)
2258      {      {
2259      uschar *q;      pcre_uint8 *q;
2260      uschar *bptr;      pcre_uint8 *bptr;
2261      int *use_offsets = offsets;      int *use_offsets = offsets;
2262      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2263      int callout_data = 0;      int callout_data = 0;
# Line 1717  while (!done) Line 2269  while (!done)
2269      int getlist = 0;      int getlist = 0;
2270      int gmatched = 0;      int gmatched = 0;
2271      int start_offset = 0;      int start_offset = 0;
2272        int start_offset_sign = 1;
2273      int g_notempty = 0;      int g_notempty = 0;
2274      int use_dfa = 0;      int use_dfa = 0;
2275    
# Line 1730  while (!done) Line 2283  while (!done)
2283    
2284      pcre_callout = callout;      pcre_callout = callout;
2285      first_callout = 1;      first_callout = 1;
2286        last_callout_mark = NULL;
2287      callout_extra = 0;      callout_extra = 0;
2288      callout_count = 0;      callout_count = 0;
2289      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1744  while (!done) Line 2298  while (!done)
2298        {        {
2299        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2300          {          {
2301          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2302              {
2303              fprintf(outfile, "\n");
2304              break;
2305              }
2306          done = 1;          done = 1;
2307          goto CONTINUE;          goto CONTINUE;
2308          }          }
# Line 1804  while (!done) Line 2362  while (!done)
2362            {            {
2363            unsigned char *pt = p;            unsigned char *pt = p;
2364            c = 0;            c = 0;
2365            while (isxdigit(*(++pt)))  
2366              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2367              when isxdigit() is a macro that refers to its argument more than
2368              once. This is banned by the C Standard, but apparently happens in at
2369              least one MacOS environment. */
2370    
2371              for (pt++; isxdigit(*pt); pt++)
2372                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2373            if (*pt == '}')            if (*pt == '}')
2374              {              {
2375              unsigned char buff8[8];              unsigned char buff8[8];
# Line 1835  while (!done) Line 2399  while (!done)
2399          c = 0;          c = 0;
2400          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2401            {            {
2402            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2403            p++;            p++;
2404            }            }
2405          break;          break;
# Line 1845  while (!done) Line 2409  while (!done)
2409          continue;          continue;
2410    
2411          case '>':          case '>':
2412            if (*p == '-')
2413              {
2414              start_offset_sign = -1;
2415              p++;
2416              }
2417          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2418            start_offset *= start_offset_sign;
2419          continue;          continue;
2420    
2421          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1864  while (!done) Line 2434  while (!done)
2434            }            }
2435          else if (isalnum(*p))          else if (isalnum(*p))
2436            {            {
2437            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
2438            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2439            *npp++ = 0;            *npp++ = 0;
2440            *npp = 0;            *npp = 0;
# Line 1918  while (!done) Line 2488  while (!done)
2488  #endif  #endif
2489            use_dfa = 1;            use_dfa = 1;
2490          continue;          continue;
2491    #endif
2492    
2493    #if !defined NODFA
2494          case 'F':          case 'F':
2495          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2496          continue;          continue;
# Line 1932  while (!done) Line 2504  while (!done)
2504            }            }
2505          else if (isalnum(*p))          else if (isalnum(*p))
2506            {            {
2507            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
2508            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2509            *npp++ = 0;            *npp++ = 0;
2510            *npp = 0;            *npp = 0;
# Line 1943  while (!done) Line 2515  while (!done)
2515            }            }
2516          continue;          continue;
2517    
2518            case 'J':
2519            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2520            if (extra != NULL
2521                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2522                && extra->executable_jit != NULL)
2523              {
2524              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2525              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2526              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2527              }
2528            continue;
2529    
2530          case 'L':          case 'L':
2531          getlist = 1;          getlist = 1;
2532          continue;          continue;
# Line 1952  while (!done) Line 2536  while (!done)
2536          continue;          continue;
2537    
2538          case 'N':          case 'N':
2539          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2540              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2541            else
2542              options |= PCRE_NOTEMPTY;
2543          continue;          continue;
2544    
2545          case 'O':          case 'O':
# Line 1975  while (!done) Line 2562  while (!done)
2562          continue;          continue;
2563    
2564          case 'P':          case 'P':
2565          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2566              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2567          continue;          continue;
2568    
2569          case 'Q':          case 'Q':
# Line 2009  while (!done) Line 2597  while (!done)
2597          case 'S':          case 'S':
2598          show_malloc = 1;          show_malloc = 1;
2599          continue;          continue;
2600    
2601          case 'Y':          case 'Y':
2602          options |= PCRE_NO_START_OPTIMIZE;          options |= PCRE_NO_START_OPTIMIZE;
2603          continue;          continue;
2604    
2605          case 'Z':          case 'Z':
2606          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
# Line 2034  while (!done) Line 2622  while (!done)
2622        *q++ = c;        *q++ = c;
2623        }        }
2624      *q = 0;      *q = 0;
2625      len = q - dbuffer;      len = (int)(q - dbuffer);
2626    
2627      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
2628      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
# Line 2072  while (!done) Line 2660  while (!done)
2660          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2661        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2662        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2663        if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;        if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2664    
2665        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2666    
# Line 2097  while (!done) Line 2685  while (!done)
2685              (void)pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2686                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2687              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2688              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2689                {                {
2690                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2691                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2692                  outfile);                  outfile);
2693                fprintf(outfile, "\n");                fprintf(outfile, "\n");
# Line 2117  while (!done) Line 2705  while (!done)
2705    
2706      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2707        {        {
2708          markptr = NULL;
2709    
2710        if (timeitm > 0)        if (timeitm > 0)
2711          {          {
2712          register int i;          register int i;
# Line 2128  while (!done) Line 2718  while (!done)
2718            {            {
2719            int workspace[1000];            int workspace[1000];
2720            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2721              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2722                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2723                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2724            }            }
# Line 2147  while (!done) Line 2737  while (!done)
2737    
2738        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2739        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
2740        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
2741          running of pcre_exec(), so disable the JIT optimization. This makes it
2742          possible to run the same set of tests with and without JIT externally
2743          requested. */
2744    
2745        if (find_match_limit)        if (find_match_limit)
2746          {          {
# Line 2156  while (!done) Line 2749  while (!done)
2749            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2750            extra->flags = 0;            extra->flags = 0;
2751            }            }
2752            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2753    
2754          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
2755            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2191  while (!done) Line 2785  while (!done)
2785        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2786          {          {
2787          int workspace[1000];          int workspace[1000];
2788          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2789            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2790            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2791          if (count == 0)          if (count == 0)
# Line 2239  while (!done) Line 2833  while (!done)
2833              }              }
2834            }            }
2835    
2836            /* do_allcaps requests showing of all captures in the pattern, to check
2837            unset ones at the end. */
2838    
2839            if (do_allcaps)
2840              {
2841              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2842              count++;   /* Allow for full match */
2843              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2844              }
2845    
2846            /* Output the captured substrings */
2847    
2848          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2849            {            {
2850            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2851                {
2852                if (use_offsets[i] != -1)
2853                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2854                    use_offsets[i], i);
2855                if (use_offsets[i+1] != -1)
2856                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2857                    use_offsets[i+1], i+1);
2858              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2859                }
2860            else            else
2861              {              {
2862              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2863              (void)pchars(bptr + use_offsets[i],              (void)pchars(bptr + use_offsets[i],
2864                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
2865              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2866              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
2867                {                {
2868                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
2869                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2870                  fprintf(outfile, " 0+ ");                  outfile);
2871                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
2872                }                }
2873              }              }
2874            }            }
2875    
2876            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2877    
2878          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2879            {            {
2880            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2335  while (!done) Line 2948  while (!done)
2948                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2949              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2950                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
2951              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
2952              }              }
2953            }            }
# Line 2345  while (!done) Line 2957  while (!done)
2957    
2958        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2959          {          {
2960          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2961  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
2962          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
2963            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
2964              bptr + use_offsets[0]);            fprintf(outfile, ": ");
2965  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2966                outfile);
2967              }
2968          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2969          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2970          }          }
# Line 2360  while (!done) Line 2974  while (!done)
2974        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2975        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2976    
2977        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
2978        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
2979        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
2980        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
2981        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
2982          newline setting in the pattern; if none was set, use pcre_config() to
2983          find the default.
2984    
2985        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2986        character, not one byte. */        character, not one byte. */
# Line 2380  while (!done) Line 2996  while (!done)
2996              {              {
2997              int d;              int d;
2998              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2999              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
3000                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
3001                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
3002                        (d == 10)? PCRE_NEWLINE_LF :
3003                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3004                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
3005                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
3006              }              }
3007            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3008                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3009                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3010                &&                &&
3011                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2397  while (!done) Line 3016  while (!done)
3016              {              {
3017              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3018                {                {
3019                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3020                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3021                }                }
3022              }              }
3023            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3024            }            }
3025          else          else
3026            {            {
3027            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3028              {              {
3029              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3030                if (gmatched == 0)
3031                  {
3032                  if (markptr == NULL) fprintf(outfile, "No match\n");
3033                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3034                  }
3035                break;
3036    
3037                case PCRE_ERROR_BADUTF8:
3038                case PCRE_ERROR_SHORTUTF8:
3039                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3040                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3041                if (use_size_offsets >= 2)
3042                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3043                    use_offsets[1]);
3044                fprintf(outfile, "\n");
3045                break;
3046    
3047                default:
3048                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3049                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3050                else
3051                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3052                break;
3053              }              }
3054            else fprintf(outfile, "Error %d\n", count);  
3055            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3056            }            }
3057          }          }
# Line 2421  while (!done) Line 3061  while (!done)
3061        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3062    
3063        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3064        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3065        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3066        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3067        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3068        character. */        character. */
3069    
# Line 2432  while (!done) Line 3072  while (!done)
3072        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3073          {          {
3074          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3075          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3076          }          }
3077    
3078        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2458  while (!done) Line 3098  while (!done)
3098  #endif  #endif
3099    
3100    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3101    if (extra != NULL) new_free(extra);    if (extra != NULL) pcre_free_study(extra);
3102    if (tables != NULL)    if (locale_set)
3103      {      {
3104      new_free((void *)tables);      new_free((void *)tables);
3105      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3106      locale_set = 0;      locale_set = 0;
3107      }      }
3108      if (jit_stack != NULL)
3109        {
3110        pcre_jit_stack_free(jit_stack);
3111        jit_stack = NULL;
3112        }
3113    }    }
3114    
3115  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");

Legend:
Removed from v.389  
changed lines
  Added in v.801

  ViewVC Help
Powered by ViewVC 1.1.5