/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC revision 775 by ph10, Thu Dec 1 10:35:30 2011 UTC
# Line 49  POSSIBILITY OF SUCH DAMAGE. Line 49  POSSIBILITY OF SUCH DAMAGE.
49  #include <errno.h>  #include <errno.h>
50    
51  #ifdef SUPPORT_LIBREADLINE  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53  #include <unistd.h>  #include <unistd.h>
54    #endif
55  #include <readline/readline.h>  #include <readline/readline.h>
56  #include <readline/history.h>  #include <readline/history.h>
57  #endif  #endif
# Line 69  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
92  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 89  appropriately for an application, not fo Line 107  appropriately for an application, not fo
107  #include "pcre.h"  #include "pcre.h"
108  #include "pcre_internal.h"  #include "pcre_internal.h"
109    
110  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
111  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
112  symbols to prevent clashes. */  external symbols to prevent clashes. */
113    
114    #define _pcre_ucp_gentype      ucp_gentype
115    #define _pcre_ucp_typerange    ucp_typerange
116  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
117  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
118  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
119  #define _pcre_utf8_table3      utf8_table3  #define _pcre_utf8_table3      utf8_table3
120  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
121    #define _pcre_utf8_char_sizes  utf8_char_sizes
122  #define _pcre_utt              utt  #define _pcre_utt              utt
123  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
124  #define _pcre_utt_names        utt_names  #define _pcre_utt_names        utt_names
# Line 107  symbols to prevent clashes. */ Line 128  symbols to prevent clashes. */
128    
129  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
130  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
131  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
132    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
133    
134    #define COMPILING_PCRETEST
135  #include "pcre_printint.src"  #include "pcre_printint.src"
136    
137  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
138    output character as-is or as a hex value when showing compiled patterns, is
139    contained in the printint.src file. We uses it here also, in cases when the
140    locale has not been explicitly changed, so as to get consistent output from
141    systems that differ in their output from isprint() even in the "C" locale. */
142    
143    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144    
145  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
146  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 169  static int locale_set = 0; Line 191  static int locale_set = 0;
191  static int show_malloc;  static int show_malloc;
192  static int use_utf8;  static int use_utf8;
193  static size_t gotten_store;  static size_t gotten_store;
194    static size_t first_gotten_store = 0;
195    static const unsigned char *last_callout_mark = NULL;
196    
197  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
198    
# Line 177  static uschar *buffer = NULL; Line 201  static uschar *buffer = NULL;
201  static uschar *dbuffer = NULL;  static uschar *dbuffer = NULL;
202  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
203    
204    /* Textual explanations for runtime error codes */
205    
206    static const char *errtexts[] = {
207      NULL,  /* 0 is no error */
208      NULL,  /* NOMATCH is handled specially */
209      "NULL argument passed",
210      "bad option value",
211      "magic number missing",
212      "unknown opcode - pattern overwritten?",
213      "no more memory",
214      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
215      "match limit exceeded",
216      "callout error code",
217      NULL,  /* BADUTF8 is handled specially */
218      "bad UTF-8 offset",
219      NULL,  /* PARTIAL is handled specially */
220      "not used - internal error",
221      "internal error - pattern overwritten?",
222      "bad count value",
223      "item unsupported for DFA matching",
224      "backreference condition or recursion test not supported for DFA matching",
225      "match limit not supported for DFA matching",
226      "workspace size exceeded in DFA matching",
227      "too much recursion for DFA matching",
228      "recursion limit exceeded",
229      "not used - internal error",
230      "invalid combination of newline options",
231      "bad offset value",
232      NULL,  /* SHORTUTF8 is handled specially */
233      "nested recursion at the same subject position",
234      "JIT stack limit reached"
235    };
236    
237    
238    /*************************************************
239    *         Alternate character tables             *
240    *************************************************/
241    
242    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
243    using the default tables of the library. However, the T option can be used to
244    select alternate sets of tables, for different kinds of testing. Note also that
245    the L (locale) option also adjusts the tables. */
246    
247    /* This is the set of tables distributed as default with PCRE. It recognizes
248    only ASCII characters. */
249    
250    static const unsigned char tables0[] = {
251    
252    /* This table is a lower casing table. */
253    
254        0,  1,  2,  3,  4,  5,  6,  7,
255        8,  9, 10, 11, 12, 13, 14, 15,
256       16, 17, 18, 19, 20, 21, 22, 23,
257       24, 25, 26, 27, 28, 29, 30, 31,
258       32, 33, 34, 35, 36, 37, 38, 39,
259       40, 41, 42, 43, 44, 45, 46, 47,
260       48, 49, 50, 51, 52, 53, 54, 55,
261       56, 57, 58, 59, 60, 61, 62, 63,
262       64, 97, 98, 99,100,101,102,103,
263      104,105,106,107,108,109,110,111,
264      112,113,114,115,116,117,118,119,
265      120,121,122, 91, 92, 93, 94, 95,
266       96, 97, 98, 99,100,101,102,103,
267      104,105,106,107,108,109,110,111,
268      112,113,114,115,116,117,118,119,
269      120,121,122,123,124,125,126,127,
270      128,129,130,131,132,133,134,135,
271      136,137,138,139,140,141,142,143,
272      144,145,146,147,148,149,150,151,
273      152,153,154,155,156,157,158,159,
274      160,161,162,163,164,165,166,167,
275      168,169,170,171,172,173,174,175,
276      176,177,178,179,180,181,182,183,
277      184,185,186,187,188,189,190,191,
278      192,193,194,195,196,197,198,199,
279      200,201,202,203,204,205,206,207,
280      208,209,210,211,212,213,214,215,
281      216,217,218,219,220,221,222,223,
282      224,225,226,227,228,229,230,231,
283      232,233,234,235,236,237,238,239,
284      240,241,242,243,244,245,246,247,
285      248,249,250,251,252,253,254,255,
286    
287    /* This table is a case flipping table. */
288    
289        0,  1,  2,  3,  4,  5,  6,  7,
290        8,  9, 10, 11, 12, 13, 14, 15,
291       16, 17, 18, 19, 20, 21, 22, 23,
292       24, 25, 26, 27, 28, 29, 30, 31,
293       32, 33, 34, 35, 36, 37, 38, 39,
294       40, 41, 42, 43, 44, 45, 46, 47,
295       48, 49, 50, 51, 52, 53, 54, 55,
296       56, 57, 58, 59, 60, 61, 62, 63,
297       64, 97, 98, 99,100,101,102,103,
298      104,105,106,107,108,109,110,111,
299      112,113,114,115,116,117,118,119,
300      120,121,122, 91, 92, 93, 94, 95,
301       96, 65, 66, 67, 68, 69, 70, 71,
302       72, 73, 74, 75, 76, 77, 78, 79,
303       80, 81, 82, 83, 84, 85, 86, 87,
304       88, 89, 90,123,124,125,126,127,
305      128,129,130,131,132,133,134,135,
306      136,137,138,139,140,141,142,143,
307      144,145,146,147,148,149,150,151,
308      152,153,154,155,156,157,158,159,
309      160,161,162,163,164,165,166,167,
310      168,169,170,171,172,173,174,175,
311      176,177,178,179,180,181,182,183,
312      184,185,186,187,188,189,190,191,
313      192,193,194,195,196,197,198,199,
314      200,201,202,203,204,205,206,207,
315      208,209,210,211,212,213,214,215,
316      216,217,218,219,220,221,222,223,
317      224,225,226,227,228,229,230,231,
318      232,233,234,235,236,237,238,239,
319      240,241,242,243,244,245,246,247,
320      248,249,250,251,252,253,254,255,
321    
322    /* This table contains bit maps for various character classes. Each map is 32
323    bytes long and the bits run from the least significant end of each byte. The
324    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
325    graph, print, punct, and cntrl. Other classes are built from combinations. */
326    
327      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
334      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336    
337      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341    
342      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346    
347      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
349      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351    
352      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
353      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
354      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356    
357      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
358      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361    
362      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
363      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366    
367      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
368      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371    
372      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
376    
377    /* This table identifies various classes of character by individual bits:
378      0x01   white space character
379      0x02   letter
380      0x04   decimal digit
381      0x08   hexadecimal digit
382      0x10   alphanumeric or '_'
383      0x80   regular expression metacharacter or binary zero
384    */
385    
386      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
387      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
388      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
389      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
390      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
391      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
392      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
393      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
394      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
395      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
396      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
397      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
398      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
399      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
400      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
401      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
402      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
403      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
404      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
407      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
408      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
409      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
410      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
411      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
412      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
413      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
414      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
415      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
416      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
417      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
418    
419    /* This is a set of tables that came orginally from a Windows user. It seems to
420    be at least an approximation of ISO 8859. In particular, there are characters
421    greater than 128 that are marked as spaces, letters, etc. */
422    
423    static const unsigned char tables1[] = {
424    0,1,2,3,4,5,6,7,
425    8,9,10,11,12,13,14,15,
426    16,17,18,19,20,21,22,23,
427    24,25,26,27,28,29,30,31,
428    32,33,34,35,36,37,38,39,
429    40,41,42,43,44,45,46,47,
430    48,49,50,51,52,53,54,55,
431    56,57,58,59,60,61,62,63,
432    64,97,98,99,100,101,102,103,
433    104,105,106,107,108,109,110,111,
434    112,113,114,115,116,117,118,119,
435    120,121,122,91,92,93,94,95,
436    96,97,98,99,100,101,102,103,
437    104,105,106,107,108,109,110,111,
438    112,113,114,115,116,117,118,119,
439    120,121,122,123,124,125,126,127,
440    128,129,130,131,132,133,134,135,
441    136,137,138,139,140,141,142,143,
442    144,145,146,147,148,149,150,151,
443    152,153,154,155,156,157,158,159,
444    160,161,162,163,164,165,166,167,
445    168,169,170,171,172,173,174,175,
446    176,177,178,179,180,181,182,183,
447    184,185,186,187,188,189,190,191,
448    224,225,226,227,228,229,230,231,
449    232,233,234,235,236,237,238,239,
450    240,241,242,243,244,245,246,215,
451    248,249,250,251,252,253,254,223,
452    224,225,226,227,228,229,230,231,
453    232,233,234,235,236,237,238,239,
454    240,241,242,243,244,245,246,247,
455    248,249,250,251,252,253,254,255,
456    0,1,2,3,4,5,6,7,
457    8,9,10,11,12,13,14,15,
458    16,17,18,19,20,21,22,23,
459    24,25,26,27,28,29,30,31,
460    32,33,34,35,36,37,38,39,
461    40,41,42,43,44,45,46,47,
462    48,49,50,51,52,53,54,55,
463    56,57,58,59,60,61,62,63,
464    64,97,98,99,100,101,102,103,
465    104,105,106,107,108,109,110,111,
466    112,113,114,115,116,117,118,119,
467    120,121,122,91,92,93,94,95,
468    96,65,66,67,68,69,70,71,
469    72,73,74,75,76,77,78,79,
470    80,81,82,83,84,85,86,87,
471    88,89,90,123,124,125,126,127,
472    128,129,130,131,132,133,134,135,
473    136,137,138,139,140,141,142,143,
474    144,145,146,147,148,149,150,151,
475    152,153,154,155,156,157,158,159,
476    160,161,162,163,164,165,166,167,
477    168,169,170,171,172,173,174,175,
478    176,177,178,179,180,181,182,183,
479    184,185,186,187,188,189,190,191,
480    224,225,226,227,228,229,230,231,
481    232,233,234,235,236,237,238,239,
482    240,241,242,243,244,245,246,215,
483    248,249,250,251,252,253,254,223,
484    192,193,194,195,196,197,198,199,
485    200,201,202,203,204,205,206,207,
486    208,209,210,211,212,213,214,247,
487    216,217,218,219,220,221,222,255,
488    0,62,0,0,1,0,0,0,
489    0,0,0,0,0,0,0,0,
490    32,0,0,0,1,0,0,0,
491    0,0,0,0,0,0,0,0,
492    0,0,0,0,0,0,255,3,
493    126,0,0,0,126,0,0,0,
494    0,0,0,0,0,0,0,0,
495    0,0,0,0,0,0,0,0,
496    0,0,0,0,0,0,255,3,
497    0,0,0,0,0,0,0,0,
498    0,0,0,0,0,0,12,2,
499    0,0,0,0,0,0,0,0,
500    0,0,0,0,0,0,0,0,
501    254,255,255,7,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    255,255,127,127,0,0,0,0,
504    0,0,0,0,0,0,0,0,
505    0,0,0,0,254,255,255,7,
506    0,0,0,0,0,4,32,4,
507    0,0,0,128,255,255,127,255,
508    0,0,0,0,0,0,255,3,
509    254,255,255,135,254,255,255,7,
510    0,0,0,0,0,4,44,6,
511    255,255,127,255,255,255,127,255,
512    0,0,0,0,254,255,255,255,
513    255,255,255,255,255,255,255,127,
514    0,0,0,0,254,255,255,255,
515    255,255,255,255,255,255,255,255,
516    0,2,0,0,255,255,255,255,
517    255,255,255,255,255,255,255,127,
518    0,0,0,0,255,255,255,255,
519    255,255,255,255,255,255,255,255,
520    0,0,0,0,254,255,0,252,
521    1,0,0,248,1,0,0,120,
522    0,0,0,0,254,255,255,255,
523    0,0,128,0,0,0,128,0,
524    255,255,255,255,0,0,0,0,
525    0,0,0,0,0,0,0,128,
526    255,255,255,255,0,0,0,0,
527    0,0,0,0,0,0,0,0,
528    128,0,0,0,0,0,0,0,
529    0,1,1,0,1,1,0,0,
530    0,0,0,0,0,0,0,0,
531    0,0,0,0,0,0,0,0,
532    1,0,0,0,128,0,0,0,
533    128,128,128,128,0,0,128,0,
534    28,28,28,28,28,28,28,28,
535    28,28,0,0,0,0,0,128,
536    0,26,26,26,26,26,26,18,
537    18,18,18,18,18,18,18,18,
538    18,18,18,18,18,18,18,18,
539    18,18,18,128,128,0,128,16,
540    0,26,26,26,26,26,26,18,
541    18,18,18,18,18,18,18,18,
542    18,18,18,18,18,18,18,18,
543    18,18,18,128,128,0,0,0,
544    0,0,0,0,0,1,0,0,
545    0,0,0,0,0,0,0,0,
546    0,0,0,0,0,0,0,0,
547    0,0,0,0,0,0,0,0,
548    1,0,0,0,0,0,0,0,
549    0,0,18,0,0,0,0,0,
550    0,0,20,20,0,18,0,0,
551    0,20,18,0,0,0,0,0,
552    18,18,18,18,18,18,18,18,
553    18,18,18,18,18,18,18,18,
554    18,18,18,18,18,18,18,0,
555    18,18,18,18,18,18,18,18,
556    18,18,18,18,18,18,18,18,
557    18,18,18,18,18,18,18,18,
558    18,18,18,18,18,18,18,0,
559    18,18,18,18,18,18,18,18
560    };
561    
562    
563    
564    
565    #ifndef HAVE_STRERROR
566    /*************************************************
567    *     Provide strerror() for non-ANSI libraries  *
568    *************************************************/
569    
570    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
571    in their libraries, but can provide the same facility by this simple
572    alternative function. */
573    
574    extern int   sys_nerr;
575    extern char *sys_errlist[];
576    
577    char *
578    strerror(int n)
579    {
580    if (n < 0 || n >= sys_nerr) return "unknown error number";
581    return sys_errlist[n];
582    }
583    #endif /* HAVE_STRERROR */
584    
585    
586    /*************************************************
587    *         JIT memory callback                    *
588    *************************************************/
589    
590    static pcre_jit_stack* jit_callback(void *arg)
591    {
592    return (pcre_jit_stack *)arg;
593    }
594    
595    
596  /*************************************************  /*************************************************
# Line 209  uschar *here = start; Line 623  uschar *here = start;
623    
624  for (;;)  for (;;)
625    {    {
626    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
627    
628    if (rlen > 1000)    if (rlen > 1000)
629      {      {
# Line 239  for (;;) Line 653  for (;;)
653      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
654    
655        {        {
656        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
657        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
658          return (here == start)? NULL : start;          return (here == start)? NULL : start;
659        }        }
# Line 559  fprintf(outfile, "%.*s", (cb->next_item_ Line 973  fprintf(outfile, "%.*s", (cb->next_item_
973  fprintf(outfile, "\n");  fprintf(outfile, "\n");
974  first_callout = 0;  first_callout = 0;
975    
976    if (cb->mark != last_callout_mark)
977      {
978      fprintf(outfile, "Latest Mark: %s\n",
979        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
980      last_callout_mark = cb->mark;
981      }
982    
983  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
984    {    {
985    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 578  return (cb->callout_number != callout_fa Line 999  return (cb->callout_number != callout_fa
999  *            Local malloc functions              *  *            Local malloc functions              *
1000  *************************************************/  *************************************************/
1001    
1002  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1003  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1004    show_malloc variable is set only during matching. */
1005    
1006  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1007  {  {
1008  void *block = malloc(size);  void *block = malloc(size);
1009  gotten_store = size;  gotten_store = size;
1010    if (first_gotten_store == 0) first_gotten_store = size;
1011  if (show_malloc)  if (show_malloc)
1012    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1013  return block;  return block;
# Line 597  if (show_malloc) Line 1020  if (show_malloc)
1020  free(block);  free(block);
1021  }  }
1022    
   
1023  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1024    
1025  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 729  return 0; Line 1151  return 0;
1151  *         Check newline indicator                *  *         Check newline indicator                *
1152  *************************************************/  *************************************************/
1153    
1154  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1155  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1156    
1157  Arguments:  Arguments:
1158    p           points after the leading '<'    p           points after the leading '<'
# Line 779  printf("  -dfa     force DFA matching fo Line 1200  printf("  -dfa     force DFA matching fo
1200  #endif  #endif
1201  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
1202  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
1203           "  -M       find MATCH_LIMIT minimum for each subject\n"
1204         "  -m       output memory used information\n"         "  -m       output memory used information\n"
1205         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
1206  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 786  printf("  -p       use POSIX interface\n Line 1208  printf("  -p       use POSIX interface\n
1208  #endif  #endif
1209  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1210  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1211  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1212           "  -s+      force each pattern to be studied, using JIT if available\n"
1213         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1214  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1215  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 808  int main(int argc, char **argv) Line 1231  int main(int argc, char **argv)
1231  FILE *infile = stdin;  FILE *infile = stdin;
1232  int options = 0;  int options = 0;
1233  int study_options = 0;  int study_options = 0;
1234    int default_find_match_limit = FALSE;
1235  int op = 1;  int op = 1;
1236  int timeit = 0;  int timeit = 0;
1237  int timeitm = 0;  int timeitm = 0;
1238  int showinfo = 0;  int showinfo = 0;
1239  int showstore = 0;  int showstore = 0;
1240    int force_study = -1;
1241    int force_study_options = 0;
1242  int quiet = 0;  int quiet = 0;
1243  int size_offsets = 45;  int size_offsets = 45;
1244  int size_offsets_max;  int size_offsets_max;
# Line 826  int all_use_dfa = 0; Line 1252  int all_use_dfa = 0;
1252  int yield = 0;  int yield = 0;
1253  int stack_size;  int stack_size;
1254    
1255    pcre_jit_stack *jit_stack = NULL;
1256    
1257    
1258  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1259  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1260    
# Line 861  while (argc > 1 && argv[op][0] == '-') Line 1290  while (argc > 1 && argv[op][0] == '-')
1290    {    {
1291    unsigned char *endptr;    unsigned char *endptr;
1292    
1293    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1294      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1295      else if (strcmp(argv[op], "-s+") == 0)
1296        {
1297        force_study = 1;
1298        force_study_options = PCRE_STUDY_JIT_COMPILE;
1299        }
1300    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1301    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1302    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1303    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1304      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1305  #if !defined NODFA  #if !defined NODFA
1306    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1307  #endif  #endif
# Line 895  while (argc > 1 && argv[op][0] == '-') Line 1330  while (argc > 1 && argv[op][0] == '-')
1330        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1331          *endptr == 0))          *endptr == 0))
1332      {      {
1333  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1334      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1335      exit(1);      exit(1);
1336  #else  #else
# Line 919  while (argc > 1 && argv[op][0] == '-') Line 1354  while (argc > 1 && argv[op][0] == '-')
1354    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1355      {      {
1356      int rc;      int rc;
1357        unsigned long int lrc;
1358      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1359      printf("Compiled with\n");      printf("Compiled with\n");
1360      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1361      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1362      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1363      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1364        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1365        if (rc)
1366          printf("  Just-in-time compiler support\n");
1367        else
1368          printf("  No just-in-time compiler support\n");
1369      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1370      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1371        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1372        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1373          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1374        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
1375        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1376      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)pcre_config(PCRE_CONFIG_BSR, &rc);
# Line 937  while (argc > 1 && argv[op][0] == '-') Line 1380  while (argc > 1 && argv[op][0] == '-')
1380      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1381      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1382      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1383      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1384      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1385      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1386      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1387      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1388      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1389      goto EXIT;      goto EXIT;
# Line 1022  while (!done) Line 1465  while (!done)
1465  #endif  #endif
1466    
1467    const char *error;    const char *error;
1468      unsigned char *markptr;
1469    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1470    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1471    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1472    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1473    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1474      int do_allcaps = 0;
1475      int do_mark = 0;
1476    int do_study = 0;    int do_study = 0;
1477      int no_force_study = 0;
1478    int do_debug = debug;    int do_debug = debug;
1479    int do_G = 0;    int do_G = 0;
1480    int do_g = 0;    int do_g = 0;
1481    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1482    int do_showrest = 0;    int do_showrest = 0;
1483      int do_showcaprest = 0;
1484    int do_flip = 0;    int do_flip = 0;
1485    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1486    
# Line 1075  while (!done) Line 1523  while (!done)
1523        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1524    
1525      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1526      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1527    
1528      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1529    
# Line 1094  while (!done) Line 1542  while (!done)
1542          }          }
1543        }        }
1544    
1545      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1546        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1547    
1548      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
# Line 1102  while (!done) Line 1550  while (!done)
1550      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1551      use_utf8 = (get_options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1552    
1553      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1554    
1555      if (true_study_size != 0)      if (true_study_size != 0)
1556        {        {
# Line 1118  while (!done) Line 1566  while (!done)
1566          {          {
1567          FAIL_READ:          FAIL_READ:
1568          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1569          if (extra != NULL) new_free(extra);          if (extra != NULL) pcre_free_study(extra);
1570          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1571          fclose(f);          fclose(f);
1572          continue;          continue;
# Line 1144  while (!done) Line 1592  while (!done)
1592      }      }
1593    
1594    pp = p;    pp = p;
1595    poffset = p - buffer;    poffset = (int)(p - buffer);
1596    
1597    for(;;)    for(;;)
1598      {      {
# Line 1184  while (!done) Line 1632  while (!done)
1632    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1633    
1634    options = 0;    options = 0;
1635    study_options = 0;    study_options = 0;
1636    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
1637    
1638    while (*pp != 0)    while (*pp != 0)
# Line 1198  while (!done) Line 1646  while (!done)
1646        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1647        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1648    
1649        case '+': do_showrest = 1; break;        case '+':
1650          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1651          break;
1652    
1653          case '=': do_allcaps = 1; break;
1654        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1655        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
1656        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1208  while (!done) Line 1660  while (!done)
1660        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1661        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1662        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1663          case 'K': do_mark = 1; break;
1664        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1665        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1666    
# Line 1215  while (!done) Line 1668  while (!done)
1668        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1669  #endif  #endif
1670    
1671        case 'S': do_study = 1; break;        case 'S':
1672          if (do_study == 0)
1673            {
1674            do_study = 1;
1675            if (*pp == '+')
1676              {
1677              study_options |= PCRE_STUDY_JIT_COMPILE;
1678              pp++;
1679              }
1680            }
1681          else
1682            {
1683            do_study = 0;
1684            no_force_study = 1;
1685            }
1686          break;
1687    
1688        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1689          case 'W': options |= PCRE_UCP; break;
1690        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1691          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1692        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1693        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1694        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1695    
1696          case 'T':
1697          switch (*pp++)
1698            {
1699            case '0': tables = tables0; break;
1700            case '1': tables = tables1; break;
1701    
1702            case '\r':
1703            case '\n':
1704            case ' ':
1705            case 0:
1706            fprintf(outfile, "** Missing table number after /T\n");
1707            goto SKIP_DATA;
1708    
1709            default:
1710            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1711            goto SKIP_DATA;
1712            }
1713          break;
1714    
1715        case 'L':        case 'L':
1716        ppp = pp;        ppp = pp;
1717        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1247  while (!done) Line 1737  while (!done)
1737    
1738        case '<':        case '<':
1739          {          {
1740          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1741            {            {
1742            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
1743            pp += 3;            pp += 3;
1744            }            }
1745          else          else
1746            {            {
1747            int x = check_newline(pp, outfile);            int x = check_newline(pp, outfile);
1748            if (x == 0) goto SKIP_DATA;            if (x == 0) goto SKIP_DATA;
1749            options |= x;            options |= x;
1750            while (*pp++ != '>');            while (*pp++ != '>');
1751            }            }
1752          }          }
1753        break;        break;
1754    
# Line 1288  while (!done) Line 1778  while (!done)
1778      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1779      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1780      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1781        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1782        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1783    
1784        first_gotten_store = 0;
1785      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1786    
1787      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1308  while (!done) Line 1801  while (!done)
1801  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1802    
1803      {      {
1804        unsigned long int get_options;
1805    
1806      if (timeit > 0)      if (timeit > 0)
1807        {        {
1808        register int i;        register int i;
# Line 1324  while (!done) Line 1819  while (!done)
1819            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1820        }        }
1821    
1822        first_gotten_store = 0;
1823      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1824    
1825      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1351  while (!done) Line 1847  while (!done)
1847        goto CONTINUE;        goto CONTINUE;
1848        }        }
1849    
1850      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1851      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1852      returns only limited data. Check that it agrees with the newer one. */      lines. */
1853    
1854        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1855        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1856    
1857        /* Print information if required. There are now two info-returning
1858        functions. The old one has a limited interface and returns only limited
1859        data. Check that it agrees with the newer one. */
1860    
1861      if (log_store)      if (log_store)
1862        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
1863          (int)(gotten_store -          (int)(first_gotten_store -
1864                sizeof(real_pcre) -                sizeof(real_pcre) -
1865                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1866    
# Line 1365  while (!done) Line 1868  while (!done)
1868      and remember the store that was got. */      and remember the store that was got. */
1869    
1870      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
1871      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1872    
1873      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
1874      help with the matching. */      help with the matching, unless the pattern has the SS option, which
1875        suppresses the effect of /S (used for a few test patterns where studying is
1876        never sensible). */
1877    
1878      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
1879        {        {
1880        if (timeit > 0)        if (timeit > 0)
1881          {          {
# Line 1378  while (!done) Line 1883  while (!done)
1883          clock_t time_taken;          clock_t time_taken;
1884          clock_t start_time = clock();          clock_t start_time = clock();
1885          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
1886            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options | force_study_options, &error);
1887          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1888          if (extra != NULL) free(extra);          if (extra != NULL) pcre_free_study(extra);
1889          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1890            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
1891              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1892          }          }
1893        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options | force_study_options, &error);
1894        if (error != NULL)        if (error != NULL)
1895          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
1896        else if (extra != NULL)        else if (extra != NULL)
1897          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1898        }        }
1899    
1900        /* If /K was present, we set up for handling MARK data. */
1901    
1902        if (do_mark)
1903          {
1904          if (extra == NULL)
1905            {
1906            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1907            extra->flags = 0;
1908            }
1909          extra->mark = &markptr;
1910          extra->flags |= PCRE_EXTRA_MARK;
1911          }
1912    
1913      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1914      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1915      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 1424  while (!done) Line 1942  while (!done)
1942          {          {
1943          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1944          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1945          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1946            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1947          }          }
1948        }        }
1949    
# Line 1438  while (!done) Line 1957  while (!done)
1957        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1958        }        }
1959    
1960        /* We already have the options in get_options (see above) */
1961    
1962      if (do_showinfo)      if (do_showinfo)
1963        {        {
1964        unsigned long int get_options, all_options;        unsigned long int all_options;
1965  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1966        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1967  #endif  #endif
# Line 1449  while (!done) Line 1970  while (!done)
1970        int nameentrysize, namecount;        int nameentrysize, namecount;
1971        const uschar *nametable;        const uschar *nametable;
1972    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1973        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1974        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1975        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1509  while (!done) Line 2029  while (!done)
2029        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2030    
2031        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2032          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2033            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2034            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2035            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1523  while (!done) Line 2043  while (!done)
2043            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2044            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2045            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2046              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2047            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2048              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2049            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2050    
2051        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1591  while (!done) Line 2113  while (!done)
2113        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2114        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2115        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2116        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2117          information unless -i or -d was also present. This means that, except
2118          when auto-callouts are involved, the output from runs with and without
2119          -s should be identical. */
2120    
2121        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2122          {          {
2123          if (extra == NULL)          if (extra == NULL)
2124            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2125          else          else
2126            {            {
2127            uschar *start_bits = NULL;            uschar *start_bits = NULL;
2128            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2129    
2130              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2131              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2132    
2133              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2134            if (start_bits == NULL)            if (start_bits == NULL)
2135              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2136            else            else
2137              {              {
2138              int i;              int i;
# Line 1633  while (!done) Line 2162  while (!done)
2162              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2163              }              }
2164            }            }
2165    
2166            /* Show this only if the JIT was set by /S, not by -s. */
2167    
2168            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2169              {
2170              int jit;
2171              new_info(re, extra, PCRE_INFO_JIT, &jit);
2172              if (jit)
2173                fprintf(outfile, "JIT study was successful\n");
2174              else
2175    #ifdef SUPPORT_JIT
2176                fprintf(outfile, "JIT study was not successful\n");
2177    #else
2178                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2179    #endif
2180              }
2181          }          }
2182        }        }
2183    
# Line 1667  while (!done) Line 2212  while (!done)
2212            }            }
2213          else          else
2214            {            {
2215            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2216    
2217              /* If there is study data, write it. */
2218    
2219            if (extra != NULL)            if (extra != NULL)
2220              {              {
2221              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1677  while (!done) Line 2225  while (!done)
2225                  strerror(errno));                  strerror(errno));
2226                }                }
2227              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2228              }              }
2229            }            }
2230          fclose(f);          fclose(f);
2231          }          }
2232    
2233        new_free(re);        new_free(re);
2234        if (extra != NULL) new_free(extra);        if (extra != NULL) pcre_free_study(extra);
2235        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2236            {
2237            new_free((void *)tables);
2238            setlocale(LC_CTYPE, "C");
2239            locale_set = 0;
2240            }
2241        continue;  /* With next regex */        continue;  /* With next regex */
2242        }        }
2243      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1702  while (!done) Line 2254  while (!done)
2254      int callout_data_set = 0;      int callout_data_set = 0;
2255      int count, c;      int count, c;
2256      int copystrings = 0;      int copystrings = 0;
2257      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2258      int getstrings = 0;      int getstrings = 0;
2259      int getlist = 0;      int getlist = 0;
2260      int gmatched = 0;      int gmatched = 0;
2261      int start_offset = 0;      int start_offset = 0;
2262        int start_offset_sign = 1;
2263      int g_notempty = 0;      int g_notempty = 0;
2264      int use_dfa = 0;      int use_dfa = 0;
2265    
# Line 1720  while (!done) Line 2273  while (!done)
2273    
2274      pcre_callout = callout;      pcre_callout = callout;
2275      first_callout = 1;      first_callout = 1;
2276        last_callout_mark = NULL;
2277      callout_extra = 0;      callout_extra = 0;
2278      callout_count = 0;      callout_count = 0;
2279      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1734  while (!done) Line 2288  while (!done)
2288        {        {
2289        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2290          {          {
2291          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2292              {
2293              fprintf(outfile, "\n");
2294              break;
2295              }
2296          done = 1;          done = 1;
2297          goto CONTINUE;          goto CONTINUE;
2298          }          }
# Line 1794  while (!done) Line 2352  while (!done)
2352            {            {
2353            unsigned char *pt = p;            unsigned char *pt = p;
2354            c = 0;            c = 0;
2355            while (isxdigit(*(++pt)))  
2356              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2357              when isxdigit() is a macro that refers to its argument more than
2358              once. This is banned by the C Standard, but apparently happens in at
2359              least one MacOS environment. */
2360    
2361              for (pt++; isxdigit(*pt); pt++)
2362                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2363            if (*pt == '}')            if (*pt == '}')
2364              {              {
2365              unsigned char buff8[8];              unsigned char buff8[8];
2366              int ii, utn;              int ii, utn;
2367              utn = ord2utf8(c, buff8);              if (use_utf8)
2368              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2369              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2370                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2371                  c = buff8[ii];   /* Last byte */
2372                  }
2373                else
2374                 {
2375                 if (c > 255)
2376                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2377                     "UTF-8 mode is not enabled.\n"
2378                     "** Truncation will probably give the wrong result.\n", c);
2379                 }
2380              p = pt + 1;              p = pt + 1;
2381              break;              break;
2382              }              }
# Line 1815  while (!done) Line 2389  while (!done)
2389          c = 0;          c = 0;
2390          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2391            {            {
2392            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2393            p++;            p++;
2394            }            }
2395          break;          break;
# Line 1825  while (!done) Line 2399  while (!done)
2399          continue;          continue;
2400    
2401          case '>':          case '>':
2402            if (*p == '-')
2403              {
2404              start_offset_sign = -1;
2405              p++;
2406              }
2407          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2408            start_offset *= start_offset_sign;
2409          continue;          continue;
2410    
2411          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1898  while (!done) Line 2478  while (!done)
2478  #endif  #endif
2479            use_dfa = 1;            use_dfa = 1;
2480          continue;          continue;
2481    #endif
2482    
2483    #if !defined NODFA
2484          case 'F':          case 'F':
2485          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2486          continue;          continue;
# Line 1923  while (!done) Line 2505  while (!done)
2505            }            }
2506          continue;          continue;
2507    
2508            case 'J':
2509            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2510            if (extra != NULL
2511                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2512                && extra->executable_jit != NULL)
2513              {
2514              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2515              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2516              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2517              }
2518            continue;
2519    
2520          case 'L':          case 'L':
2521          getlist = 1;          getlist = 1;
2522          continue;          continue;
# Line 1932  while (!done) Line 2526  while (!done)
2526          continue;          continue;
2527    
2528          case 'N':          case 'N':
2529          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2530              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2531            else
2532              options |= PCRE_NOTEMPTY;
2533          continue;          continue;
2534    
2535          case 'O':          case 'O':
# Line 1955  while (!done) Line 2552  while (!done)
2552          continue;          continue;
2553    
2554          case 'P':          case 'P':
2555          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2556              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2557          continue;          continue;
2558    
2559          case 'Q':          case 'Q':
# Line 1990  while (!done) Line 2588  while (!done)
2588          show_malloc = 1;          show_malloc = 1;
2589          continue;          continue;
2590    
2591            case 'Y':
2592            options |= PCRE_NO_START_OPTIMIZE;
2593            continue;
2594    
2595          case 'Z':          case 'Z':
2596          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2597          continue;          continue;
# Line 2010  while (!done) Line 2612  while (!done)
2612        *q++ = c;        *q++ = c;
2613        }        }
2614      *q = 0;      *q = 0;
2615      len = q - dbuffer;      len = (int)(q - dbuffer);
2616    
2617        /* Move the data to the end of the buffer so that a read over the end of
2618        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2619        we are using the POSIX interface, we must include the terminating zero. */
2620    
2621    #if !defined NOPOSIX
2622        if (posix || do_posix)
2623          {
2624          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2625          bptr += buffer_size - len - 1;
2626          }
2627        else
2628    #endif
2629          {
2630          memmove(bptr + buffer_size - len, bptr, len);
2631          bptr += buffer_size - len;
2632          }
2633    
2634      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2635        {        {
# Line 2031  while (!done) Line 2650  while (!done)
2650          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2651        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2652        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2653          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2654    
2655        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2656    
# Line 2055  while (!done) Line 2675  while (!done)
2675              (void)pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2676                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2677              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2678              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2679                {                {
2680                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2681                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2682                  outfile);                  outfile);
2683                fprintf(outfile, "\n");                fprintf(outfile, "\n");
# Line 2075  while (!done) Line 2695  while (!done)
2695    
2696      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2697        {        {
2698          markptr = NULL;
2699    
2700        if (timeitm > 0)        if (timeitm > 0)
2701          {          {
2702          register int i;          register int i;
# Line 2086  while (!done) Line 2708  while (!done)
2708            {            {
2709            int workspace[1000];            int workspace[1000];
2710            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2711              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2712                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2713                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2714            }            }
# Line 2105  while (!done) Line 2727  while (!done)
2727    
2728        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2729        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
2730        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
2731          running of pcre_exec(), so disable the JIT optimization. This makes it
2732          possible to run the same set of tests with and without JIT externally
2733          requested. */
2734    
2735        if (find_match_limit)        if (find_match_limit)
2736          {          {
# Line 2114  while (!done) Line 2739  while (!done)
2739            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2740            extra->flags = 0;            extra->flags = 0;
2741            }            }
2742            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2743    
2744          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
2745            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2149  while (!done) Line 2775  while (!done)
2775        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2776          {          {
2777          int workspace[1000];          int workspace[1000];
2778          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2779            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2780            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2781          if (count == 0)          if (count == 0)
# Line 2197  while (!done) Line 2823  while (!done)
2823              }              }
2824            }            }
2825    
2826            /* do_allcaps requests showing of all captures in the pattern, to check
2827            unset ones at the end. */
2828    
2829            if (do_allcaps)
2830              {
2831              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2832              count++;   /* Allow for full match */
2833              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2834              }
2835    
2836            /* Output the captured substrings */
2837    
2838          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2839            {            {
2840            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2841                {
2842                if (use_offsets[i] != -1)
2843                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2844                    use_offsets[i], i);
2845                if (use_offsets[i+1] != -1)
2846                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2847                    use_offsets[i+1], i+1);
2848              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2849                }
2850            else            else
2851              {              {
2852              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2853              (void)pchars(bptr + use_offsets[i],              (void)pchars(bptr + use_offsets[i],
2854                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
2855              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2856              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
2857                {                {
2858                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
2859                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2860                  fprintf(outfile, " 0+ ");                  outfile);
2861                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
2862                }                }
2863              }              }
2864            }            }
2865    
2866            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2867    
2868          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2869            {            {
2870            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2293  while (!done) Line 2938  while (!done)
2938                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2939              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2940                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
2941              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
2942              }              }
2943            }            }
# Line 2303  while (!done) Line 2947  while (!done)
2947    
2948        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2949          {          {
2950          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2951  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
2952          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
2953            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
2954              bptr + use_offsets[0]);            fprintf(outfile, ": ");
2955  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2956                outfile);
2957              }
2958          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2959          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2960          }          }
# Line 2318  while (!done) Line 2964  while (!done)
2964        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2965        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2966    
2967        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
2968        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
2969        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
2970        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
2971        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
2972          newline setting in the pattern; if none was set, use pcre_config() to
2973          find the default.
2974    
2975        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2976        character, not one byte. */        character, not one byte. */
# Line 2338  while (!done) Line 2986  while (!done)
2986              {              {
2987              int d;              int d;
2988              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2989              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
2990                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
2991                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
2992                        (d == 10)? PCRE_NEWLINE_LF :
2993                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2994                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
2995                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2996              }              }
2997            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2998                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2999                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3000                &&                &&
3001                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2355  while (!done) Line 3006  while (!done)
3006              {              {
3007              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3008                {                {
3009                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3010                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3011                }                }
3012              }              }
3013            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3014            }            }
3015          else          else
3016            {            {
3017            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3018              {              {
3019              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3020                if (gmatched == 0)
3021                  {
3022                  if (markptr == NULL) fprintf(outfile, "No match\n");
3023                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3024                  }
3025                break;
3026    
3027                case PCRE_ERROR_BADUTF8:
3028                case PCRE_ERROR_SHORTUTF8:
3029                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3030                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3031                if (use_size_offsets >= 2)
3032                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3033                    use_offsets[1]);
3034                fprintf(outfile, "\n");
3035                break;
3036    
3037                default:
3038                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3039                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3040                else
3041                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3042                break;
3043              }              }
3044            else fprintf(outfile, "Error %d\n", count);  
3045            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3046            }            }
3047          }          }
# Line 2379  while (!done) Line 3051  while (!done)
3051        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3052    
3053        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3054        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3055        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3056        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3057        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3058        character. */        character. */
3059    
# Line 2390  while (!done) Line 3062  while (!done)
3062        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3063          {          {
3064          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3065          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3066          }          }
3067    
3068        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2416  while (!done) Line 3088  while (!done)
3088  #endif  #endif
3089    
3090    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3091    if (extra != NULL) new_free(extra);    if (extra != NULL) pcre_free_study(extra);
3092    if (tables != NULL)    if (locale_set)
3093      {      {
3094      new_free((void *)tables);      new_free((void *)tables);
3095      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3096      locale_set = 0;      locale_set = 0;
3097      }      }
3098      if (jit_stack != NULL)
3099        {
3100        pcre_jit_stack_free(jit_stack);
3101        jit_stack = NULL;
3102        }
3103    }    }
3104    
3105  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");

Legend:
Removed from v.336  
changed lines
  Added in v.775

  ViewVC Help
Powered by ViewVC 1.1.5