/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 146 by ph10, Thu Apr 5 09:17:28 2007 UTC code/branches/pcre16/pcretest.c revision 805 by ph10, Wed Dec 14 16:49:20 2011 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
61  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 59  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
92  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 79  appropriately for an application, not fo Line 107  appropriately for an application, not fo
107  #include "pcre.h"  #include "pcre.h"
108  #include "pcre_internal.h"  #include "pcre_internal.h"
109    
110  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
111  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
112  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
113    when that is compiled in debug mode. */
114    
115    #ifdef SUPPORT_PCRE8
116    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
117    #endif
118    #ifdef SUPPORT_PCRE16
119    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
120    #endif
121    
122    /* We need access to some of the data tables that PCRE uses. So as not to have
123    to keep two copies, we include the source file here, changing the names of the
124    external symbols to prevent clashes. */
125    
126    #define _pcre_ucp_gentype      ucp_gentype
127    #define _pcre_ucp_typerange    ucp_typerange
128  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
129  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
130  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 90  symbols to prevent clashes. */ Line 132  symbols to prevent clashes. */
132  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
133  #define _pcre_utt              utt  #define _pcre_utt              utt
134  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
135    #define _pcre_utt_names        utt_names
136  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
137    
138  #include "pcre_tables.c"  #include "pcre_tables.c"
139    
140  /* We also need the pcre_printint() function for printing out compiled  /* The definition of the macro PRINTABLE, which determines whether to print an
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled.  
   
 The definition of the macro PRINTABLE, which determines whether to print an  
141  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
142  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
143  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
144  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
145    
146  #include "pcre_printint.src"  #ifdef EBCDIC
147    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
148    #else
149    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
150    #endif
151    
152  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
153    
   
154  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
155  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
156  Makefile. */  Makefile. */
# Line 152  static int callout_count; Line 194  static int callout_count;
194  static int callout_extra;  static int callout_extra;
195  static int callout_fail_count;  static int callout_fail_count;
196  static int callout_fail_id;  static int callout_fail_id;
197    static int debug_lengths;
198  static int first_callout;  static int first_callout;
199  static int locale_set = 0;  static int locale_set = 0;
200  static int show_malloc;  static int show_malloc;
201  static int use_utf8;  static int use_utf8;
202  static size_t gotten_store;  static size_t gotten_store;
203    static size_t first_gotten_store = 0;
204    static const unsigned char *last_callout_mark = NULL;
205    
206    static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
207    
208  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
209    
210  static int buffer_size = 50000;  static int buffer_size = 50000;
211  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
212  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
213  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
214    
215    #ifdef SUPPORT_PCRE16
216    static int buffer16_size = 0;
217    static pcre_uint16 *buffer16 = NULL;
218    #endif
219    
220    /* Textual explanations for runtime error codes */
221    
222    static const char *errtexts[] = {
223      NULL,  /* 0 is no error */
224      NULL,  /* NOMATCH is handled specially */
225      "NULL argument passed",
226      "bad option value",
227      "magic number missing",
228      "unknown opcode - pattern overwritten?",
229      "no more memory",
230      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
231      "match limit exceeded",
232      "callout error code",
233      NULL,  /* BADUTF8 is handled specially */
234      "bad UTF-8 offset",
235      NULL,  /* PARTIAL is handled specially */
236      "not used - internal error",
237      "internal error - pattern overwritten?",
238      "bad count value",
239      "item unsupported for DFA matching",
240      "backreference condition or recursion test not supported for DFA matching",
241      "match limit not supported for DFA matching",
242      "workspace size exceeded in DFA matching",
243      "too much recursion for DFA matching",
244      "recursion limit exceeded",
245      "not used - internal error",
246      "invalid combination of newline options",
247      "bad offset value",
248      NULL,  /* SHORTUTF8 is handled specially */
249      "nested recursion at the same subject position",
250      "JIT stack limit reached",
251      "pattern compiled in wrong mode (8-bit/16-bit error)"
252    };
253    
254    
255    /*************************************************
256    *         Alternate character tables             *
257    *************************************************/
258    
259    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
260    using the default tables of the library. However, the T option can be used to
261    select alternate sets of tables, for different kinds of testing. Note also that
262    the L (locale) option also adjusts the tables. */
263    
264    /* This is the set of tables distributed as default with PCRE. It recognizes
265    only ASCII characters. */
266    
267    static const unsigned char tables0[] = {
268    
269    /* This table is a lower casing table. */
270    
271        0,  1,  2,  3,  4,  5,  6,  7,
272        8,  9, 10, 11, 12, 13, 14, 15,
273       16, 17, 18, 19, 20, 21, 22, 23,
274       24, 25, 26, 27, 28, 29, 30, 31,
275       32, 33, 34, 35, 36, 37, 38, 39,
276       40, 41, 42, 43, 44, 45, 46, 47,
277       48, 49, 50, 51, 52, 53, 54, 55,
278       56, 57, 58, 59, 60, 61, 62, 63,
279       64, 97, 98, 99,100,101,102,103,
280      104,105,106,107,108,109,110,111,
281      112,113,114,115,116,117,118,119,
282      120,121,122, 91, 92, 93, 94, 95,
283       96, 97, 98, 99,100,101,102,103,
284      104,105,106,107,108,109,110,111,
285      112,113,114,115,116,117,118,119,
286      120,121,122,123,124,125,126,127,
287      128,129,130,131,132,133,134,135,
288      136,137,138,139,140,141,142,143,
289      144,145,146,147,148,149,150,151,
290      152,153,154,155,156,157,158,159,
291      160,161,162,163,164,165,166,167,
292      168,169,170,171,172,173,174,175,
293      176,177,178,179,180,181,182,183,
294      184,185,186,187,188,189,190,191,
295      192,193,194,195,196,197,198,199,
296      200,201,202,203,204,205,206,207,
297      208,209,210,211,212,213,214,215,
298      216,217,218,219,220,221,222,223,
299      224,225,226,227,228,229,230,231,
300      232,233,234,235,236,237,238,239,
301      240,241,242,243,244,245,246,247,
302      248,249,250,251,252,253,254,255,
303    
304    /* This table is a case flipping table. */
305    
306        0,  1,  2,  3,  4,  5,  6,  7,
307        8,  9, 10, 11, 12, 13, 14, 15,
308       16, 17, 18, 19, 20, 21, 22, 23,
309       24, 25, 26, 27, 28, 29, 30, 31,
310       32, 33, 34, 35, 36, 37, 38, 39,
311       40, 41, 42, 43, 44, 45, 46, 47,
312       48, 49, 50, 51, 52, 53, 54, 55,
313       56, 57, 58, 59, 60, 61, 62, 63,
314       64, 97, 98, 99,100,101,102,103,
315      104,105,106,107,108,109,110,111,
316      112,113,114,115,116,117,118,119,
317      120,121,122, 91, 92, 93, 94, 95,
318       96, 65, 66, 67, 68, 69, 70, 71,
319       72, 73, 74, 75, 76, 77, 78, 79,
320       80, 81, 82, 83, 84, 85, 86, 87,
321       88, 89, 90,123,124,125,126,127,
322      128,129,130,131,132,133,134,135,
323      136,137,138,139,140,141,142,143,
324      144,145,146,147,148,149,150,151,
325      152,153,154,155,156,157,158,159,
326      160,161,162,163,164,165,166,167,
327      168,169,170,171,172,173,174,175,
328      176,177,178,179,180,181,182,183,
329      184,185,186,187,188,189,190,191,
330      192,193,194,195,196,197,198,199,
331      200,201,202,203,204,205,206,207,
332      208,209,210,211,212,213,214,215,
333      216,217,218,219,220,221,222,223,
334      224,225,226,227,228,229,230,231,
335      232,233,234,235,236,237,238,239,
336      240,241,242,243,244,245,246,247,
337      248,249,250,251,252,253,254,255,
338    
339    /* This table contains bit maps for various character classes. Each map is 32
340    bytes long and the bits run from the least significant end of each byte. The
341    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
342    graph, print, punct, and cntrl. Other classes are built from combinations. */
343    
344      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
345      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348    
349      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
350      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353    
354      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
355      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358    
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363    
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368    
369      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
370      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373    
374      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
375      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
378    
379      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
380      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
381      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
382      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
383    
384      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
385      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
386      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
387      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
388    
389      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
390      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
391      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
392      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
393    
394    /* This table identifies various classes of character by individual bits:
395      0x01   white space character
396      0x02   letter
397      0x04   decimal digit
398      0x08   hexadecimal digit
399      0x10   alphanumeric or '_'
400      0x80   regular expression metacharacter or binary zero
401    */
402    
403      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
404      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
407      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
408      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
409      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
410      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
411      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
412      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
413      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
414      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
415      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
416      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
417      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
418      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
419      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
420      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
421      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
422      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
423      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
424      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
425      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
426      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
427      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
428      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
429      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
430      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
431      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
432      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
433      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
434      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
435    
436    /* This is a set of tables that came orginally from a Windows user. It seems to
437    be at least an approximation of ISO 8859. In particular, there are characters
438    greater than 128 that are marked as spaces, letters, etc. */
439    
440    static const unsigned char tables1[] = {
441    0,1,2,3,4,5,6,7,
442    8,9,10,11,12,13,14,15,
443    16,17,18,19,20,21,22,23,
444    24,25,26,27,28,29,30,31,
445    32,33,34,35,36,37,38,39,
446    40,41,42,43,44,45,46,47,
447    48,49,50,51,52,53,54,55,
448    56,57,58,59,60,61,62,63,
449    64,97,98,99,100,101,102,103,
450    104,105,106,107,108,109,110,111,
451    112,113,114,115,116,117,118,119,
452    120,121,122,91,92,93,94,95,
453    96,97,98,99,100,101,102,103,
454    104,105,106,107,108,109,110,111,
455    112,113,114,115,116,117,118,119,
456    120,121,122,123,124,125,126,127,
457    128,129,130,131,132,133,134,135,
458    136,137,138,139,140,141,142,143,
459    144,145,146,147,148,149,150,151,
460    152,153,154,155,156,157,158,159,
461    160,161,162,163,164,165,166,167,
462    168,169,170,171,172,173,174,175,
463    176,177,178,179,180,181,182,183,
464    184,185,186,187,188,189,190,191,
465    224,225,226,227,228,229,230,231,
466    232,233,234,235,236,237,238,239,
467    240,241,242,243,244,245,246,215,
468    248,249,250,251,252,253,254,223,
469    224,225,226,227,228,229,230,231,
470    232,233,234,235,236,237,238,239,
471    240,241,242,243,244,245,246,247,
472    248,249,250,251,252,253,254,255,
473    0,1,2,3,4,5,6,7,
474    8,9,10,11,12,13,14,15,
475    16,17,18,19,20,21,22,23,
476    24,25,26,27,28,29,30,31,
477    32,33,34,35,36,37,38,39,
478    40,41,42,43,44,45,46,47,
479    48,49,50,51,52,53,54,55,
480    56,57,58,59,60,61,62,63,
481    64,97,98,99,100,101,102,103,
482    104,105,106,107,108,109,110,111,
483    112,113,114,115,116,117,118,119,
484    120,121,122,91,92,93,94,95,
485    96,65,66,67,68,69,70,71,
486    72,73,74,75,76,77,78,79,
487    80,81,82,83,84,85,86,87,
488    88,89,90,123,124,125,126,127,
489    128,129,130,131,132,133,134,135,
490    136,137,138,139,140,141,142,143,
491    144,145,146,147,148,149,150,151,
492    152,153,154,155,156,157,158,159,
493    160,161,162,163,164,165,166,167,
494    168,169,170,171,172,173,174,175,
495    176,177,178,179,180,181,182,183,
496    184,185,186,187,188,189,190,191,
497    224,225,226,227,228,229,230,231,
498    232,233,234,235,236,237,238,239,
499    240,241,242,243,244,245,246,215,
500    248,249,250,251,252,253,254,223,
501    192,193,194,195,196,197,198,199,
502    200,201,202,203,204,205,206,207,
503    208,209,210,211,212,213,214,247,
504    216,217,218,219,220,221,222,255,
505    0,62,0,0,1,0,0,0,
506    0,0,0,0,0,0,0,0,
507    32,0,0,0,1,0,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,255,3,
510    126,0,0,0,126,0,0,0,
511    0,0,0,0,0,0,0,0,
512    0,0,0,0,0,0,0,0,
513    0,0,0,0,0,0,255,3,
514    0,0,0,0,0,0,0,0,
515    0,0,0,0,0,0,12,2,
516    0,0,0,0,0,0,0,0,
517    0,0,0,0,0,0,0,0,
518    254,255,255,7,0,0,0,0,
519    0,0,0,0,0,0,0,0,
520    255,255,127,127,0,0,0,0,
521    0,0,0,0,0,0,0,0,
522    0,0,0,0,254,255,255,7,
523    0,0,0,0,0,4,32,4,
524    0,0,0,128,255,255,127,255,
525    0,0,0,0,0,0,255,3,
526    254,255,255,135,254,255,255,7,
527    0,0,0,0,0,4,44,6,
528    255,255,127,255,255,255,127,255,
529    0,0,0,0,254,255,255,255,
530    255,255,255,255,255,255,255,127,
531    0,0,0,0,254,255,255,255,
532    255,255,255,255,255,255,255,255,
533    0,2,0,0,255,255,255,255,
534    255,255,255,255,255,255,255,127,
535    0,0,0,0,255,255,255,255,
536    255,255,255,255,255,255,255,255,
537    0,0,0,0,254,255,0,252,
538    1,0,0,248,1,0,0,120,
539    0,0,0,0,254,255,255,255,
540    0,0,128,0,0,0,128,0,
541    255,255,255,255,0,0,0,0,
542    0,0,0,0,0,0,0,128,
543    255,255,255,255,0,0,0,0,
544    0,0,0,0,0,0,0,0,
545    128,0,0,0,0,0,0,0,
546    0,1,1,0,1,1,0,0,
547    0,0,0,0,0,0,0,0,
548    0,0,0,0,0,0,0,0,
549    1,0,0,0,128,0,0,0,
550    128,128,128,128,0,0,128,0,
551    28,28,28,28,28,28,28,28,
552    28,28,0,0,0,0,0,128,
553    0,26,26,26,26,26,26,18,
554    18,18,18,18,18,18,18,18,
555    18,18,18,18,18,18,18,18,
556    18,18,18,128,128,0,128,16,
557    0,26,26,26,26,26,26,18,
558    18,18,18,18,18,18,18,18,
559    18,18,18,18,18,18,18,18,
560    18,18,18,128,128,0,0,0,
561    0,0,0,0,0,1,0,0,
562    0,0,0,0,0,0,0,0,
563    0,0,0,0,0,0,0,0,
564    0,0,0,0,0,0,0,0,
565    1,0,0,0,0,0,0,0,
566    0,0,18,0,0,0,0,0,
567    0,0,20,20,0,18,0,0,
568    0,20,18,0,0,0,0,0,
569    18,18,18,18,18,18,18,18,
570    18,18,18,18,18,18,18,18,
571    18,18,18,18,18,18,18,0,
572    18,18,18,18,18,18,18,18,
573    18,18,18,18,18,18,18,18,
574    18,18,18,18,18,18,18,18,
575    18,18,18,18,18,18,18,0,
576    18,18,18,18,18,18,18,18
577    };
578    
579    
580    
581    
582    #ifndef HAVE_STRERROR
583    /*************************************************
584    *     Provide strerror() for non-ANSI libraries  *
585    *************************************************/
586    
587    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
588    in their libraries, but can provide the same facility by this simple
589    alternative function. */
590    
591    extern int   sys_nerr;
592    extern char *sys_errlist[];
593    
594    char *
595    strerror(int n)
596    {
597    if (n < 0 || n >= sys_nerr) return "unknown error number";
598    return sys_errlist[n];
599    }
600    #endif /* HAVE_STRERROR */
601    
602    
603    /*************************************************
604    *         JIT memory callback                    *
605    *************************************************/
606    
607    static pcre_jit_stack* jit_callback(void *arg)
608    {
609    return (pcre_jit_stack *)arg;
610    }
611    
612    
613    #ifdef SUPPORT_PCRE16
614    /*************************************************
615    *         Convert a string to 16-bit             *
616    *************************************************/
617    
618    /* The result is always left in buffer16. */
619    
620    static int
621    to16(unsigned char *p, int utf)
622    {
623    pcre_uint16 *pp;
624    int len = (int)strlen((char *)p) + 1;
625    
626    if (buffer16_size < 2*len)
627      {
628      if (buffer16 != NULL) free(buffer16);
629      buffer16_size = 2*len;
630      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
631      if (buffer16 == NULL)
632        {
633        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
634        exit(1);
635        }
636      }
637    
638    pp = buffer16;
639    
640    if (!utf)
641      {
642      while (*p != 0) *pp++ = *p++;
643      *pp++ = 0;
644      }
645    
646    else
647      {
648    fprintf(stderr, "pcretest: no support yet for UTF-16\n");
649    exit(1);
650      }
651    
652    return pp - buffer16;
653    }
654    #endif
655    
656    
657  /*************************************************  /*************************************************
# Line 183  optimal way of handling this, but hey, t Line 670  optimal way of handling this, but hey, t
670  Arguments:  Arguments:
671    f            the file to read    f            the file to read
672    start        where in buffer to start (this *must* be within buffer)    start        where in buffer to start (this *must* be within buffer)
673      prompt       for stdin or readline()
674    
675  Returns:       pointer to the start of new data  Returns:       pointer to the start of new data
676                 could be a copy of start, or could be moved                 could be a copy of start, or could be moved
677                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
678  */  */
679    
680  static uschar *  static pcre_uint8 *
681  extend_inputline(FILE *f, uschar *start)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
682  {  {
683  uschar *here = start;  pcre_uint8 *here = start;
684    
685  for (;;)  for (;;)
686    {    {
687    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
688    
689    if (rlen > 1000)    if (rlen > 1000)
690      {      {
691      int dlen;      int dlen;
692      if (fgets((char *)here, rlen,  f) == NULL)  
693        return (here == start)? NULL : start;      /* If libreadline support is required, use readline() to read a line if the
694        input is a terminal. Note that readline() removes the trailing newline, so
695        we must put it back again, to be compatible with fgets(). */
696    
697    #ifdef SUPPORT_LIBREADLINE
698        if (isatty(fileno(f)))
699          {
700          size_t len;
701          char *s = readline(prompt);
702          if (s == NULL) return (here == start)? NULL : start;
703          len = strlen(s);
704          if (len > 0) add_history(s);
705          if (len > rlen - 1) len = rlen - 1;
706          memcpy(here, s, len);
707          here[len] = '\n';
708          here[len+1] = 0;
709          free(s);
710          }
711        else
712    #endif
713    
714        /* Read the next line by normal means, prompting if the file is stdin. */
715    
716          {
717          if (f == stdin) printf("%s", prompt);
718          if (fgets((char *)here, rlen,  f) == NULL)
719            return (here == start)? NULL : start;
720          }
721    
722      dlen = (int)strlen((char *)here);      dlen = (int)strlen((char *)here);
723      if (dlen > 0 && here[dlen - 1] == '\n') return start;      if (dlen > 0 && here[dlen - 1] == '\n') return start;
724      here += dlen;      here += dlen;
# Line 211  for (;;) Line 727  for (;;)
727    else    else
728      {      {
729      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
730      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
731      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
732      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
733    
734      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
735        {        {
# Line 355  Returns:     number of characters placed Line 871  Returns:     number of characters placed
871  #if !defined NOUTF8  #if !defined NOUTF8
872    
873  static int  static int
874  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
875  {  {
876  register int i, j;  register int i, j;
877  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 518  fprintf(outfile, "%.*s", (cb->next_item_ Line 1034  fprintf(outfile, "%.*s", (cb->next_item_
1034  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1035  first_callout = 0;  first_callout = 0;
1036    
1037    if (cb->mark != last_callout_mark)
1038      {
1039      fprintf(outfile, "Latest Mark: %s\n",
1040        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1041      last_callout_mark = cb->mark;
1042      }
1043    
1044  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1045    {    {
1046    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 537  return (cb->callout_number != callout_fa Line 1060  return (cb->callout_number != callout_fa
1060  *            Local malloc functions              *  *            Local malloc functions              *
1061  *************************************************/  *************************************************/
1062    
1063  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1064  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1065    show_malloc variable is set only during matching. */
1066    
1067  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1068  {  {
1069  void *block = malloc(size);  void *block = malloc(size);
1070  gotten_store = size;  gotten_store = size;
1071    if (first_gotten_store == 0) first_gotten_store = size;
1072  if (show_malloc)  if (show_malloc)
1073    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1074  return block;  return block;
# Line 556  if (show_malloc) Line 1081  if (show_malloc)
1081  free(block);  free(block);
1082  }  }
1083    
   
1084  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1085    
1086  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 584  free(block); Line 1108  free(block);
1108  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1109  {  {
1110  int rc;  int rc;
1111  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
1112    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1113  }  }
1114    
# Line 612  return ((value & 0x000000ff) << 24) | Line 1136  return ((value & 0x000000ff) << 24) |
1136  *************************************************/  *************************************************/
1137    
1138  static int  static int
1139  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1140    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1141    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1142  {  {
# Line 659  return count; Line 1183  return count;
1183    
1184    
1185  /*************************************************  /*************************************************
1186    *         Case-independent strncmp() function    *
1187    *************************************************/
1188    
1189    /*
1190    Arguments:
1191      s         first string
1192      t         second string
1193      n         number of characters to compare
1194    
1195    Returns:    < 0, = 0, or > 0, according to the comparison
1196    */
1197    
1198    static int
1199    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1200    {
1201    while (n--)
1202      {
1203      int c = tolower(*s++) - tolower(*t++);
1204      if (c) return c;
1205      }
1206    return 0;
1207    }
1208    
1209    
1210    
1211    /*************************************************
1212  *         Check newline indicator                *  *         Check newline indicator                *
1213  *************************************************/  *************************************************/
1214    
1215  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1216  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  a message and return 0 if there is no match.
1217    
1218  Arguments:  Arguments:
1219    p           points after the leading '<'    p           points after the leading '<'
# Line 673  Returns:      appropriate PCRE_NEWLINE_x Line 1223  Returns:      appropriate PCRE_NEWLINE_x
1223  */  */
1224    
1225  static int  static int
1226  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1227  {  {
1228  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1229  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1230  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1231  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1232    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1233    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1234    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1235  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1236  return 0;  return 0;
1237  }  }
# Line 692  return 0; Line 1245  return 0;
1245  static void  static void
1246  usage(void)  usage(void)
1247  {  {
1248  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1249    printf("Input and output default to stdin and stdout.\n");
1250    #ifdef SUPPORT_LIBREADLINE
1251    printf("If input is a terminal, readline() is used to read from it.\n");
1252    #else
1253    printf("This version of pcretest is not linked with readline().\n");
1254    #endif
1255    printf("\nOptions:\n");
1256    #ifdef SUPPORT_PCRE16
1257    printf("  -16      use 16-bit interface\n");
1258    #endif
1259  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1260  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1261  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 701  printf("  -dfa     force DFA matching fo Line 1264  printf("  -dfa     force DFA matching fo
1264  #endif  #endif
1265  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
1266  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
1267           "  -M       find MATCH_LIMIT minimum for each subject\n"
1268         "  -m       output memory used information\n"         "  -m       output memory used information\n"
1269         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
1270  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 708  printf("  -p       use POSIX interface\n Line 1272  printf("  -p       use POSIX interface\n
1272  #endif  #endif
1273  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1274  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1275  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1276           "  -s+      force each pattern to be studied, using JIT if available\n"
1277         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1278  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1279  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 730  int main(int argc, char **argv) Line 1295  int main(int argc, char **argv)
1295  FILE *infile = stdin;  FILE *infile = stdin;
1296  int options = 0;  int options = 0;
1297  int study_options = 0;  int study_options = 0;
1298    int default_find_match_limit = FALSE;
1299  int op = 1;  int op = 1;
1300  int timeit = 0;  int timeit = 0;
1301  int timeitm = 0;  int timeitm = 0;
1302  int showinfo = 0;  int showinfo = 0;
1303  int showstore = 0;  int showstore = 0;
1304    int force_study = -1;
1305    int force_study_options = 0;
1306  int quiet = 0;  int quiet = 0;
1307  int size_offsets = 45;  int size_offsets = 45;
1308  int size_offsets_max;  int size_offsets_max;
# Line 745  int posix = 0; Line 1313  int posix = 0;
1313  int debug = 0;  int debug = 0;
1314  int done = 0;  int done = 0;
1315  int all_use_dfa = 0;  int all_use_dfa = 0;
1316    int use_pcre16 = 0;
1317  int yield = 0;  int yield = 0;
1318  int stack_size;  int stack_size;
1319    
1320    pcre_jit_stack *jit_stack = NULL;
1321    
1322  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1323  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1324    
1325  uschar copynames[1024];  pcre_uchar copynames[1024];
1326  uschar getnames[1024];  pcre_uchar getnames[1024];
   
 uschar *copynamesptr;  
 uschar *getnamesptr;  
1327    
1328  /* Get buffers from malloc() so that Electric Fence will check their misuse  pcre_uchar *copynamesptr;
1329  when I am debugging. They grow automatically when very long lines are read. */  pcre_uchar *getnamesptr;
1330    
1331  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
1332  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
1333  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
1334    
1335    buffer = (pcre_uint8 *)malloc(buffer_size);
1336    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1337    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1338    
1339  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1340    
# Line 783  while (argc > 1 && argv[op][0] == '-') Line 1355  while (argc > 1 && argv[op][0] == '-')
1355    {    {
1356    unsigned char *endptr;    unsigned char *endptr;
1357    
1358    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1359      showstore = 1;    else if (strcmp(argv[op], "-m") == 0) showstore = 1;
1360      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1361      else if (strcmp(argv[op], "-s+") == 0)
1362        {
1363        force_study = 1;
1364        force_study_options = PCRE_STUDY_JIT_COMPILE;
1365        }
1366    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1367    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1368    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1369    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1370      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1371  #if !defined NODFA  #if !defined NODFA
1372    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1373  #endif  #endif
# Line 817  while (argc > 1 && argv[op][0] == '-') Line 1396  while (argc > 1 && argv[op][0] == '-')
1396        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1397          *endptr == 0))          *endptr == 0))
1398      {      {
1399  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1400      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1401      exit(1);      exit(1);
1402  #else  #else
# Line 841  while (argc > 1 && argv[op][0] == '-') Line 1420  while (argc > 1 && argv[op][0] == '-')
1420    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1421      {      {
1422      int rc;      int rc;
1423        unsigned long int lrc;
1424      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1425      printf("Compiled with\n");      printf("Compiled with\n");
1426    
1427    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */
1428    
1429    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1430        printf("  8-bit and 16-bit support\n");
1431        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1432        printf("  %sUTF-8 support\n", rc? "" : "No ");
1433        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1434        printf("  %sUTF-16 support\n", rc? "" : "No ");
1435    #elif defined SUPPORT_PCRE8
1436        printf("  8-bit support only\n");
1437      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1438      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1439    #else
1440        printf("  16-bit support only\n");
1441        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1442        printf("  %sUTF-16 support\n", rc? "" : "No ");
1443    #endif
1444    
1445      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1446      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1447        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1448        if (rc)
1449          printf("  Just-in-time compiler support\n");
1450        else
1451          printf("  No just-in-time compiler support\n");
1452      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1453      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1454        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1455        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1456          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1457          (rc == -2)? "ANYCRLF" :
1458        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1459        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1460        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1461                                         "all Unicode newlines");
1462      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1463      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1464      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1465      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1466      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1467      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1468      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1469      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1470      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1471      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1472      goto EXIT;      goto EXIT;
# Line 880  while (argc > 1 && argv[op][0] == '-') Line 1488  while (argc > 1 && argv[op][0] == '-')
1488    argc--;    argc--;
1489    }    }
1490    
1491    /* Select which fullinfo function to use. */
1492    
1493    fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
1494    
1495  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1496    
1497  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
# Line 887  offsets = (int *)malloc(size_offsets_max Line 1499  offsets = (int *)malloc(size_offsets_max
1499  if (offsets == NULL)  if (offsets == NULL)
1500    {    {
1501    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1502      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1503    yield = 1;    yield = 1;
1504    goto EXIT;    goto EXIT;
1505    }    }
# Line 918  if (argc > 2) Line 1530  if (argc > 2)
1530    
1531  /* Set alternative malloc function */  /* Set alternative malloc function */
1532    
1533    #ifdef SUPPORT_PCRE8
1534  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1535  pcre_free = new_free;  pcre_free = new_free;
1536  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1537  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1538    #endif
1539    
1540    #ifdef SUPPORT_PCRE16
1541    pcre16_malloc = new_malloc;
1542    pcre16_free = new_free;
1543    pcre16_stack_malloc = stack_malloc;
1544    pcre16_stack_free = stack_free;
1545    #endif
1546    
1547  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1548    
# Line 940  while (!done) Line 1561  while (!done)
1561  #endif  #endif
1562    
1563    const char *error;    const char *error;
1564      unsigned char *markptr;
1565    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1566    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1567    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1568    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1569    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1570      int do_allcaps = 0;
1571      int do_mark = 0;
1572    int do_study = 0;    int do_study = 0;
1573      int no_force_study = 0;
1574    int do_debug = debug;    int do_debug = debug;
   int debug_lengths = 1;  
1575    int do_G = 0;    int do_G = 0;
1576    int do_g = 0;    int do_g = 0;
1577    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1578    int do_showrest = 0;    int do_showrest = 0;
1579      int do_showcaprest = 0;
1580    int do_flip = 0;    int do_flip = 0;
1581    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1582    
1583    use_utf8 = 0;    use_utf8 = 0;
1584      debug_lengths = 1;
1585    
1586    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1587    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1588    fflush(outfile);    fflush(outfile);
1589    
# Line 971  while (!done) Line 1596  while (!done)
1596    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1597      {      {
1598      unsigned long int magic, get_options;      unsigned long int magic, get_options;
1599      uschar sbuf[8];      pcre_uint8 sbuf[8];
1600      FILE *f;      FILE *f;
1601    
1602      p++;      p++;
# Line 994  while (!done) Line 1619  while (!done)
1619        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1620    
1621      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1622      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1623    
1624      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1625    
# Line 1013  while (!done) Line 1638  while (!done)
1638          }          }
1639        }        }
1640    
1641      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1642        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1643    
1644      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
# Line 1021  while (!done) Line 1646  while (!done)
1646      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1647      use_utf8 = (get_options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1648    
1649      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1650    
1651      if (true_study_size != 0)      if (true_study_size != 0)
1652        {        {
# Line 1037  while (!done) Line 1662  while (!done)
1662          {          {
1663          FAIL_READ:          FAIL_READ:
1664          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1665          if (extra != NULL) new_free(extra);          if (extra != NULL) pcre_free_study(extra);
1666          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1667          fclose(f);          fclose(f);
1668          continue;          continue;
# Line 1058  while (!done) Line 1683  while (!done)
1683    
1684    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1685      {      {
1686      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1687      goto SKIP_DATA;      goto SKIP_DATA;
1688      }      }
1689    
1690    pp = p;    pp = p;
1691    poffset = p - buffer;    poffset = (int)(p - buffer);
1692    
1693    for(;;)    for(;;)
1694      {      {
# Line 1074  while (!done) Line 1699  while (!done)
1699        pp++;        pp++;
1700        }        }
1701      if (*pp != 0) break;      if (*pp != 0) break;
1702      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1703        {        {
1704        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1705        done = 1;        done = 1;
# Line 1118  while (!done) Line 1742  while (!done)
1742        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1743        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1744    
1745        case '+': do_showrest = 1; break;        case '+':
1746          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1747          break;
1748    
1749          case '=': do_allcaps = 1; break;
1750        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1751        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
1752        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1128  while (!done) Line 1756  while (!done)
1756        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1757        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1758        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1759          case 'K': do_mark = 1; break;
1760        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1761        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1762    
# Line 1135  while (!done) Line 1764  while (!done)
1764        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1765  #endif  #endif
1766    
1767        case 'S': do_study = 1; break;        case 'S':
1768          if (do_study == 0)
1769            {
1770            do_study = 1;
1771            if (*pp == '+')
1772              {
1773              study_options |= PCRE_STUDY_JIT_COMPILE;
1774              pp++;
1775              }
1776            }
1777          else
1778            {
1779            do_study = 0;
1780            no_force_study = 1;
1781            }
1782          break;
1783    
1784        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1785          case 'W': options |= PCRE_UCP; break;
1786        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1787          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1788        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1789        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1790        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1791    
1792          case 'T':
1793          switch (*pp++)
1794            {
1795            case '0': tables = tables0; break;
1796            case '1': tables = tables1; break;
1797    
1798            case '\r':
1799            case '\n':
1800            case ' ':
1801            case 0:
1802            fprintf(outfile, "** Missing table number after /T\n");
1803            goto SKIP_DATA;
1804    
1805            default:
1806            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1807            goto SKIP_DATA;
1808            }
1809          break;
1810    
1811        case 'L':        case 'L':
1812        ppp = pp;        ppp = pp;
1813        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1167  while (!done) Line 1833  while (!done)
1833    
1834        case '<':        case '<':
1835          {          {
1836          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1837          if (x == 0) goto SKIP_DATA;            {
1838          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
1839          while (*pp++ != '>');            pp += 3;
1840              }
1841            else
1842              {
1843              int x = check_newline(pp, outfile);
1844              if (x == 0) goto SKIP_DATA;
1845              options |= x;
1846              while (*pp++ != '>');
1847              }
1848          }          }
1849        break;        break;
1850    
# Line 1187  while (!done) Line 1861  while (!done)
1861    
1862    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1863    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1864    local character tables. */    local character tables. Neither does it have 16-bit support. */
1865    
1866  #if !defined NOPOSIX  #if !defined NOPOSIX
1867    if (posix || do_posix)    if (posix || do_posix)
# Line 1200  while (!done) Line 1874  while (!done)
1874      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1875      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1876      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1877        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1878        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1879    
1880        first_gotten_store = 0;
1881      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1882    
1883      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1220  while (!done) Line 1897  while (!done)
1897  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1898    
1899      {      {
1900        unsigned long int get_options;
1901    
1902        /* In 16-bit mode, convert the input. The space needed for a non-UTF string
1903        is exactly double the 8-bit size. For a UTF-8 string, the size needed for
1904        UTF-16 is no more than double, because up to 0xffff uses no more than 3
1905        bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
1906        and up to 4 bytes in UTF-16. */
1907    
1908    #ifdef SUPPORT_PCRE16
1909        if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
1910    #endif
1911    
1912        /* Compile many times when timing */
1913    
1914      if (timeit > 0)      if (timeit > 0)
1915        {        {
1916        register int i;        register int i;
# Line 1227  while (!done) Line 1918  while (!done)
1918        clock_t start_time = clock();        clock_t start_time = clock();
1919        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
1920          {          {
1921          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  #ifdef SUPPORT_PCRE16
1922            if (use_pcre16)
1923              re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1924            else
1925    #endif
1926              re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1927          if (re != NULL) free(re);          if (re != NULL) free(re);
1928          }          }
1929        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1236  while (!done) Line 1932  while (!done)
1932            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1933        }        }
1934    
1935      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
1936    
1937    #ifdef SUPPORT_PCRE16
1938        if (use_pcre16)
1939          re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1940        else
1941    #endif
1942          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1943    
1944      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1945      if non-interactive. */      if non-interactive. */
# Line 1249  while (!done) Line 1952  while (!done)
1952          {          {
1953          for (;;)          for (;;)
1954            {            {
1955            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1956              {              {
1957              done = 1;              done = 1;
1958              goto CONTINUE;              goto CONTINUE;
# Line 1263  while (!done) Line 1966  while (!done)
1966        goto CONTINUE;        goto CONTINUE;
1967        }        }
1968    
1969      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1970      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1971      returns only limited data. Check that it agrees with the newer one. */      lines. */
1972    
1973      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1974        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
1975    
1976      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
1977      and remember the store that was got. */      and remember the store that was got. */
1978    
1979      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
1980      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1981    
1982        /* Output code size information if requested */
1983    
1984        if (log_store)
1985          fprintf(outfile, "Memory allocation (code space): %d\n",
1986            (int)(first_gotten_store -
1987                  sizeof(real_pcre) -
1988                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1989    
1990      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
1991      help with the matching. */      help with the matching, unless the pattern has the SS option, which
1992        suppresses the effect of /S (used for a few test patterns where studying is
1993        never sensible). */
1994    
1995      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
1996        {        {
1997        if (timeit > 0)        if (timeit > 0)
1998          {          {
# Line 1290  while (!done) Line 2000  while (!done)
2000          clock_t time_taken;          clock_t time_taken;
2001          clock_t start_time = clock();          clock_t start_time = clock();
2002          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2003            extra = pcre_study(re, study_options, &error);            {
2004              if (use_pcre16)
2005                extra = pcre16_study(re, study_options | force_study_options, &error);
2006              else
2007                extra = pcre_study(re, study_options | force_study_options, &error);
2008              }
2009          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2010          if (extra != NULL) free(extra);          if (extra != NULL) pcre_free_study(extra);
2011          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2012            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2013              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2014          }          }
2015        extra = pcre_study(re, study_options, &error);        if (use_pcre16)
2016            extra = pcre16_study(re, study_options | force_study_options, &error);
2017          else
2018            extra = pcre_study(re, study_options | force_study_options, &error);
2019        if (error != NULL)        if (error != NULL)
2020          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2021        else if (extra != NULL)        else if (extra != NULL)
2022            {
2023          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2024            if (log_store)
2025              {
2026              size_t jitsize;
2027              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2028              if (jitsize != 0)
2029                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2030              }
2031            }
2032          }
2033    
2034        /* If /K was present, we set up for handling MARK data. */
2035    
2036        if (do_mark)
2037          {
2038          if (extra == NULL)
2039            {
2040            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2041            extra->flags = 0;
2042            }
2043          extra->mark = &markptr;
2044          extra->flags |= PCRE_EXTRA_MARK;
2045        }        }
2046    
2047      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
# Line 1312  while (!done) Line 2052  while (!done)
2052      if (do_flip)      if (do_flip)
2053        {        {
2054        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
2055        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
2056            byteflip(rre->magic_number, sizeof(rre->magic_number));
2057        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
2058        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
2059        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2060        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
2061        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2062        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
2063        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2064          rre->first_char =
2065            (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2066          rre->req_char =
2067            (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2068          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2069          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
2070        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2071          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
2072        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2073            sizeof(rre->name_count));
2074    
2075        if (extra != NULL)        if (extra != NULL)
2076          {          {
2077          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2078          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2079          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2080            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2081          }          }
2082        }        }
2083    
2084      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
2085    
2086      SHOW_INFO:      SHOW_INFO:
2087    
2088      if (do_debug)      if (do_debug)
2089        {        {
2090        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2091        pcre_printint(re, outfile, debug_lengths);        if (use_pcre16)
2092            pcre16_printint(re, outfile, debug_lengths);
2093          else
2094            pcre_printint(re, outfile, debug_lengths);
2095        }        }
2096    
2097        /* We already have the options in get_options (see above) */
2098    
2099      if (do_showinfo)      if (do_showinfo)
2100        {        {
2101        unsigned long int get_options, all_options;        unsigned long int all_options;
2102  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2103        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2104  #endif  #endif
2105        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2106            hascrorlf;
2107        int nameentrysize, namecount;        int nameentrysize, namecount;
2108        const uschar *nametable;        const pcre_uchar *nametable;
2109    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2110        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2111        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2112        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1362  while (!done) Line 2115  while (!done)
2115        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2116        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2117        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2118          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2119          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2120          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2121    
2122          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2123          that it gives the same results as the new function. */
2124    
2125  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2126        old_count = pcre_info(re, &old_options, &old_first_char);        if (!use_pcre16)
2127        if (count < 0) fprintf(outfile,          {
2128          "Error %d from pcre_info()\n", count);          old_count = pcre_info(re, &old_options, &old_first_char);
2129        else          if (count < 0) fprintf(outfile,
2130          {            "Error %d from pcre_info()\n", count);
2131          if (old_count != count) fprintf(outfile,          else
2132            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,            {
2133              old_count);            if (old_count != count) fprintf(outfile,
2134                "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2135          if (old_first_char != first_char) fprintf(outfile,                old_count);
2136            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
2137              first_char, old_first_char);            if (old_first_char != first_char) fprintf(outfile,
2138                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2139          if (old_options != (int)get_options) fprintf(outfile,                first_char, old_first_char);
2140            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
2141              get_options, old_options);            if (old_options != (int)get_options) fprintf(outfile,
2142          }              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2143                  get_options, old_options);
2144              }
2145            }
2146  #endif  #endif
2147    
2148        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
# Line 1403  while (!done) Line 2165  while (!done)
2165            }            }
2166          }          }
2167    
2168        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2169        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2170    
2171        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2172        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
2173    
2174        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2175          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2176            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2177            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2178            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2179            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2180            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2181            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2182              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2183              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2184            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2185            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2186            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2187            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2188            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2189              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2190            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2191              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2192            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2193    
2194          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2195    
2196        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
2197          {          {
2198          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1445  while (!done) Line 2207  while (!done)
2207          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
2208          break;          break;
2209    
2210            case PCRE_NEWLINE_ANYCRLF:
2211            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2212            break;
2213    
2214          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
2215          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
2216          break;          break;
# Line 1463  while (!done) Line 2229  while (!done)
2229          }          }
2230        else        else
2231          {          {
2232          int ch = first_char & 255;          const char *caseless =
2233          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2234            "" : " (caseless)";            "" : " (caseless)";
2235          if (PRINTHEX(ch))  
2236            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(first_char))
2237              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2238          else          else
2239            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2240          }          }
2241    
2242        if (need_char < 0)        if (need_char < 0)
# Line 1478  while (!done) Line 2245  while (!done)
2245          }          }
2246        else        else
2247          {          {
2248          int ch = need_char & 255;          const char *caseless =
2249          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2250            "" : " (caseless)";            "" : " (caseless)";
2251          if (PRINTHEX(ch))  
2252            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(need_char))
2253              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2254          else          else
2255            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2256          }          }
2257    
2258        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2259        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2260        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2261        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2262          information unless -i or -d was also present. This means that, except
2263          when auto-callouts are involved, the output from runs with and without
2264          -s should be identical. */
2265    
2266        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2267          {          {
2268          if (extra == NULL)          if (extra == NULL)
2269            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2270          else          else
2271            {            {
2272            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2273            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2274    
2275              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2276              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2277    
2278              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2279            if (start_bits == NULL)            if (start_bits == NULL)
2280              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2281            else            else
2282              {              {
2283              int i;              int i;
# Line 1532  while (!done) Line 2307  while (!done)
2307              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2308              }              }
2309            }            }
2310    
2311            /* Show this only if the JIT was set by /S, not by -s. */
2312    
2313            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2314              {
2315              int jit;
2316              new_info(re, extra, PCRE_INFO_JIT, &jit);
2317              if (jit)
2318                fprintf(outfile, "JIT study was successful\n");
2319              else
2320    #ifdef SUPPORT_JIT
2321                fprintf(outfile, "JIT study was not successful\n");
2322    #else
2323                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2324    #endif
2325              }
2326          }          }
2327        }        }
2328    
# Line 1548  while (!done) Line 2339  while (!done)
2339          }          }
2340        else        else
2341          {          {
2342          uschar sbuf[8];          pcre_uint8 sbuf[8];
2343          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2344          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2345          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2346          sbuf[3] = (true_size)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
2347    
2348          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2349          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2350          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2351          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2352    
2353          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2354              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1566  while (!done) Line 2357  while (!done)
2357            }            }
2358          else          else
2359            {            {
2360            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2361    
2362              /* If there is study data, write it. */
2363    
2364            if (extra != NULL)            if (extra != NULL)
2365              {              {
2366              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1576  while (!done) Line 2370  while (!done)
2370                  strerror(errno));                  strerror(errno));
2371                }                }
2372              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2373              }              }
2374            }            }
2375          fclose(f);          fclose(f);
2376          }          }
2377    
2378        new_free(re);        new_free(re);
2379        if (extra != NULL) new_free(extra);        if (extra != NULL) pcre_free_study(extra);
2380        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2381            {
2382            new_free((void *)tables);
2383            setlocale(LC_CTYPE, "C");
2384            locale_set = 0;
2385            }
2386        continue;  /* With next regex */        continue;  /* With next regex */
2387        }        }
2388      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1593  while (!done) Line 2391  while (!done)
2391    
2392    for (;;)    for (;;)
2393      {      {
2394      uschar *q;      pcre_uint8 *q;
2395      uschar *bptr = dbuffer;      pcre_uint8 *bptr;
2396      int *use_offsets = offsets;      int *use_offsets = offsets;
2397      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2398      int callout_data = 0;      int callout_data = 0;
2399      int callout_data_set = 0;      int callout_data_set = 0;
2400      int count, c;      int count, c;
2401      int copystrings = 0;      int copystrings = 0;
2402      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2403      int getstrings = 0;      int getstrings = 0;
2404      int getlist = 0;      int getlist = 0;
2405      int gmatched = 0;      int gmatched = 0;
2406      int start_offset = 0;      int start_offset = 0;
2407        int start_offset_sign = 1;
2408      int g_notempty = 0;      int g_notempty = 0;
2409      int use_dfa = 0;      int use_dfa = 0;
2410    
# Line 1619  while (!done) Line 2418  while (!done)
2418    
2419      pcre_callout = callout;      pcre_callout = callout;
2420      first_callout = 1;      first_callout = 1;
2421        last_callout_mark = NULL;
2422      callout_extra = 0;      callout_extra = 0;
2423      callout_count = 0;      callout_count = 0;
2424      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1631  while (!done) Line 2431  while (!done)
2431      len = 0;      len = 0;
2432      for (;;)      for (;;)
2433        {        {
2434        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
2435          {          {
2436          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2437              {
2438              fprintf(outfile, "\n");
2439              break;
2440              }
2441          done = 1;          done = 1;
2442          goto CONTINUE;          goto CONTINUE;
2443          }          }
# Line 1650  while (!done) Line 2453  while (!done)
2453      p = buffer;      p = buffer;
2454      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2455    
2456      q = dbuffer;      bptr = q = dbuffer;
2457      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2458        {        {
2459        int i = 0;        int i = 0;
# Line 1694  while (!done) Line 2497  while (!done)
2497            {            {
2498            unsigned char *pt = p;            unsigned char *pt = p;
2499            c = 0;            c = 0;
2500            while (isxdigit(*(++pt)))  
2501              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2502              when isxdigit() is a macro that refers to its argument more than
2503              once. This is banned by the C Standard, but apparently happens in at
2504              least one MacOS environment. */
2505    
2506              for (pt++; isxdigit(*pt); pt++)
2507                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2508            if (*pt == '}')            if (*pt == '}')
2509              {              {
2510              unsigned char buff8[8];              unsigned char buff8[8];
2511              int ii, utn;              int ii, utn;
2512              utn = ord2utf8(c, buff8);              if (use_utf8)
2513              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2514              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2515                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2516                  c = buff8[ii];   /* Last byte */
2517                  }
2518                else
2519                 {
2520                 if (c > 255)
2521                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2522                     "UTF-8 mode is not enabled.\n"
2523                     "** Truncation will probably give the wrong result.\n", c);
2524                 }
2525              p = pt + 1;              p = pt + 1;
2526              break;              break;
2527              }              }
# Line 1715  while (!done) Line 2534  while (!done)
2534          c = 0;          c = 0;
2535          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2536            {            {
2537            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2538            p++;            p++;
2539            }            }
2540          break;          break;
# Line 1725  while (!done) Line 2544  while (!done)
2544          continue;          continue;
2545    
2546          case '>':          case '>':
2547            if (*p == '-')
2548              {
2549              start_offset_sign = -1;
2550              p++;
2551              }
2552          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2553            start_offset *= start_offset_sign;
2554          continue;          continue;
2555    
2556          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1744  while (!done) Line 2569  while (!done)
2569            }            }
2570          else if (isalnum(*p))          else if (isalnum(*p))
2571            {            {
2572            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
2573            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2574            *npp++ = 0;            *npp++ = 0;
2575            *npp = 0;            *npp = 0;
# Line 1798  while (!done) Line 2623  while (!done)
2623  #endif  #endif
2624            use_dfa = 1;            use_dfa = 1;
2625          continue;          continue;
2626    #endif
2627    
2628    #if !defined NODFA
2629          case 'F':          case 'F':
2630          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2631          continue;          continue;
# Line 1812  while (!done) Line 2639  while (!done)
2639            }            }
2640          else if (isalnum(*p))          else if (isalnum(*p))
2641            {            {
2642            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
2643            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2644            *npp++ = 0;            *npp++ = 0;
2645            *npp = 0;            *npp = 0;
# Line 1823  while (!done) Line 2650  while (!done)
2650            }            }
2651          continue;          continue;
2652    
2653            case 'J':
2654            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2655            if (extra != NULL
2656                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2657                && extra->executable_jit != NULL)
2658              {
2659              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2660              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2661              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2662              }
2663            continue;
2664    
2665          case 'L':          case 'L':
2666          getlist = 1;          getlist = 1;
2667          continue;          continue;
# Line 1832  while (!done) Line 2671  while (!done)
2671          continue;          continue;
2672    
2673          case 'N':          case 'N':
2674          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2675              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2676            else
2677              options |= PCRE_NOTEMPTY;
2678          continue;          continue;
2679    
2680          case 'O':          case 'O':
# Line 1845  while (!done) Line 2687  while (!done)
2687            if (offsets == NULL)            if (offsets == NULL)
2688              {              {
2689              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2690                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2691              yield = 1;              yield = 1;
2692              goto EXIT;              goto EXIT;
2693              }              }
# Line 1855  while (!done) Line 2697  while (!done)
2697          continue;          continue;
2698    
2699          case 'P':          case 'P':
2700          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2701              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2702          continue;          continue;
2703    
2704          case 'Q':          case 'Q':
# Line 1890  while (!done) Line 2733  while (!done)
2733          show_malloc = 1;          show_malloc = 1;
2734          continue;          continue;
2735    
2736            case 'Y':
2737            options |= PCRE_NO_START_OPTIMIZE;
2738            continue;
2739    
2740          case 'Z':          case 'Z':
2741          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2742          continue;          continue;
# Line 1910  while (!done) Line 2757  while (!done)
2757        *q++ = c;        *q++ = c;
2758        }        }
2759      *q = 0;      *q = 0;
2760      len = q - dbuffer;      len = (int)(q - dbuffer);
2761    
2762        /* Move the data to the end of the buffer so that a read over the end of
2763        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2764        we are using the POSIX interface, we must include the terminating zero. */
2765    
2766    #if !defined NOPOSIX
2767        if (posix || do_posix)
2768          {
2769          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2770          bptr += buffer_size - len - 1;
2771          }
2772        else
2773    #endif
2774          {
2775          memmove(bptr + buffer_size - len, bptr, len);
2776          bptr += buffer_size - len;
2777          }
2778    
2779      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2780        {        {
# Line 1931  while (!done) Line 2795  while (!done)
2795          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2796        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2797        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2798          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2799    
2800        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2801    
# Line 1955  while (!done) Line 2820  while (!done)
2820              (void)pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2821                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2822              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2823              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2824                {                {
2825                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2826                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2827                  outfile);                  outfile);
2828                fprintf(outfile, "\n");                fprintf(outfile, "\n");
# Line 1975  while (!done) Line 2840  while (!done)
2840    
2841      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2842        {        {
2843          markptr = NULL;
2844    
2845        if (timeitm > 0)        if (timeitm > 0)
2846          {          {
2847          register int i;          register int i;
2848          clock_t time_taken;          clock_t time_taken;
2849          clock_t start_time = clock();          clock_t start_time = clock();
2850    
2851    #ifdef SUPPORT_PCRE16
2852            if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
2853    #endif
2854    
2855    
2856  #if !defined NODFA  #if !defined NODFA
2857          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2858            {            {
2859            int workspace[1000];            int workspace[1000];
2860            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2861              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2862                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2863                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2864            }            }
# Line 2005  while (!done) Line 2877  while (!done)
2877    
2878        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2879        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
2880        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
2881          running of pcre_exec(), so disable the JIT optimization. This makes it
2882          possible to run the same set of tests with and without JIT externally
2883          requested. */
2884    
2885        if (find_match_limit)        if (find_match_limit)
2886          {          {
# Line 2014  while (!done) Line 2889  while (!done)
2889            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2890            extra->flags = 0;            extra->flags = 0;
2891            }            }
2892            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2893    
2894          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
2895            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2049  while (!done) Line 2925  while (!done)
2925        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2926          {          {
2927          int workspace[1000];          int workspace[1000];
2928          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2929            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2930            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2931          if (count == 0)          if (count == 0)
# Line 2062  while (!done) Line 2938  while (!done)
2938    
2939        else        else
2940          {          {
2941          count = pcre_exec(re, extra, (char *)bptr, len,          if (use_pcre16)
2942            start_offset, options | g_notempty, use_offsets, use_size_offsets);            count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
2943                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2944            else
2945              count = pcre_exec(re, extra, (char *)bptr, len,
2946                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2947          if (count == 0)          if (count == 0)
2948            {            {
2949            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2097  while (!done) Line 2977  while (!done)
2977              }              }
2978            }            }
2979    
2980            /* do_allcaps requests showing of all captures in the pattern, to check
2981            unset ones at the end. */
2982    
2983            if (do_allcaps)
2984              {
2985              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2986              count++;   /* Allow for full match */
2987              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2988              }
2989    
2990            /* Output the captured substrings */
2991    
2992          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2993            {            {
2994            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2995                {
2996                if (use_offsets[i] != -1)
2997                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2998                    use_offsets[i], i);
2999                if (use_offsets[i+1] != -1)
3000                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3001                    use_offsets[i+1], i+1);
3002              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3003                }
3004            else            else
3005              {              {
3006              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3007              (void)pchars(bptr + use_offsets[i],              (void)pchars(bptr + use_offsets[i],
3008                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3009              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3010              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3011                {                {
3012                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3013                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
3014                  fprintf(outfile, " 0+ ");                  outfile);
3015                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3016                }                }
3017              }              }
3018            }            }
3019    
3020            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3021    
3022          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3023            {            {
3024            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2193  while (!done) Line 3092  while (!done)
3092                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3093              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3094                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3095              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3096              }              }
3097            }            }
# Line 2203  while (!done) Line 3101  while (!done)
3101    
3102        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
3103          {          {
3104          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
3105  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
3106          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
3107            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
3108              bptr + use_offsets[0]);            fprintf(outfile, ": ");
3109  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3110                outfile);
3111              }
3112          fprintf(outfile, "\n");          fprintf(outfile, "\n");
3113          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
3114          }          }
# Line 2218  while (!done) Line 3118  while (!done)
3118        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
3119        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
3120    
3121        Complication arises in the case when the newline option is "any".        Complication arises in the case when the newline convention is "any",
3122        If the previous match was at the end of a line terminated by CRLF, an        "crlf", or "anycrlf". If the previous match was at the end of a line
3123        advance of one character just passes the \r, whereas we should prefer the        terminated by CRLF, an advance of one character just passes the \r,
3124        longer newline sequence, as does the code in pcre_exec(). Fudge the        whereas we should prefer the longer newline sequence, as does the code in
3125        offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
3126          newline setting in the pattern; if none was set, use pcre_config() to
3127          find the default.
3128    
3129        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
3130        character, not one byte. */        character, not one byte. */
# Line 2238  while (!done) Line 3140  while (!done)
3140              {              {
3141              int d;              int d;
3142              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3143              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
3144                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
3145                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
3146                        (d == 10)? PCRE_NEWLINE_LF :
3147                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3148                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
3149                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
3150              }              }
3151            if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3152                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3153                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3154                  &&
3155                start_offset < len - 1 &&                start_offset < len - 1 &&
3156                bptr[start_offset] == '\r' &&                bptr[start_offset] == '\r' &&
3157                bptr[start_offset+1] == '\n')                bptr[start_offset+1] == '\n')
# Line 2252  while (!done) Line 3160  while (!done)
3160              {              {
3161              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3162                {                {
3163                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3164                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3165                }                }
3166              }              }
3167            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3168            }            }
3169          else          else
3170            {            {
3171            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3172              {              {
3173              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3174                if (gmatched == 0)
3175                  {
3176                  if (markptr == NULL) fprintf(outfile, "No match\n");
3177                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3178                  }
3179                break;
3180    
3181                case PCRE_ERROR_BADUTF8:
3182                case PCRE_ERROR_SHORTUTF8:
3183                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3184                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3185                if (use_size_offsets >= 2)
3186                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3187                    use_offsets[1]);
3188                fprintf(outfile, "\n");
3189                break;
3190    
3191                default:
3192                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3193                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3194                else
3195                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3196                break;
3197              }              }
3198            else fprintf(outfile, "Error %d\n", count);  
3199            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3200            }            }
3201          }          }
# Line 2276  while (!done) Line 3205  while (!done)
3205        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3206    
3207        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3208        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3209        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3210        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3211        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3212        character. */        character. */
3213    
# Line 2287  while (!done) Line 3216  while (!done)
3216        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3217          {          {
3218          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3219          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3220          }          }
3221    
3222        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2313  while (!done) Line 3242  while (!done)
3242  #endif  #endif
3243    
3244    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3245    if (extra != NULL) new_free(extra);    if (extra != NULL) pcre_free_study(extra);
3246    if (tables != NULL)    if (locale_set)
3247      {      {
3248      new_free((void *)tables);      new_free((void *)tables);
3249      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3250      locale_set = 0;      locale_set = 0;
3251      }      }
3252      if (jit_stack != NULL)
3253        {
3254        pcre_jit_stack_free(jit_stack);
3255        jit_stack = NULL;
3256        }
3257    }    }
3258    
3259  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 2334  free(dbuffer); Line 3268  free(dbuffer);
3268  free(pbuffer);  free(pbuffer);
3269  free(offsets);  free(offsets);
3270    
3271    #ifdef SUPPORT_PCRE16
3272    if (buffer16 != NULL) free(buffer16);
3273    #endif
3274    
3275  return yield;  return yield;
3276  }  }
3277    

Legend:
Removed from v.146  
changed lines
  Added in v.805

  ViewVC Help
Powered by ViewVC 1.1.5