/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC code/branches/pcre16/pcretest.c revision 811 by zherczeg, Mon Dec 19 14:05:44 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  */  */
38    
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <stdio.h>  #include <stdio.h>
46  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 49  POSSIBILITY OF SUCH DAMAGE.
49  #include <locale.h>  #include <locale.h>
50  #include <errno.h>  #include <errno.h>
51    
52  #ifndef _WIN32  #ifdef SUPPORT_LIBREADLINE
53  #include <sys/resource.h>  #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
60    
61    /* A number of things vary for Windows builds. Originally, pcretest opened its
62    input and output without "b"; then I was told that "b" was needed in some
63    environments, so it was added for release 5.0 to both the input and output. (It
64    makes no difference on Unix-like systems.) Later I was told that it is wrong
65    for the input on Windows. I've now abstracted the modes into two macros that
66    are set here, to make it easier to fiddle with them, and removed "b" from the
67    input mode under Windows. */
68    
69    #if defined(_WIN32) || defined(WIN32)
70    #include <io.h>                /* For _setmode() */
71    #include <fcntl.h>             /* For _O_BINARY */
72    #define INPUT_MODE   "r"
73    #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87  #endif  #endif
88    
89  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* Not Windows */
90    
91    #else
92    #include <sys/time.h>          /* These two includes are needed */
93    #include <sys/resource.h>      /* for setrlimit(). */
94    #define INPUT_MODE   "rb"
95    #define OUTPUT_MODE  "wb"
96    #endif
97    
 /* We include pcre_internal.h because we need the internal info for displaying  
 the results of pcre_study() and we also need to know about the internal  
 macros, structures, and other internal data values; pcretest has "inside  
 information" compared to a program that strictly follows the PCRE API. */  
98    
99    /* We have to include pcre_internal.h because we need the internal info for
100    displaying the results of pcre_study() and we also need to know about the
101    internal macros, structures, and other internal data values; pcretest has
102    "inside information" compared to a program that strictly follows the PCRE API.
103    
104    Although pcre_internal.h does itself include pcre.h, we explicitly include it
105    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106    appropriately for an application, not for building PCRE. */
107    
108    #include "pcre.h"
109  #include "pcre_internal.h"  #include "pcre_internal.h"
110    
111  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
112  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
113  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123    /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
130  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
131  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 68  symbols to prevent clashes. */ Line 133  symbols to prevent clashes. */
133  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
134  #define _pcre_utt              utt  #define _pcre_utt              utt
135  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
138    
139  #include "pcre_tables.c"  #include "pcre_tables.c"
140    
141  /* We also need the pcre_printint() function for printing out compiled  /* The definition of the macro PRINTABLE, which determines whether to print an
142  patterns. This function is in a separate file so that it can be included in  output character as-is or as a hex value when showing compiled patterns, is
143  pcre_compile.c when that module is compiled with debugging enabled. */  the same as in the printint.src file. We uses it here in cases when the locale
144    has not been explicitly changed, so as to get consistent output from systems
145    that differ in their output from isprint() even in the "C" locale. */
146    
147  #include "pcre_printint.src"  #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149    #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 87  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, originally for the benefit of a version that was
164  build pcretest without support for UTF8 (define NOUTF8), without the interface  imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  without the interface to the DFA matcher (NODFA), and without the doublecheck
166  function (define NOINFOCHECK). */  of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167    out the UTF8 support if PCRE is built without it. */
168    
169    #ifndef SUPPORT_UTF8
170    #ifndef NOUTF8
171    #define NOUTF8
172    #endif
173    #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    #define PCHARS8(lv, p, len, f) \
185      lv = pchars((pcre_uint8 *)p, len, f)
186    
187    #define PCHARSV8(p, len, f) \
188      (void)pchars((pcre_uint8 *)p, len, f)
189    
190    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191      re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194        offsets, size_offsets) \
195      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196        offsets, size_offsets)
197    
198    #define PCRE_STUDY8(extra, re, options, error) \
199      extra = pcre_study(re, options, error)
200    
201    #define PCRE_FREE_STUDY8(extra) \
202      pcre_free_study(extra)
203    
204    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
205      pcre_pattern_to_host_byte_order(re, extra, tables)
206    
207    #endif /* SUPPORT_PCRE8 */
208    
209    
210    #ifdef SUPPORT_PCRE16
211    #define PCHARS16(lv, p, len, f) \
212      lv = pchars16((PCRE_SPTR16)p, len, f)
213    
214    #define PCHARSV16(p, len, f) \
215      (void)pchars16((PCRE_SPTR16)p, len, f)
216    
217    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
218      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
219    
220    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
221        offsets, size_offsets) \
222      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
223        options, offsets, size_offsets)
224    
225    #define PCRE_FREE_STUDY16(extra) \
226      pcre16_free_study(extra)
227    
228    #define PCRE_STUDY16(extra, re, options, error) \
229      extra = pcre16_study(re, options, error)
230    
231    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
232      pcre16_pattern_to_host_byte_order(re, extra, tables)
233    
234    #endif /* SUPPORT_PCRE16 */
235    
236    
237    /* ----- Both modes are supported; a runtime test is needed ----- */
238    
239    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
240    
241    #define PCHARS(lv, p, len, f) \
242      if (use_pcre16) \
243        PCHARS16(lv, p, len, f); \
244      else \
245        PCHARS8(lv, p, len, f)
246    
247    #define PCHARSV(p, len, f) \
248      if (use_pcre16) \
249        PCHARSV16(p, len, f); \
250      else \
251        PCHARSV8(p, len, f)
252    
253    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
254      if (use_pcre16) \
255        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
256      else \
257        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
258    
259    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
260        offsets, size_offsets) \
261      if (use_pcre16) \
262        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
263          offsets, size_offsets); \
264      else \
265        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
266          offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY(extra) \
269      if (use_pcre16) \
270        PCRE_FREE_STUDY16(extra); \
271      else \
272        PCRE_FREE_STUDY8(extra)
273    
274    #define PCRE_STUDY(extra, re, options, error) \
275      if (use_pcre16) \
276        PCRE_STUDY16(extra, re, options, error); \
277      else \
278        PCRE_STUDY8(extra, re, options, error)
279    
280    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
281      if (use_pcre16) \
282        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
283      else \
284        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
285    
286    /* ----- Only 8-bit mode is supported ----- */
287    
288    #elif defined SUPPORT_PCRE8
289    #define PCHARS           PCHARS8
290    #define PCHARSV          PCHARSV8
291    #define PCRE_COMPILE     PCRE_COMPILE8
292    #define PCRE_EXEC        PCRE_EXEC8
293    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
294    #define PCRE_STUDY       PCRE_STUDY8
295    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
296    
297    /* ----- Only 16-bit mode is supported ----- */
298    
299    #else
300    #define PCHARS           PCHARS16
301    #define PCHARSV          PCHARSV16
302    #define PCRE_COMPILE     PCRE_COMPILE16
303    #define PCRE_EXEC        PCRE_EXEC16
304    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
305    #define PCRE_STUDY       PCRE_STUDY16
306    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
307    #endif
308    
309    /* ----- End of mode-specific function call macros ----- */
310    
311    
312  /* Other parameters */  /* Other parameters */
# Line 103  function (define NOINFOCHECK). */ Line 319  function (define NOINFOCHECK). */
319  #endif  #endif
320  #endif  #endif
321    
322    /* This is the default loop count for timing. */
323    
324  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
325    
326  /* Static variables */  /* Static variables */
# Line 113  static int callout_count; Line 331  static int callout_count;
331  static int callout_extra;  static int callout_extra;
332  static int callout_fail_count;  static int callout_fail_count;
333  static int callout_fail_id;  static int callout_fail_id;
334    static int debug_lengths;
335  static int first_callout;  static int first_callout;
336    static int locale_set = 0;
337  static int show_malloc;  static int show_malloc;
338  static int use_utf8;  static int use_utf;
339  static size_t gotten_store;  static size_t gotten_store;
340    static size_t first_gotten_store = 0;
341    static const unsigned char *last_callout_mark = NULL;
342    
343  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
344    
345  static int buffer_size = 50000;  static int buffer_size = 50000;
346  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
347  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
348  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
349    
350    #ifdef SUPPORT_PCRE16
351    static int buffer16_size = 0;
352    static pcre_uint16 *buffer16 = NULL;
353    #endif
354    
355    /* If we have 8-bit support, default use_pcre16 to false; if there is also
356    16-bit support, it can be changed by an option. If there is no 8-bit support,
357    there must be 16-bit support, so default it to 1. */
358    
359  /*************************************************  #ifdef SUPPORT_PCRE8
360  *        Read or extend an input line            *  static int use_pcre16 = 0;
361  *************************************************/  #else
362    static int use_pcre16 = 1;
363    #endif
364    
365  /* Input lines are read into buffer, but both patterns and data lines can be  /* Textual explanations for runtime error codes */
 continued over multiple input lines. In addition, if the buffer fills up, we  
 want to automatically expand it so as to be able to handle extremely large  
 lines that are needed for certain stress tests. When the input buffer is  
 expanded, the other two buffers must also be expanded likewise, and the  
 contents of pbuffer, which are a copy of the input for callouts, must be  
 preserved (for when expansion happens for a data line). This is not the most  
 optimal way of handling this, but hey, this is just a test program!  
366    
367  Arguments:  static const char *errtexts[] = {
368    f            the file to read    NULL,  /* 0 is no error */
369    start        where in buffer to start (this *must* be within buffer)    NULL,  /* NOMATCH is handled specially */
370      "NULL argument passed",
371      "bad option value",
372      "magic number missing",
373      "unknown opcode - pattern overwritten?",
374      "no more memory",
375      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
376      "match limit exceeded",
377      "callout error code",
378      NULL,  /* BADUTF8 is handled specially */
379      "bad UTF-8 offset",
380      NULL,  /* PARTIAL is handled specially */
381      "not used - internal error",
382      "internal error - pattern overwritten?",
383      "bad count value",
384      "item unsupported for DFA matching",
385      "backreference condition or recursion test not supported for DFA matching",
386      "match limit not supported for DFA matching",
387      "workspace size exceeded in DFA matching",
388      "too much recursion for DFA matching",
389      "recursion limit exceeded",
390      "not used - internal error",
391      "invalid combination of newline options",
392      "bad offset value",
393      NULL,  /* SHORTUTF8 is handled specially */
394      "nested recursion at the same subject position",
395      "JIT stack limit reached",
396      "pattern compiled in wrong mode (8-bit/16-bit error)"
397    };
398    
 Returns:       pointer to the start of new data  
                could be a copy of start, or could be moved  
                NULL if no data read and EOF reached  
 */  
399    
400  static uschar *  /*************************************************
401  extend_inputline(FILE *f, uschar *start)  *         Alternate character tables             *
402  {  *************************************************/
 uschar *here = start;  
403    
404  for (;;)  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
405    {  using the default tables of the library. However, the T option can be used to
406    int rlen = buffer_size - (here - buffer);  select alternate sets of tables, for different kinds of testing. Note also that
407    if (rlen > 1000)  the L (locale) option also adjusts the tables. */
408      {  
409      int dlen;  /* This is the set of tables distributed as default with PCRE. It recognizes
410      if (fgets((char *)here, rlen,  f) == NULL)  only ASCII characters. */
411        return (here == start)? NULL : start;  
412      dlen = (int)strlen((char *)here);  static const pcre_uint8 tables0[] = {
413      if (dlen > 0 && here[dlen - 1] == '\n') return start;  
414      here += dlen;  /* This table is a lower casing table. */
415      }  
416        0,  1,  2,  3,  4,  5,  6,  7,
417        8,  9, 10, 11, 12, 13, 14, 15,
418       16, 17, 18, 19, 20, 21, 22, 23,
419       24, 25, 26, 27, 28, 29, 30, 31,
420       32, 33, 34, 35, 36, 37, 38, 39,
421       40, 41, 42, 43, 44, 45, 46, 47,
422       48, 49, 50, 51, 52, 53, 54, 55,
423       56, 57, 58, 59, 60, 61, 62, 63,
424       64, 97, 98, 99,100,101,102,103,
425      104,105,106,107,108,109,110,111,
426      112,113,114,115,116,117,118,119,
427      120,121,122, 91, 92, 93, 94, 95,
428       96, 97, 98, 99,100,101,102,103,
429      104,105,106,107,108,109,110,111,
430      112,113,114,115,116,117,118,119,
431      120,121,122,123,124,125,126,127,
432      128,129,130,131,132,133,134,135,
433      136,137,138,139,140,141,142,143,
434      144,145,146,147,148,149,150,151,
435      152,153,154,155,156,157,158,159,
436      160,161,162,163,164,165,166,167,
437      168,169,170,171,172,173,174,175,
438      176,177,178,179,180,181,182,183,
439      184,185,186,187,188,189,190,191,
440      192,193,194,195,196,197,198,199,
441      200,201,202,203,204,205,206,207,
442      208,209,210,211,212,213,214,215,
443      216,217,218,219,220,221,222,223,
444      224,225,226,227,228,229,230,231,
445      232,233,234,235,236,237,238,239,
446      240,241,242,243,244,245,246,247,
447      248,249,250,251,252,253,254,255,
448    
449    /* This table is a case flipping table. */
450    
451        0,  1,  2,  3,  4,  5,  6,  7,
452        8,  9, 10, 11, 12, 13, 14, 15,
453       16, 17, 18, 19, 20, 21, 22, 23,
454       24, 25, 26, 27, 28, 29, 30, 31,
455       32, 33, 34, 35, 36, 37, 38, 39,
456       40, 41, 42, 43, 44, 45, 46, 47,
457       48, 49, 50, 51, 52, 53, 54, 55,
458       56, 57, 58, 59, 60, 61, 62, 63,
459       64, 97, 98, 99,100,101,102,103,
460      104,105,106,107,108,109,110,111,
461      112,113,114,115,116,117,118,119,
462      120,121,122, 91, 92, 93, 94, 95,
463       96, 65, 66, 67, 68, 69, 70, 71,
464       72, 73, 74, 75, 76, 77, 78, 79,
465       80, 81, 82, 83, 84, 85, 86, 87,
466       88, 89, 90,123,124,125,126,127,
467      128,129,130,131,132,133,134,135,
468      136,137,138,139,140,141,142,143,
469      144,145,146,147,148,149,150,151,
470      152,153,154,155,156,157,158,159,
471      160,161,162,163,164,165,166,167,
472      168,169,170,171,172,173,174,175,
473      176,177,178,179,180,181,182,183,
474      184,185,186,187,188,189,190,191,
475      192,193,194,195,196,197,198,199,
476      200,201,202,203,204,205,206,207,
477      208,209,210,211,212,213,214,215,
478      216,217,218,219,220,221,222,223,
479      224,225,226,227,228,229,230,231,
480      232,233,234,235,236,237,238,239,
481      240,241,242,243,244,245,246,247,
482      248,249,250,251,252,253,254,255,
483    
484    /* This table contains bit maps for various character classes. Each map is 32
485    bytes long and the bits run from the least significant end of each byte. The
486    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
487    graph, print, punct, and cntrl. Other classes are built from combinations. */
488    
489      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
490      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
492      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493    
494      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
495      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
496      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
497      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498    
499      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
500      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
501      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
502      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503    
504      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
505      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
506      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
507      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508    
509      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
510      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
511      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
512      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513    
514      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
515      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
516      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
517      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518    
519      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
520      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
521      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
522      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523    
524      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
525      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
526      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
527      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
528    
529      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
530      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
531      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533    
534      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
535      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
536      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
538    
539    /* This table identifies various classes of character by individual bits:
540      0x01   white space character
541      0x02   letter
542      0x04   decimal digit
543      0x08   hexadecimal digit
544      0x10   alphanumeric or '_'
545      0x80   regular expression metacharacter or binary zero
546    */
547    
548    else    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
549      {    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
550      int new_buffer_size = 2*buffer_size;    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
551      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
552      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);    0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
553      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);    0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
554      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
555      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
556      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
557      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
558      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
559      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
560      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
561      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
562      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
563      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
564      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
566      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
567      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
568      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
569      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
570      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
571      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
572      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
573      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
574      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
575      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
576      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
577      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
578      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
579      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
580    
581    /* This is a set of tables that came orginally from a Windows user. It seems to
582    be at least an approximation of ISO 8859. In particular, there are characters
583    greater than 128 that are marked as spaces, letters, etc. */
584    
585    static const pcre_uint8 tables1[] = {
586    0,1,2,3,4,5,6,7,
587    8,9,10,11,12,13,14,15,
588    16,17,18,19,20,21,22,23,
589    24,25,26,27,28,29,30,31,
590    32,33,34,35,36,37,38,39,
591    40,41,42,43,44,45,46,47,
592    48,49,50,51,52,53,54,55,
593    56,57,58,59,60,61,62,63,
594    64,97,98,99,100,101,102,103,
595    104,105,106,107,108,109,110,111,
596    112,113,114,115,116,117,118,119,
597    120,121,122,91,92,93,94,95,
598    96,97,98,99,100,101,102,103,
599    104,105,106,107,108,109,110,111,
600    112,113,114,115,116,117,118,119,
601    120,121,122,123,124,125,126,127,
602    128,129,130,131,132,133,134,135,
603    136,137,138,139,140,141,142,143,
604    144,145,146,147,148,149,150,151,
605    152,153,154,155,156,157,158,159,
606    160,161,162,163,164,165,166,167,
607    168,169,170,171,172,173,174,175,
608    176,177,178,179,180,181,182,183,
609    184,185,186,187,188,189,190,191,
610    224,225,226,227,228,229,230,231,
611    232,233,234,235,236,237,238,239,
612    240,241,242,243,244,245,246,215,
613    248,249,250,251,252,253,254,223,
614    224,225,226,227,228,229,230,231,
615    232,233,234,235,236,237,238,239,
616    240,241,242,243,244,245,246,247,
617    248,249,250,251,252,253,254,255,
618    0,1,2,3,4,5,6,7,
619    8,9,10,11,12,13,14,15,
620    16,17,18,19,20,21,22,23,
621    24,25,26,27,28,29,30,31,
622    32,33,34,35,36,37,38,39,
623    40,41,42,43,44,45,46,47,
624    48,49,50,51,52,53,54,55,
625    56,57,58,59,60,61,62,63,
626    64,97,98,99,100,101,102,103,
627    104,105,106,107,108,109,110,111,
628    112,113,114,115,116,117,118,119,
629    120,121,122,91,92,93,94,95,
630    96,65,66,67,68,69,70,71,
631    72,73,74,75,76,77,78,79,
632    80,81,82,83,84,85,86,87,
633    88,89,90,123,124,125,126,127,
634    128,129,130,131,132,133,134,135,
635    136,137,138,139,140,141,142,143,
636    144,145,146,147,148,149,150,151,
637    152,153,154,155,156,157,158,159,
638    160,161,162,163,164,165,166,167,
639    168,169,170,171,172,173,174,175,
640    176,177,178,179,180,181,182,183,
641    184,185,186,187,188,189,190,191,
642    224,225,226,227,228,229,230,231,
643    232,233,234,235,236,237,238,239,
644    240,241,242,243,244,245,246,215,
645    248,249,250,251,252,253,254,223,
646    192,193,194,195,196,197,198,199,
647    200,201,202,203,204,205,206,207,
648    208,209,210,211,212,213,214,247,
649    216,217,218,219,220,221,222,255,
650    0,62,0,0,1,0,0,0,
651    0,0,0,0,0,0,0,0,
652    32,0,0,0,1,0,0,0,
653    0,0,0,0,0,0,0,0,
654    0,0,0,0,0,0,255,3,
655    126,0,0,0,126,0,0,0,
656    0,0,0,0,0,0,0,0,
657    0,0,0,0,0,0,0,0,
658    0,0,0,0,0,0,255,3,
659    0,0,0,0,0,0,0,0,
660    0,0,0,0,0,0,12,2,
661    0,0,0,0,0,0,0,0,
662    0,0,0,0,0,0,0,0,
663    254,255,255,7,0,0,0,0,
664    0,0,0,0,0,0,0,0,
665    255,255,127,127,0,0,0,0,
666    0,0,0,0,0,0,0,0,
667    0,0,0,0,254,255,255,7,
668    0,0,0,0,0,4,32,4,
669    0,0,0,128,255,255,127,255,
670    0,0,0,0,0,0,255,3,
671    254,255,255,135,254,255,255,7,
672    0,0,0,0,0,4,44,6,
673    255,255,127,255,255,255,127,255,
674    0,0,0,0,254,255,255,255,
675    255,255,255,255,255,255,255,127,
676    0,0,0,0,254,255,255,255,
677    255,255,255,255,255,255,255,255,
678    0,2,0,0,255,255,255,255,
679    255,255,255,255,255,255,255,127,
680    0,0,0,0,255,255,255,255,
681    255,255,255,255,255,255,255,255,
682    0,0,0,0,254,255,0,252,
683    1,0,0,248,1,0,0,120,
684    0,0,0,0,254,255,255,255,
685    0,0,128,0,0,0,128,0,
686    255,255,255,255,0,0,0,0,
687    0,0,0,0,0,0,0,128,
688    255,255,255,255,0,0,0,0,
689    0,0,0,0,0,0,0,0,
690    128,0,0,0,0,0,0,0,
691    0,1,1,0,1,1,0,0,
692    0,0,0,0,0,0,0,0,
693    0,0,0,0,0,0,0,0,
694    1,0,0,0,128,0,0,0,
695    128,128,128,128,0,0,128,0,
696    28,28,28,28,28,28,28,28,
697    28,28,0,0,0,0,0,128,
698    0,26,26,26,26,26,26,18,
699    18,18,18,18,18,18,18,18,
700    18,18,18,18,18,18,18,18,
701    18,18,18,128,128,0,128,16,
702    0,26,26,26,26,26,26,18,
703    18,18,18,18,18,18,18,18,
704    18,18,18,18,18,18,18,18,
705    18,18,18,128,128,0,0,0,
706    0,0,0,0,0,1,0,0,
707    0,0,0,0,0,0,0,0,
708    0,0,0,0,0,0,0,0,
709    0,0,0,0,0,0,0,0,
710    1,0,0,0,0,0,0,0,
711    0,0,18,0,0,0,0,0,
712    0,0,20,20,0,18,0,0,
713    0,20,18,0,0,0,0,0,
714    18,18,18,18,18,18,18,18,
715    18,18,18,18,18,18,18,18,
716    18,18,18,18,18,18,18,0,
717    18,18,18,18,18,18,18,18,
718    18,18,18,18,18,18,18,18,
719    18,18,18,18,18,18,18,18,
720    18,18,18,18,18,18,18,0,
721    18,18,18,18,18,18,18,18
722    };
723    
     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  
       {  
       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);  
       exit(1);  
       }  
724    
     memcpy(new_buffer, buffer, buffer_size);  
     memcpy(new_pbuffer, pbuffer, buffer_size);  
725    
     buffer_size = new_buffer_size;  
726    
727      start = new_buffer + (start - buffer);  #ifndef HAVE_STRERROR
728      here = new_buffer + (here - buffer);  /*************************************************
729    *     Provide strerror() for non-ANSI libraries  *
730    *************************************************/
731    
732      free(buffer);  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
733      free(dbuffer);  in their libraries, but can provide the same facility by this simple
734      free(pbuffer);  alternative function. */
735    
736      buffer = new_buffer;  extern int   sys_nerr;
737      dbuffer = new_dbuffer;  extern char *sys_errlist[];
     pbuffer = new_pbuffer;  
     }  
   }  
738    
739  return NULL;  /* Control never gets here */  char *
740    strerror(int n)
741    {
742    if (n < 0 || n >= sys_nerr) return "unknown error number";
743    return sys_errlist[n];
744  }  }
745    #endif /* HAVE_STRERROR */
   
   
   
   
746    
747    
748  /*************************************************  /*************************************************
749  *          Read number from string               *  *         JIT memory callback                    *
750  *************************************************/  *************************************************/
751    
752  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  static pcre_jit_stack* jit_callback(void *arg)
 around with conditional compilation, just do the job by hand. It is only used  
 for unpicking the -o argument, so just keep it simple.  
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
   
 Returns:        the unsigned long  
 */  
   
 static int  
 get_value(unsigned char *str, unsigned char **endptr)  
753  {  {
754  int result = 0;  return (pcre_jit_stack *)arg;
 while(*str != 0 && isspace(*str)) str++;  
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
755  }  }
756    
757    
   
   
758  /*************************************************  /*************************************************
759  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
760  *************************************************/  *************************************************/
# Line 253  Returns:      >  0 => the number of byte Line 773  Returns:      >  0 => the number of byte
773  #if !defined NOUTF8  #if !defined NOUTF8
774    
775  static int  static int
776  utf82ord(unsigned char *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
777  {  {
778  int c = *utf8bytes++;  int c = *utf8bytes++;
779  int d = c;  int d = c;
# Line 311  Arguments: Line 831  Arguments:
831  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
832  */  */
833    
834    #if !defined NOUTF8
835    
836  static int  static int
837  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
838  {  {
839  register int i, j;  register int i, j;
840  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 327  for (j = i; j > 0; j--) Line 849  for (j = i; j > 0; j--)
849  return i + 1;  return i + 1;
850  }  }
851    
852    #endif
853    
854    
855    
856    #ifdef SUPPORT_PCRE16
857  /*************************************************  /*************************************************
858  *             Print character string             *  *         Convert a string to 16-bit             *
859  *************************************************/  *************************************************/
860    
861  /* Character string printing function. Must handle UTF-8 strings in utf8  /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
862  mode. Yields number of characters printed. If handed a NULL file, just counts  8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
863  chars without printing. */  double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
864    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
865    result is always left in buffer16.
866    
867  static int pchars(unsigned char *p, int length, FILE *f)  Arguments:
868      p          points to a byte string
869      utf        true if UTF-8 (to be converted to UTF-16)
870      len        number of bytes in the string (excluding trailing zero)
871    
872    Returns:     number of 16-bit data items used (excluding trailing zero)
873                 OR -1 if a UTF-8 string is malformed
874    */
875    
876    static int
877    to16(pcre_uint8 *p, int utf, int len)
878    {
879    pcre_uint16 *pp;
880    
881    if (buffer16_size < 2*len + 2)
882      {
883      if (buffer16 != NULL) free(buffer16);
884      buffer16_size = 2*len + 2;
885      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
886      if (buffer16 == NULL)
887        {
888        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
889        exit(1);
890        }
891      }
892    
893    pp = buffer16;
894    
895    if (!utf)
896      {
897      while (len-- > 0) *pp++ = *p++;
898      }
899    
900    else
901      {
902      int c;
903      while (len > 0)
904        {
905        int chlen = utf82ord(p, &c);
906        if (chlen <= 0) return -1;
907        p += chlen;
908        len -= chlen;
909        if (c < 0x10000) *pp++ = c; else
910          {
911          c -= 0x10000;
912          *pp++ = 0xD800 | (c >> 10);
913          *pp++ = 0xDC00 | (c & 0x3ff);
914          }
915        }
916      }
917    
918    *pp = 0;
919    return pp - buffer16;
920    }
921    #endif
922    
923    
924    /*************************************************
925    *        Read or extend an input line            *
926    *************************************************/
927    
928    /* Input lines are read into buffer, but both patterns and data lines can be
929    continued over multiple input lines. In addition, if the buffer fills up, we
930    want to automatically expand it so as to be able to handle extremely large
931    lines that are needed for certain stress tests. When the input buffer is
932    expanded, the other two buffers must also be expanded likewise, and the
933    contents of pbuffer, which are a copy of the input for callouts, must be
934    preserved (for when expansion happens for a data line). This is not the most
935    optimal way of handling this, but hey, this is just a test program!
936    
937    Arguments:
938      f            the file to read
939      start        where in buffer to start (this *must* be within buffer)
940      prompt       for stdin or readline()
941    
942    Returns:       pointer to the start of new data
943                   could be a copy of start, or could be moved
944                   NULL if no data read and EOF reached
945    */
946    
947    static pcre_uint8 *
948    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
949    {
950    pcre_uint8 *here = start;
951    
952    for (;;)
953      {
954      int rlen = (int)(buffer_size - (here - buffer));
955    
956      if (rlen > 1000)
957        {
958        int dlen;
959    
960        /* If libreadline support is required, use readline() to read a line if the
961        input is a terminal. Note that readline() removes the trailing newline, so
962        we must put it back again, to be compatible with fgets(). */
963    
964    #ifdef SUPPORT_LIBREADLINE
965        if (isatty(fileno(f)))
966          {
967          size_t len;
968          char *s = readline(prompt);
969          if (s == NULL) return (here == start)? NULL : start;
970          len = strlen(s);
971          if (len > 0) add_history(s);
972          if (len > rlen - 1) len = rlen - 1;
973          memcpy(here, s, len);
974          here[len] = '\n';
975          here[len+1] = 0;
976          free(s);
977          }
978        else
979    #endif
980    
981        /* Read the next line by normal means, prompting if the file is stdin. */
982    
983          {
984          if (f == stdin) printf("%s", prompt);
985          if (fgets((char *)here, rlen,  f) == NULL)
986            return (here == start)? NULL : start;
987          }
988    
989        dlen = (int)strlen((char *)here);
990        if (dlen > 0 && here[dlen - 1] == '\n') return start;
991        here += dlen;
992        }
993    
994      else
995        {
996        int new_buffer_size = 2*buffer_size;
997        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
998        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
999        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1000    
1001        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1002          {
1003          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1004          exit(1);
1005          }
1006    
1007        memcpy(new_buffer, buffer, buffer_size);
1008        memcpy(new_pbuffer, pbuffer, buffer_size);
1009    
1010        buffer_size = new_buffer_size;
1011    
1012        start = new_buffer + (start - buffer);
1013        here = new_buffer + (here - buffer);
1014    
1015        free(buffer);
1016        free(dbuffer);
1017        free(pbuffer);
1018    
1019        buffer = new_buffer;
1020        dbuffer = new_dbuffer;
1021        pbuffer = new_pbuffer;
1022        }
1023      }
1024    
1025    return NULL;  /* Control never gets here */
1026    }
1027    
1028    
1029    
1030    /*************************************************
1031    *          Read number from string               *
1032    *************************************************/
1033    
1034    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1035    around with conditional compilation, just do the job by hand. It is only used
1036    for unpicking arguments, so just keep it simple.
1037    
1038    Arguments:
1039      str           string to be converted
1040      endptr        where to put the end pointer
1041    
1042    Returns:        the unsigned long
1043    */
1044    
1045    static int
1046    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1047    {
1048    int result = 0;
1049    while(*str != 0 && isspace(*str)) str++;
1050    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1051    *endptr = str;
1052    return(result);
1053    }
1054    
1055    
1056    
1057    /*************************************************
1058    *             Print one character                *
1059    *************************************************/
1060    
1061    /* Print a single character either literally, or as a hex escape. */
1062    
1063    static int pchar(int c, FILE *f)
1064    {
1065    if (PRINTOK(c))
1066      {
1067      if (f != NULL) fprintf(f, "%c", c);
1068      return 1;
1069      }
1070    
1071    if (c < 0x100)
1072      {
1073      if (use_utf)
1074        {
1075        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1076        return 6;
1077        }
1078      else
1079        {
1080        if (f != NULL) fprintf(f, "\\x%02x", c);
1081        return 4;
1082        }
1083      }
1084    
1085    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1086    return (c <= 0x000000ff)? 6 :
1087           (c <= 0x00000fff)? 7 :
1088           (c <= 0x0000ffff)? 8 :
1089           (c <= 0x000fffff)? 9 : 10;
1090    }
1091    
1092    
1093    
1094    #ifdef SUPPORT_PCRE8
1095    /*************************************************
1096    *         Print 8-bit character string           *
1097    *************************************************/
1098    
1099    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1100    If handed a NULL file, just counts chars without printing. */
1101    
1102    static int pchars(pcre_uint8 *p, int length, FILE *f)
1103  {  {
1104  int c = 0;  int c = 0;
1105  int yield = 0;  int yield = 0;
# Line 345  int yield = 0; Line 1107  int yield = 0;
1107  while (length-- > 0)  while (length-- > 0)
1108    {    {
1109  #if !defined NOUTF8  #if !defined NOUTF8
1110    if (use_utf8)    if (use_utf)
1111      {      {
1112      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1113      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1114        {        {
1115        length -= rc - 1;        length -= rc - 1;
1116        p += rc;        p += rc;
1117        if (c < 256 && isprint(c))        yield += pchar(c, f);
1118          {        continue;
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n;  
         if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);  
         yield += n;  
         }  
       continue;  
1119        }        }
1120      }      }
1121  #endif  #endif
1122      c = *p++;
1123      yield += pchar(c, f);
1124      }
1125    
1126     /* Not UTF-8, or malformed UTF-8  */  return yield;
1127    }
1128    #endif
1129    
1130    if (isprint(c = *(p++)))  
1131      {  
1132      if (f != NULL) fprintf(f, "%c", c);  #ifdef SUPPORT_PCRE16
1133      yield++;  /*************************************************
1134      }  *           Print 16-bit character string        *
1135    else  *************************************************/
1136    
1137    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1138    If handed a NULL file, just counts chars without printing. */
1139    
1140    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1141    {
1142    int yield = 0;
1143    
1144    while (length-- > 0)
1145      {
1146      int c = *p++ & 0xffff;
1147    #if !defined NOUTF8
1148      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1149      {      {
1150      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1151      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1152      }        {
1153          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1154          length--;
1155          p++;
1156          }
1157        }
1158    #endif
1159      yield += pchar(c, f);
1160    }    }
1161    
1162  return yield;  return yield;
1163  }  }
1164    #endif
1165    
1166    
1167    
# Line 413  if (callout_extra) Line 1190  if (callout_extra)
1190      else      else
1191        {        {
1192        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1193        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject + cb->offset_vector[i],
1194          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1195        fprintf(f, "\n");        fprintf(f, "\n");
1196        }        }
# Line 426  printed lengths of the substrings. */ Line 1203  printed lengths of the substrings. */
1203    
1204  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1205    
1206  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, cb->start_match, f);
1207  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject + cb->start_match,
1208    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1209    
1210  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1211    
1212  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject + cb->current_position,
1213    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1214    
1215  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 469  fprintf(outfile, "%.*s", (cb->next_item_ Line 1246  fprintf(outfile, "%.*s", (cb->next_item_
1246  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1247  first_callout = 0;  first_callout = 0;
1248    
1249    if (cb->mark != last_callout_mark)
1250      {
1251      fprintf(outfile, "Latest Mark: %s\n",
1252        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1253      last_callout_mark = cb->mark;
1254      }
1255    
1256  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1257    {    {
1258    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 488  return (cb->callout_number != callout_fa Line 1272  return (cb->callout_number != callout_fa
1272  *            Local malloc functions              *  *            Local malloc functions              *
1273  *************************************************/  *************************************************/
1274    
1275  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1276  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1277    show_malloc variable is set only during matching. */
1278    
1279  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1280  {  {
1281  void *block = malloc(size);  void *block = malloc(size);
1282  gotten_store = size;  gotten_store = size;
1283    if (first_gotten_store == 0) first_gotten_store = size;
1284  if (show_malloc)  if (show_malloc)
1285    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1286  return block;  return block;
# Line 507  if (show_malloc) Line 1293  if (show_malloc)
1293  free(block);  free(block);
1294  }  }
1295    
   
1296  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1297    
1298  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 530  free(block); Line 1315  free(block);
1315  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1316  *************************************************/  *************************************************/
1317    
1318  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1319    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1320    value, but the code is defensive. */
1321    
1322  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1323  {  {
1324  int rc;  int rc;
1325  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1326    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1327    #ifdef SUPPORT_PCRE16
1328      rc = pcre16_fullinfo(re, study, option, ptr);
1329    #else
1330      rc = PCRE_ERROR_BADMODE;
1331    #endif
1332    else
1333    #ifdef SUPPORT_PCRE8
1334      rc = pcre_fullinfo(re, study, option, ptr);
1335    #else
1336      rc = PCRE_ERROR_BADMODE;
1337    #endif
1338    
1339    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1340      use_pcre16? "16" : "", option);
1341  }  }
1342    
1343    
# Line 563  return ((value & 0x000000ff) << 24) | Line 1364  return ((value & 0x000000ff) << 24) |
1364  *************************************************/  *************************************************/
1365    
1366  static int  static int
1367  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1368    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1369    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1370  {  {
# Line 578  for (;;) Line 1379  for (;;)
1379    {    {
1380    *limit = mid;    *limit = mid;
1381    
1382    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1383      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1384    
1385    if (count == errnumber)    if (count == errnumber)
# Line 603  for (;;) Line 1404  for (;;)
1404    else break;    /* Some other error */    else break;    /* Some other error */
1405    }    }
1406    
1407  extra->flags &= ~flag;  extra->flags &= ~flag;
1408  return count;  return count;
1409    }
1410    
1411    
1412    
1413    /*************************************************
1414    *         Case-independent strncmp() function    *
1415    *************************************************/
1416    
1417    /*
1418    Arguments:
1419      s         first string
1420      t         second string
1421      n         number of characters to compare
1422    
1423    Returns:    < 0, = 0, or > 0, according to the comparison
1424    */
1425    
1426    static int
1427    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1428    {
1429    while (n--)
1430      {
1431      int c = tolower(*s++) - tolower(*t++);
1432      if (c) return c;
1433      }
1434    return 0;
1435  }  }
1436    
1437    
# Line 613  return count; Line 1440  return count;
1440  *         Check newline indicator                *  *         Check newline indicator                *
1441  *************************************************/  *************************************************/
1442    
1443  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1444  xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.  a message and return 0 if there is no match.
1445    
1446  Arguments:  Arguments:
1447    p           points after the leading '<'    p           points after the leading '<'
# Line 624  Returns:      appropriate PCRE_NEWLINE_x Line 1451  Returns:      appropriate PCRE_NEWLINE_x
1451  */  */
1452    
1453  static int  static int
1454  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1455  {  {
1456  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1457  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1458  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1459    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1460    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1461    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1462    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1463  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1464  return 0;  return 0;
1465  }  }
# Line 636  return 0; Line 1467  return 0;
1467    
1468    
1469  /*************************************************  /*************************************************
1470    *             Usage function                     *
1471    *************************************************/
1472    
1473    static void
1474    usage(void)
1475    {
1476    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1477    printf("Input and output default to stdin and stdout.\n");
1478    #ifdef SUPPORT_LIBREADLINE
1479    printf("If input is a terminal, readline() is used to read from it.\n");
1480    #else
1481    printf("This version of pcretest is not linked with readline().\n");
1482    #endif
1483    printf("\nOptions:\n");
1484    #ifdef SUPPORT_PCRE16
1485    printf("  -16      use 16-bit interface\n");
1486    #endif
1487    printf("  -b       show compiled code (bytecode)\n");
1488    printf("  -C       show PCRE compile-time options and exit\n");
1489    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1490    #if !defined NODFA
1491    printf("  -dfa     force DFA matching for all subjects\n");
1492    #endif
1493    printf("  -help    show usage information\n");
1494    printf("  -i       show information about compiled patterns\n"
1495           "  -M       find MATCH_LIMIT minimum for each subject\n"
1496           "  -m       output memory used information\n"
1497           "  -o <n>   set size of offsets vector to <n>\n");
1498    #if !defined NOPOSIX
1499    printf("  -p       use POSIX interface\n");
1500    #endif
1501    printf("  -q       quiet: do not output PCRE version number at start\n");
1502    printf("  -S <n>   set stack size to <n> megabytes\n");
1503    printf("  -s       force each pattern to be studied at basic level\n"
1504           "  -s+      force each pattern to be studied, using JIT if available\n"
1505           "  -t       time compilation and execution\n");
1506    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1507    printf("  -tm      time execution (matching) only\n");
1508    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1509    }
1510    
1511    
1512    
1513    /*************************************************
1514  *                Main Program                    *  *                Main Program                    *
1515  *************************************************/  *************************************************/
1516    
# Line 648  int main(int argc, char **argv) Line 1523  int main(int argc, char **argv)
1523  FILE *infile = stdin;  FILE *infile = stdin;
1524  int options = 0;  int options = 0;
1525  int study_options = 0;  int study_options = 0;
1526    int default_find_match_limit = FALSE;
1527  int op = 1;  int op = 1;
1528  int timeit = 0;  int timeit = 0;
1529    int timeitm = 0;
1530  int showinfo = 0;  int showinfo = 0;
1531  int showstore = 0;  int showstore = 0;
1532    int force_study = -1;
1533    int force_study_options = 0;
1534  int quiet = 0;  int quiet = 0;
1535  int size_offsets = 45;  int size_offsets = 45;
1536  int size_offsets_max;  int size_offsets_max;
# Line 665  int all_use_dfa = 0; Line 1544  int all_use_dfa = 0;
1544  int yield = 0;  int yield = 0;
1545  int stack_size;  int stack_size;
1546    
1547    pcre_jit_stack *jit_stack = NULL;
1548    
1549  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1550  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1551    
1552  uschar copynames[1024];  pcre_uchar copynames[1024];
1553  uschar getnames[1024];  pcre_uchar getnames[1024];
1554    
1555  uschar *copynamesptr;  pcre_uchar *copynamesptr;
1556  uschar *getnamesptr;  pcre_uchar *getnamesptr;
1557    
1558  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that valgrind will check their misuse when
1559  when I am debugging. They grow automatically when very long lines are read. */  debugging. They grow automatically when very long lines are read. The 16-bit
1560    buffer (buffer16) is obtained only if needed. */
1561    
1562  buffer = (unsigned char *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
1563  dbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (pcre_uint8 *)malloc(buffer_size);
1564  pbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1565    
1566  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
1567    
1568  outfile = stdout;  outfile = stdout;
1569    
1570    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1571    library to translate CRLF into a single LF character. At least, that's what
1572    I've been told: never having used Windows I take this all on trust. Originally
1573    it set 0x8000, but then I was advised that _O_BINARY was better. */
1574    
1575    #if defined(_WIN32) || defined(WIN32)
1576    _setmode( _fileno( stdout ), _O_BINARY );
1577    #endif
1578    
1579  /* Scan options */  /* Scan options */
1580    
1581  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1582    {    {
1583    unsigned char *endptr;    pcre_uint8 *endptr;
1584    
1585      if (strcmp(argv[op], "-m") == 0) showstore = 1;
1586      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1587      else if (strcmp(argv[op], "-s+") == 0)
1588        {
1589        force_study = 1;
1590        force_study_options = PCRE_STUDY_JIT_COMPILE;
1591        }
1592    #ifdef SUPPORT_PCRE16
1593      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1594    #endif
1595    
   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)  
     showstore = 1;  
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
1596    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1597      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1598    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1599    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1600      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1601  #if !defined NODFA  #if !defined NODFA
1602    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1603  #endif  #endif
1604    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1605        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1606          *endptr == 0))          *endptr == 0))
1607      {      {
1608      op++;      op++;
1609      argc--;      argc--;
1610      }      }
1611      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1612        {
1613        int both = argv[op][2] == 0;
1614        int temp;
1615        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1616                         *endptr == 0))
1617          {
1618          timeitm = temp;
1619          op++;
1620          argc--;
1621          }
1622        else timeitm = LOOPREPEAT;
1623        if (both) timeit = timeitm;
1624        }
1625    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1626        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1627          *endptr == 0))          *endptr == 0))
1628      {      {
1629  #ifdef _WIN32  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1630      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1631      exit(1);      exit(1);
1632  #else  #else
# Line 741  while (argc > 1 && argv[op][0] == '-') Line 1650  while (argc > 1 && argv[op][0] == '-')
1650    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1651      {      {
1652      int rc;      int rc;
1653        unsigned long int lrc;
1654      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1655      printf("Compiled with\n");      printf("Compiled with\n");
1656    
1657    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1658    are set, either both UTFs are supported or both are not supported. */
1659    
1660    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1661        printf("  8-bit and 16-bit support\n");
1662        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1663        if (rc)
1664          printf("  UTF-8 and UTF-16 support\n");
1665        else
1666          printf("  No UTF-8 or UTF-16 support\n");
1667    #elif defined SUPPORT_PCRE8
1668        printf("  8-bit support only\n");
1669      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1670      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1671    #else
1672        printf("  16-bit support only\n");
1673        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1674        printf("  %sUTF-16 support\n", rc? "" : "No ");
1675    #endif
1676    
1677      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1678      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1679        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1680        if (rc)
1681          printf("  Just-in-time compiler support\n");
1682        else
1683          printf("  No just-in-time compiler support\n");
1684      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1685      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1686        (rc == '\n')? "LF" : "CRLF");      in EBCDIC environments. CR is 13 and NL is 10. */
1687        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1688          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1689          (rc == -2)? "ANYCRLF" :
1690          (rc == -1)? "ANY" : "???");
1691        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1692        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1693                                         "all Unicode newlines");
1694      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1695      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1696      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1697      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1698      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1699      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1700      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1701      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1702      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1703      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1704      exit(0);      goto EXIT;
1705        }
1706      else if (strcmp(argv[op], "-help") == 0 ||
1707               strcmp(argv[op], "--help") == 0)
1708        {
1709        usage();
1710        goto EXIT;
1711      }      }
1712    else    else
1713      {      {
1714      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1715      printf("Usage:   pcretest [options] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -S <n> set stack size to <n> megabytes\n");  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
1716      yield = 1;      yield = 1;
1717      goto EXIT;      goto EXIT;
1718      }      }
# Line 794  offsets = (int *)malloc(size_offsets_max Line 1727  offsets = (int *)malloc(size_offsets_max
1727  if (offsets == NULL)  if (offsets == NULL)
1728    {    {
1729    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1730      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1731    yield = 1;    yield = 1;
1732    goto EXIT;    goto EXIT;
1733    }    }
# Line 803  if (offsets == NULL) Line 1736  if (offsets == NULL)
1736    
1737  if (argc > 1)  if (argc > 1)
1738    {    {
1739    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1740    if (infile == NULL)    if (infile == NULL)
1741      {      {
1742      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 814  if (argc > 1) Line 1747  if (argc > 1)
1747    
1748  if (argc > 2)  if (argc > 2)
1749    {    {
1750    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1751    if (outfile == NULL)    if (outfile == NULL)
1752      {      {
1753      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 825  if (argc > 2) Line 1758  if (argc > 2)
1758    
1759  /* Set alternative malloc function */  /* Set alternative malloc function */
1760    
1761    #ifdef SUPPORT_PCRE8
1762  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1763  pcre_free = new_free;  pcre_free = new_free;
1764  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1765  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1766    #endif
1767    
1768    #ifdef SUPPORT_PCRE16
1769    pcre16_malloc = new_malloc;
1770    pcre16_free = new_free;
1771    pcre16_stack_malloc = stack_malloc;
1772    pcre16_stack_free = stack_free;
1773    #endif
1774    
1775  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1776    
# Line 847  while (!done) Line 1789  while (!done)
1789  #endif  #endif
1790    
1791    const char *error;    const char *error;
1792    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
1793    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
1794    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
1795      const pcre_uint8 *tables = NULL;
1796    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1797    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1798      int do_allcaps = 0;
1799      int do_mark = 0;
1800    int do_study = 0;    int do_study = 0;
1801      int no_force_study = 0;
1802    int do_debug = debug;    int do_debug = debug;
1803    int do_G = 0;    int do_G = 0;
1804    int do_g = 0;    int do_g = 0;
1805    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1806    int do_showrest = 0;    int do_showrest = 0;
1807      int do_showcaprest = 0;
1808    int do_flip = 0;    int do_flip = 0;
1809    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1810    
1811    use_utf8 = 0;    use_utf = 0;
1812      debug_lengths = 1;
1813    
1814    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1815    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1816    fflush(outfile);    fflush(outfile);
1817    
# Line 877  while (!done) Line 1824  while (!done)
1824    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1825      {      {
1826      unsigned long int magic, get_options;      unsigned long int magic, get_options;
1827      uschar sbuf[8];      pcre_uint8 sbuf[8];
1828      FILE *f;      FILE *f;
1829    
1830      p++;      p++;
# Line 900  while (!done) Line 1847  while (!done)
1847        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1848    
1849      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1850      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1851    
1852      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1853    
# Line 919  while (!done) Line 1866  while (!done)
1866          }          }
1867        }        }
1868    
1869      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1870        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1871    
1872      /* Need to know if UTF-8 for printing data strings */      /* Now see if there is any following study data. */
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
1873    
1874      if (true_study_size != 0)      if (true_study_size != 0)
1875        {        {
# Line 943  while (!done) Line 1885  while (!done)
1885          {          {
1886          FAIL_READ:          FAIL_READ:
1887          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1888          if (extra != NULL) new_free(extra);          if (extra != NULL)
1889              {
1890              PCRE_FREE_STUDY(extra);
1891              }
1892          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1893          fclose(f);          fclose(f);
1894          continue;          continue;
# Line 953  while (!done) Line 1898  while (!done)
1898        }        }
1899      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
1900    
1901        /* Flip the necessary bytes. */
1902        if (do_flip != 0)
1903          {
1904          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
1905          }
1906    
1907        /* Need to know if UTF-8 for printing data strings */
1908    
1909        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1910        use_utf = (get_options & PCRE_UTF8) != 0;
1911    
1912      fclose(f);      fclose(f);
1913      goto SHOW_INFO;      goto SHOW_INFO;
1914      }      }
1915    
1916    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
1917    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
1918    
1919    delimiter = *p++;    delimiter = *p++;
1920    
1921    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1922      {      {
1923      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1924      goto SKIP_DATA;      goto SKIP_DATA;
1925      }      }
1926    
1927    pp = p;    pp = p;
1928      poffset = (int)(p - buffer);
1929    
1930    for(;;)    for(;;)
1931      {      {
# Line 979  while (!done) Line 1936  while (!done)
1936        pp++;        pp++;
1937        }        }
1938      if (*pp != 0) break;      if (*pp != 0) break;
1939      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1940        {        {
1941        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1942        done = 1;        done = 1;
# Line 989  while (!done) Line 1945  while (!done)
1945      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1946      }      }
1947    
1948      /* The buffer may have moved while being extended; reset the start of data
1949      pointer to the correct relative point in the buffer. */
1950    
1951      p = buffer + poffset;
1952    
1953    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1954    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1955    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 1018  while (!done) Line 1979  while (!done)
1979        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1980        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1981    
1982        case '+': do_showrest = 1; break;        case '+':
1983          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1984          break;
1985    
1986          case '=': do_allcaps = 1; break;
1987        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1988          case 'B': do_debug = 1; break;
1989        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1990        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1991        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
# Line 1027  while (!done) Line 1993  while (!done)
1993        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1994        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1995        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1996          case 'K': do_mark = 1; break;
1997        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1998        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1999    
# Line 1034  while (!done) Line 2001  while (!done)
2001        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2002  #endif  #endif
2003    
2004        case 'S': do_study = 1; break;        case 'S':
2005          if (do_study == 0)
2006            {
2007            do_study = 1;
2008            if (*pp == '+')
2009              {
2010              study_options |= PCRE_STUDY_JIT_COMPILE;
2011              pp++;
2012              }
2013            }
2014          else
2015            {
2016            do_study = 0;
2017            no_force_study = 1;
2018            }
2019          break;
2020    
2021        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2022          case 'W': options |= PCRE_UCP; break;
2023        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2024        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2025          case 'Z': debug_lengths = 0; break;
2026          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2027        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2028    
2029          case 'T':
2030          switch (*pp++)
2031            {
2032            case '0': tables = tables0; break;
2033            case '1': tables = tables1; break;
2034    
2035            case '\r':
2036            case '\n':
2037            case ' ':
2038            case 0:
2039            fprintf(outfile, "** Missing table number after /T\n");
2040            goto SKIP_DATA;
2041    
2042            default:
2043            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2044            goto SKIP_DATA;
2045            }
2046          break;
2047    
2048        case 'L':        case 'L':
2049        ppp = pp;        ppp = pp;
2050        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
2051        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
2052          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2053        *ppp = 0;        *ppp = 0;
2054        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2055          {          {
2056          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2057          goto SKIP_DATA;          goto SKIP_DATA;
2058          }          }
2059          locale_set = 1;
2060        tables = pcre_maketables();        tables = pcre_maketables();
2061        pp = ppp;        pp = ppp;
2062        break;        break;
# Line 1063  while (!done) Line 2070  while (!done)
2070    
2071        case '<':        case '<':
2072          {          {
2073          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2074          if (x == 0) goto SKIP_DATA;            {
2075          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2076          while (*pp++ != '>');            pp += 3;
2077              }
2078            else
2079              {
2080              int x = check_newline(pp, outfile);
2081              if (x == 0) goto SKIP_DATA;
2082              options |= x;
2083              while (*pp++ != '>');
2084              }
2085          }          }
2086        break;        break;
2087    
# Line 1083  while (!done) Line 2098  while (!done)
2098    
2099    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2100    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2101    local character tables. */    local character tables. Neither does it have 16-bit support. */
2102    
2103  #if !defined NOPOSIX  #if !defined NOPOSIX
2104    if (posix || do_posix)    if (posix || do_posix)
# Line 1096  while (!done) Line 2111  while (!done)
2111      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2112      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2113      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2114        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2115        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2116    
2117        first_gotten_store = 0;
2118      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2119    
2120      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1116  while (!done) Line 2134  while (!done)
2134  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2135    
2136      {      {
2137      if (timeit)      unsigned long int get_options;
2138    
2139        /* In 16-bit mode, convert the input. */
2140    
2141    #ifdef SUPPORT_PCRE16
2142        if (use_pcre16)
2143          {
2144          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2145            {
2146            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2147              "converted to UTF-16\n");
2148            goto SKIP_DATA;
2149            }
2150          p = (pcre_uint8 *)buffer16;
2151          }
2152    #endif
2153    
2154        /* Compile many times when timing */
2155    
2156        if (timeit > 0)
2157        {        {
2158        register int i;        register int i;
2159        clock_t time_taken;        clock_t time_taken;
2160        clock_t start_time = clock();        clock_t start_time = clock();
2161        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2162          {          {
2163          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2164          if (re != NULL) free(re);          if (re != NULL) free(re);
2165          }          }
2166        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2167        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2168          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
2169            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2170        }        }
2171    
2172      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2173        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2174    
2175      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2176      if non-interactive. */      if non-interactive. */
# Line 1145  while (!done) Line 2183  while (!done)
2183          {          {
2184          for (;;)          for (;;)
2185            {            {
2186            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2187              {              {
2188              done = 1;              done = 1;
2189              goto CONTINUE;              goto CONTINUE;
# Line 1159  while (!done) Line 2197  while (!done)
2197        goto CONTINUE;        goto CONTINUE;
2198        }        }
2199    
2200      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2201      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2202      returns only limited data. Check that it agrees with the newer one. */      lines. */
2203    
2204      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2205        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2206    
2207      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2208      and remember the store that was got. */      and remember the store that was got. */
2209    
2210      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
2211      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2212    
2213        /* Output code size information if requested */
2214    
2215        if (log_store)
2216          fprintf(outfile, "Memory allocation (code space): %d\n",
2217            (int)(first_gotten_store -
2218                  sizeof(real_pcre) -
2219                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2220    
2221      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
2222      help with the matching. */      help with the matching, unless the pattern has the SS option, which
2223        suppresses the effect of /S (used for a few test patterns where studying is
2224        never sensible). */
2225    
2226      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
2227        {        {
2228        if (timeit)        if (timeit > 0)
2229          {          {
2230          register int i;          register int i;
2231          clock_t time_taken;          clock_t time_taken;
2232          clock_t start_time = clock();          clock_t start_time = clock();
2233          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
2234            extra = pcre_study(re, study_options, &error);            {
2235              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2236              }
2237          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2238          if (extra != NULL) free(extra);          if (extra != NULL)
2239          fprintf(outfile, "  Study time %.3f milliseconds\n",            {
2240            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            PCRE_FREE_STUDY(extra);
2241              }
2242            fprintf(outfile, "  Study time %.4f milliseconds\n",
2243              (((double)time_taken * 1000.0) / (double)timeit) /
2244              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2245          }          }
2246        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2247        if (error != NULL)        if (error != NULL)
2248          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2249        else if (extra != NULL)        else if (extra != NULL)
2250            {
2251          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2252            if (log_store)
2253              {
2254              size_t jitsize;
2255              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2256              if (jitsize != 0)
2257                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2258              }
2259            }
2260        }        }
2261    
2262      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
2263      fields in the regex data block and the study block. This is to make it  
2264      possible to test PCRE's handling of byte-flipped patterns, e.g. those      if (do_mark)
2265      compiled on a different architecture. */        {
2266          if (extra == NULL)
2267      if (do_flip)          {
2268        {          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2269        real_pcre *rre = (real_pcre *)re;          extra->flags = 0;
       rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
2270          }          }
2271          extra->mark = &markptr;
2272          extra->flags |= PCRE_EXTRA_MARK;
2273        }        }
2274    
2275      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
2276    
2277      SHOW_INFO:      SHOW_INFO:
2278    
2279        if (do_debug)
2280          {
2281          fprintf(outfile, "------------------------------------------------------------------\n");
2282    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2283          if (use_pcre16)
2284            pcre16_printint(re, outfile, debug_lengths);
2285          else
2286            pcre_printint(re, outfile, debug_lengths);
2287    #elif defined SUPPORT_PCRE8
2288          pcre_printint(re, outfile, debug_lengths);
2289    #else
2290          pcre16_printint(re, outfile, debug_lengths);
2291    #endif
2292          }
2293    
2294        /* We already have the options in get_options (see above) */
2295    
2296      if (do_showinfo)      if (do_showinfo)
2297        {        {
2298        unsigned long int get_options, all_options;        unsigned long int all_options;
2299  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2300        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2301  #endif  #endif
2302        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2303            hascrorlf;
2304        int nameentrysize, namecount;        int nameentrysize, namecount;
2305        const uschar *nametable;        const pcre_uchar *nametable;
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
2306    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2307        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2308        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2309        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1258  while (!done) Line 2312  while (!done)
2312        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2313        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2314        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2315          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2316          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2317          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2318    
2319          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2320          that it gives the same results as the new function. */
2321    
2322  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2323        old_count = pcre_info(re, &old_options, &old_first_char);        if (!use_pcre16)
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
2324          {          {
2325          if (old_count != count) fprintf(outfile,          old_count = pcre_info(re, &old_options, &old_first_char);
2326            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (count < 0) fprintf(outfile,
2327              old_count);            "Error %d from pcre_info()\n", count);
2328            else
2329          if (old_first_char != first_char) fprintf(outfile,            {
2330            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            if (old_count != count) fprintf(outfile,
2331              first_char, old_first_char);              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2332                  old_count);
2333          if (old_options != (int)get_options) fprintf(outfile,  
2334            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            if (old_first_char != first_char) fprintf(outfile,
2335              get_options, old_options);              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2336                  first_char, old_first_char);
2337    
2338              if (old_options != (int)get_options) fprintf(outfile,
2339                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2340                  get_options, old_options);
2341              }
2342          }          }
2343  #endif  #endif
2344    
# Line 1299  while (!done) Line 2362  while (!done)
2362            }            }
2363          }          }
2364    
2365        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2366        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2367    
2368        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2369        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
2370    
2371        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2372          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2373            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2374            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2375            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2376            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2377            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2378            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2379              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2380              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2381            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2382            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2383            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2384            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2385            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2386            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2387              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2388              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2389            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2390    
2391        switch (get_options & PCRE_NEWLINE_CRLF)        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2392    
2393          switch (get_options & PCRE_NEWLINE_BITS)
2394          {          {
2395          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
2396          fprintf(outfile, "Forced newline sequence: CR\n");          fprintf(outfile, "Forced newline sequence: CR\n");
# Line 1341  while (!done) Line 2404  while (!done)
2404          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
2405          break;          break;
2406    
2407            case PCRE_NEWLINE_ANYCRLF:
2408            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2409            break;
2410    
2411            case PCRE_NEWLINE_ANY:
2412            fprintf(outfile, "Forced newline sequence: ANY\n");
2413            break;
2414    
2415          default:          default:
2416          break;          break;
2417          }          }
# Line 1355  while (!done) Line 2426  while (!done)
2426          }          }
2427        else        else
2428          {          {
2429          int ch = first_char & 255;          const char *caseless =
2430          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2431            "" : " (caseless)";            "" : " (caseless)";
2432          if (isprint(ch))  
2433            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
2434              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2435          else          else
2436            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
2437              fprintf(outfile, "First char = ");
2438              pchar(first_char, outfile);
2439              fprintf(outfile, "%s\n", caseless);
2440              }
2441          }          }
2442    
2443        if (need_char < 0)        if (need_char < 0)
# Line 1370  while (!done) Line 2446  while (!done)
2446          }          }
2447        else        else
2448          {          {
2449          int ch = need_char & 255;          const char *caseless =
2450          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2451            "" : " (caseless)";            "" : " (caseless)";
2452          if (isprint(ch))  
2453            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
2454              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2455          else          else
2456            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2457          }          }
2458    
2459        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2460        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2461        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2462        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2463          information unless -i or -d was also present. This means that, except
2464          when auto-callouts are involved, the output from runs with and without
2465          -s should be identical. */
2466    
2467        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2468          {          {
2469          if (extra == NULL)          if (extra == NULL)
2470            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2471          else          else
2472            {            {
2473            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2474            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2475    
2476              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2477              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2478    
2479              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2480            if (start_bits == NULL)            if (start_bits == NULL)
2481              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2482            else            else
2483              {              {
2484              int i;              int i;
# Line 1409  while (!done) Line 2493  while (!done)
2493                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2494                    c = 2;                    c = 2;
2495                    }                    }
2496                  if (isprint(i) && i != ' ')                  if (PRINTOK(i) && i != ' ')
2497                    {                    {
2498                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2499                    c += 2;                    c += 2;
# Line 1424  while (!done) Line 2508  while (!done)
2508              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2509              }              }
2510            }            }
2511    
2512            /* Show this only if the JIT was set by /S, not by -s. */
2513    
2514            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2515              {
2516              int jit;
2517              new_info(re, extra, PCRE_INFO_JIT, &jit);
2518              if (jit)
2519                fprintf(outfile, "JIT study was successful\n");
2520              else
2521    #ifdef SUPPORT_JIT
2522                fprintf(outfile, "JIT study was not successful\n");
2523    #else
2524                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2525    #endif
2526              }
2527          }          }
2528        }        }
2529    
# Line 1433  while (!done) Line 2533  while (!done)
2533    
2534      if (to_file != NULL)      if (to_file != NULL)
2535        {        {
2536          /* If the 'F' option was present, we flip the bytes of all the integer
2537          fields in the regex data block and the study block. This is to make it
2538          possible to test PCRE's handling of byte-flipped patterns, e.g. those
2539          compiled on a different architecture. */
2540    
2541          if (do_flip)
2542            {
2543            real_pcre *rre = (real_pcre *)re;
2544            rre->magic_number =
2545              byteflip(rre->magic_number, sizeof(rre->magic_number));
2546            rre->size = byteflip(rre->size, sizeof(rre->size));
2547            rre->options = byteflip(rre->options, sizeof(rre->options));
2548            rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2549            rre->top_bracket =
2550              (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2551            rre->top_backref =
2552              (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2553            rre->first_char =
2554              (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2555            rre->req_char =
2556              (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2557            rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2558              sizeof(rre->name_table_offset));
2559            rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2560              sizeof(rre->name_entry_size));
2561            rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2562              sizeof(rre->name_count));
2563    
2564            if (extra != NULL)
2565              {
2566              pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2567              rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2568              rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2569              rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2570              }
2571            }
2572    
2573        FILE *f = fopen((char *)to_file, "wb");        FILE *f = fopen((char *)to_file, "wb");
2574        if (f == NULL)        if (f == NULL)
2575          {          {
# Line 1440  while (!done) Line 2577  while (!done)
2577          }          }
2578        else        else
2579          {          {
2580          uschar sbuf[8];          pcre_uint8 sbuf[8];
2581          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2582          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2583          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2584          sbuf[3] = (true_size)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
2585    
2586          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2587          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2588          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2589          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2590    
2591          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2592              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1458  while (!done) Line 2595  while (!done)
2595            }            }
2596          else          else
2597            {            {
2598            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2599    
2600              /* If there is study data, write it. */
2601    
2602            if (extra != NULL)            if (extra != NULL)
2603              {              {
2604              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1474  while (!done) Line 2614  while (!done)
2614          }          }
2615    
2616        new_free(re);        new_free(re);
2617        if (extra != NULL) new_free(extra);        if (extra != NULL)
2618        if (tables != NULL) new_free((void *)tables);          {
2619            PCRE_FREE_STUDY(extra);
2620            }
2621          if (locale_set)
2622            {
2623            new_free((void *)tables);
2624            setlocale(LC_CTYPE, "C");
2625            locale_set = 0;
2626            }
2627        continue;  /* With next regex */        continue;  /* With next regex */
2628        }        }
2629      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1484  while (!done) Line 2632  while (!done)
2632    
2633    for (;;)    for (;;)
2634      {      {
2635      uschar *q;      pcre_uint8 *q;
2636      uschar *bptr = dbuffer;      pcre_uint8 *bptr;
2637      int *use_offsets = offsets;      int *use_offsets = offsets;
2638      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2639      int callout_data = 0;      int callout_data = 0;
2640      int callout_data_set = 0;      int callout_data_set = 0;
2641      int count, c;      int count, c;
2642      int copystrings = 0;      int copystrings = 0;
2643      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2644      int getstrings = 0;      int getstrings = 0;
2645      int getlist = 0;      int getlist = 0;
2646      int gmatched = 0;      int gmatched = 0;
2647      int start_offset = 0;      int start_offset = 0;
2648        int start_offset_sign = 1;
2649      int g_notempty = 0;      int g_notempty = 0;
2650      int use_dfa = 0;      int use_dfa = 0;
2651    
# Line 1510  while (!done) Line 2659  while (!done)
2659    
2660      pcre_callout = callout;      pcre_callout = callout;
2661      first_callout = 1;      first_callout = 1;
2662        last_callout_mark = NULL;
2663      callout_extra = 0;      callout_extra = 0;
2664      callout_count = 0;      callout_count = 0;
2665      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1522  while (!done) Line 2672  while (!done)
2672      len = 0;      len = 0;
2673      for (;;)      for (;;)
2674        {        {
2675        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
2676          {          {
2677          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2678              {
2679              fprintf(outfile, "\n");
2680              break;
2681              }
2682          done = 1;          done = 1;
2683          goto CONTINUE;          goto CONTINUE;
2684          }          }
# Line 1541  while (!done) Line 2694  while (!done)
2694      p = buffer;      p = buffer;
2695      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2696    
2697      q = dbuffer;      bptr = q = dbuffer;
2698      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2699        {        {
2700        int i = 0;        int i = 0;
# Line 1565  while (!done) Line 2718  while (!done)
2718            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2719    
2720  #if !defined NOUTF8  #if !defined NOUTF8
2721          if (use_utf8 && c > 255)          if (use_utf && c > 255)
2722            {            {
2723            unsigned char buff8[8];            pcre_uint8 buff8[8];
2724            int ii, utn;            int ii, utn;
2725            utn = ord2utf8(c, buff8);            utn = ord2utf8(c, buff8);
2726            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
# Line 1583  while (!done) Line 2736  while (!done)
2736  #if !defined NOUTF8  #if !defined NOUTF8
2737          if (*p == '{')          if (*p == '{')
2738            {            {
2739            unsigned char *pt = p;            pcre_uint8 *pt = p;
2740            c = 0;            c = 0;
2741            while (isxdigit(*(++pt)))  
2742              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2743              when isxdigit() is a macro that refers to its argument more than
2744              once. This is banned by the C Standard, but apparently happens in at
2745              least one MacOS environment. */
2746    
2747              for (pt++; isxdigit(*pt); pt++)
2748                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2749            if (*pt == '}')            if (*pt == '}')
2750              {              {
2751              unsigned char buff8[8];              pcre_uint8 buff8[8];
2752              int ii, utn;              int ii, utn;
2753              utn = ord2utf8(c, buff8);              if (use_utf)
2754              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2755              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2756                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2757                  c = buff8[ii];   /* Last byte */
2758                  }
2759                else
2760                 {
2761                 if (c > 255)
2762                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2763                     "UTF-8 mode is not enabled.\n"
2764                     "** Truncation will probably give the wrong result.\n", c);
2765                 }
2766              p = pt + 1;              p = pt + 1;
2767              break;              break;
2768              }              }
# Line 1606  while (!done) Line 2775  while (!done)
2775          c = 0;          c = 0;
2776          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2777            {            {
2778            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2779            p++;            p++;
2780            }            }
2781          break;          break;
# Line 1616  while (!done) Line 2785  while (!done)
2785          continue;          continue;
2786    
2787          case '>':          case '>':
2788            if (*p == '-')
2789              {
2790              start_offset_sign = -1;
2791              p++;
2792              }
2793          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2794            start_offset *= start_offset_sign;
2795          continue;          continue;
2796    
2797          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1635  while (!done) Line 2810  while (!done)
2810            }            }
2811          else if (isalnum(*p))          else if (isalnum(*p))
2812            {            {
2813            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
2814            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2815            *npp++ = 0;            *npp++ = 0;
2816            *npp = 0;            *npp = 0;
# Line 1689  while (!done) Line 2864  while (!done)
2864  #endif  #endif
2865            use_dfa = 1;            use_dfa = 1;
2866          continue;          continue;
2867    #endif
2868    
2869    #if !defined NODFA
2870          case 'F':          case 'F':
2871          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2872          continue;          continue;
# Line 1703  while (!done) Line 2880  while (!done)
2880            }            }
2881          else if (isalnum(*p))          else if (isalnum(*p))
2882            {            {
2883            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
2884            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2885            *npp++ = 0;            *npp++ = 0;
2886            *npp = 0;            *npp = 0;
# Line 1714  while (!done) Line 2891  while (!done)
2891            }            }
2892          continue;          continue;
2893    
2894            case 'J':
2895            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2896            if (extra != NULL
2897                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2898                && extra->executable_jit != NULL)
2899              {
2900              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2901              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2902              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2903              }
2904            continue;
2905    
2906          case 'L':          case 'L':
2907          getlist = 1;          getlist = 1;
2908          continue;          continue;
# Line 1723  while (!done) Line 2912  while (!done)
2912          continue;          continue;
2913    
2914          case 'N':          case 'N':
2915          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2916              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2917            else
2918              options |= PCRE_NOTEMPTY;
2919          continue;          continue;
2920    
2921          case 'O':          case 'O':
# Line 1736  while (!done) Line 2928  while (!done)
2928            if (offsets == NULL)            if (offsets == NULL)
2929              {              {
2930              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2931                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2932              yield = 1;              yield = 1;
2933              goto EXIT;              goto EXIT;
2934              }              }
# Line 1746  while (!done) Line 2938  while (!done)
2938          continue;          continue;
2939    
2940          case 'P':          case 'P':
2941          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2942              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2943          continue;          continue;
2944    
2945          case 'Q':          case 'Q':
# Line 1781  while (!done) Line 2974  while (!done)
2974          show_malloc = 1;          show_malloc = 1;
2975          continue;          continue;
2976    
2977            case 'Y':
2978            options |= PCRE_NO_START_OPTIMIZE;
2979            continue;
2980    
2981          case 'Z':          case 'Z':
2982          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2983          continue;          continue;
# Line 1801  while (!done) Line 2998  while (!done)
2998        *q++ = c;        *q++ = c;
2999        }        }
3000      *q = 0;      *q = 0;
3001      len = q - dbuffer;      len = (int)(q - dbuffer);
3002    
3003        /* Move the data to the end of the buffer so that a read over the end of
3004        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3005        we are using the POSIX interface, we must include the terminating zero. */
3006    
3007    #if !defined NOPOSIX
3008        if (posix || do_posix)
3009          {
3010          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3011          bptr += buffer_size - len - 1;
3012          }
3013        else
3014    #endif
3015          {
3016          memmove(bptr + buffer_size - len, bptr, len);
3017          bptr += buffer_size - len;
3018          }
3019    
3020      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3021        {        {
# Line 1822  while (!done) Line 3036  while (!done)
3036          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3037        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3038        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3039          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3040    
3041        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3042    
# Line 1843  while (!done) Line 3058  while (!done)
3058            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3059              {              {
3060              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3061              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3062                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3063              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3064              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3065                {                {
3066                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3067                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3068                  outfile);                  outfile);
3069                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3070                }                }
# Line 1857  while (!done) Line 3072  while (!done)
3072            }            }
3073          }          }
3074        free(pmatch);        free(pmatch);
3075          goto NEXT_DATA;
3076        }        }
3077    
3078    #endif  /* !defined NOPOSIX */
3079    
3080      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3081    
3082      else  #ifdef SUPPORT_PCRE16
3083  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3084          {
3085          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3086          if (len < 0)
3087            {
3088            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3089              "converted to UTF-16\n");
3090            goto NEXT_DATA;
3091            }
3092          bptr = (pcre_uint8 *)buffer16;
3093          }
3094    #endif
3095    
3096      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3097        {        {
3098        if (timeit)        markptr = NULL;
3099    
3100          if (timeitm > 0)
3101          {          {
3102          register int i;          register int i;
3103          clock_t time_taken;          clock_t time_taken;
# Line 1876  while (!done) Line 3107  while (!done)
3107          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
3108            {            {
3109            int workspace[1000];            int workspace[1000];
3110            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
3111              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3112                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
3113                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
3114            }            }
3115          else          else
3116  #endif  #endif
3117    
3118          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
3119            count = pcre_exec(re, extra, (char *)bptr, len,            {
3120              PCRE_EXEC(count, re, extra, bptr, len,
3121              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
3122              }
3123          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3124          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3125            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
3126              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3127          }          }
3128    
3129        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3130        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3131        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3132          running of pcre_exec(), so disable the JIT optimization. This makes it
3133          possible to run the same set of tests with and without JIT externally
3134          requested. */
3135    
3136        if (find_match_limit)        if (find_match_limit)
3137          {          {
# Line 1905  while (!done) Line 3140  while (!done)
3140            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3141            extra->flags = 0;            extra->flags = 0;
3142            }            }
3143            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3144    
3145          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3146            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 1928  while (!done) Line 3164  while (!done)
3164            }            }
3165          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3166          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3167          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3168            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3169          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3170          }          }
# Line 1940  while (!done) Line 3176  while (!done)
3176        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3177          {          {
3178          int workspace[1000];          int workspace[1000];
3179          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3180            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
3181            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
3182          if (count == 0)          if (count == 0)
# Line 1953  while (!done) Line 3189  while (!done)
3189    
3190        else        else
3191          {          {
3192          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3193            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3194          if (count == 0)          if (count == 0)
3195            {            {
3196            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 1966  while (!done) Line 3202  while (!done)
3202    
3203        if (count >= 0)        if (count >= 0)
3204          {          {
3205          int i;          int i, maxcount;
3206    
3207    #if !defined NODFA
3208            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3209    #endif
3210              maxcount = use_size_offsets/3;
3211    
3212            /* This is a check against a lunatic return value. */
3213    
3214            if (count > maxcount)
3215              {
3216              fprintf(outfile,
3217                "** PCRE error: returned count %d is too big for offset size %d\n",
3218                count, use_size_offsets);
3219              count = use_size_offsets/3;
3220              if (do_g || do_G)
3221                {
3222                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3223                do_g = do_G = FALSE;        /* Break g/G loop */
3224                }
3225              }
3226    
3227            /* do_allcaps requests showing of all captures in the pattern, to check
3228            unset ones at the end. */
3229    
3230            if (do_allcaps)
3231              {
3232              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3233              count++;   /* Allow for full match */
3234              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3235              }
3236    
3237            /* Output the captured substrings */
3238    
3239          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3240            {            {
3241            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
3242                {
3243                if (use_offsets[i] != -1)
3244                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3245                    use_offsets[i], i);
3246                if (use_offsets[i+1] != -1)
3247                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3248                    use_offsets[i+1], i+1);
3249              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3250                }
3251            else            else
3252              {              {
3253              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3254              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr + use_offsets[i],
3255                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3256              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3257              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3258                {                {
3259                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3260                  {                PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3261                  fprintf(outfile, " 0+ ");                  outfile);
3262                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3263                }                }
3264              }              }
3265            }            }
3266    
3267            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3268    
3269          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3270            {            {
3271            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2063  while (!done) Line 3339  while (!done)
3339                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3340              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3341                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3342              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3343              }              }
3344            }            }
# Line 2073  while (!done) Line 3348  while (!done)
3348    
3349        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
3350          {          {
3351          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
3352  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
3353          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
3354            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
3355              bptr + use_offsets[0]);            fprintf(outfile, ": ");
3356  #endif            PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3357                outfile);
3358              }
3359          fprintf(outfile, "\n");          fprintf(outfile, "\n");
3360          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
3361          }          }
3362    
3363        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
3364        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
3365        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
3366        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
3367        offset values to achieve this. We won't be at the end of the string -  
3368        that was checked before setting g_notempty. */        Complication arises in the case when the newline convention is "any",
3369          "crlf", or "anycrlf". If the previous match was at the end of a line
3370          terminated by CRLF, an advance of one character just passes the \r,
3371          whereas we should prefer the longer newline sequence, as does the code in
3372          pcre_exec(). Fudge the offset value to achieve this. We check for a
3373          newline setting in the pattern; if none was set, use pcre_config() to
3374          find the default.
3375    
3376          Otherwise, in the case of UTF-8 matching, the advance must be one
3377          character, not one byte. */
3378    
3379        else        else
3380          {          {
3381          if (g_notempty != 0)          if (g_notempty != 0)
3382            {            {
3383            int onechar = 1;            int onechar = 1;
3384              unsigned int obits = ((real_pcre *)re)->options;
3385            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
3386            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
3387                {
3388                int d;
3389                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3390                /* Note that these values are always the ASCII ones, even in
3391                EBCDIC environments. CR = 13, NL = 10. */
3392                obits = (d == 13)? PCRE_NEWLINE_CR :
3393                        (d == 10)? PCRE_NEWLINE_LF :
3394                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3395                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
3396                        (d == -1)? PCRE_NEWLINE_ANY : 0;
3397                }
3398              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3399                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3400                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3401                  &&
3402                  start_offset < len - 1 &&
3403                  bptr[start_offset] == '\r' &&
3404                  bptr[start_offset+1] == '\n')
3405                onechar++;
3406              else if (use_utf)
3407              {              {
3408              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3409                {                {
3410                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3411                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3412                }                }
3413              }              }
3414            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3415            }            }
3416          else          else
3417            {            {
3418            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3419              {              {
3420              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3421                if (gmatched == 0)
3422                  {
3423                  if (markptr == NULL) fprintf(outfile, "No match\n");
3424                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3425                  }
3426                break;
3427    
3428                case PCRE_ERROR_BADUTF8:
3429                case PCRE_ERROR_SHORTUTF8:
3430                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3431                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3432                if (use_size_offsets >= 2)
3433                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3434                    use_offsets[1]);
3435                fprintf(outfile, "\n");
3436                break;
3437    
3438                default:
3439                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3440                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3441                else
3442                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3443                break;
3444              }              }
3445            else fprintf(outfile, "Error %d\n", count);  
3446            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3447            }            }
3448          }          }
# Line 2124  while (!done) Line 3452  while (!done)
3452        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3453    
3454        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3455        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3456        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3457        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3458        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3459        character. */        character. */
3460    
3461        g_notempty = 0;        g_notempty = 0;
3462    
3463        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3464          {          {
3465          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3466          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3467          }          }
3468    
3469        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2160  while (!done) Line 3489  while (!done)
3489  #endif  #endif
3490    
3491    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3492    if (extra != NULL) new_free(extra);    if (extra != NULL)
3493    if (tables != NULL)      {
3494        PCRE_FREE_STUDY(extra);
3495        }
3496      if (locale_set)
3497      {      {
3498      new_free((void *)tables);      new_free((void *)tables);
3499      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3500        locale_set = 0;
3501        }
3502      if (jit_stack != NULL)
3503        {
3504        pcre_jit_stack_free(jit_stack);
3505        jit_stack = NULL;
3506      }      }
3507    }    }
3508    
# Line 2180  free(dbuffer); Line 3518  free(dbuffer);
3518  free(pbuffer);  free(pbuffer);
3519  free(offsets);  free(offsets);
3520    
3521    #ifdef SUPPORT_PCRE16
3522    if (buffer16 != NULL) free(buffer16);
3523    #endif
3524    
3525  return yield;  return yield;
3526  }  }
3527    

Legend:
Removed from v.91  
changed lines
  Added in v.811

  ViewVC Help
Powered by ViewVC 1.1.5