/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 146 by ph10, Thu Apr 5 09:17:28 2007 UTC code/branches/pcre16/pcretest.c revision 813 by ph10, Tue Dec 20 14:03:16 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  */  */
38    
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <stdio.h>  #include <stdio.h>
46  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 49  POSSIBILITY OF SUCH DAMAGE.
49  #include <locale.h>  #include <locale.h>
50  #include <errno.h>  #include <errno.h>
51    
52    #ifdef SUPPORT_LIBREADLINE
53    #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
60    
61  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
62  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 59  input mode under Windows. */ Line 72  input mode under Windows. */
72  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
73  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89    /* Not Windows */
90    
91  #else  #else
92  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
93  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 79  appropriately for an application, not fo Line 108  appropriately for an application, not fo
108  #include "pcre.h"  #include "pcre.h"
109  #include "pcre_internal.h"  #include "pcre_internal.h"
110    
111  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
112  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
113  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123    /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
130  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
131  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 90  symbols to prevent clashes. */ Line 133  symbols to prevent clashes. */
133  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
134  #define _pcre_utt              utt  #define _pcre_utt              utt
135  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
138    
139  #include "pcre_tables.c"  #include "pcre_tables.c"
140    
141  /* We also need the pcre_printint() function for printing out compiled  /* The definition of the macro PRINTABLE, which determines whether to print an
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled.  
   
 The definition of the macro PRINTABLE, which determines whether to print an  
142  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
143  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
144  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
145  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
146    
147  #include "pcre_printint.src"  #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 117  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
164  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165  interface to the DFA matcher (NODFA), and without the doublecheck of the old  without the interface to the DFA matcher (NODFA), and without the doublecheck
166  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167  UTF8 support if PCRE is built without it. */  out the UTF8 support if PCRE is built without it. */
168    
169  #ifndef SUPPORT_UTF8  #ifndef SUPPORT_UTF8
170  #ifndef NOUTF8  #ifndef NOUTF8
# Line 129  UTF8 support if PCRE is built without it Line 172  UTF8 support if PCRE is built without it
172  #endif  #endif
173  #endif  #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    
185    #define PCHARS8(lv, p, len, f) \
186      lv = pchars((pcre_uint8 *)p, len, f)
187    
188    #define PCHARSV8(p, len, f) \
189      (void)pchars((pcre_uint8 *)p, len, f)
190    
191    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
192      re = pcre_compile((char *)pat, options, error, erroffset, tables)
193    
194    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
195        offsets, size_offsets) \
196      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
197        offsets, size_offsets)
198    
199    #define PCRE_FREE_STUDY8(extra) \
200      pcre_free_study(extra)
201    
202    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
203      pcre_pattern_to_host_byte_order(re, extra, tables)
204    
205    #define PCRE_STUDY8(extra, re, options, error) \
206      extra = pcre_study(re, options, error)
207    
208    #endif /* SUPPORT_PCRE8 */
209    
210    
211    #ifdef SUPPORT_PCRE16
212    
213    #define PCHARS16(lv, p, len, f) \
214      lv = pchars16((PCRE_SPTR16)p, len, f)
215    
216    #define PCHARSV16(p, len, f) \
217      (void)pchars16((PCRE_SPTR16)p, len, f)
218    
219    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
220      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
221    
222    #define PCRE_FREE_STUDY16(extra) \
223      pcre16_free_study(extra)
224    
225    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
226        offsets, size_offsets) \
227      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
228        options, offsets, size_offsets)
229    
230    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
231      pcre16_pattern_to_host_byte_order(re, extra, tables)
232    
233    #define PCRE_STUDY16(extra, re, options, error) \
234      extra = pcre16_study(re, options, error)
235    
236    #endif /* SUPPORT_PCRE16 */
237    
238    
239    /* ----- Both modes are supported; a runtime test is needed ----- */
240    
241    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
242    
243    #define PCHARS(lv, p, len, f) \
244      if (use_pcre16) \
245        PCHARS16(lv, p, len, f); \
246      else \
247        PCHARS8(lv, p, len, f)
248    
249    #define PCHARSV(p, len, f) \
250      if (use_pcre16) \
251        PCHARSV16(p, len, f); \
252      else \
253        PCHARSV8(p, len, f)
254    
255    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
256      if (use_pcre16) \
257        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
258      else \
259        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
260    
261    #define PCRE_FREE_STUDY(extra) \
262      if (use_pcre16) \
263        PCRE_FREE_STUDY16(extra); \
264      else \
265        PCRE_FREE_STUDY8(extra)
266    
267    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
268        offsets, size_offsets) \
269      if (use_pcre16) \
270        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
271          offsets, size_offsets); \
272      else \
273        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
274          offsets, size_offsets)
275    
276    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
277      if (use_pcre16) \
278        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
279      else \
280        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
281    
282    #define PCRE_STUDY(extra, re, options, error) \
283      if (use_pcre16) \
284        PCRE_STUDY16(extra, re, options, error); \
285      else \
286        PCRE_STUDY8(extra, re, options, error)
287    
288    /* ----- Only 8-bit mode is supported ----- */
289    
290    #elif defined SUPPORT_PCRE8
291    #define PCHARS           PCHARS8
292    #define PCHARSV          PCHARSV8
293    #define PCRE_COMPILE     PCRE_COMPILE8
294    #define PCRE_EXEC        PCRE_EXEC8
295    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
296    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
297    #define PCRE_STUDY       PCRE_STUDY8
298    
299    /* ----- Only 16-bit mode is supported ----- */
300    
301    #else
302    #define PCHARS           PCHARS16
303    #define PCHARSV          PCHARSV16
304    #define PCRE_COMPILE     PCRE_COMPILE16
305    #define PCRE_EXEC        PCRE_EXEC16
306    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
307    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
308    #define PCRE_STUDY       PCRE_STUDY16
309    #endif
310    
311    /* ----- End of mode-specific function call macros ----- */
312    
313    
314  /* Other parameters */  /* Other parameters */
315    
# Line 152  static int callout_count; Line 333  static int callout_count;
333  static int callout_extra;  static int callout_extra;
334  static int callout_fail_count;  static int callout_fail_count;
335  static int callout_fail_id;  static int callout_fail_id;
336    static int debug_lengths;
337  static int first_callout;  static int first_callout;
338  static int locale_set = 0;  static int locale_set = 0;
339  static int show_malloc;  static int show_malloc;
340  static int use_utf8;  static int use_utf;
341  static size_t gotten_store;  static size_t gotten_store;
342    static size_t first_gotten_store = 0;
343    static const unsigned char *last_callout_mark = NULL;
344    
345  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
346    
347  static int buffer_size = 50000;  static int buffer_size = 50000;
348  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
349  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
350  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
351    
352    /* Another buffer is needed translation to 16-bit character strings. It will
353    obtained and extended as required. */
354    
355    #ifdef SUPPORT_PCRE16
356    static int buffer16_size = 0;
357    static pcre_uint16 *buffer16 = NULL;
358    
359    /* We need the table of operator lengths that is used for 16-bit compiling, in
360    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
361    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
362    appropriately for the 16-bit world. Just as a safety check, make sure that
363    COMPILE_PCRE16 is *not* set. */
364    
365    #ifdef COMPILE_PCRE16
366    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
367    #endif
368    
369    #if LINK_SIZE == 2
370    #undef LINK_SIZE
371    #define LINK_SIZE 1
372    #elif LINK_SIZE == 3 || LINK_SIZE == 4
373    #undef LINK_SIZE
374    #define LINK_SIZE 2
375    #else
376    #error LINK_SIZE must be either 2, 3, or 4
377    #endif
378    
379    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
380    
381    #endif  /* SUPPORT_PCRE16 */
382    
383  /*************************************************  /* If we have 8-bit support, default use_pcre16 to false; if there is also
384  *        Read or extend an input line            *  16-bit support, it can be changed by an option. If there is no 8-bit support,
385  *************************************************/  there must be 16-bit support, so default it to 1. */
386    
387  /* Input lines are read into buffer, but both patterns and data lines can be  #ifdef SUPPORT_PCRE8
388  continued over multiple input lines. In addition, if the buffer fills up, we  static int use_pcre16 = 0;
389  want to automatically expand it so as to be able to handle extremely large  #else
390  lines that are needed for certain stress tests. When the input buffer is  static int use_pcre16 = 1;
391  expanded, the other two buffers must also be expanded likewise, and the  #endif
 contents of pbuffer, which are a copy of the input for callouts, must be  
 preserved (for when expansion happens for a data line). This is not the most  
 optimal way of handling this, but hey, this is just a test program!  
392    
393  Arguments:  /* Textual explanations for runtime error codes */
   f            the file to read  
   start        where in buffer to start (this *must* be within buffer)  
394    
395  Returns:       pointer to the start of new data  static const char *errtexts[] = {
396                 could be a copy of start, or could be moved    NULL,  /* 0 is no error */
397                 NULL if no data read and EOF reached    NULL,  /* NOMATCH is handled specially */
398  */    "NULL argument passed",
399      "bad option value",
400      "magic number missing",
401      "unknown opcode - pattern overwritten?",
402      "no more memory",
403      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
404      "match limit exceeded",
405      "callout error code",
406      NULL,  /* BADUTF8 is handled specially */
407      "bad UTF-8 offset",
408      NULL,  /* PARTIAL is handled specially */
409      "not used - internal error",
410      "internal error - pattern overwritten?",
411      "bad count value",
412      "item unsupported for DFA matching",
413      "backreference condition or recursion test not supported for DFA matching",
414      "match limit not supported for DFA matching",
415      "workspace size exceeded in DFA matching",
416      "too much recursion for DFA matching",
417      "recursion limit exceeded",
418      "not used - internal error",
419      "invalid combination of newline options",
420      "bad offset value",
421      NULL,  /* SHORTUTF8 is handled specially */
422      "nested recursion at the same subject position",
423      "JIT stack limit reached",
424      "pattern compiled in wrong mode (8-bit/16-bit error)"
425    };
426    
 static uschar *  
 extend_inputline(FILE *f, uschar *start)  
 {  
 uschar *here = start;  
427    
428  for (;;)  /*************************************************
429    {  *         Alternate character tables             *
430    int rlen = buffer_size - (here - buffer);  *************************************************/
431    
432    if (rlen > 1000)  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
433      {  using the default tables of the library. However, the T option can be used to
434      int dlen;  select alternate sets of tables, for different kinds of testing. Note also that
435      if (fgets((char *)here, rlen,  f) == NULL)  the L (locale) option also adjusts the tables. */
436        return (here == start)? NULL : start;  
437      dlen = (int)strlen((char *)here);  /* This is the set of tables distributed as default with PCRE. It recognizes
438      if (dlen > 0 && here[dlen - 1] == '\n') return start;  only ASCII characters. */
439      here += dlen;  
440      }  static const pcre_uint8 tables0[] = {
441    
442    /* This table is a lower casing table. */
443    
444        0,  1,  2,  3,  4,  5,  6,  7,
445        8,  9, 10, 11, 12, 13, 14, 15,
446       16, 17, 18, 19, 20, 21, 22, 23,
447       24, 25, 26, 27, 28, 29, 30, 31,
448       32, 33, 34, 35, 36, 37, 38, 39,
449       40, 41, 42, 43, 44, 45, 46, 47,
450       48, 49, 50, 51, 52, 53, 54, 55,
451       56, 57, 58, 59, 60, 61, 62, 63,
452       64, 97, 98, 99,100,101,102,103,
453      104,105,106,107,108,109,110,111,
454      112,113,114,115,116,117,118,119,
455      120,121,122, 91, 92, 93, 94, 95,
456       96, 97, 98, 99,100,101,102,103,
457      104,105,106,107,108,109,110,111,
458      112,113,114,115,116,117,118,119,
459      120,121,122,123,124,125,126,127,
460      128,129,130,131,132,133,134,135,
461      136,137,138,139,140,141,142,143,
462      144,145,146,147,148,149,150,151,
463      152,153,154,155,156,157,158,159,
464      160,161,162,163,164,165,166,167,
465      168,169,170,171,172,173,174,175,
466      176,177,178,179,180,181,182,183,
467      184,185,186,187,188,189,190,191,
468      192,193,194,195,196,197,198,199,
469      200,201,202,203,204,205,206,207,
470      208,209,210,211,212,213,214,215,
471      216,217,218,219,220,221,222,223,
472      224,225,226,227,228,229,230,231,
473      232,233,234,235,236,237,238,239,
474      240,241,242,243,244,245,246,247,
475      248,249,250,251,252,253,254,255,
476    
477    /* This table is a case flipping table. */
478    
479        0,  1,  2,  3,  4,  5,  6,  7,
480        8,  9, 10, 11, 12, 13, 14, 15,
481       16, 17, 18, 19, 20, 21, 22, 23,
482       24, 25, 26, 27, 28, 29, 30, 31,
483       32, 33, 34, 35, 36, 37, 38, 39,
484       40, 41, 42, 43, 44, 45, 46, 47,
485       48, 49, 50, 51, 52, 53, 54, 55,
486       56, 57, 58, 59, 60, 61, 62, 63,
487       64, 97, 98, 99,100,101,102,103,
488      104,105,106,107,108,109,110,111,
489      112,113,114,115,116,117,118,119,
490      120,121,122, 91, 92, 93, 94, 95,
491       96, 65, 66, 67, 68, 69, 70, 71,
492       72, 73, 74, 75, 76, 77, 78, 79,
493       80, 81, 82, 83, 84, 85, 86, 87,
494       88, 89, 90,123,124,125,126,127,
495      128,129,130,131,132,133,134,135,
496      136,137,138,139,140,141,142,143,
497      144,145,146,147,148,149,150,151,
498      152,153,154,155,156,157,158,159,
499      160,161,162,163,164,165,166,167,
500      168,169,170,171,172,173,174,175,
501      176,177,178,179,180,181,182,183,
502      184,185,186,187,188,189,190,191,
503      192,193,194,195,196,197,198,199,
504      200,201,202,203,204,205,206,207,
505      208,209,210,211,212,213,214,215,
506      216,217,218,219,220,221,222,223,
507      224,225,226,227,228,229,230,231,
508      232,233,234,235,236,237,238,239,
509      240,241,242,243,244,245,246,247,
510      248,249,250,251,252,253,254,255,
511    
512    /* This table contains bit maps for various character classes. Each map is 32
513    bytes long and the bits run from the least significant end of each byte. The
514    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
515    graph, print, punct, and cntrl. Other classes are built from combinations. */
516    
517      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
518      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
521    
522      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
523      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
524      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
525      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
526    
527      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
528      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
529      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
530      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
531    
532      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
534      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
535      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
536    
537      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
538      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
539      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
541    
542      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
543      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
544      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
545      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546    
547      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
548      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
549      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551    
552      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
553      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
554      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
555      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556    
557      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
558      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
559      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
560      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561    
562      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
563      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
564      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566    
567    /* This table identifies various classes of character by individual bits:
568      0x01   white space character
569      0x02   letter
570      0x04   decimal digit
571      0x08   hexadecimal digit
572      0x10   alphanumeric or '_'
573      0x80   regular expression metacharacter or binary zero
574    */
575    
576    else    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
577      {    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
578      int new_buffer_size = 2*buffer_size;    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
579      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
580      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);    0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
581      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);    0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
582      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
583      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
584      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
585      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
586      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
587      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
588      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
589      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
590      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
591      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
592      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
593      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
594      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
595      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
596      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
597      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
598      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
599      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
600      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
601      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
602      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
603      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
604      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
605      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
606      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
607      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
608    
609    /* This is a set of tables that came orginally from a Windows user. It seems to
610    be at least an approximation of ISO 8859. In particular, there are characters
611    greater than 128 that are marked as spaces, letters, etc. */
612    
613    static const pcre_uint8 tables1[] = {
614    0,1,2,3,4,5,6,7,
615    8,9,10,11,12,13,14,15,
616    16,17,18,19,20,21,22,23,
617    24,25,26,27,28,29,30,31,
618    32,33,34,35,36,37,38,39,
619    40,41,42,43,44,45,46,47,
620    48,49,50,51,52,53,54,55,
621    56,57,58,59,60,61,62,63,
622    64,97,98,99,100,101,102,103,
623    104,105,106,107,108,109,110,111,
624    112,113,114,115,116,117,118,119,
625    120,121,122,91,92,93,94,95,
626    96,97,98,99,100,101,102,103,
627    104,105,106,107,108,109,110,111,
628    112,113,114,115,116,117,118,119,
629    120,121,122,123,124,125,126,127,
630    128,129,130,131,132,133,134,135,
631    136,137,138,139,140,141,142,143,
632    144,145,146,147,148,149,150,151,
633    152,153,154,155,156,157,158,159,
634    160,161,162,163,164,165,166,167,
635    168,169,170,171,172,173,174,175,
636    176,177,178,179,180,181,182,183,
637    184,185,186,187,188,189,190,191,
638    224,225,226,227,228,229,230,231,
639    232,233,234,235,236,237,238,239,
640    240,241,242,243,244,245,246,215,
641    248,249,250,251,252,253,254,223,
642    224,225,226,227,228,229,230,231,
643    232,233,234,235,236,237,238,239,
644    240,241,242,243,244,245,246,247,
645    248,249,250,251,252,253,254,255,
646    0,1,2,3,4,5,6,7,
647    8,9,10,11,12,13,14,15,
648    16,17,18,19,20,21,22,23,
649    24,25,26,27,28,29,30,31,
650    32,33,34,35,36,37,38,39,
651    40,41,42,43,44,45,46,47,
652    48,49,50,51,52,53,54,55,
653    56,57,58,59,60,61,62,63,
654    64,97,98,99,100,101,102,103,
655    104,105,106,107,108,109,110,111,
656    112,113,114,115,116,117,118,119,
657    120,121,122,91,92,93,94,95,
658    96,65,66,67,68,69,70,71,
659    72,73,74,75,76,77,78,79,
660    80,81,82,83,84,85,86,87,
661    88,89,90,123,124,125,126,127,
662    128,129,130,131,132,133,134,135,
663    136,137,138,139,140,141,142,143,
664    144,145,146,147,148,149,150,151,
665    152,153,154,155,156,157,158,159,
666    160,161,162,163,164,165,166,167,
667    168,169,170,171,172,173,174,175,
668    176,177,178,179,180,181,182,183,
669    184,185,186,187,188,189,190,191,
670    224,225,226,227,228,229,230,231,
671    232,233,234,235,236,237,238,239,
672    240,241,242,243,244,245,246,215,
673    248,249,250,251,252,253,254,223,
674    192,193,194,195,196,197,198,199,
675    200,201,202,203,204,205,206,207,
676    208,209,210,211,212,213,214,247,
677    216,217,218,219,220,221,222,255,
678    0,62,0,0,1,0,0,0,
679    0,0,0,0,0,0,0,0,
680    32,0,0,0,1,0,0,0,
681    0,0,0,0,0,0,0,0,
682    0,0,0,0,0,0,255,3,
683    126,0,0,0,126,0,0,0,
684    0,0,0,0,0,0,0,0,
685    0,0,0,0,0,0,0,0,
686    0,0,0,0,0,0,255,3,
687    0,0,0,0,0,0,0,0,
688    0,0,0,0,0,0,12,2,
689    0,0,0,0,0,0,0,0,
690    0,0,0,0,0,0,0,0,
691    254,255,255,7,0,0,0,0,
692    0,0,0,0,0,0,0,0,
693    255,255,127,127,0,0,0,0,
694    0,0,0,0,0,0,0,0,
695    0,0,0,0,254,255,255,7,
696    0,0,0,0,0,4,32,4,
697    0,0,0,128,255,255,127,255,
698    0,0,0,0,0,0,255,3,
699    254,255,255,135,254,255,255,7,
700    0,0,0,0,0,4,44,6,
701    255,255,127,255,255,255,127,255,
702    0,0,0,0,254,255,255,255,
703    255,255,255,255,255,255,255,127,
704    0,0,0,0,254,255,255,255,
705    255,255,255,255,255,255,255,255,
706    0,2,0,0,255,255,255,255,
707    255,255,255,255,255,255,255,127,
708    0,0,0,0,255,255,255,255,
709    255,255,255,255,255,255,255,255,
710    0,0,0,0,254,255,0,252,
711    1,0,0,248,1,0,0,120,
712    0,0,0,0,254,255,255,255,
713    0,0,128,0,0,0,128,0,
714    255,255,255,255,0,0,0,0,
715    0,0,0,0,0,0,0,128,
716    255,255,255,255,0,0,0,0,
717    0,0,0,0,0,0,0,0,
718    128,0,0,0,0,0,0,0,
719    0,1,1,0,1,1,0,0,
720    0,0,0,0,0,0,0,0,
721    0,0,0,0,0,0,0,0,
722    1,0,0,0,128,0,0,0,
723    128,128,128,128,0,0,128,0,
724    28,28,28,28,28,28,28,28,
725    28,28,0,0,0,0,0,128,
726    0,26,26,26,26,26,26,18,
727    18,18,18,18,18,18,18,18,
728    18,18,18,18,18,18,18,18,
729    18,18,18,128,128,0,128,16,
730    0,26,26,26,26,26,26,18,
731    18,18,18,18,18,18,18,18,
732    18,18,18,18,18,18,18,18,
733    18,18,18,128,128,0,0,0,
734    0,0,0,0,0,1,0,0,
735    0,0,0,0,0,0,0,0,
736    0,0,0,0,0,0,0,0,
737    0,0,0,0,0,0,0,0,
738    1,0,0,0,0,0,0,0,
739    0,0,18,0,0,0,0,0,
740    0,0,20,20,0,18,0,0,
741    0,20,18,0,0,0,0,0,
742    18,18,18,18,18,18,18,18,
743    18,18,18,18,18,18,18,18,
744    18,18,18,18,18,18,18,0,
745    18,18,18,18,18,18,18,18,
746    18,18,18,18,18,18,18,18,
747    18,18,18,18,18,18,18,18,
748    18,18,18,18,18,18,18,0,
749    18,18,18,18,18,18,18,18
750    };
751    
     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  
       {  
       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);  
       exit(1);  
       }  
752    
     memcpy(new_buffer, buffer, buffer_size);  
     memcpy(new_pbuffer, pbuffer, buffer_size);  
753    
     buffer_size = new_buffer_size;  
754    
755      start = new_buffer + (start - buffer);  #ifndef HAVE_STRERROR
756      here = new_buffer + (here - buffer);  /*************************************************
757    *     Provide strerror() for non-ANSI libraries  *
758    *************************************************/
759    
760      free(buffer);  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
761      free(dbuffer);  in their libraries, but can provide the same facility by this simple
762      free(pbuffer);  alternative function. */
763    
764      buffer = new_buffer;  extern int   sys_nerr;
765      dbuffer = new_dbuffer;  extern char *sys_errlist[];
     pbuffer = new_pbuffer;  
     }  
   }  
766    
767  return NULL;  /* Control never gets here */  char *
768    strerror(int n)
769    {
770    if (n < 0 || n >= sys_nerr) return "unknown error number";
771    return sys_errlist[n];
772  }  }
773    #endif /* HAVE_STRERROR */
   
   
   
   
774    
775    
776  /*************************************************  /*************************************************
777  *          Read number from string               *  *         JIT memory callback                    *
778  *************************************************/  *************************************************/
779    
780  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  static pcre_jit_stack* jit_callback(void *arg)
 around with conditional compilation, just do the job by hand. It is only used  
 for unpicking arguments, so just keep it simple.  
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
   
 Returns:        the unsigned long  
 */  
   
 static int  
 get_value(unsigned char *str, unsigned char **endptr)  
781  {  {
782  int result = 0;  return (pcre_jit_stack *)arg;
 while(*str != 0 && isspace(*str)) str++;  
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
783  }  }
784    
785    
   
   
786  /*************************************************  /*************************************************
787  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
788  *************************************************/  *************************************************/
# Line 294  Returns:      >  0 => the number of byte Line 801  Returns:      >  0 => the number of byte
801  #if !defined NOUTF8  #if !defined NOUTF8
802    
803  static int  static int
804  utf82ord(unsigned char *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
805  {  {
806  int c = *utf8bytes++;  int c = *utf8bytes++;
807  int d = c;  int d = c;
# Line 355  Returns:     number of characters placed Line 862  Returns:     number of characters placed
862  #if !defined NOUTF8  #if !defined NOUTF8
863    
864  static int  static int
865  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
866  {  {
867  register int i, j;  register int i, j;
868  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 374  return i + 1; Line 881  return i + 1;
881    
882    
883    
884    #ifdef SUPPORT_PCRE16
885  /*************************************************  /*************************************************
886  *             Print character string             *  *         Convert a string to 16-bit             *
887  *************************************************/  *************************************************/
888    
889  /* Character string printing function. Must handle UTF-8 strings in utf8  /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
890  mode. Yields number of characters printed. If handed a NULL file, just counts  8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
891  chars without printing. */  double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
892    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
893    result is always left in buffer16.
894    
895  static int pchars(unsigned char *p, int length, FILE *f)  Arguments:
896      p          points to a byte string
897      utf        true if UTF-8 (to be converted to UTF-16)
898      len        number of bytes in the string (excluding trailing zero)
899    
900    Returns:     number of 16-bit data items used (excluding trailing zero)
901                 OR -1 if a UTF-8 string is malformed
902    */
903    
904    static int
905    to16(pcre_uint8 *p, int utf, int len)
906    {
907    pcre_uint16 *pp;
908    
909    if (buffer16_size < 2*len + 2)
910      {
911      if (buffer16 != NULL) free(buffer16);
912      buffer16_size = 2*len + 2;
913      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
914      if (buffer16 == NULL)
915        {
916        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
917        exit(1);
918        }
919      }
920    
921    pp = buffer16;
922    
923    if (!utf)
924      {
925      while (len-- > 0) *pp++ = *p++;
926      }
927    
928    else
929      {
930      int c;
931      while (len > 0)
932        {
933        int chlen = utf82ord(p, &c);
934        if (chlen <= 0) return -1;
935        p += chlen;
936        len -= chlen;
937        if (c < 0x10000) *pp++ = c; else
938          {
939          c -= 0x10000;
940          *pp++ = 0xD800 | (c >> 10);
941          *pp++ = 0xDC00 | (c & 0x3ff);
942          }
943        }
944      }
945    
946    *pp = 0;
947    return pp - buffer16;
948    }
949    #endif
950    
951    
952    /*************************************************
953    *        Read or extend an input line            *
954    *************************************************/
955    
956    /* Input lines are read into buffer, but both patterns and data lines can be
957    continued over multiple input lines. In addition, if the buffer fills up, we
958    want to automatically expand it so as to be able to handle extremely large
959    lines that are needed for certain stress tests. When the input buffer is
960    expanded, the other two buffers must also be expanded likewise, and the
961    contents of pbuffer, which are a copy of the input for callouts, must be
962    preserved (for when expansion happens for a data line). This is not the most
963    optimal way of handling this, but hey, this is just a test program!
964    
965    Arguments:
966      f            the file to read
967      start        where in buffer to start (this *must* be within buffer)
968      prompt       for stdin or readline()
969    
970    Returns:       pointer to the start of new data
971                   could be a copy of start, or could be moved
972                   NULL if no data read and EOF reached
973    */
974    
975    static pcre_uint8 *
976    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
977    {
978    pcre_uint8 *here = start;
979    
980    for (;;)
981      {
982      int rlen = (int)(buffer_size - (here - buffer));
983    
984      if (rlen > 1000)
985        {
986        int dlen;
987    
988        /* If libreadline support is required, use readline() to read a line if the
989        input is a terminal. Note that readline() removes the trailing newline, so
990        we must put it back again, to be compatible with fgets(). */
991    
992    #ifdef SUPPORT_LIBREADLINE
993        if (isatty(fileno(f)))
994          {
995          size_t len;
996          char *s = readline(prompt);
997          if (s == NULL) return (here == start)? NULL : start;
998          len = strlen(s);
999          if (len > 0) add_history(s);
1000          if (len > rlen - 1) len = rlen - 1;
1001          memcpy(here, s, len);
1002          here[len] = '\n';
1003          here[len+1] = 0;
1004          free(s);
1005          }
1006        else
1007    #endif
1008    
1009        /* Read the next line by normal means, prompting if the file is stdin. */
1010    
1011          {
1012          if (f == stdin) printf("%s", prompt);
1013          if (fgets((char *)here, rlen,  f) == NULL)
1014            return (here == start)? NULL : start;
1015          }
1016    
1017        dlen = (int)strlen((char *)here);
1018        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1019        here += dlen;
1020        }
1021    
1022      else
1023        {
1024        int new_buffer_size = 2*buffer_size;
1025        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1026        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1027        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1028    
1029        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1030          {
1031          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1032          exit(1);
1033          }
1034    
1035        memcpy(new_buffer, buffer, buffer_size);
1036        memcpy(new_pbuffer, pbuffer, buffer_size);
1037    
1038        buffer_size = new_buffer_size;
1039    
1040        start = new_buffer + (start - buffer);
1041        here = new_buffer + (here - buffer);
1042    
1043        free(buffer);
1044        free(dbuffer);
1045        free(pbuffer);
1046    
1047        buffer = new_buffer;
1048        dbuffer = new_dbuffer;
1049        pbuffer = new_pbuffer;
1050        }
1051      }
1052    
1053    return NULL;  /* Control never gets here */
1054    }
1055    
1056    
1057    
1058    /*************************************************
1059    *          Read number from string               *
1060    *************************************************/
1061    
1062    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1063    around with conditional compilation, just do the job by hand. It is only used
1064    for unpicking arguments, so just keep it simple.
1065    
1066    Arguments:
1067      str           string to be converted
1068      endptr        where to put the end pointer
1069    
1070    Returns:        the unsigned long
1071    */
1072    
1073    static int
1074    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1075    {
1076    int result = 0;
1077    while(*str != 0 && isspace(*str)) str++;
1078    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1079    *endptr = str;
1080    return(result);
1081    }
1082    
1083    
1084    
1085    /*************************************************
1086    *             Print one character                *
1087    *************************************************/
1088    
1089    /* Print a single character either literally, or as a hex escape. */
1090    
1091    static int pchar(int c, FILE *f)
1092    {
1093    if (PRINTOK(c))
1094      {
1095      if (f != NULL) fprintf(f, "%c", c);
1096      return 1;
1097      }
1098    
1099    if (c < 0x100)
1100      {
1101      if (use_utf)
1102        {
1103        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1104        return 6;
1105        }
1106      else
1107        {
1108        if (f != NULL) fprintf(f, "\\x%02x", c);
1109        return 4;
1110        }
1111      }
1112    
1113    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1114    return (c <= 0x000000ff)? 6 :
1115           (c <= 0x00000fff)? 7 :
1116           (c <= 0x0000ffff)? 8 :
1117           (c <= 0x000fffff)? 9 : 10;
1118    }
1119    
1120    
1121    
1122    #ifdef SUPPORT_PCRE8
1123    /*************************************************
1124    *         Print 8-bit character string           *
1125    *************************************************/
1126    
1127    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1128    If handed a NULL file, just counts chars without printing. */
1129    
1130    static int pchars(pcre_uint8 *p, int length, FILE *f)
1131  {  {
1132  int c = 0;  int c = 0;
1133  int yield = 0;  int yield = 0;
# Line 390  int yield = 0; Line 1135  int yield = 0;
1135  while (length-- > 0)  while (length-- > 0)
1136    {    {
1137  #if !defined NOUTF8  #if !defined NOUTF8
1138    if (use_utf8)    if (use_utf)
1139      {      {
1140      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1141      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1142        {        {
1143        length -= rc - 1;        length -= rc - 1;
1144        p += rc;        p += rc;
1145        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1146        continue;        continue;
1147        }        }
1148      }      }
1149  #endif  #endif
1150      c = *p++;
1151      yield += pchar(c, f);
1152      }
1153    
1154     /* Not UTF-8, or malformed UTF-8  */  return yield;
1155    }
1156    #endif
1157    
1158    c = *p++;  
1159    if (PRINTHEX(c))  
1160      {  #ifdef SUPPORT_PCRE16
1161      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1162      yield++;  *           Print 16-bit character string        *
1163      }  *************************************************/
1164    else  
1165    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1166    If handed a NULL file, just counts chars without printing. */
1167    
1168    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1169    {
1170    int yield = 0;
1171    
1172    while (length-- > 0)
1173      {
1174      int c = *p++ & 0xffff;
1175    #if !defined NOUTF8
1176      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1177      {      {
1178      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1179      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1180          {
1181          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1182          length--;
1183          p++;
1184          }
1185      }      }
1186    #endif
1187      yield += pchar(c, f);
1188    }    }
1189    
1190  return yield;  return yield;
1191  }  }
1192    #endif
1193    
1194    
1195    
# Line 462  if (callout_extra) Line 1218  if (callout_extra)
1218      else      else
1219        {        {
1220        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1221        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject + cb->offset_vector[i],
1222          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1223        fprintf(f, "\n");        fprintf(f, "\n");
1224        }        }
# Line 475  printed lengths of the substrings. */ Line 1231  printed lengths of the substrings. */
1231    
1232  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1233    
1234  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, cb->start_match, f);
1235  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject + cb->start_match,
1236    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1237    
1238  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1239    
1240  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject + cb->current_position,
1241    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1242    
1243  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 518  fprintf(outfile, "%.*s", (cb->next_item_ Line 1274  fprintf(outfile, "%.*s", (cb->next_item_
1274  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1275  first_callout = 0;  first_callout = 0;
1276    
1277    if (cb->mark != last_callout_mark)
1278      {
1279      fprintf(outfile, "Latest Mark: %s\n",
1280        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1281      last_callout_mark = cb->mark;
1282      }
1283    
1284  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1285    {    {
1286    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 537  return (cb->callout_number != callout_fa Line 1300  return (cb->callout_number != callout_fa
1300  *            Local malloc functions              *  *            Local malloc functions              *
1301  *************************************************/  *************************************************/
1302    
1303  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1304  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1305    show_malloc variable is set only during matching. */
1306    
1307  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1308  {  {
1309  void *block = malloc(size);  void *block = malloc(size);
1310  gotten_store = size;  gotten_store = size;
1311    if (first_gotten_store == 0) first_gotten_store = size;
1312  if (show_malloc)  if (show_malloc)
1313    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1314  return block;  return block;
# Line 556  if (show_malloc) Line 1321  if (show_malloc)
1321  free(block);  free(block);
1322  }  }
1323    
   
1324  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1325    
1326  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 579  free(block); Line 1343  free(block);
1343  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1344  *************************************************/  *************************************************/
1345    
1346  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1347    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1348    value, but the code is defensive. */
1349    
1350  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1351  {  {
1352  int rc;  int rc;
1353  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1354    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1355    #ifdef SUPPORT_PCRE16
1356      rc = pcre16_fullinfo(re, study, option, ptr);
1357    #else
1358      rc = PCRE_ERROR_BADMODE;
1359    #endif
1360    else
1361    #ifdef SUPPORT_PCRE8
1362      rc = pcre_fullinfo(re, study, option, ptr);
1363    #else
1364      rc = PCRE_ERROR_BADMODE;
1365    #endif
1366    
1367    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1368      use_pcre16? "16" : "", option);
1369  }  }
1370    
1371    
1372    
1373  /*************************************************  /*************************************************
1374  *         Byte flipping function                 *  *             Swap byte functions                *
1375  *************************************************/  *************************************************/
1376    
1377  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16
1378  byteflip(unsigned long int value, int n)  and pcre_uint32 value.
1379    
1380    Arguments:
1381      value        any number
1382    
1383    Returns:       the byte swapped value
1384    */
1385    
1386    static pcre_uint32
1387    swap_uint32(pcre_uint32 value)
1388  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1389  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1390         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1391         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1392         ((value & 0xff000000) >> 24);         (value >> 24);
1393    }
1394    
1395    static pcre_uint16
1396    swap_uint16(pcre_uint16 value)
1397    {
1398    return (value >> 8) | (value << 8);
1399  }  }
1400    
1401    
1402    
1403    /*************************************************
1404    *        Flip bytes in a compiled pattern        *
1405    *************************************************/
1406    
1407    /* This function is called if the 'F' option was present on a pattern that is
1408    to be written to a file. We flip the bytes of all the integer fields in the
1409    regex data block and the study block. In 16-bit mode this also flips relevant
1410    bytes in the pattern itself. This is to make it possible to test PCRE's
1411    ability to reload byte-flipped patterns, e.g. those compiled on a different
1412    architecture. */
1413    
1414    static void
1415    regexflip(pcre *ere, pcre_extra *extra)
1416    {
1417    real_pcre *re = (real_pcre *)ere;
1418    int op;
1419    
1420    #ifdef SUPPORT_PCRE16
1421    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1422    int length = re->name_count * re->name_entry_size;
1423    #ifdef SUPPORT_UTF
1424    BOOL utf = (re->options & PCRE_UTF16) != 0;
1425    BOOL utf16_char = FALSE;
1426    #endif /* SUPPORT_UTF */
1427    #endif /* SUPPORT_PCRE16 */
1428    
1429    /* Always flip the bytes in the main data block and study blocks. */
1430    
1431    re->magic_number = REVERSED_MAGIC_NUMBER;
1432    re->size = swap_uint32(re->size);
1433    re->options = swap_uint32(re->options);
1434    re->flags = swap_uint16(re->flags);
1435    re->top_bracket = swap_uint16(re->top_bracket);
1436    re->top_backref = swap_uint16(re->top_backref);
1437    re->first_char = swap_uint16(re->first_char);
1438    re->req_char = swap_uint16(re->req_char);
1439    re->name_table_offset = swap_uint16(re->name_table_offset);
1440    re->name_entry_size = swap_uint16(re->name_entry_size);
1441    re->name_count = swap_uint16(re->name_count);
1442    
1443    if (extra != NULL)
1444      {
1445      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1446      rsd->size = swap_uint32(rsd->size);
1447      rsd->flags = swap_uint32(rsd->flags);
1448      rsd->minlength = swap_uint32(rsd->minlength);
1449      }
1450    
1451    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1452    in the name table, if present, and then in the pattern itself. */
1453    
1454    #ifdef SUPPORT_PCRE16
1455    if (!use_pcre16) return;
1456    
1457    while(TRUE)
1458      {
1459      /* Swap previous characters. */
1460      while (length-- > 0)
1461        {
1462        *ptr = swap_uint16(*ptr);
1463        ptr++;
1464        }
1465    #ifdef SUPPORT_UTF
1466      if (utf16_char)
1467        {
1468        if ((ptr[-1] & 0xfc00) == 0xd800)
1469          {
1470          /* We know that there is only one extra character in UTF-16. */
1471          *ptr = swap_uint16(*ptr);
1472          ptr++;
1473          }
1474        }
1475      utf16_char = FALSE;
1476    #endif /* SUPPORT_UTF */
1477    
1478      /* Get next opcode. */
1479    
1480      length = 0;
1481      op = *ptr;
1482      *ptr++ = swap_uint16(op);
1483    
1484      switch (op)
1485        {
1486        case OP_END:
1487        return;
1488    
1489        case OP_CHAR:
1490        case OP_CHARI:
1491        case OP_NOT:
1492        case OP_NOTI:
1493        case OP_STAR:
1494        case OP_MINSTAR:
1495        case OP_PLUS:
1496        case OP_MINPLUS:
1497        case OP_QUERY:
1498        case OP_MINQUERY:
1499        case OP_UPTO:
1500        case OP_MINUPTO:
1501        case OP_EXACT:
1502        case OP_POSSTAR:
1503        case OP_POSPLUS:
1504        case OP_POSQUERY:
1505        case OP_POSUPTO:
1506        case OP_STARI:
1507        case OP_MINSTARI:
1508        case OP_PLUSI:
1509        case OP_MINPLUSI:
1510        case OP_QUERYI:
1511        case OP_MINQUERYI:
1512        case OP_UPTOI:
1513        case OP_MINUPTOI:
1514        case OP_EXACTI:
1515        case OP_POSSTARI:
1516        case OP_POSPLUSI:
1517        case OP_POSQUERYI:
1518        case OP_POSUPTOI:
1519        case OP_NOTSTAR:
1520        case OP_NOTMINSTAR:
1521        case OP_NOTPLUS:
1522        case OP_NOTMINPLUS:
1523        case OP_NOTQUERY:
1524        case OP_NOTMINQUERY:
1525        case OP_NOTUPTO:
1526        case OP_NOTMINUPTO:
1527        case OP_NOTEXACT:
1528        case OP_NOTPOSSTAR:
1529        case OP_NOTPOSPLUS:
1530        case OP_NOTPOSQUERY:
1531        case OP_NOTPOSUPTO:
1532        case OP_NOTSTARI:
1533        case OP_NOTMINSTARI:
1534        case OP_NOTPLUSI:
1535        case OP_NOTMINPLUSI:
1536        case OP_NOTQUERYI:
1537        case OP_NOTMINQUERYI:
1538        case OP_NOTUPTOI:
1539        case OP_NOTMINUPTOI:
1540        case OP_NOTEXACTI:
1541        case OP_NOTPOSSTARI:
1542        case OP_NOTPOSPLUSI:
1543        case OP_NOTPOSQUERYI:
1544        case OP_NOTPOSUPTOI:
1545    #ifdef SUPPORT_UTF
1546        if (utf) utf16_char = TRUE;
1547    #endif
1548        length = OP_lengths16[op] - 1;
1549        break;
1550    
1551        case OP_CLASS:
1552        case OP_NCLASS:
1553        /* Skip the character bit map. */
1554        ptr += 32/sizeof(pcre_uint16);
1555        length = 0;
1556        break;
1557    
1558        case OP_XCLASS:
1559        /* Reverse the size of the XCLASS instance. */
1560        ptr++;
1561        *ptr = swap_uint16(*ptr);
1562        if (LINK_SIZE > 1)
1563          {
1564          /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1565          ptr++;
1566          *ptr = swap_uint16(*ptr);
1567          }
1568        ptr++;
1569    
1570        if (LINK_SIZE > 1)
1571          length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1572            (1 + LINK_SIZE + 1);
1573        else
1574          length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1575    
1576        op = *ptr;
1577        *ptr = swap_uint16(op);
1578        if ((op & XCL_MAP) != 0)
1579          {
1580          /* Skip the character bit map. */
1581          ptr += 32/sizeof(pcre_uint16);
1582          length -= 32/sizeof(pcre_uint16);
1583          }
1584        break;
1585    
1586        default:
1587        length = OP_lengths16[op] - 1;
1588        break;
1589        }
1590      }
1591    /* Control should never reach here in 16 bit mode. */
1592    #endif /* SUPPORT_PCRE16 */
1593    }
1594    
1595    
1596    
1597  /*************************************************  /*************************************************
1598  *        Check match or recursion limit          *  *        Check match or recursion limit          *
1599  *************************************************/  *************************************************/
1600    
1601  static int  static int
1602  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1603    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1604    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1605  {  {
# Line 627  for (;;) Line 1614  for (;;)
1614    {    {
1615    *limit = mid;    *limit = mid;
1616    
1617    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1618      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1619    
1620    if (count == errnumber)    if (count == errnumber)
# Line 659  return count; Line 1646  return count;
1646    
1647    
1648  /*************************************************  /*************************************************
1649    *         Case-independent strncmp() function    *
1650    *************************************************/
1651    
1652    /*
1653    Arguments:
1654      s         first string
1655      t         second string
1656      n         number of characters to compare
1657    
1658    Returns:    < 0, = 0, or > 0, according to the comparison
1659    */
1660    
1661    static int
1662    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1663    {
1664    while (n--)
1665      {
1666      int c = tolower(*s++) - tolower(*t++);
1667      if (c) return c;
1668      }
1669    return 0;
1670    }
1671    
1672    
1673    
1674    /*************************************************
1675  *         Check newline indicator                *  *         Check newline indicator                *
1676  *************************************************/  *************************************************/
1677    
1678  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1679  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  a message and return 0 if there is no match.
1680    
1681  Arguments:  Arguments:
1682    p           points after the leading '<'    p           points after the leading '<'
# Line 673  Returns:      appropriate PCRE_NEWLINE_x Line 1686  Returns:      appropriate PCRE_NEWLINE_x
1686  */  */
1687    
1688  static int  static int
1689  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1690  {  {
1691  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1692  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1693  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1694  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1695    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1696    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1697    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1698  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1699  return 0;  return 0;
1700  }  }
# Line 692  return 0; Line 1708  return 0;
1708  static void  static void
1709  usage(void)  usage(void)
1710  {  {
1711  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1712    printf("Input and output default to stdin and stdout.\n");
1713    #ifdef SUPPORT_LIBREADLINE
1714    printf("If input is a terminal, readline() is used to read from it.\n");
1715    #else
1716    printf("This version of pcretest is not linked with readline().\n");
1717    #endif
1718    printf("\nOptions:\n");
1719    #ifdef SUPPORT_PCRE16
1720    printf("  -16      use 16-bit interface\n");
1721    #endif
1722  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1723  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1724  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 701  printf("  -dfa     force DFA matching fo Line 1727  printf("  -dfa     force DFA matching fo
1727  #endif  #endif
1728  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
1729  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
1730           "  -M       find MATCH_LIMIT minimum for each subject\n"
1731         "  -m       output memory used information\n"         "  -m       output memory used information\n"
1732         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
1733  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 708  printf("  -p       use POSIX interface\n Line 1735  printf("  -p       use POSIX interface\n
1735  #endif  #endif
1736  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1737  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1738  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1739           "  -s+      force each pattern to be studied, using JIT if available\n"
1740         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1741  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1742  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 730  int main(int argc, char **argv) Line 1758  int main(int argc, char **argv)
1758  FILE *infile = stdin;  FILE *infile = stdin;
1759  int options = 0;  int options = 0;
1760  int study_options = 0;  int study_options = 0;
1761    int default_find_match_limit = FALSE;
1762  int op = 1;  int op = 1;
1763  int timeit = 0;  int timeit = 0;
1764  int timeitm = 0;  int timeitm = 0;
1765  int showinfo = 0;  int showinfo = 0;
1766  int showstore = 0;  int showstore = 0;
1767    int force_study = -1;
1768    int force_study_options = 0;
1769  int quiet = 0;  int quiet = 0;
1770  int size_offsets = 45;  int size_offsets = 45;
1771  int size_offsets_max;  int size_offsets_max;
# Line 748  int all_use_dfa = 0; Line 1779  int all_use_dfa = 0;
1779  int yield = 0;  int yield = 0;
1780  int stack_size;  int stack_size;
1781    
1782    pcre_jit_stack *jit_stack = NULL;
1783    
1784  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1785  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1786    
1787  uschar copynames[1024];  pcre_uchar copynames[1024];
1788  uschar getnames[1024];  pcre_uchar getnames[1024];
   
 uschar *copynamesptr;  
 uschar *getnamesptr;  
1789    
1790  /* Get buffers from malloc() so that Electric Fence will check their misuse  pcre_uchar *copynamesptr;
1791  when I am debugging. They grow automatically when very long lines are read. */  pcre_uchar *getnamesptr;
1792    
1793  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
1794  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
1795  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
1796    
1797    buffer = (pcre_uint8 *)malloc(buffer_size);
1798    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1799    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1800    
1801  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1802    
# Line 781  _setmode( _fileno( stdout ), _O_BINARY ) Line 1815  _setmode( _fileno( stdout ), _O_BINARY )
1815    
1816  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1817    {    {
1818    unsigned char *endptr;    pcre_uint8 *endptr;
1819    
1820      if (strcmp(argv[op], "-m") == 0) showstore = 1;
1821      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1822      else if (strcmp(argv[op], "-s+") == 0)
1823        {
1824        force_study = 1;
1825        force_study_options = PCRE_STUDY_JIT_COMPILE;
1826        }
1827    #ifdef SUPPORT_PCRE16
1828      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1829    #endif
1830    
   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)  
     showstore = 1;  
1831    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1832    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1833    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1834    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1835      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1836  #if !defined NODFA  #if !defined NODFA
1837    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1838  #endif  #endif
1839    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1840        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1841          *endptr == 0))          *endptr == 0))
1842      {      {
1843      op++;      op++;
# Line 803  while (argc > 1 && argv[op][0] == '-') Line 1847  while (argc > 1 && argv[op][0] == '-')
1847      {      {
1848      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
1849      int temp;      int temp;
1850      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1851                       *endptr == 0))                       *endptr == 0))
1852        {        {
1853        timeitm = temp;        timeitm = temp;
# Line 814  while (argc > 1 && argv[op][0] == '-') Line 1858  while (argc > 1 && argv[op][0] == '-')
1858      if (both) timeit = timeitm;      if (both) timeit = timeitm;
1859      }      }
1860    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1861        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1862          *endptr == 0))          *endptr == 0))
1863      {      {
1864  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1865      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1866      exit(1);      exit(1);
1867  #else  #else
# Line 841  while (argc > 1 && argv[op][0] == '-') Line 1885  while (argc > 1 && argv[op][0] == '-')
1885    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1886      {      {
1887      int rc;      int rc;
1888        unsigned long int lrc;
1889      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1890      printf("Compiled with\n");      printf("Compiled with\n");
1891    
1892    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1893    are set, either both UTFs are supported or both are not supported. */
1894    
1895    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1896        printf("  8-bit and 16-bit support\n");
1897        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1898        if (rc)
1899          printf("  UTF-8 and UTF-16 support\n");
1900        else
1901          printf("  No UTF-8 or UTF-16 support\n");
1902    #elif defined SUPPORT_PCRE8
1903        printf("  8-bit support only\n");
1904      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1905      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1906    #else
1907        printf("  16-bit support only\n");
1908        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1909        printf("  %sUTF-16 support\n", rc? "" : "No ");
1910    #endif
1911    
1912      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1913      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1914        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1915        if (rc)
1916          printf("  Just-in-time compiler support\n");
1917        else
1918          printf("  No just-in-time compiler support\n");
1919      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1920      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1921        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1922        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1923          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1924          (rc == -2)? "ANYCRLF" :
1925        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1926        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1927        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1928                                         "all Unicode newlines");
1929      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1930      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1931      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1932      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1933      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1934      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1935      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1936      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1937      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1938      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1939      goto EXIT;      goto EXIT;
# Line 887  offsets = (int *)malloc(size_offsets_max Line 1962  offsets = (int *)malloc(size_offsets_max
1962  if (offsets == NULL)  if (offsets == NULL)
1963    {    {
1964    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1965      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1966    yield = 1;    yield = 1;
1967    goto EXIT;    goto EXIT;
1968    }    }
# Line 918  if (argc > 2) Line 1993  if (argc > 2)
1993    
1994  /* Set alternative malloc function */  /* Set alternative malloc function */
1995    
1996    #ifdef SUPPORT_PCRE8
1997  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1998  pcre_free = new_free;  pcre_free = new_free;
1999  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2000  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2001    #endif
2002    
2003    #ifdef SUPPORT_PCRE16
2004    pcre16_malloc = new_malloc;
2005    pcre16_free = new_free;
2006    pcre16_stack_malloc = stack_malloc;
2007    pcre16_stack_free = stack_free;
2008    #endif
2009    
2010  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2011    
# Line 940  while (!done) Line 2024  while (!done)
2024  #endif  #endif
2025    
2026    const char *error;    const char *error;
2027    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2028    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2029    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2030      const pcre_uint8 *tables = NULL;
2031    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2032    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2033      int do_allcaps = 0;
2034      int do_mark = 0;
2035    int do_study = 0;    int do_study = 0;
2036      int no_force_study = 0;
2037    int do_debug = debug;    int do_debug = debug;
   int debug_lengths = 1;  
2038    int do_G = 0;    int do_G = 0;
2039    int do_g = 0;    int do_g = 0;
2040    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2041    int do_showrest = 0;    int do_showrest = 0;
2042      int do_showcaprest = 0;
2043    int do_flip = 0;    int do_flip = 0;
2044    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2045    
2046    use_utf8 = 0;    use_utf = 0;
2047      debug_lengths = 1;
2048    
2049    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
2050    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2051    fflush(outfile);    fflush(outfile);
2052    
# Line 971  while (!done) Line 2059  while (!done)
2059    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2060      {      {
2061      unsigned long int magic, get_options;      unsigned long int magic, get_options;
2062      uschar sbuf[8];      pcre_uint8 sbuf[8];
2063      FILE *f;      FILE *f;
2064    
2065      p++;      p++;
# Line 994  while (!done) Line 2082  while (!done)
2082        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2083    
2084      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
2085      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2086    
2087      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2088    
2089      magic = ((real_pcre *)re)->magic_number;      magic = ((real_pcre *)re)->magic_number;
2090      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2091        {        {
2092        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2093          {          {
2094          do_flip = 1;          do_flip = 1;
2095          }          }
# Line 1013  while (!done) Line 2101  while (!done)
2101          }          }
2102        }        }
2103    
2104      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2105        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
2106    
2107      /* Need to know if UTF-8 for printing data strings */      /* Now see if there is any following study data. */
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2108    
2109      if (true_study_size != 0)      if (true_study_size != 0)
2110        {        {
# Line 1037  while (!done) Line 2120  while (!done)
2120          {          {
2121          FAIL_READ:          FAIL_READ:
2122          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2123          if (extra != NULL) new_free(extra);          if (extra != NULL)
2124              {
2125              PCRE_FREE_STUDY(extra);
2126              }
2127          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2128          fclose(f);          fclose(f);
2129          continue;          continue;
# Line 1047  while (!done) Line 2133  while (!done)
2133        }        }
2134      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2135    
2136        /* Flip the necessary bytes. */
2137        if (do_flip)
2138          {
2139          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2140          }
2141    
2142        /* Need to know if UTF-8 for printing data strings */
2143    
2144        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2145        use_utf = (get_options & PCRE_UTF8) != 0;
2146    
2147      fclose(f);      fclose(f);
2148      goto SHOW_INFO;      goto SHOW_INFO;
2149      }      }
2150    
2151    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2152    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2153    
2154    delimiter = *p++;    delimiter = *p++;
2155    
2156    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2157      {      {
2158      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2159      goto SKIP_DATA;      goto SKIP_DATA;
2160      }      }
2161    
2162    pp = p;    pp = p;
2163    poffset = p - buffer;    poffset = (int)(p - buffer);
2164    
2165    for(;;)    for(;;)
2166      {      {
# Line 1074  while (!done) Line 2171  while (!done)
2171        pp++;        pp++;
2172        }        }
2173      if (*pp != 0) break;      if (*pp != 0) break;
2174      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
2175        {        {
2176        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2177        done = 1;        done = 1;
# Line 1118  while (!done) Line 2214  while (!done)
2214        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2215        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2216    
2217        case '+': do_showrest = 1; break;        case '+':
2218          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2219          break;
2220    
2221          case '=': do_allcaps = 1; break;
2222        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2223        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2224        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1128  while (!done) Line 2228  while (!done)
2228        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2229        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2230        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2231          case 'K': do_mark = 1; break;
2232        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2233        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2234    
# Line 1135  while (!done) Line 2236  while (!done)
2236        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2237  #endif  #endif
2238    
2239        case 'S': do_study = 1; break;        case 'S':
2240          if (do_study == 0)
2241            {
2242            do_study = 1;
2243            if (*pp == '+')
2244              {
2245              study_options |= PCRE_STUDY_JIT_COMPILE;
2246              pp++;
2247              }
2248            }
2249          else
2250            {
2251            do_study = 0;
2252            no_force_study = 1;
2253            }
2254          break;
2255    
2256        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2257          case 'W': options |= PCRE_UCP; break;
2258        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2259          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2260        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2261        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2262        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2263    
2264          case 'T':
2265          switch (*pp++)
2266            {
2267            case '0': tables = tables0; break;
2268            case '1': tables = tables1; break;
2269    
2270            case '\r':
2271            case '\n':
2272            case ' ':
2273            case 0:
2274            fprintf(outfile, "** Missing table number after /T\n");
2275            goto SKIP_DATA;
2276    
2277            default:
2278            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2279            goto SKIP_DATA;
2280            }
2281          break;
2282    
2283        case 'L':        case 'L':
2284        ppp = pp;        ppp = pp;
2285        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1167  while (!done) Line 2305  while (!done)
2305    
2306        case '<':        case '<':
2307          {          {
2308          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2309          if (x == 0) goto SKIP_DATA;            {
2310          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2311          while (*pp++ != '>');            pp += 3;
2312              }
2313            else
2314              {
2315              int x = check_newline(pp, outfile);
2316              if (x == 0) goto SKIP_DATA;
2317              options |= x;
2318              while (*pp++ != '>');
2319              }
2320          }          }
2321        break;        break;
2322    
# Line 1187  while (!done) Line 2333  while (!done)
2333    
2334    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2335    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2336    local character tables. */    local character tables. Neither does it have 16-bit support. */
2337    
2338  #if !defined NOPOSIX  #if !defined NOPOSIX
2339    if (posix || do_posix)    if (posix || do_posix)
# Line 1200  while (!done) Line 2346  while (!done)
2346      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2347      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2348      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2349        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2350        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2351    
2352        first_gotten_store = 0;
2353      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2354    
2355      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1220  while (!done) Line 2369  while (!done)
2369  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2370    
2371      {      {
2372        unsigned long int get_options;
2373    
2374        /* In 16-bit mode, convert the input. */
2375    
2376    #ifdef SUPPORT_PCRE16
2377        if (use_pcre16)
2378          {
2379          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2380            {
2381            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2382              "converted to UTF-16\n");
2383            goto SKIP_DATA;
2384            }
2385          p = (pcre_uint8 *)buffer16;
2386          }
2387    #endif
2388    
2389        /* Compile many times when timing */
2390    
2391      if (timeit > 0)      if (timeit > 0)
2392        {        {
2393        register int i;        register int i;
# Line 1227  while (!done) Line 2395  while (!done)
2395        clock_t start_time = clock();        clock_t start_time = clock();
2396        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2397          {          {
2398          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2399          if (re != NULL) free(re);          if (re != NULL) free(re);
2400          }          }
2401        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1236  while (!done) Line 2404  while (!done)
2404            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2405        }        }
2406    
2407      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2408        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2409    
2410      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2411      if non-interactive. */      if non-interactive. */
# Line 1249  while (!done) Line 2418  while (!done)
2418          {          {
2419          for (;;)          for (;;)
2420            {            {
2421            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2422              {              {
2423              done = 1;              done = 1;
2424              goto CONTINUE;              goto CONTINUE;
# Line 1263  while (!done) Line 2432  while (!done)
2432        goto CONTINUE;        goto CONTINUE;
2433        }        }
2434    
2435      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2436      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2437      returns only limited data. Check that it agrees with the newer one. */      lines. */
2438    
2439      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2440        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2441    
2442      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2443      and remember the store that was got. */      and remember the store that was got. */
2444    
2445      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
2446      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2447    
2448        /* Output code size information if requested */
2449    
2450        if (log_store)
2451          fprintf(outfile, "Memory allocation (code space): %d\n",
2452            (int)(first_gotten_store -
2453                  sizeof(real_pcre) -
2454                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2455    
2456      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
2457      help with the matching. */      help with the matching, unless the pattern has the SS option, which
2458        suppresses the effect of /S (used for a few test patterns where studying is
2459        never sensible). */
2460    
2461      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
2462        {        {
2463        if (timeit > 0)        if (timeit > 0)
2464          {          {
# Line 1290  while (!done) Line 2466  while (!done)
2466          clock_t time_taken;          clock_t time_taken;
2467          clock_t start_time = clock();          clock_t start_time = clock();
2468          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2469            extra = pcre_study(re, study_options, &error);            {
2470              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2471              }
2472          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2473          if (extra != NULL) free(extra);          if (extra != NULL)
2474              {
2475              PCRE_FREE_STUDY(extra);
2476              }
2477          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2478            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2479              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2480          }          }
2481        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2482        if (error != NULL)        if (error != NULL)
2483          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2484        else if (extra != NULL)        else if (extra != NULL)
2485            {
2486          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2487            if (log_store)
2488              {
2489              size_t jitsize;
2490              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2491              if (jitsize != 0)
2492                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2493              }
2494            }
2495        }        }
2496    
2497      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
2498    
2499      if (do_flip)      if (do_mark)
2500        {        {
2501        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));  
   
       if (extra != NULL)  
2502          {          {
2503          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2504          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
2505          }          }
2506          extra->mark = &markptr;
2507          extra->flags |= PCRE_EXTRA_MARK;
2508        }        }
2509    
2510      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
2511    
2512      SHOW_INFO:      SHOW_INFO:
2513    
2514      if (do_debug)      if (do_debug)
2515        {        {
2516        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2517    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2518          if (use_pcre16)
2519            pcre16_printint(re, outfile, debug_lengths);
2520          else
2521            pcre_printint(re, outfile, debug_lengths);
2522    #elif defined SUPPORT_PCRE8
2523        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
2524    #else
2525          pcre16_printint(re, outfile, debug_lengths);
2526    #endif
2527        }        }
2528    
2529        /* We already have the options in get_options (see above) */
2530    
2531      if (do_showinfo)      if (do_showinfo)
2532        {        {
2533        unsigned long int get_options, all_options;        unsigned long int all_options;
2534  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2535        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2536  #endif  #endif
2537        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2538            hascrorlf;
2539        int nameentrysize, namecount;        int nameentrysize, namecount;
2540        const uschar *nametable;        const pcre_uchar *nametable;
2541    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2542        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2543        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2544        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1362  while (!done) Line 2547  while (!done)
2547        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2548        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2549        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2550          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2551          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2552          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2553    
2554          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2555          that it gives the same results as the new function. */
2556    
2557  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2558        old_count = pcre_info(re, &old_options, &old_first_char);        if (!use_pcre16)
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
2559          {          {
2560          if (old_count != count) fprintf(outfile,          old_count = pcre_info(re, &old_options, &old_first_char);
2561            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (count < 0) fprintf(outfile,
2562              old_count);            "Error %d from pcre_info()\n", count);
2563            else
2564          if (old_first_char != first_char) fprintf(outfile,            {
2565            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            if (old_count != count) fprintf(outfile,
2566              first_char, old_first_char);              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2567                  old_count);
2568          if (old_options != (int)get_options) fprintf(outfile,  
2569            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            if (old_first_char != first_char) fprintf(outfile,
2570              get_options, old_options);              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2571                  first_char, old_first_char);
2572    
2573              if (old_options != (int)get_options) fprintf(outfile,
2574                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2575                  get_options, old_options);
2576              }
2577          }          }
2578  #endif  #endif
2579    
# Line 1403  while (!done) Line 2597  while (!done)
2597            }            }
2598          }          }
2599    
2600        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2601        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2602    
2603        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2604        if (do_flip)        if (do_flip) all_options = swap_uint32(all_options);
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
2605    
2606        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2607          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2608            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2609            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2610            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2611            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2612            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2613            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2614              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2615              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2616            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2617            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2618            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2619            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2620            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2621            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2622              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2623              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2624            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2625    
2626          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2627    
2628        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
2629          {          {
2630          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1445  while (!done) Line 2639  while (!done)
2639          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
2640          break;          break;
2641    
2642            case PCRE_NEWLINE_ANYCRLF:
2643            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2644            break;
2645    
2646          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
2647          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
2648          break;          break;
# Line 1463  while (!done) Line 2661  while (!done)
2661          }          }
2662        else        else
2663          {          {
2664          int ch = first_char & 255;          const char *caseless =
2665          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2666            "" : " (caseless)";            "" : " (caseless)";
2667          if (PRINTHEX(ch))  
2668            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
2669              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2670          else          else
2671            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
2672              fprintf(outfile, "First char = ");
2673              pchar(first_char, outfile);
2674              fprintf(outfile, "%s\n", caseless);
2675              }
2676          }          }
2677    
2678        if (need_char < 0)        if (need_char < 0)
# Line 1478  while (!done) Line 2681  while (!done)
2681          }          }
2682        else        else
2683          {          {
2684          int ch = need_char & 255;          const char *caseless =
2685          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2686            "" : " (caseless)";            "" : " (caseless)";
2687          if (PRINTHEX(ch))  
2688            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
2689              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2690          else          else
2691            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2692          }          }
2693    
2694        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2695        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2696        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2697        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2698          information unless -i or -d was also present. This means that, except
2699          when auto-callouts are involved, the output from runs with and without
2700          -s should be identical. */
2701    
2702        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2703          {          {
2704          if (extra == NULL)          if (extra == NULL)
2705            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2706          else          else
2707            {            {
2708            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2709            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2710    
2711              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2712              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2713    
2714              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2715            if (start_bits == NULL)            if (start_bits == NULL)
2716              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2717            else            else
2718              {              {
2719              int i;              int i;
# Line 1517  while (!done) Line 2728  while (!done)
2728                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2729                    c = 2;                    c = 2;
2730                    }                    }
2731                  if (PRINTHEX(i) && i != ' ')                  if (PRINTOK(i) && i != ' ')
2732                    {                    {
2733                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2734                    c += 2;                    c += 2;
# Line 1532  while (!done) Line 2743  while (!done)
2743              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2744              }              }
2745            }            }
2746    
2747            /* Show this only if the JIT was set by /S, not by -s. */
2748    
2749            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2750              {
2751              int jit;
2752              new_info(re, extra, PCRE_INFO_JIT, &jit);
2753              if (jit)
2754                fprintf(outfile, "JIT study was successful\n");
2755              else
2756    #ifdef SUPPORT_JIT
2757                fprintf(outfile, "JIT study was not successful\n");
2758    #else
2759                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2760    #endif
2761              }
2762          }          }
2763        }        }
2764    
# Line 1548  while (!done) Line 2775  while (!done)
2775          }          }
2776        else        else
2777          {          {
2778          uschar sbuf[8];          pcre_uint8 sbuf[8];
2779          sbuf[0] = (true_size >> 24)  & 255;  
2780          sbuf[1] = (true_size >> 16)  & 255;          if (do_flip) regexflip(re, extra);
2781          sbuf[2] = (true_size >>  8)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2782          sbuf[3] = (true_size)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2783            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2784          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
2785          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2786          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2787          sbuf[7] = (true_study_size)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2788            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2789    
2790          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2791              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1566  while (!done) Line 2794  while (!done)
2794            }            }
2795          else          else
2796            {            {
2797            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2798    
2799              /* If there is study data, write it. */
2800    
2801            if (extra != NULL)            if (extra != NULL)
2802              {              {
2803              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1576  while (!done) Line 2807  while (!done)
2807                  strerror(errno));                  strerror(errno));
2808                }                }
2809              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2810              }              }
2811            }            }
2812          fclose(f);          fclose(f);
2813          }          }
2814    
2815        new_free(re);        new_free(re);
2816        if (extra != NULL) new_free(extra);        if (extra != NULL)
2817        if (tables != NULL) new_free((void *)tables);          {
2818            PCRE_FREE_STUDY(extra);
2819            }
2820          if (locale_set)
2821            {
2822            new_free((void *)tables);
2823            setlocale(LC_CTYPE, "C");
2824            locale_set = 0;
2825            }
2826        continue;  /* With next regex */        continue;  /* With next regex */
2827        }        }
2828      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1593  while (!done) Line 2831  while (!done)
2831    
2832    for (;;)    for (;;)
2833      {      {
2834      uschar *q;      pcre_uint8 *q;
2835      uschar *bptr = dbuffer;      pcre_uint8 *bptr;
2836      int *use_offsets = offsets;      int *use_offsets = offsets;
2837      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2838      int callout_data = 0;      int callout_data = 0;
2839      int callout_data_set = 0;      int callout_data_set = 0;
2840      int count, c;      int count, c;
2841      int copystrings = 0;      int copystrings = 0;
2842      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2843      int getstrings = 0;      int getstrings = 0;
2844      int getlist = 0;      int getlist = 0;
2845      int gmatched = 0;      int gmatched = 0;
2846      int start_offset = 0;      int start_offset = 0;
2847        int start_offset_sign = 1;
2848      int g_notempty = 0;      int g_notempty = 0;
2849      int use_dfa = 0;      int use_dfa = 0;
2850    
# Line 1619  while (!done) Line 2858  while (!done)
2858    
2859      pcre_callout = callout;      pcre_callout = callout;
2860      first_callout = 1;      first_callout = 1;
2861        last_callout_mark = NULL;
2862      callout_extra = 0;      callout_extra = 0;
2863      callout_count = 0;      callout_count = 0;
2864      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1631  while (!done) Line 2871  while (!done)
2871      len = 0;      len = 0;
2872      for (;;)      for (;;)
2873        {        {
2874        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
2875          {          {
2876          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2877              {
2878              fprintf(outfile, "\n");
2879              break;
2880              }
2881          done = 1;          done = 1;
2882          goto CONTINUE;          goto CONTINUE;
2883          }          }
# Line 1650  while (!done) Line 2893  while (!done)
2893      p = buffer;      p = buffer;
2894      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2895    
2896      q = dbuffer;      bptr = q = dbuffer;
2897      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2898        {        {
2899        int i = 0;        int i = 0;
# Line 1674  while (!done) Line 2917  while (!done)
2917            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2918    
2919  #if !defined NOUTF8  #if !defined NOUTF8
2920          if (use_utf8 && c > 255)          if (use_utf && c > 255)
2921            {            {
2922            unsigned char buff8[8];            pcre_uint8 buff8[8];
2923            int ii, utn;            int ii, utn;
2924            utn = ord2utf8(c, buff8);            utn = ord2utf8(c, buff8);
2925            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
# Line 1692  while (!done) Line 2935  while (!done)
2935  #if !defined NOUTF8  #if !defined NOUTF8
2936          if (*p == '{')          if (*p == '{')
2937            {            {
2938            unsigned char *pt = p;            pcre_uint8 *pt = p;
2939            c = 0;            c = 0;
2940            while (isxdigit(*(++pt)))  
2941              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2942              when isxdigit() is a macro that refers to its argument more than
2943              once. This is banned by the C Standard, but apparently happens in at
2944              least one MacOS environment. */
2945    
2946              for (pt++; isxdigit(*pt); pt++)
2947                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2948            if (*pt == '}')            if (*pt == '}')
2949              {              {
2950              unsigned char buff8[8];              pcre_uint8 buff8[8];
2951              int ii, utn;              int ii, utn;
2952              utn = ord2utf8(c, buff8);              if (use_utf)
2953              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2954              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2955                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2956                  c = buff8[ii];   /* Last byte */
2957                  }
2958                else
2959                 {
2960                 if (c > 255)
2961                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2962                     "UTF-8 mode is not enabled.\n"
2963                     "** Truncation will probably give the wrong result.\n", c);
2964                 }
2965              p = pt + 1;              p = pt + 1;
2966              break;              break;
2967              }              }
# Line 1715  while (!done) Line 2974  while (!done)
2974          c = 0;          c = 0;
2975          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2976            {            {
2977            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2978            p++;            p++;
2979            }            }
2980          break;          break;
# Line 1725  while (!done) Line 2984  while (!done)
2984          continue;          continue;
2985    
2986          case '>':          case '>':
2987            if (*p == '-')
2988              {
2989              start_offset_sign = -1;
2990              p++;
2991              }
2992          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2993            start_offset *= start_offset_sign;
2994          continue;          continue;
2995    
2996          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1744  while (!done) Line 3009  while (!done)
3009            }            }
3010          else if (isalnum(*p))          else if (isalnum(*p))
3011            {            {
3012            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
3013            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
3014            *npp++ = 0;            *npp++ = 0;
3015            *npp = 0;            *npp = 0;
# Line 1798  while (!done) Line 3063  while (!done)
3063  #endif  #endif
3064            use_dfa = 1;            use_dfa = 1;
3065          continue;          continue;
3066    #endif
3067    
3068    #if !defined NODFA
3069          case 'F':          case 'F':
3070          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3071          continue;          continue;
# Line 1812  while (!done) Line 3079  while (!done)
3079            }            }
3080          else if (isalnum(*p))          else if (isalnum(*p))
3081            {            {
3082            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
3083            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
3084            *npp++ = 0;            *npp++ = 0;
3085            *npp = 0;            *npp = 0;
# Line 1823  while (!done) Line 3090  while (!done)
3090            }            }
3091          continue;          continue;
3092    
3093            case 'J':
3094            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3095            if (extra != NULL
3096                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3097                && extra->executable_jit != NULL)
3098              {
3099              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3100              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3101              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3102              }
3103            continue;
3104    
3105          case 'L':          case 'L':
3106          getlist = 1;          getlist = 1;
3107          continue;          continue;
# Line 1832  while (!done) Line 3111  while (!done)
3111          continue;          continue;
3112    
3113          case 'N':          case 'N':
3114          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3115              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3116            else
3117              options |= PCRE_NOTEMPTY;
3118          continue;          continue;
3119    
3120          case 'O':          case 'O':
# Line 1845  while (!done) Line 3127  while (!done)
3127            if (offsets == NULL)            if (offsets == NULL)
3128              {              {
3129              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
3130                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
3131              yield = 1;              yield = 1;
3132              goto EXIT;              goto EXIT;
3133              }              }
# Line 1855  while (!done) Line 3137  while (!done)
3137          continue;          continue;
3138    
3139          case 'P':          case 'P':
3140          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3141              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3142          continue;          continue;
3143    
3144          case 'Q':          case 'Q':
# Line 1890  while (!done) Line 3173  while (!done)
3173          show_malloc = 1;          show_malloc = 1;
3174          continue;          continue;
3175    
3176            case 'Y':
3177            options |= PCRE_NO_START_OPTIMIZE;
3178            continue;
3179    
3180          case 'Z':          case 'Z':
3181          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3182          continue;          continue;
# Line 1910  while (!done) Line 3197  while (!done)
3197        *q++ = c;        *q++ = c;
3198        }        }
3199      *q = 0;      *q = 0;
3200      len = q - dbuffer;      len = (int)(q - dbuffer);
3201    
3202        /* Move the data to the end of the buffer so that a read over the end of
3203        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3204        we are using the POSIX interface, we must include the terminating zero. */
3205    
3206    #if !defined NOPOSIX
3207        if (posix || do_posix)
3208          {
3209          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3210          bptr += buffer_size - len - 1;
3211          }
3212        else
3213    #endif
3214          {
3215          memmove(bptr + buffer_size - len, bptr, len);
3216          bptr += buffer_size - len;
3217          }
3218    
3219      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3220        {        {
# Line 1931  while (!done) Line 3235  while (!done)
3235          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3236        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3237        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3238          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3239    
3240        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3241    
# Line 1952  while (!done) Line 3257  while (!done)
3257            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3258              {              {
3259              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3260              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3261                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3262              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3263              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3264                {                {
3265                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3266                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3267                  outfile);                  outfile);
3268                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3269                }                }
# Line 1966  while (!done) Line 3271  while (!done)
3271            }            }
3272          }          }
3273        free(pmatch);        free(pmatch);
3274          goto NEXT_DATA;
3275        }        }
3276    
3277    #endif  /* !defined NOPOSIX */
3278    
3279      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3280    
3281      else  #ifdef SUPPORT_PCRE16
3282  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3283          {
3284          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3285          if (len < 0)
3286            {
3287            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3288              "converted to UTF-16\n");
3289            goto NEXT_DATA;
3290            }
3291          bptr = (pcre_uint8 *)buffer16;
3292          }
3293    #endif
3294    
3295      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3296        {        {
3297          markptr = NULL;
3298    
3299        if (timeitm > 0)        if (timeitm > 0)
3300          {          {
3301          register int i;          register int i;
# Line 1986  while (!done) Line 3307  while (!done)
3307            {            {
3308            int workspace[1000];            int workspace[1000];
3309            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3310              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3311                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
3312                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
3313            }            }
# Line 1994  while (!done) Line 3315  while (!done)
3315  #endif  #endif
3316    
3317          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3318            count = pcre_exec(re, extra, (char *)bptr, len,            {
3319              PCRE_EXEC(count, re, extra, bptr, len,
3320              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
3321              }
3322          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3323          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3324            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2005  while (!done) Line 3327  while (!done)
3327    
3328        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3329        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3330        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3331          running of pcre_exec(), so disable the JIT optimization. This makes it
3332          possible to run the same set of tests with and without JIT externally
3333          requested. */
3334    
3335        if (find_match_limit)        if (find_match_limit)
3336          {          {
# Line 2014  while (!done) Line 3339  while (!done)
3339            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3340            extra->flags = 0;            extra->flags = 0;
3341            }            }
3342            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3343    
3344          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3345            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2037  while (!done) Line 3363  while (!done)
3363            }            }
3364          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3365          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3366          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3367            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3368          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3369          }          }
# Line 2049  while (!done) Line 3375  while (!done)
3375        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3376          {          {
3377          int workspace[1000];          int workspace[1000];
3378          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3379            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
3380            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
3381          if (count == 0)          if (count == 0)
# Line 2062  while (!done) Line 3388  while (!done)
3388    
3389        else        else
3390          {          {
3391          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3392            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3393          if (count == 0)          if (count == 0)
3394            {            {
3395            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2097  while (!done) Line 3423  while (!done)
3423              }              }
3424            }            }
3425    
3426            /* do_allcaps requests showing of all captures in the pattern, to check
3427            unset ones at the end. */
3428    
3429            if (do_allcaps)
3430              {
3431              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3432              count++;   /* Allow for full match */
3433              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3434              }
3435    
3436            /* Output the captured substrings */
3437    
3438          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3439            {            {
3440            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
3441                {
3442                if (use_offsets[i] != -1)
3443                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3444                    use_offsets[i], i);
3445                if (use_offsets[i+1] != -1)
3446                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3447                    use_offsets[i+1], i+1);
3448              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3449                }
3450            else            else
3451              {              {
3452              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3453              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr + use_offsets[i],
3454                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3455              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3456              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3457                {                {
3458                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3459                  {                PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3460                  fprintf(outfile, " 0+ ");                  outfile);
3461                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3462                }                }
3463              }              }
3464            }            }
3465    
3466            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3467    
3468          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3469            {            {
3470            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2193  while (!done) Line 3538  while (!done)
3538                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3539              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3540                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3541              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3542              }              }
3543            }            }
# Line 2203  while (!done) Line 3547  while (!done)
3547    
3548        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
3549          {          {
3550          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
3551  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
3552          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
3553            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
3554              bptr + use_offsets[0]);            fprintf(outfile, ": ");
3555  #endif            PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3556                outfile);
3557              }
3558          fprintf(outfile, "\n");          fprintf(outfile, "\n");
3559          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
3560          }          }
# Line 2218  while (!done) Line 3564  while (!done)
3564        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
3565        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
3566    
3567        Complication arises in the case when the newline option is "any".        Complication arises in the case when the newline convention is "any",
3568        If the previous match was at the end of a line terminated by CRLF, an        "crlf", or "anycrlf". If the previous match was at the end of a line
3569        advance of one character just passes the \r, whereas we should prefer the        terminated by CRLF, an advance of one character just passes the \r,
3570        longer newline sequence, as does the code in pcre_exec(). Fudge the        whereas we should prefer the longer newline sequence, as does the code in
3571        offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
3572          newline setting in the pattern; if none was set, use pcre_config() to
3573          find the default.
3574    
3575        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
3576        character, not one byte. */        character, not one byte. */
# Line 2238  while (!done) Line 3586  while (!done)
3586              {              {
3587              int d;              int d;
3588              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3589              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
3590                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
3591                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
3592                        (d == 10)? PCRE_NEWLINE_LF :
3593                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3594                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
3595                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
3596              }              }
3597            if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3598                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3599                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3600                  &&
3601                start_offset < len - 1 &&                start_offset < len - 1 &&
3602                bptr[start_offset] == '\r' &&                bptr[start_offset] == '\r' &&
3603                bptr[start_offset+1] == '\n')                bptr[start_offset+1] == '\n')
3604              onechar++;              onechar++;
3605            else if (use_utf8)            else if (use_utf)
3606              {              {
3607              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3608                {                {
3609                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3610                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3611                }                }
3612              }              }
3613            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3614            }            }
3615          else          else
3616            {            {
3617            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3618              {              {
3619              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3620                if (gmatched == 0)
3621                  {
3622                  if (markptr == NULL) fprintf(outfile, "No match\n");
3623                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3624                  }
3625                break;
3626    
3627                case PCRE_ERROR_BADUTF8:
3628                case PCRE_ERROR_SHORTUTF8:
3629                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3630                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3631                if (use_size_offsets >= 2)
3632                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3633                    use_offsets[1]);
3634                fprintf(outfile, "\n");
3635                break;
3636    
3637                default:
3638                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3639                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3640                else
3641                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3642                break;
3643              }              }
3644            else fprintf(outfile, "Error %d\n", count);  
3645            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3646            }            }
3647          }          }
# Line 2276  while (!done) Line 3651  while (!done)
3651        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3652    
3653        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3654        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3655        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3656        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3657        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3658        character. */        character. */
3659    
# Line 2287  while (!done) Line 3662  while (!done)
3662        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3663          {          {
3664          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3665          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3666          }          }
3667    
3668        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2313  while (!done) Line 3688  while (!done)
3688  #endif  #endif
3689    
3690    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3691    if (extra != NULL) new_free(extra);    if (extra != NULL)
3692    if (tables != NULL)      {
3693        PCRE_FREE_STUDY(extra);
3694        }
3695      if (locale_set)
3696      {      {
3697      new_free((void *)tables);      new_free((void *)tables);
3698      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3699      locale_set = 0;      locale_set = 0;
3700      }      }
3701      if (jit_stack != NULL)
3702        {
3703        pcre_jit_stack_free(jit_stack);
3704        jit_stack = NULL;
3705        }
3706    }    }
3707    
3708  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 2334  free(dbuffer); Line 3717  free(dbuffer);
3717  free(pbuffer);  free(pbuffer);
3718  free(offsets);  free(offsets);
3719    
3720    #ifdef SUPPORT_PCRE16
3721    if (buffer16 != NULL) free(buffer16);
3722    #endif
3723    
3724  return yield;  return yield;
3725  }  }
3726    

Legend:
Removed from v.146  
changed lines
  Added in v.813

  ViewVC Help
Powered by ViewVC 1.1.5