/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC code/branches/pcre16/pcretest.c revision 809 by zherczeg, Mon Dec 19 11:04:45 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  */  */
38    
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <stdio.h>  #include <stdio.h>
46  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 49  POSSIBILITY OF SUCH DAMAGE.
49  #include <locale.h>  #include <locale.h>
50  #include <errno.h>  #include <errno.h>
51    
52  /* We need the internal info for displaying the results of pcre_study(). Also  #ifdef SUPPORT_LIBREADLINE
53  for getting the opcodes for showing compiled code. */  #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
60    
61    /* A number of things vary for Windows builds. Originally, pcretest opened its
62    input and output without "b"; then I was told that "b" was needed in some
63    environments, so it was added for release 5.0 to both the input and output. (It
64    makes no difference on Unix-like systems.) Later I was told that it is wrong
65    for the input on Windows. I've now abstracted the modes into two macros that
66    are set here, to make it easier to fiddle with them, and removed "b" from the
67    input mode under Windows. */
68    
69    #if defined(_WIN32) || defined(WIN32)
70    #include <io.h>                /* For _setmode() */
71    #include <fcntl.h>             /* For _O_BINARY */
72    #define INPUT_MODE   "r"
73    #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89    /* Not Windows */
90    
91  #define PCRE_SPY        /* For Win32 build, import data, not export */  #else
92  #include "internal.h"  #include <sys/time.h>          /* These two includes are needed */
93    #include <sys/resource.h>      /* for setrlimit(). */
94    #define INPUT_MODE   "rb"
95    #define OUTPUT_MODE  "wb"
96    #endif
97    
98    
99    /* We have to include pcre_internal.h because we need the internal info for
100    displaying the results of pcre_study() and we also need to know about the
101    internal macros, structures, and other internal data values; pcretest has
102    "inside information" compared to a program that strictly follows the PCRE API.
103    
104    Although pcre_internal.h does itself include pcre.h, we explicitly include it
105    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106    appropriately for an application, not for building PCRE. */
107    
108    #include "pcre.h"
109    #include "pcre_internal.h"
110    
111    /* The pcre_printint() function, which prints the internal form of a compiled
112    regex, is held in a separate file so that (a) it can be compiled in either
113    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123    /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129    #define _pcre_utf8_table1      utf8_table1
130    #define _pcre_utf8_table1_size utf8_table1_size
131    #define _pcre_utf8_table2      utf8_table2
132    #define _pcre_utf8_table3      utf8_table3
133    #define _pcre_utf8_table4      utf8_table4
134    #define _pcre_utt              utt
135    #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137    #define _pcre_OP_lengths       OP_lengths
138    
139    #include "pcre_tables.c"
140    
141    /* The definition of the macro PRINTABLE, which determines whether to print an
142    output character as-is or as a hex value when showing compiled patterns, is
143    the same as in the printint.src file. We uses it here in cases when the locale
144    has not been explicitly changed, so as to get consistent output from systems
145    that differ in their output from isprint() even in the "C" locale. */
146    
147    #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149    #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 58  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163    /* It is also possible, originally for the benefit of a version that was
164    imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165    without the interface to the DFA matcher (NODFA), and without the doublecheck
166    of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167    out the UTF8 support if PCRE is built without it. */
168    
169    #ifndef SUPPORT_UTF8
170    #ifndef NOUTF8
171    #define NOUTF8
172    #endif
173    #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    #define PCHARS8(lv, p, len, f) \
185      lv = pchars((pcre_uint8 *)p, len, f)
186    
187    #define PCHARSV8(p, len, f) \
188      (void)pchars((pcre_uint8 *)p, len, f)
189    
190    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191      re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194        offsets, size_offsets) \
195      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196        offsets, size_offsets)
197    
198    #define PCRE_STUDY8(extra, re, options, error) \
199      extra = pcre_study(re, options, error)
200    
201    #define PCRE_FREE_STUDY8(extra) \
202      pcre_free_study(extra)
203    
204    #endif /* SUPPORT_PCRE8 */
205    
206    
207    #ifdef SUPPORT_PCRE16
208    #define PCHARS16(lv, p, len, f) \
209      lv = pchars16((PCRE_SPTR16)p, len, f)
210    
211    #define PCHARSV16(p, len, f) \
212      (void)pchars16((PCRE_SPTR16)p, len, f)
213    
214    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
215      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
216    
217    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
218        offsets, size_offsets) \
219      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
220        options, offsets, size_offsets)
221    
222    #define PCRE_STUDY16(extra, re, options, error) \
223      extra = pcre16_study(re, options, error)
224    
225    #define PCRE_FREE_STUDY16(extra) \
226      pcre16_free_study(extra)
227    
228    #endif /* SUPPORT_PCRE16 */
229    
230    
231    /* ----- Both modes are supported; a runtime test is needed ----- */
232    
233    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
234    
235    #define PCHARS(lv, p, len, f) \
236      if (use_pcre16) \
237        PCHARS16(lv, p, len, f); \
238      else \
239        PCHARS8(lv, p, len, f)
240    
241    #define PCHARSV(p, len, f) \
242      if (use_pcre16) \
243        PCHARSV16(p, len, f); \
244      else \
245        PCHARSV8(p, len, f)
246    
247    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
248      if (use_pcre16) \
249        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
250      else \
251        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
252    
253    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
254        offsets, size_offsets) \
255      if (use_pcre16) \
256        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
257          offsets, size_offsets); \
258      else \
259        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
260          offsets, size_offsets)
261    
262    #define PCRE_STUDY(extra, re, options, error) \
263      if (use_pcre16) \
264        PCRE_STUDY16(extra, re, options, error); \
265      else \
266        PCRE_STUDY8(extra, re, options, error)
267    
268    #define PCRE_FREE_STUDY(extra) \
269      if (use_pcre16) \
270        PCRE_FREE_STUDY16(extra); \
271      else \
272        PCRE_FREE_STUDY8(extra)
273    
274    /* ----- Only 8-bit mode is supported ----- */
275    
276    #elif defined SUPPORT_PCRE8
277    #define PCHARS           PCHARS8
278    #define PCHARSV          PCHARSV8
279    #define PCRE_COMPILE     PCRE_COMPILE8
280    #define PCRE_EXEC        PCRE_EXEC8
281    #define PCRE_STUDY       PCRE_STUDY8
282    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
283    
284    /* ----- Only 16-bit mode is supported ----- */
285    
286    #else
287    #define PCHARS           PCHARS16
288    #define PCHARSV          PCHARSV16
289    #define PCRE_COMPILE     PCRE_COMPILE16
290    #define PCRE_EXEC        PCRE_EXEC16
291    #define PCRE_STUDY       PCRE_STUDY16
292    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
293    #endif
294    
295    /* ----- End of mode-specific function call macros ----- */
296    
297    
298    /* Other parameters */
299    
300  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
301  #ifdef CLK_TCK  #ifdef CLK_TCK
302  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 66  Makefile. */ Line 305  Makefile. */
305  #endif  #endif
306  #endif  #endif
307    
308  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
309    
310  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
311    
312    /* Static variables */
313    
314  static FILE *outfile;  static FILE *outfile;
315  static int log_store = 0;  static int log_store = 0;
# Line 79  static int callout_count; Line 317  static int callout_count;
317  static int callout_extra;  static int callout_extra;
318  static int callout_fail_count;  static int callout_fail_count;
319  static int callout_fail_id;  static int callout_fail_id;
320    static int debug_lengths;
321  static int first_callout;  static int first_callout;
322    static int locale_set = 0;
323  static int show_malloc;  static int show_malloc;
324  static int use_utf8;  static int use_utf8;
325  static size_t gotten_store;  static size_t gotten_store;
326    static size_t first_gotten_store = 0;
327    static const unsigned char *last_callout_mark = NULL;
328    
329  static uschar *pbuffer = NULL;  /* The buffers grow automatically if very long input lines are encountered. */
330    
331    static int buffer_size = 50000;
332    static pcre_uint8 *buffer = NULL;
333    static pcre_uint8 *dbuffer = NULL;
334    static pcre_uint8 *pbuffer = NULL;
335    
336    #ifdef SUPPORT_PCRE16
337    static int buffer16_size = 0;
338    static pcre_uint16 *buffer16 = NULL;
339    #endif
340    
341  static const int utf8_table1[] = {  /* If we have 8-bit support, default use_pcre16 to false; if there is also
342    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  16-bit support, it can be changed by an option. If there is no 8-bit support,
343    there must be 16-bit support, so default it to 1. */
344    
345  static const int utf8_table2[] = {  #ifdef SUPPORT_PCRE8
346    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  static int use_pcre16 = 0;
347    #else
348    static int use_pcre16 = 1;
349    #endif
350    
351  static const int utf8_table3[] = {  /* Textual explanations for runtime error codes */
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
352    
353    static const char *errtexts[] = {
354      NULL,  /* 0 is no error */
355      NULL,  /* NOMATCH is handled specially */
356      "NULL argument passed",
357      "bad option value",
358      "magic number missing",
359      "unknown opcode - pattern overwritten?",
360      "no more memory",
361      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
362      "match limit exceeded",
363      "callout error code",
364      NULL,  /* BADUTF8 is handled specially */
365      "bad UTF-8 offset",
366      NULL,  /* PARTIAL is handled specially */
367      "not used - internal error",
368      "internal error - pattern overwritten?",
369      "bad count value",
370      "item unsupported for DFA matching",
371      "backreference condition or recursion test not supported for DFA matching",
372      "match limit not supported for DFA matching",
373      "workspace size exceeded in DFA matching",
374      "too much recursion for DFA matching",
375      "recursion limit exceeded",
376      "not used - internal error",
377      "invalid combination of newline options",
378      "bad offset value",
379      NULL,  /* SHORTUTF8 is handled specially */
380      "nested recursion at the same subject position",
381      "JIT stack limit reached",
382      "pattern compiled in wrong mode (8-bit/16-bit error)"
383    };
384    
385    
386  /*************************************************  /*************************************************
387  *         Print compiled regex                   *  *         Alternate character tables             *
388  *************************************************/  *************************************************/
389    
390  /* The code for doing this is held in a separate file that is also included in  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
391  pcre.c when it is compiled with the debug switch. It defines a function called  using the default tables of the library. However, the T option can be used to
392  print_internals(), which uses a table of opcode lengths defined by the macro  select alternate sets of tables, for different kinds of testing. Note also that
393  OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates  the L (locale) option also adjusts the tables. */
394  Unicode property names to numbers; this is kept in a separate file. */  
395    /* This is the set of tables distributed as default with PCRE. It recognizes
396  static uschar OP_lengths[] = { OP_LENGTHS };  only ASCII characters. */
397    
398  #include "ucp.h"  static const pcre_uint8 tables0[] = {
399  #include "ucptypetable.c"  
400  #include "printint.c"  /* This table is a lower casing table. */
401    
402        0,  1,  2,  3,  4,  5,  6,  7,
403        8,  9, 10, 11, 12, 13, 14, 15,
404       16, 17, 18, 19, 20, 21, 22, 23,
405       24, 25, 26, 27, 28, 29, 30, 31,
406       32, 33, 34, 35, 36, 37, 38, 39,
407       40, 41, 42, 43, 44, 45, 46, 47,
408       48, 49, 50, 51, 52, 53, 54, 55,
409       56, 57, 58, 59, 60, 61, 62, 63,
410       64, 97, 98, 99,100,101,102,103,
411      104,105,106,107,108,109,110,111,
412      112,113,114,115,116,117,118,119,
413      120,121,122, 91, 92, 93, 94, 95,
414       96, 97, 98, 99,100,101,102,103,
415      104,105,106,107,108,109,110,111,
416      112,113,114,115,116,117,118,119,
417      120,121,122,123,124,125,126,127,
418      128,129,130,131,132,133,134,135,
419      136,137,138,139,140,141,142,143,
420      144,145,146,147,148,149,150,151,
421      152,153,154,155,156,157,158,159,
422      160,161,162,163,164,165,166,167,
423      168,169,170,171,172,173,174,175,
424      176,177,178,179,180,181,182,183,
425      184,185,186,187,188,189,190,191,
426      192,193,194,195,196,197,198,199,
427      200,201,202,203,204,205,206,207,
428      208,209,210,211,212,213,214,215,
429      216,217,218,219,220,221,222,223,
430      224,225,226,227,228,229,230,231,
431      232,233,234,235,236,237,238,239,
432      240,241,242,243,244,245,246,247,
433      248,249,250,251,252,253,254,255,
434    
435    /* This table is a case flipping table. */
436    
437        0,  1,  2,  3,  4,  5,  6,  7,
438        8,  9, 10, 11, 12, 13, 14, 15,
439       16, 17, 18, 19, 20, 21, 22, 23,
440       24, 25, 26, 27, 28, 29, 30, 31,
441       32, 33, 34, 35, 36, 37, 38, 39,
442       40, 41, 42, 43, 44, 45, 46, 47,
443       48, 49, 50, 51, 52, 53, 54, 55,
444       56, 57, 58, 59, 60, 61, 62, 63,
445       64, 97, 98, 99,100,101,102,103,
446      104,105,106,107,108,109,110,111,
447      112,113,114,115,116,117,118,119,
448      120,121,122, 91, 92, 93, 94, 95,
449       96, 65, 66, 67, 68, 69, 70, 71,
450       72, 73, 74, 75, 76, 77, 78, 79,
451       80, 81, 82, 83, 84, 85, 86, 87,
452       88, 89, 90,123,124,125,126,127,
453      128,129,130,131,132,133,134,135,
454      136,137,138,139,140,141,142,143,
455      144,145,146,147,148,149,150,151,
456      152,153,154,155,156,157,158,159,
457      160,161,162,163,164,165,166,167,
458      168,169,170,171,172,173,174,175,
459      176,177,178,179,180,181,182,183,
460      184,185,186,187,188,189,190,191,
461      192,193,194,195,196,197,198,199,
462      200,201,202,203,204,205,206,207,
463      208,209,210,211,212,213,214,215,
464      216,217,218,219,220,221,222,223,
465      224,225,226,227,228,229,230,231,
466      232,233,234,235,236,237,238,239,
467      240,241,242,243,244,245,246,247,
468      248,249,250,251,252,253,254,255,
469    
470    /* This table contains bit maps for various character classes. Each map is 32
471    bytes long and the bits run from the least significant end of each byte. The
472    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
473    graph, print, punct, and cntrl. Other classes are built from combinations. */
474    
475      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
476      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
477      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
479    
480      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
481      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
482      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
484    
485      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
486      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
489    
490      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
492      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
494    
495      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
497      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499    
500      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
501      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
502      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
504    
505      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
506      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
507      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509    
510      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
511      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
512      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
514    
515      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
516      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
517      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519    
520      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
521      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
522      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524    
525    /* This table identifies various classes of character by individual bits:
526      0x01   white space character
527      0x02   letter
528      0x04   decimal digit
529      0x08   hexadecimal digit
530      0x10   alphanumeric or '_'
531      0x80   regular expression metacharacter or binary zero
532    */
533    
534      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
535      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
536      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
537      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
538      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
539      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
540      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
541      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
542      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
543      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
544      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
545      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
546      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
547      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
548      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
549      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
550      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
551      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
552      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
553      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
554      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
555      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
556      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
557      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
558      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
559      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
560      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
561      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
562      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
563      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
564      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
566    
567    /* This is a set of tables that came orginally from a Windows user. It seems to
568    be at least an approximation of ISO 8859. In particular, there are characters
569    greater than 128 that are marked as spaces, letters, etc. */
570    
571    static const pcre_uint8 tables1[] = {
572    0,1,2,3,4,5,6,7,
573    8,9,10,11,12,13,14,15,
574    16,17,18,19,20,21,22,23,
575    24,25,26,27,28,29,30,31,
576    32,33,34,35,36,37,38,39,
577    40,41,42,43,44,45,46,47,
578    48,49,50,51,52,53,54,55,
579    56,57,58,59,60,61,62,63,
580    64,97,98,99,100,101,102,103,
581    104,105,106,107,108,109,110,111,
582    112,113,114,115,116,117,118,119,
583    120,121,122,91,92,93,94,95,
584    96,97,98,99,100,101,102,103,
585    104,105,106,107,108,109,110,111,
586    112,113,114,115,116,117,118,119,
587    120,121,122,123,124,125,126,127,
588    128,129,130,131,132,133,134,135,
589    136,137,138,139,140,141,142,143,
590    144,145,146,147,148,149,150,151,
591    152,153,154,155,156,157,158,159,
592    160,161,162,163,164,165,166,167,
593    168,169,170,171,172,173,174,175,
594    176,177,178,179,180,181,182,183,
595    184,185,186,187,188,189,190,191,
596    224,225,226,227,228,229,230,231,
597    232,233,234,235,236,237,238,239,
598    240,241,242,243,244,245,246,215,
599    248,249,250,251,252,253,254,223,
600    224,225,226,227,228,229,230,231,
601    232,233,234,235,236,237,238,239,
602    240,241,242,243,244,245,246,247,
603    248,249,250,251,252,253,254,255,
604    0,1,2,3,4,5,6,7,
605    8,9,10,11,12,13,14,15,
606    16,17,18,19,20,21,22,23,
607    24,25,26,27,28,29,30,31,
608    32,33,34,35,36,37,38,39,
609    40,41,42,43,44,45,46,47,
610    48,49,50,51,52,53,54,55,
611    56,57,58,59,60,61,62,63,
612    64,97,98,99,100,101,102,103,
613    104,105,106,107,108,109,110,111,
614    112,113,114,115,116,117,118,119,
615    120,121,122,91,92,93,94,95,
616    96,65,66,67,68,69,70,71,
617    72,73,74,75,76,77,78,79,
618    80,81,82,83,84,85,86,87,
619    88,89,90,123,124,125,126,127,
620    128,129,130,131,132,133,134,135,
621    136,137,138,139,140,141,142,143,
622    144,145,146,147,148,149,150,151,
623    152,153,154,155,156,157,158,159,
624    160,161,162,163,164,165,166,167,
625    168,169,170,171,172,173,174,175,
626    176,177,178,179,180,181,182,183,
627    184,185,186,187,188,189,190,191,
628    224,225,226,227,228,229,230,231,
629    232,233,234,235,236,237,238,239,
630    240,241,242,243,244,245,246,215,
631    248,249,250,251,252,253,254,223,
632    192,193,194,195,196,197,198,199,
633    200,201,202,203,204,205,206,207,
634    208,209,210,211,212,213,214,247,
635    216,217,218,219,220,221,222,255,
636    0,62,0,0,1,0,0,0,
637    0,0,0,0,0,0,0,0,
638    32,0,0,0,1,0,0,0,
639    0,0,0,0,0,0,0,0,
640    0,0,0,0,0,0,255,3,
641    126,0,0,0,126,0,0,0,
642    0,0,0,0,0,0,0,0,
643    0,0,0,0,0,0,0,0,
644    0,0,0,0,0,0,255,3,
645    0,0,0,0,0,0,0,0,
646    0,0,0,0,0,0,12,2,
647    0,0,0,0,0,0,0,0,
648    0,0,0,0,0,0,0,0,
649    254,255,255,7,0,0,0,0,
650    0,0,0,0,0,0,0,0,
651    255,255,127,127,0,0,0,0,
652    0,0,0,0,0,0,0,0,
653    0,0,0,0,254,255,255,7,
654    0,0,0,0,0,4,32,4,
655    0,0,0,128,255,255,127,255,
656    0,0,0,0,0,0,255,3,
657    254,255,255,135,254,255,255,7,
658    0,0,0,0,0,4,44,6,
659    255,255,127,255,255,255,127,255,
660    0,0,0,0,254,255,255,255,
661    255,255,255,255,255,255,255,127,
662    0,0,0,0,254,255,255,255,
663    255,255,255,255,255,255,255,255,
664    0,2,0,0,255,255,255,255,
665    255,255,255,255,255,255,255,127,
666    0,0,0,0,255,255,255,255,
667    255,255,255,255,255,255,255,255,
668    0,0,0,0,254,255,0,252,
669    1,0,0,248,1,0,0,120,
670    0,0,0,0,254,255,255,255,
671    0,0,128,0,0,0,128,0,
672    255,255,255,255,0,0,0,0,
673    0,0,0,0,0,0,0,128,
674    255,255,255,255,0,0,0,0,
675    0,0,0,0,0,0,0,0,
676    128,0,0,0,0,0,0,0,
677    0,1,1,0,1,1,0,0,
678    0,0,0,0,0,0,0,0,
679    0,0,0,0,0,0,0,0,
680    1,0,0,0,128,0,0,0,
681    128,128,128,128,0,0,128,0,
682    28,28,28,28,28,28,28,28,
683    28,28,0,0,0,0,0,128,
684    0,26,26,26,26,26,26,18,
685    18,18,18,18,18,18,18,18,
686    18,18,18,18,18,18,18,18,
687    18,18,18,128,128,0,128,16,
688    0,26,26,26,26,26,26,18,
689    18,18,18,18,18,18,18,18,
690    18,18,18,18,18,18,18,18,
691    18,18,18,128,128,0,0,0,
692    0,0,0,0,0,1,0,0,
693    0,0,0,0,0,0,0,0,
694    0,0,0,0,0,0,0,0,
695    0,0,0,0,0,0,0,0,
696    1,0,0,0,0,0,0,0,
697    0,0,18,0,0,0,0,0,
698    0,0,20,20,0,18,0,0,
699    0,20,18,0,0,0,0,0,
700    18,18,18,18,18,18,18,18,
701    18,18,18,18,18,18,18,18,
702    18,18,18,18,18,18,18,0,
703    18,18,18,18,18,18,18,18,
704    18,18,18,18,18,18,18,18,
705    18,18,18,18,18,18,18,18,
706    18,18,18,18,18,18,18,0,
707    18,18,18,18,18,18,18,18
708    };
709    
710    
711    
712    
713    #ifndef HAVE_STRERROR
714  /*************************************************  /*************************************************
715  *          Read number from string               *  *     Provide strerror() for non-ANSI libraries  *
716  *************************************************/  *************************************************/
717    
718  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
719  around with conditional compilation, just do the job by hand. It is only used  in their libraries, but can provide the same facility by this simple
720  for unpicking the -o argument, so just keep it simple.  alternative function. */
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
721    
722  Returns:        the unsigned long  extern int   sys_nerr;
723  */  extern char *sys_errlist[];
724    
725  static int  char *
726  get_value(unsigned char *str, unsigned char **endptr)  strerror(int n)
727  {  {
728  int result = 0;  if (n < 0 || n >= sys_nerr) return "unknown error number";
729  while(*str != 0 && isspace(*str)) str++;  return sys_errlist[n];
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
730  }  }
731    #endif /* HAVE_STRERROR */
732    
733    
734  /*************************************************  /*************************************************
735  *       Convert character value to UTF-8         *  *         JIT memory callback                    *
736  *************************************************/  *************************************************/
737    
738  /* This function takes an integer value in the range 0 - 0x7fffffff  static pcre_jit_stack* jit_callback(void *arg)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
739  {  {
740  register int i, j;  return (pcre_jit_stack *)arg;
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
741  }  }
742    
743    
# Line 188  return i + 1; Line 749  return i + 1;
749  and returns the value of the character.  and returns the value of the character.
750    
751  Argument:  Argument:
752    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
753    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
754    
755  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
756             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
757  */  */
758    
759    #if !defined NOUTF8
760    
761  static int  static int
762  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
763  {  {
764  int c = *buffer++;  int c = *utf8bytes++;
765  int d = c;  int d = c;
766  int i, j, s;  int i, j, s;
767    
# Line 218  d = (c & utf8_table3[i]) << s; Line 781  d = (c & utf8_table3[i]) << s;
781    
782  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
783    {    {
784    c = *buffer++;    c = *utf8bytes++;
785    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
786    s -= 6;    s -= 6;
787    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 226  for (j = 0; j < i; j++) Line 789  for (j = 0; j < i; j++)
789    
790  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
791    
792  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
793    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
794  if (j != i) return -(i+1);  if (j != i) return -(i+1);
795    
# Line 236  if (j != i) return -(i+1); Line 799  if (j != i) return -(i+1);
799  return i+1;  return i+1;
800  }  }
801    
802    #endif
803    
804    
805    
806  /*************************************************  /*************************************************
807  *             Print character string             *  *       Convert character value to UTF-8         *
808  *************************************************/  *************************************************/
809    
810  /* Character string printing function. Must handle UTF-8 strings in utf8  /* This function takes an integer value in the range 0 - 0x7fffffff
811  mode. Yields number of characters printed. If handed a NULL file, just counts  and encodes it as a UTF-8 character in 0 to 6 bytes.
812  chars without printing. */  
813    Arguments:
814      cvalue     the character value
815      utf8bytes  pointer to buffer for result - at least 6 bytes long
816    
817  static int pchars(unsigned char *p, int length, FILE *f)  Returns:     number of characters placed in the buffer
818    */
819    
820    #if !defined NOUTF8
821    
822    static int
823    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
824  {  {
825  int c;  register int i, j;
826    for (i = 0; i < utf8_table1_size; i++)
827      if (cvalue <= utf8_table1[i]) break;
828    utf8bytes += i;
829    for (j = i; j > 0; j--)
830     {
831     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
832     cvalue >>= 6;
833     }
834    *utf8bytes = utf8_table2[i] | cvalue;
835    return i + 1;
836    }
837    
838    #endif
839    
840    
841    
842    #ifdef SUPPORT_PCRE16
843    /*************************************************
844    *         Convert a string to 16-bit             *
845    *************************************************/
846    
847    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
848    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
849    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
850    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
851    result is always left in buffer16. */
852    
853    static int
854    to16(pcre_uint8 *p, int utf, int len)
855    {
856    pcre_uint16 *pp;
857    
858    if (buffer16_size < 2*len + 2)
859      {
860      if (buffer16 != NULL) free(buffer16);
861      buffer16_size = 2*len + 2;
862      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
863      if (buffer16 == NULL)
864        {
865        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
866        exit(1);
867        }
868      }
869    
870    pp = buffer16;
871    
872    if (!utf)
873      {
874      while (len-- > 0) *pp++ = *p++;
875      }
876    
877    else
878      {
879      int c;
880      while (len > 0)
881        {
882        int chlen = utf82ord(p, &c);
883        p += chlen;
884        len -= chlen;
885        if (c < 0x10000) *pp++ = c; else
886          {
887          c -= 0x10000;
888          *pp++ = 0xD800 | (c >> 10);
889          *pp++ = 0xDC00 | (c & 0x3ff);
890          }
891        }
892      }
893    
894    *pp = 0;
895    return pp - buffer16;
896    }
897    #endif
898    
899    
900    /*************************************************
901    *        Read or extend an input line            *
902    *************************************************/
903    
904    /* Input lines are read into buffer, but both patterns and data lines can be
905    continued over multiple input lines. In addition, if the buffer fills up, we
906    want to automatically expand it so as to be able to handle extremely large
907    lines that are needed for certain stress tests. When the input buffer is
908    expanded, the other two buffers must also be expanded likewise, and the
909    contents of pbuffer, which are a copy of the input for callouts, must be
910    preserved (for when expansion happens for a data line). This is not the most
911    optimal way of handling this, but hey, this is just a test program!
912    
913    Arguments:
914      f            the file to read
915      start        where in buffer to start (this *must* be within buffer)
916      prompt       for stdin or readline()
917    
918    Returns:       pointer to the start of new data
919                   could be a copy of start, or could be moved
920                   NULL if no data read and EOF reached
921    */
922    
923    static pcre_uint8 *
924    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
925    {
926    pcre_uint8 *here = start;
927    
928    for (;;)
929      {
930      int rlen = (int)(buffer_size - (here - buffer));
931    
932      if (rlen > 1000)
933        {
934        int dlen;
935    
936        /* If libreadline support is required, use readline() to read a line if the
937        input is a terminal. Note that readline() removes the trailing newline, so
938        we must put it back again, to be compatible with fgets(). */
939    
940    #ifdef SUPPORT_LIBREADLINE
941        if (isatty(fileno(f)))
942          {
943          size_t len;
944          char *s = readline(prompt);
945          if (s == NULL) return (here == start)? NULL : start;
946          len = strlen(s);
947          if (len > 0) add_history(s);
948          if (len > rlen - 1) len = rlen - 1;
949          memcpy(here, s, len);
950          here[len] = '\n';
951          here[len+1] = 0;
952          free(s);
953          }
954        else
955    #endif
956    
957        /* Read the next line by normal means, prompting if the file is stdin. */
958    
959          {
960          if (f == stdin) printf("%s", prompt);
961          if (fgets((char *)here, rlen,  f) == NULL)
962            return (here == start)? NULL : start;
963          }
964    
965        dlen = (int)strlen((char *)here);
966        if (dlen > 0 && here[dlen - 1] == '\n') return start;
967        here += dlen;
968        }
969    
970      else
971        {
972        int new_buffer_size = 2*buffer_size;
973        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
974        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
975        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
976    
977        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
978          {
979          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
980          exit(1);
981          }
982    
983        memcpy(new_buffer, buffer, buffer_size);
984        memcpy(new_pbuffer, pbuffer, buffer_size);
985    
986        buffer_size = new_buffer_size;
987    
988        start = new_buffer + (start - buffer);
989        here = new_buffer + (here - buffer);
990    
991        free(buffer);
992        free(dbuffer);
993        free(pbuffer);
994    
995        buffer = new_buffer;
996        dbuffer = new_dbuffer;
997        pbuffer = new_pbuffer;
998        }
999      }
1000    
1001    return NULL;  /* Control never gets here */
1002    }
1003    
1004    
1005    
1006    /*************************************************
1007    *          Read number from string               *
1008    *************************************************/
1009    
1010    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1011    around with conditional compilation, just do the job by hand. It is only used
1012    for unpicking arguments, so just keep it simple.
1013    
1014    Arguments:
1015      str           string to be converted
1016      endptr        where to put the end pointer
1017    
1018    Returns:        the unsigned long
1019    */
1020    
1021    static int
1022    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1023    {
1024    int result = 0;
1025    while(*str != 0 && isspace(*str)) str++;
1026    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1027    *endptr = str;
1028    return(result);
1029    }
1030    
1031    
1032    
1033    #ifdef SUPPORT_PCRE8
1034    /*************************************************
1035    *         Print 8-bit character string           *
1036    *************************************************/
1037    
1038    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1039    If handed a NULL file, just counts chars without printing. */
1040    
1041    static int pchars(pcre_uint8 *p, int length, FILE *f)
1042    {
1043    int c = 0;
1044  int yield = 0;  int yield = 0;
1045    
1046  while (length-- > 0)  while (length-- > 0)
1047    {    {
1048    #if !defined NOUTF8
1049    if (use_utf8)    if (use_utf8)
1050      {      {
1051      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 261  while (length-- > 0) Line 1054  while (length-- > 0)
1054        {        {
1055        length -= rc - 1;        length -= rc - 1;
1056        p += rc;        p += rc;
1057        if (c < 256 && isprint(c))        if (PRINTOK(c))
1058          {          {
1059          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
1060          yield++;          yield++;
1061          }          }
1062        else        else
1063          {          {
1064          int n;          int n = 4;
1065          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
1066          yield += n;          yield += (n <= 0x000000ff)? 2 :
1067                     (n <= 0x00000fff)? 3 :
1068                     (n <= 0x0000ffff)? 4 :
1069                     (n <= 0x000fffff)? 5 : 6;
1070          }          }
1071        continue;        continue;
1072        }        }
1073      }      }
1074    #endif
1075    
1076     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
1077    
1078    if (isprint(c = *(p++)))    c = *p++;
1079      if (PRINTOK(c))
1080      {      {
1081      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
1082      yield++;      yield++;
# Line 292  while (length-- > 0) Line 1090  while (length-- > 0)
1090    
1091  return yield;  return yield;
1092  }  }
1093    #endif
1094    
1095    
1096    
1097    #ifdef SUPPORT_PCRE16
1098    /*************************************************
1099    *           Print 16-bit character string        *
1100    *************************************************/
1101    
1102    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1103    If handed a NULL file, just counts chars without printing. */
1104    
1105    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1106    {
1107    int yield = 0;
1108    
1109    while (length-- > 0)
1110      {
1111      int c = *p++ & 0xffff;
1112    
1113    #if !defined NOUTF8
1114      if (use_utf8 && c >= 0xD800 && c < 0xDC00 && length > 0)
1115        {
1116        int d = *p & 0xffff;
1117        if (d >= 0xDC00 && d < 0xDFFF)
1118          {
1119          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1120          length--;
1121          p++;
1122          }
1123        }
1124    #endif
1125    
1126      if (PRINTOK(c))
1127        {
1128        if (f != NULL) fprintf(f, "%c", c);
1129        yield++;
1130        }
1131      else
1132        {
1133        yield += 4;
1134        if (c < 0x100)
1135          {
1136          if (f != NULL) fprintf(f, "\\x%02x", c);
1137          }
1138        else
1139          {
1140          if (f != NULL) fprintf(f, "\\x{%02x}", c);
1141          yield += (c <= 0x000000ff)? 2 :
1142                   (c <= 0x00000fff)? 3 :
1143                   (c <= 0x0000ffff)? 4 :
1144                   (c <= 0x000fffff)? 5 : 6;
1145          }
1146        }
1147      }
1148    
1149    return yield;
1150    }
1151    #endif
1152    
1153    
1154    
# Line 320  if (callout_extra) Line 1177  if (callout_extra)
1177      else      else
1178        {        {
1179        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1180        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject + cb->offset_vector[i],
1181          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1182        fprintf(f, "\n");        fprintf(f, "\n");
1183        }        }
# Line 333  printed lengths of the substrings. */ Line 1190  printed lengths of the substrings. */
1190    
1191  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1192    
1193  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, cb->start_match, f);
1194  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject + cb->start_match,
1195    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1196    
1197  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1198    
1199  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject + cb->current_position,
1200    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1201    
1202  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 376  fprintf(outfile, "%.*s", (cb->next_item_ Line 1233  fprintf(outfile, "%.*s", (cb->next_item_
1233  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1234  first_callout = 0;  first_callout = 0;
1235    
1236    if (cb->mark != last_callout_mark)
1237      {
1238      fprintf(outfile, "Latest Mark: %s\n",
1239        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1240      last_callout_mark = cb->mark;
1241      }
1242    
1243  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1244    {    {
1245    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 395  return (cb->callout_number != callout_fa Line 1259  return (cb->callout_number != callout_fa
1259  *            Local malloc functions              *  *            Local malloc functions              *
1260  *************************************************/  *************************************************/
1261    
1262  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1263  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1264    show_malloc variable is set only during matching. */
1265    
1266  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1267  {  {
1268  void *block = malloc(size);  void *block = malloc(size);
1269  gotten_store = size;  gotten_store = size;
1270    if (first_gotten_store == 0) first_gotten_store = size;
1271  if (show_malloc)  if (show_malloc)
1272    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1273  return block;  return block;
1274  }  }
1275    
# Line 414  if (show_malloc) Line 1280  if (show_malloc)
1280  free(block);  free(block);
1281  }  }
1282    
   
1283  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1284    
1285  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
1286  {  {
1287  void *block = malloc(size);  void *block = malloc(size);
1288  if (show_malloc)  if (show_malloc)
1289    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1290  return block;  return block;
1291  }  }
1292    
# Line 437  free(block); Line 1302  free(block);
1302  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1303  *************************************************/  *************************************************/
1304    
1305  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1306    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1307    value, but the code is defensive. */
1308    
1309  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1310  {  {
1311  int rc;  int rc;
1312  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1313    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1314    #ifdef SUPPORT_PCRE16
1315      rc = pcre16_fullinfo(re, study, option, ptr);
1316    #else
1317      rc = PCRE_ERROR_BADMODE;
1318    #endif
1319    else
1320    #ifdef SUPPORT_PCRE8
1321      rc = pcre_fullinfo(re, study, option, ptr);
1322    #else
1323      rc = PCRE_ERROR_BADMODE;
1324    #endif
1325    
1326    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1327      use_pcre16? "16" : "", option);
1328  }  }
1329    
1330    
# Line 452  if ((rc = pcre_fullinfo(re, study, optio Line 1333  if ((rc = pcre_fullinfo(re, study, optio
1333  *         Byte flipping function                 *  *         Byte flipping function                 *
1334  *************************************************/  *************************************************/
1335    
1336  static long int  static unsigned long int
1337  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
1338  {  {
1339  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1340  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 466  return ((value & 0x000000ff) << 24) | Line 1347  return ((value & 0x000000ff) << 24) |
1347    
1348    
1349  /*************************************************  /*************************************************
1350    *        Check match or recursion limit          *
1351    *************************************************/
1352    
1353    static int
1354    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1355      int start_offset, int options, int *use_offsets, int use_size_offsets,
1356      int flag, unsigned long int *limit, int errnumber, const char *msg)
1357    {
1358    int count;
1359    int min = 0;
1360    int mid = 64;
1361    int max = -1;
1362    
1363    extra->flags |= flag;
1364    
1365    for (;;)
1366      {
1367      *limit = mid;
1368    
1369      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1370        use_offsets, use_size_offsets);
1371    
1372      if (count == errnumber)
1373        {
1374        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1375        min = mid;
1376        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1377        }
1378    
1379      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1380                             count == PCRE_ERROR_PARTIAL)
1381        {
1382        if (mid == min + 1)
1383          {
1384          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1385          break;
1386          }
1387        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1388        max = mid;
1389        mid = (min + mid)/2;
1390        }
1391      else break;    /* Some other error */
1392      }
1393    
1394    extra->flags &= ~flag;
1395    return count;
1396    }
1397    
1398    
1399    
1400    /*************************************************
1401    *         Case-independent strncmp() function    *
1402    *************************************************/
1403    
1404    /*
1405    Arguments:
1406      s         first string
1407      t         second string
1408      n         number of characters to compare
1409    
1410    Returns:    < 0, = 0, or > 0, according to the comparison
1411    */
1412    
1413    static int
1414    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1415    {
1416    while (n--)
1417      {
1418      int c = tolower(*s++) - tolower(*t++);
1419      if (c) return c;
1420      }
1421    return 0;
1422    }
1423    
1424    
1425    
1426    /*************************************************
1427    *         Check newline indicator                *
1428    *************************************************/
1429    
1430    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1431    a message and return 0 if there is no match.
1432    
1433    Arguments:
1434      p           points after the leading '<'
1435      f           file for error message
1436    
1437    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1438    */
1439    
1440    static int
1441    check_newline(pcre_uint8 *p, FILE *f)
1442    {
1443    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1444    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1445    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1446    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1447    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1448    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1449    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1450    fprintf(f, "Unknown newline type at: <%s\n", p);
1451    return 0;
1452    }
1453    
1454    
1455    
1456    /*************************************************
1457    *             Usage function                     *
1458    *************************************************/
1459    
1460    static void
1461    usage(void)
1462    {
1463    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1464    printf("Input and output default to stdin and stdout.\n");
1465    #ifdef SUPPORT_LIBREADLINE
1466    printf("If input is a terminal, readline() is used to read from it.\n");
1467    #else
1468    printf("This version of pcretest is not linked with readline().\n");
1469    #endif
1470    printf("\nOptions:\n");
1471    #ifdef SUPPORT_PCRE16
1472    printf("  -16      use 16-bit interface\n");
1473    #endif
1474    printf("  -b       show compiled code (bytecode)\n");
1475    printf("  -C       show PCRE compile-time options and exit\n");
1476    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1477    #if !defined NODFA
1478    printf("  -dfa     force DFA matching for all subjects\n");
1479    #endif
1480    printf("  -help    show usage information\n");
1481    printf("  -i       show information about compiled patterns\n"
1482           "  -M       find MATCH_LIMIT minimum for each subject\n"
1483           "  -m       output memory used information\n"
1484           "  -o <n>   set size of offsets vector to <n>\n");
1485    #if !defined NOPOSIX
1486    printf("  -p       use POSIX interface\n");
1487    #endif
1488    printf("  -q       quiet: do not output PCRE version number at start\n");
1489    printf("  -S <n>   set stack size to <n> megabytes\n");
1490    printf("  -s       force each pattern to be studied at basic level\n"
1491           "  -s+      force each pattern to be studied, using JIT if available\n"
1492           "  -t       time compilation and execution\n");
1493    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1494    printf("  -tm      time execution (matching) only\n");
1495    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1496    }
1497    
1498    
1499    
1500    /*************************************************
1501  *                Main Program                    *  *                Main Program                    *
1502  *************************************************/  *************************************************/
1503    
# Line 478  int main(int argc, char **argv) Line 1510  int main(int argc, char **argv)
1510  FILE *infile = stdin;  FILE *infile = stdin;
1511  int options = 0;  int options = 0;
1512  int study_options = 0;  int study_options = 0;
1513    int default_find_match_limit = FALSE;
1514  int op = 1;  int op = 1;
1515  int timeit = 0;  int timeit = 0;
1516    int timeitm = 0;
1517  int showinfo = 0;  int showinfo = 0;
1518  int showstore = 0;  int showstore = 0;
1519    int force_study = -1;
1520    int force_study_options = 0;
1521    int quiet = 0;
1522  int size_offsets = 45;  int size_offsets = 45;
1523  int size_offsets_max;  int size_offsets_max;
1524  int *offsets;  int *offsets = NULL;
1525  #if !defined NOPOSIX  #if !defined NOPOSIX
1526  int posix = 0;  int posix = 0;
1527  #endif  #endif
1528  int debug = 0;  int debug = 0;
1529  int done = 0;  int done = 0;
1530    int all_use_dfa = 0;
1531    int yield = 0;
1532    int stack_size;
1533    
1534  unsigned char *buffer;  pcre_jit_stack *jit_stack = NULL;
 unsigned char *dbuffer;  
1535    
1536  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* These vectors store, end-to-end, a list of captured substring names. Assume
1537  when I am debugging. */  that 1024 is plenty long enough for the few names we'll be testing. */
1538    
1539  buffer = (unsigned char *)malloc(BUFFER_SIZE);  pcre_uchar copynames[1024];
1540  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  pcre_uchar getnames[1024];
 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1541    
1542  #if defined(_WIN32) || defined(WIN32)  pcre_uchar *copynamesptr;
1543  _setmode( _fileno( stdout ), 0x8000 );  pcre_uchar *getnamesptr;
1544  #endif  /* defined(_WIN32) || defined(WIN32) */  
1545    /* Get buffers from malloc() so that valgrind will check their misuse when
1546    debugging. They grow automatically when very long lines are read. The 16-bit
1547    buffer (buffer16) is obtained only if needed. */
1548    
1549    buffer = (pcre_uint8 *)malloc(buffer_size);
1550    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1551    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1552    
1553    /* The outfile variable is static so that new_malloc can use it. */
1554    
1555  outfile = stdout;  outfile = stdout;
1556    
1557    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1558    library to translate CRLF into a single LF character. At least, that's what
1559    I've been told: never having used Windows I take this all on trust. Originally
1560    it set 0x8000, but then I was advised that _O_BINARY was better. */
1561    
1562    #if defined(_WIN32) || defined(WIN32)
1563    _setmode( _fileno( stdout ), _O_BINARY );
1564    #endif
1565    
1566  /* Scan options */  /* Scan options */
1567    
1568  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1569    {    {
1570    unsigned char *endptr;    pcre_uint8 *endptr;
1571    
1572    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1573      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1574    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-s+") == 0)
1575        {
1576        force_study = 1;
1577        force_study_options = PCRE_STUDY_JIT_COMPILE;
1578        }
1579    #ifdef SUPPORT_PCRE16
1580      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1581    #endif
1582    
1583      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1584      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1585    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1586    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1587      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1588    #if !defined NODFA
1589      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1590    #endif
1591    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1592        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1593          *endptr == 0))          *endptr == 0))
1594      {      {
1595      op++;      op++;
1596      argc--;      argc--;
1597      }      }
1598      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1599        {
1600        int both = argv[op][2] == 0;
1601        int temp;
1602        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1603                         *endptr == 0))
1604          {
1605          timeitm = temp;
1606          op++;
1607          argc--;
1608          }
1609        else timeitm = LOOPREPEAT;
1610        if (both) timeit = timeitm;
1611        }
1612      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1613          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1614            *endptr == 0))
1615        {
1616    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1617        printf("PCRE: -S not supported on this OS\n");
1618        exit(1);
1619    #else
1620        int rc;
1621        struct rlimit rlim;
1622        getrlimit(RLIMIT_STACK, &rlim);
1623        rlim.rlim_cur = stack_size * 1024 * 1024;
1624        rc = setrlimit(RLIMIT_STACK, &rlim);
1625        if (rc != 0)
1626          {
1627        printf("PCRE: setrlimit() failed with error %d\n", rc);
1628        exit(1);
1629          }
1630        op++;
1631        argc--;
1632    #endif
1633        }
1634  #if !defined NOPOSIX  #if !defined NOPOSIX
1635    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1636  #endif  #endif
1637    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1638      {      {
1639      int rc;      int rc;
1640        unsigned long int lrc;
1641      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1642      printf("Compiled with\n");      printf("Compiled with\n");
1643    
1644    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1645    are set, either both UTFs are supported or both are not supported. */
1646    
1647    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1648        printf("  8-bit and 16-bit support\n");
1649        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1650        if (rc)
1651          printf("  UTF-8 and UTF-16 support\n");
1652        else
1653          printf("  No UTF-8 or UTF-16 support\n");
1654    #elif defined SUPPORT_PCRE8
1655        printf("  8-bit support only\n");
1656      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1657      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1658    #else
1659        printf("  16-bit support only\n");
1660        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1661        printf("  %sUTF-16 support\n", rc? "" : "No ");
1662    #endif
1663    
1664      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1665      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1666        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1667        if (rc)
1668          printf("  Just-in-time compiler support\n");
1669        else
1670          printf("  No just-in-time compiler support\n");
1671      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1672      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1673        in EBCDIC environments. CR is 13 and NL is 10. */
1674        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1675          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1676          (rc == -2)? "ANYCRLF" :
1677          (rc == -1)? "ANY" : "???");
1678        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1679        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1680                                         "all Unicode newlines");
1681      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1682      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1683      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1684      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1685      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1686      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1687        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1688        printf("  Default recursion depth limit = %ld\n", lrc);
1689      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1690      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1691      exit(0);      goto EXIT;
1692        }
1693      else if (strcmp(argv[op], "-help") == 0 ||
1694               strcmp(argv[op], "--help") == 0)
1695        {
1696        usage();
1697        goto EXIT;
1698      }      }
1699    else    else
1700      {      {
1701      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1702      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1703      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
1704      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1705      }      }
1706    op++;    op++;
1707    argc--;    argc--;
# Line 580  offsets = (int *)malloc(size_offsets_max Line 1714  offsets = (int *)malloc(size_offsets_max
1714  if (offsets == NULL)  if (offsets == NULL)
1715    {    {
1716    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1717      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1718    return 1;    yield = 1;
1719      goto EXIT;
1720    }    }
1721    
1722  /* Sort out the input and output files */  /* Sort out the input and output files */
1723    
1724  if (argc > 1)  if (argc > 1)
1725    {    {
1726    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1727    if (infile == NULL)    if (infile == NULL)
1728      {      {
1729      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1730      return 1;      yield = 1;
1731        goto EXIT;
1732      }      }
1733    }    }
1734    
1735  if (argc > 2)  if (argc > 2)
1736    {    {
1737    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1738    if (outfile == NULL)    if (outfile == NULL)
1739      {      {
1740      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1741      return 1;      yield = 1;
1742        goto EXIT;
1743      }      }
1744    }    }
1745    
1746  /* Set alternative malloc function */  /* Set alternative malloc function */
1747    
1748    #ifdef SUPPORT_PCRE8
1749  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1750  pcre_free = new_free;  pcre_free = new_free;
1751  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1752  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1753    #endif
1754    
1755    #ifdef SUPPORT_PCRE16
1756    pcre16_malloc = new_malloc;
1757    pcre16_free = new_free;
1758    pcre16_stack_malloc = stack_malloc;
1759    pcre16_stack_free = stack_free;
1760    #endif
1761    
1762  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1763    
1764  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1765    
1766  /* Main loop */  /* Main loop */
1767    
# Line 630  while (!done) Line 1776  while (!done)
1776  #endif  #endif
1777    
1778    const char *error;    const char *error;
1779    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
1780    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
1781    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
1782      const pcre_uint8 *tables = NULL;
1783    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1784    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1785      int do_allcaps = 0;
1786      int do_mark = 0;
1787    int do_study = 0;    int do_study = 0;
1788      int no_force_study = 0;
1789    int do_debug = debug;    int do_debug = debug;
1790    int do_G = 0;    int do_G = 0;
1791    int do_g = 0;    int do_g = 0;
1792    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1793    int do_showrest = 0;    int do_showrest = 0;
1794      int do_showcaprest = 0;
1795    int do_flip = 0;    int do_flip = 0;
1796    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1797    
1798    use_utf8 = 0;    use_utf8 = 0;
1799      debug_lengths = 1;
1800    
1801    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1802    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1803    fflush(outfile);    fflush(outfile);
1804    
# Line 659  while (!done) Line 1810  while (!done)
1810    
1811    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1812      {      {
1813      unsigned long int magic;      unsigned long int magic, get_options;
1814      uschar sbuf[8];      pcre_uint8 sbuf[8];
1815      FILE *f;      FILE *f;
1816    
1817      p++;      p++;
# Line 683  while (!done) Line 1834  while (!done)
1834        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1835    
1836      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1837      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1838    
1839      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1840    
# Line 702  while (!done) Line 1853  while (!done)
1853          }          }
1854        }        }
1855    
1856      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1857        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1858    
1859      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1860    
1861      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1862      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1863    
1864      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1865    
1866      if (true_study_size != 0)      if (true_study_size != 0)
1867        {        {
# Line 726  while (!done) Line 1877  while (!done)
1877          {          {
1878          FAIL_READ:          FAIL_READ:
1879          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1880          if (extra != NULL) new_free(extra);          if (extra != NULL)
1881              {
1882              PCRE_FREE_STUDY(extra);
1883              }
1884          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1885          fclose(f);          fclose(f);
1886          continue;          continue;
# Line 747  while (!done) Line 1901  while (!done)
1901    
1902    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1903      {      {
1904      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1905      goto SKIP_DATA;      goto SKIP_DATA;
1906      }      }
1907    
1908    pp = p;    pp = p;
1909      poffset = (int)(p - buffer);
1910    
1911    for(;;)    for(;;)
1912      {      {
# Line 762  while (!done) Line 1917  while (!done)
1917        pp++;        pp++;
1918        }        }
1919      if (*pp != 0) break;      if (*pp != 0) break;
1920        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1921        {        {
1922        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1923        done = 1;        done = 1;
# Line 780  while (!done) Line 1926  while (!done)
1926      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1927      }      }
1928    
1929      /* The buffer may have moved while being extended; reset the start of data
1930      pointer to the correct relative point in the buffer. */
1931    
1932      p = buffer + poffset;
1933    
1934    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1935    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1936    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 802  while (!done) Line 1953  while (!done)
1953      {      {
1954      switch (*pp++)      switch (*pp++)
1955        {        {
1956          case 'f': options |= PCRE_FIRSTLINE; break;
1957        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1958        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1959        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1960        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1961        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1962    
1963        case '+': do_showrest = 1; break;        case '+':
1964          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1965          break;
1966    
1967          case '=': do_allcaps = 1; break;
1968        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1969          case 'B': do_debug = 1; break;
1970        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1971        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1972        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1973        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1974        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1975        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1976          case 'J': options |= PCRE_DUPNAMES; break;
1977          case 'K': do_mark = 1; break;
1978        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1979        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1980    
# Line 823  while (!done) Line 1982  while (!done)
1982        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1983  #endif  #endif
1984    
1985        case 'S': do_study = 1; break;        case 'S':
1986          if (do_study == 0)
1987            {
1988            do_study = 1;
1989            if (*pp == '+')
1990              {
1991              study_options |= PCRE_STUDY_JIT_COMPILE;
1992              pp++;
1993              }
1994            }
1995          else
1996            {
1997            do_study = 0;
1998            no_force_study = 1;
1999            }
2000          break;
2001    
2002        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2003          case 'W': options |= PCRE_UCP; break;
2004        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2005          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2006          case 'Z': debug_lengths = 0; break;
2007        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
2008        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2009    
2010          case 'T':
2011          switch (*pp++)
2012            {
2013            case '0': tables = tables0; break;
2014            case '1': tables = tables1; break;
2015    
2016            case '\r':
2017            case '\n':
2018            case ' ':
2019            case 0:
2020            fprintf(outfile, "** Missing table number after /T\n");
2021            goto SKIP_DATA;
2022    
2023            default:
2024            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2025            goto SKIP_DATA;
2026            }
2027          break;
2028    
2029        case 'L':        case 'L':
2030        ppp = pp;        ppp = pp;
2031        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2032          /* The '0' test is just in case this is an unterminated line. */
2033          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2034        *ppp = 0;        *ppp = 0;
2035        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2036          {          {
2037          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2038          goto SKIP_DATA;          goto SKIP_DATA;
2039          }          }
2040          locale_set = 1;
2041        tables = pcre_maketables();        tables = pcre_maketables();
2042        pp = ppp;        pp = ppp;
2043        break;        break;
# Line 849  while (!done) Line 2049  while (!done)
2049        *pp = 0;        *pp = 0;
2050        break;        break;
2051    
2052        case '\n': case ' ': break;        case '<':
2053            {
2054            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2055              {
2056              options |= PCRE_JAVASCRIPT_COMPAT;
2057              pp += 3;
2058              }
2059            else
2060              {
2061              int x = check_newline(pp, outfile);
2062              if (x == 0) goto SKIP_DATA;
2063              options |= x;
2064              while (*pp++ != '>');
2065              }
2066            }
2067          break;
2068    
2069          case '\r':                      /* So that it works in Windows */
2070          case '\n':
2071          case ' ':
2072          break;
2073    
2074        default:        default:
2075        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 859  while (!done) Line 2079  while (!done)
2079    
2080    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2081    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2082    local character tables. */    local character tables. Neither does it have 16-bit support. */
2083    
2084  #if !defined NOPOSIX  #if !defined NOPOSIX
2085    if (posix || do_posix)    if (posix || do_posix)
# Line 869  while (!done) Line 2089  while (!done)
2089    
2090      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2091      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2092        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2093        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2094        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2095        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2096        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2097    
2098        first_gotten_store = 0;
2099      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2100    
2101      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 876  while (!done) Line 2103  while (!done)
2103    
2104      if (rc != 0)      if (rc != 0)
2105        {        {
2106        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2107        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2108        goto SKIP_DATA;        goto SKIP_DATA;
2109        }        }
# Line 888  while (!done) Line 2115  while (!done)
2115  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2116    
2117      {      {
2118      if (timeit)      unsigned long int get_options;
2119    
2120        /* In 16-bit mode, convert the input. */
2121    
2122    #ifdef SUPPORT_PCRE16
2123        if (use_pcre16)
2124          {
2125          (void)to16(p, options & PCRE_UTF8, (int)strlen((char *)p));
2126          p = (pcre_uint8 *)buffer16;
2127          }
2128    #endif
2129    
2130        /* Compile many times when timing */
2131    
2132        if (timeit > 0)
2133        {        {
2134        register int i;        register int i;
2135        clock_t time_taken;        clock_t time_taken;
2136        clock_t start_time = clock();        clock_t start_time = clock();
2137        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2138          {          {
2139          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2140          if (re != NULL) free(re);          if (re != NULL) free(re);
2141          }          }
2142        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2143        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2144          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
2145            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2146        }        }
2147    
2148      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2149        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2150    
2151      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2152      if non-interactive. */      if non-interactive. */
# Line 917  while (!done) Line 2159  while (!done)
2159          {          {
2160          for (;;)          for (;;)
2161            {            {
2162            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2163              {              {
2164              done = 1;              done = 1;
2165              goto CONTINUE;              goto CONTINUE;
# Line 931  while (!done) Line 2173  while (!done)
2173        goto CONTINUE;        goto CONTINUE;
2174        }        }
2175    
2176      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2177      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2178      returns only limited data. Check that it agrees with the newer one. */      lines. */
2179    
2180      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2181        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2182    
2183      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2184      and remember the store that was got. */      and remember the store that was got. */
2185    
2186      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
2187      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2188    
2189        /* Output code size information if requested */
2190    
2191        if (log_store)
2192          fprintf(outfile, "Memory allocation (code space): %d\n",
2193            (int)(first_gotten_store -
2194                  sizeof(real_pcre) -
2195                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2196    
2197      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
2198      help with the matching. */      help with the matching, unless the pattern has the SS option, which
2199        suppresses the effect of /S (used for a few test patterns where studying is
2200        never sensible). */
2201    
2202      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
2203        {        {
2204        if (timeit)        if (timeit > 0)
2205          {          {
2206          register int i;          register int i;
2207          clock_t time_taken;          clock_t time_taken;
2208          clock_t start_time = clock();          clock_t start_time = clock();
2209          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
2210            extra = pcre_study(re, study_options, &error);            {
2211              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2212              }
2213          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2214          if (extra != NULL) free(extra);          if (extra != NULL)
2215          fprintf(outfile, "  Study time %.3f milliseconds\n",            {
2216            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            PCRE_FREE_STUDY(extra);
2217              }
2218            fprintf(outfile, "  Study time %.4f milliseconds\n",
2219              (((double)time_taken * 1000.0) / (double)timeit) /
2220              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2221          }          }
2222        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2223        if (error != NULL)        if (error != NULL)
2224          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2225        else if (extra != NULL)        else if (extra != NULL)
2226            {
2227          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2228            if (log_store)
2229              {
2230              size_t jitsize;
2231              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2232              if (jitsize != 0)
2233                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2234              }
2235            }
2236          }
2237    
2238        /* If /K was present, we set up for handling MARK data. */
2239    
2240        if (do_mark)
2241          {
2242          if (extra == NULL)
2243            {
2244            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2245            extra->flags = 0;
2246            }
2247          extra->mark = &markptr;
2248          extra->flags |= PCRE_EXTRA_MARK;
2249        }        }
2250    
2251      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
# Line 980  while (!done) Line 2256  while (!done)
2256      if (do_flip)      if (do_flip)
2257        {        {
2258        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
2259        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
2260            byteflip(rre->magic_number, sizeof(rre->magic_number));
2261        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
2262        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
2263        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2264        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
2265        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2266        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
2267        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2268          rre->first_char =
2269            (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2270          rre->req_char =
2271            (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2272          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2273          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
2274        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2275          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
2276        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2277            sizeof(rre->name_count));
2278    
2279        if (extra != NULL)        if (extra != NULL)
2280          {          {
2281          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2282          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2283          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2284            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2285          }          }
2286        }        }
2287    
2288      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
2289    
2290      SHOW_INFO:      SHOW_INFO:
2291    
2292        if (do_debug)
2293          {
2294          fprintf(outfile, "------------------------------------------------------------------\n");
2295    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2296          if (use_pcre16)
2297            pcre16_printint(re, outfile, debug_lengths);
2298          else
2299            pcre_printint(re, outfile, debug_lengths);
2300    #elif defined SUPPORT_PCRE8
2301          pcre_printint(re, outfile, debug_lengths);
2302    #else
2303          pcre16_printint(re, outfile, debug_lengths);
2304    #endif
2305          }
2306    
2307        /* We already have the options in get_options (see above) */
2308    
2309      if (do_showinfo)      if (do_showinfo)
2310        {        {
2311        unsigned long int get_options, all_options;        unsigned long int all_options;
2312    #if !defined NOINFOCHECK
2313        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2314        int count, backrefmax, first_char, need_char;  #endif
2315          int count, backrefmax, first_char, need_char, okpartial, jchanged,
2316            hascrorlf;
2317        int nameentrysize, namecount;        int nameentrysize, namecount;
2318        const uschar *nametable;        const pcre_uchar *nametable;
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
2319    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2320        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2321        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2322        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1028  while (!done) Line 2325  while (!done)
2325        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2326        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2327        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2328          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2329        old_count = pcre_info(re, &old_options, &old_first_char);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2330        if (count < 0) fprintf(outfile,        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2331          "Error %d from pcre_info()\n", count);  
2332        else        /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2333          {        that it gives the same results as the new function. */
2334          if (old_count != count) fprintf(outfile,  
2335            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  #if !defined NOINFOCHECK
2336              old_count);        if (!use_pcre16)
2337            {
2338          if (old_first_char != first_char) fprintf(outfile,          old_count = pcre_info(re, &old_options, &old_first_char);
2339            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",          if (count < 0) fprintf(outfile,
2340              first_char, old_first_char);            "Error %d from pcre_info()\n", count);
2341            else
2342          if (old_options != (int)get_options) fprintf(outfile,            {
2343            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            if (old_count != count) fprintf(outfile,
2344              get_options, old_options);              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2345                  old_count);
2346    
2347              if (old_first_char != first_char) fprintf(outfile,
2348                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2349                  first_char, old_first_char);
2350    
2351              if (old_options != (int)get_options) fprintf(outfile,
2352                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2353                  get_options, old_options);
2354              }
2355          }          }
2356    #endif
2357    
2358        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
2359          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2360          size, regex_gotten_store);          (int)size, (int)regex_gotten_store);
2361    
2362        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
2363        if (backrefmax > 0)        if (backrefmax > 0)
# Line 1067  while (!done) Line 2375  while (!done)
2375            }            }
2376          }          }
2377    
2378        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2379        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2380    
2381        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2382        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
2383    
2384        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2385          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2386            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2387            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2388            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2389            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2390              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2391            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2392              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2393              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2394            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2395            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2396            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2397              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2398            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2399            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2400              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2401              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2402              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2403    
2404          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2405    
2406          switch (get_options & PCRE_NEWLINE_BITS)
2407            {
2408            case PCRE_NEWLINE_CR:
2409            fprintf(outfile, "Forced newline sequence: CR\n");
2410            break;
2411    
2412            case PCRE_NEWLINE_LF:
2413            fprintf(outfile, "Forced newline sequence: LF\n");
2414            break;
2415    
2416            case PCRE_NEWLINE_CRLF:
2417            fprintf(outfile, "Forced newline sequence: CRLF\n");
2418            break;
2419    
2420            case PCRE_NEWLINE_ANYCRLF:
2421            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2422            break;
2423    
2424            case PCRE_NEWLINE_ANY:
2425            fprintf(outfile, "Forced newline sequence: ANY\n");
2426            break;
2427    
2428        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          default:
2429          fprintf(outfile, "Case state changes\n");          break;
2430            }
2431    
2432        if (first_char == -1)        if (first_char == -1)
2433          {          {
2434          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
2435          }          }
2436        else if (first_char < 0)        else if (first_char < 0)
2437          {          {
# Line 1105  while (!done) Line 2439  while (!done)
2439          }          }
2440        else        else
2441          {          {
2442          int ch = first_char & 255;          const char *caseless =
2443          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2444            "" : " (caseless)";            "" : " (caseless)";
2445          if (isprint(ch))  
2446            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
2447              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2448          else          else
2449            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2450          }          }
2451    
2452        if (need_char < 0)        if (need_char < 0)
# Line 1120  while (!done) Line 2455  while (!done)
2455          }          }
2456        else        else
2457          {          {
2458          int ch = need_char & 255;          const char *caseless =
2459          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2460            "" : " (caseless)";            "" : " (caseless)";
2461          if (isprint(ch))  
2462            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
2463              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2464          else          else
2465            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2466          }          }
2467    
2468        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2469        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2470        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2471        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2472          information unless -i or -d was also present. This means that, except
2473          when auto-callouts are involved, the output from runs with and without
2474          -s should be identical. */
2475    
2476        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2477          {          {
2478          if (extra == NULL)          if (extra == NULL)
2479            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2480          else          else
2481            {            {
2482            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2483            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2484    
2485              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2486              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2487    
2488              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2489            if (start_bits == NULL)            if (start_bits == NULL)
2490              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2491            else            else
2492              {              {
2493              int i;              int i;
# Line 1159  while (!done) Line 2502  while (!done)
2502                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2503                    c = 2;                    c = 2;
2504                    }                    }
2505                  if (isprint(i) && i != ' ')                  if (PRINTOK(i) && i != ' ')
2506                    {                    {
2507                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2508                    c += 2;                    c += 2;
# Line 1174  while (!done) Line 2517  while (!done)
2517              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2518              }              }
2519            }            }
2520    
2521            /* Show this only if the JIT was set by /S, not by -s. */
2522    
2523            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2524              {
2525              int jit;
2526              new_info(re, extra, PCRE_INFO_JIT, &jit);
2527              if (jit)
2528                fprintf(outfile, "JIT study was successful\n");
2529              else
2530    #ifdef SUPPORT_JIT
2531                fprintf(outfile, "JIT study was not successful\n");
2532    #else
2533                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2534    #endif
2535              }
2536          }          }
2537        }        }
2538    
# Line 1190  while (!done) Line 2549  while (!done)
2549          }          }
2550        else        else
2551          {          {
2552          uschar sbuf[8];          pcre_uint8 sbuf[8];
2553          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2554          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2555          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2556          sbuf[3] = (true_size)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
2557    
2558          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2559          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2560          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2561          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2562    
2563          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2564              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1208  while (!done) Line 2567  while (!done)
2567            }            }
2568          else          else
2569            {            {
2570            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2571    
2572              /* If there is study data, write it. */
2573    
2574            if (extra != NULL)            if (extra != NULL)
2575              {              {
2576              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1222  while (!done) Line 2584  while (!done)
2584            }            }
2585          fclose(f);          fclose(f);
2586          }          }
2587    
2588          new_free(re);
2589          if (extra != NULL)
2590            {
2591            PCRE_FREE_STUDY(extra);
2592            }
2593          if (locale_set)
2594            {
2595            new_free((void *)tables);
2596            setlocale(LC_CTYPE, "C");
2597            locale_set = 0;
2598            }
2599        continue;  /* With next regex */        continue;  /* With next regex */
2600        }        }
2601      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1230  while (!done) Line 2604  while (!done)
2604    
2605    for (;;)    for (;;)
2606      {      {
2607      unsigned char *q;      pcre_uint8 *q;
2608      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
2609      int *use_offsets = offsets;      int *use_offsets = offsets;
2610      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2611      int callout_data = 0;      int callout_data = 0;
2612      int callout_data_set = 0;      int callout_data_set = 0;
2613      int count, c;      int count, c;
2614      int copystrings = 0;      int copystrings = 0;
2615      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2616      int getstrings = 0;      int getstrings = 0;
2617      int getlist = 0;      int getlist = 0;
2618      int gmatched = 0;      int gmatched = 0;
2619      int start_offset = 0;      int start_offset = 0;
2620        int start_offset_sign = 1;
2621      int g_notempty = 0;      int g_notempty = 0;
2622        int use_dfa = 0;
2623    
2624      options = 0;      options = 0;
2625    
2626        *copynames = 0;
2627        *getnames = 0;
2628    
2629        copynamesptr = copynames;
2630        getnamesptr = getnames;
2631    
2632      pcre_callout = callout;      pcre_callout = callout;
2633      first_callout = 1;      first_callout = 1;
2634        last_callout_mark = NULL;
2635      callout_extra = 0;      callout_extra = 0;
2636      callout_count = 0;      callout_count = 0;
2637      callout_fail_count = 999999;      callout_fail_count = 999999;
2638      callout_fail_id = -1;      callout_fail_id = -1;
2639      show_malloc = 0;      show_malloc = 0;
2640    
2641      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2642      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2643    
2644        len = 0;
2645        for (;;)
2646        {        {
2647        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2648        goto CONTINUE;          {
2649            if (len > 0)    /* Reached EOF without hitting a newline */
2650              {
2651              fprintf(outfile, "\n");
2652              break;
2653              }
2654            done = 1;
2655            goto CONTINUE;
2656            }
2657          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2658          len = (int)strlen((char *)buffer);
2659          if (buffer[len-1] == '\n') break;
2660        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2661    
     len = (int)strlen((char *)buffer);  
2662      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2663      buffer[len] = 0;      buffer[len] = 0;
2664      if (len == 0) break;      if (len == 0) break;
# Line 1271  while (!done) Line 2666  while (!done)
2666      p = buffer;      p = buffer;
2667      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2668    
2669      q = dbuffer;      bptr = q = dbuffer;
2670      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2671        {        {
2672        int i = 0;        int i = 0;
# Line 1293  while (!done) Line 2688  while (!done)
2688          c -= '0';          c -= '0';
2689          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2690            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2691    
2692    #if !defined NOUTF8
2693            if (use_utf8 && c > 255)
2694              {
2695              pcre_uint8 buff8[8];
2696              int ii, utn;
2697              utn = ord2utf8(c, buff8);
2698              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2699              c = buff8[ii];   /* Last byte */
2700              }
2701    #endif
2702          break;          break;
2703    
2704          case 'x':          case 'x':
2705    
2706          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2707    
2708    #if !defined NOUTF8
2709          if (*p == '{')          if (*p == '{')
2710            {            {
2711            unsigned char *pt = p;            pcre_uint8 *pt = p;
2712            c = 0;            c = 0;
2713            while (isxdigit(*(++pt)))  
2714              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2715              when isxdigit() is a macro that refers to its argument more than
2716              once. This is banned by the C Standard, but apparently happens in at
2717              least one MacOS environment. */
2718    
2719              for (pt++; isxdigit(*pt); pt++)
2720                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2721            if (*pt == '}')            if (*pt == '}')
2722              {              {
2723              unsigned char buff8[8];              pcre_uint8 buff8[8];
2724              int ii, utn;              int ii, utn;
2725              utn = ord2utf8(c, buff8);              if (use_utf8)
2726              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2727              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2728                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2729                  c = buff8[ii];   /* Last byte */
2730                  }
2731                else
2732                 {
2733                 if (c > 255)
2734                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2735                     "UTF-8 mode is not enabled.\n"
2736                     "** Truncation will probably give the wrong result.\n", c);
2737                 }
2738              p = pt + 1;              p = pt + 1;
2739              break;              break;
2740              }              }
2741            /* Not correct form; fall through */            /* Not correct form; fall through */
2742            }            }
2743    #endif
2744    
2745          /* Ordinary \x */          /* Ordinary \x */
2746    
2747          c = 0;          c = 0;
2748          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2749            {            {
2750            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2751            p++;            p++;
2752            }            }
2753          break;          break;
# Line 1333  while (!done) Line 2757  while (!done)
2757          continue;          continue;
2758    
2759          case '>':          case '>':
2760            if (*p == '-')
2761              {
2762              start_offset_sign = -1;
2763              p++;
2764              }
2765          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2766            start_offset *= start_offset_sign;
2767          continue;          continue;
2768    
2769          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1352  while (!done) Line 2782  while (!done)
2782            }            }
2783          else if (isalnum(*p))          else if (isalnum(*p))
2784            {            {
2785            uschar name[256];            pcre_uchar *npp = copynamesptr;
           uschar *npp = name;  
2786            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2787              *npp++ = 0;
2788            *npp = 0;            *npp = 0;
2789            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2790            if (n < 0)            if (n < 0)
2791              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2792            else copystrings |= 1 << n;            copynamesptr = npp;
2793            }            }
2794          else if (*p == '+')          else if (*p == '+')
2795            {            {
# Line 1397  while (!done) Line 2827  while (!done)
2827            }            }
2828          continue;          continue;
2829    
2830    #if !defined NODFA
2831            case 'D':
2832    #if !defined NOPOSIX
2833            if (posix || do_posix)
2834              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2835            else
2836    #endif
2837              use_dfa = 1;
2838            continue;
2839    #endif
2840    
2841    #if !defined NODFA
2842            case 'F':
2843            options |= PCRE_DFA_SHORTEST;
2844            continue;
2845    #endif
2846    
2847          case 'G':          case 'G':
2848          if (isdigit(*p))          if (isdigit(*p))
2849            {            {
# Line 1405  while (!done) Line 2852  while (!done)
2852            }            }
2853          else if (isalnum(*p))          else if (isalnum(*p))
2854            {            {
2855            uschar name[256];            pcre_uchar *npp = getnamesptr;
           uschar *npp = name;  
2856            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2857              *npp++ = 0;
2858            *npp = 0;            *npp = 0;
2859            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2860            if (n < 0)            if (n < 0)
2861              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2862            else getstrings |= 1 << n;            getnamesptr = npp;
2863              }
2864            continue;
2865    
2866            case 'J':
2867            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2868            if (extra != NULL
2869                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2870                && extra->executable_jit != NULL)
2871              {
2872              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2873              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2874              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2875            }            }
2876          continue;          continue;
2877    
# Line 1425  while (!done) Line 2884  while (!done)
2884          continue;          continue;
2885    
2886          case 'N':          case 'N':
2887          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2888              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2889            else
2890              options |= PCRE_NOTEMPTY;
2891          continue;          continue;
2892    
2893          case 'O':          case 'O':
# Line 1438  while (!done) Line 2900  while (!done)
2900            if (offsets == NULL)            if (offsets == NULL)
2901              {              {
2902              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2903                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2904              return 1;              yield = 1;
2905                goto EXIT;
2906              }              }
2907            }            }
2908          use_size_offsets = n;          use_size_offsets = n;
# Line 1447  while (!done) Line 2910  while (!done)
2910          continue;          continue;
2911    
2912          case 'P':          case 'P':
2913          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2914              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2915            continue;
2916    
2917            case 'Q':
2918            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2919            if (extra == NULL)
2920              {
2921              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2922              extra->flags = 0;
2923              }
2924            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2925            extra->match_limit_recursion = n;
2926          continue;          continue;
2927    
2928            case 'q':
2929            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2930            if (extra == NULL)
2931              {
2932              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2933              extra->flags = 0;
2934              }
2935            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2936            extra->match_limit = n;
2937            continue;
2938    
2939    #if !defined NODFA
2940            case 'R':
2941            options |= PCRE_DFA_RESTART;
2942            continue;
2943    #endif
2944    
2945          case 'S':          case 'S':
2946          show_malloc = 1;          show_malloc = 1;
2947          continue;          continue;
2948    
2949            case 'Y':
2950            options |= PCRE_NO_START_OPTIMIZE;
2951            continue;
2952    
2953          case 'Z':          case 'Z':
2954          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2955          continue;          continue;
# Line 1461  while (!done) Line 2957  while (!done)
2957          case '?':          case '?':
2958          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2959          continue;          continue;
2960    
2961            case '<':
2962              {
2963              int x = check_newline(p, outfile);
2964              if (x == 0) goto NEXT_DATA;
2965              options |= x;
2966              while (*p++ != '>');
2967              }
2968            continue;
2969          }          }
2970        *q++ = c;        *q++ = c;
2971        }        }
2972      *q = 0;      *q = 0;
2973      len = q - dbuffer;      len = (int)(q - dbuffer);
2974    
2975        /* Move the data to the end of the buffer so that a read over the end of
2976        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2977        we are using the POSIX interface, we must include the terminating zero. */
2978    
2979    #if !defined NOPOSIX
2980        if (posix || do_posix)
2981          {
2982          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2983          bptr += buffer_size - len - 1;
2984          }
2985        else
2986    #endif
2987          {
2988          memmove(bptr + buffer_size - len, bptr, len);
2989          bptr += buffer_size - len;
2990          }
2991    
2992        if ((all_use_dfa || use_dfa) && find_match_limit)
2993          {
2994          printf("**Match limit not relevant for DFA matching: ignored\n");
2995          find_match_limit = 0;
2996          }
2997    
2998      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2999      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
# Line 1480  while (!done) Line 3008  while (!done)
3008          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3009        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3010        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3011          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3012    
3013        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3014    
3015        if (rc != 0)        if (rc != 0)
3016          {          {
3017          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3018          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3019          }          }
3020          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3021                  != 0)
3022            {
3023            fprintf(outfile, "Matched with REG_NOSUB\n");
3024            }
3025        else        else
3026          {          {
3027          size_t i;          size_t i;
# Line 1496  while (!done) Line 3030  while (!done)
3030            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3031              {              {
3032              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3033              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3034                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3035              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3036              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3037                {                {
3038                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3039                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3040                  outfile);                  outfile);
3041                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3042                }                }
# Line 1510  while (!done) Line 3044  while (!done)
3044            }            }
3045          }          }
3046        free(pmatch);        free(pmatch);
3047          goto NEXT_DATA;
3048        }        }
3049    
3050    #endif  /* !defined NOPOSIX */
3051    
3052      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3053    
3054      else  #ifdef SUPPORT_PCRE16
3055  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3056          {
3057          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3058          bptr = (pcre_uint8 *)buffer16;
3059          }
3060    #endif
3061    
3062      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3063        {        {
3064        if (timeit)        markptr = NULL;
3065    
3066          if (timeitm > 0)
3067          {          {
3068          register int i;          register int i;
3069          clock_t time_taken;          clock_t time_taken;
3070          clock_t start_time = clock();          clock_t start_time = clock();
3071          for (i = 0; i < LOOPREPEAT; i++)  
3072            count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
3073            if (all_use_dfa || use_dfa)
3074              {
3075              int workspace[1000];
3076              for (i = 0; i < timeitm; i++)
3077                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3078                  options | g_notempty, use_offsets, use_size_offsets, workspace,
3079                  sizeof(workspace)/sizeof(int));
3080              }
3081            else
3082    #endif
3083    
3084            for (i = 0; i < timeitm; i++)
3085              {
3086              PCRE_EXEC(count, re, extra, bptr, len,
3087              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
3088              }
3089          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3090          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3091            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
3092              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3093          }          }
3094    
3095        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3096        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
3097          for the recursion limit. The match limits are relevant only to the normal
3098          running of pcre_exec(), so disable the JIT optimization. This makes it
3099          possible to run the same set of tests with and without JIT externally
3100          requested. */
3101    
3102        if (find_match_limit)        if (find_match_limit)
3103          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
3104          if (extra == NULL)          if (extra == NULL)
3105            {            {
3106            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3107            extra->flags = 0;            extra->flags = 0;
3108            }            }
3109          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
3110    
3111          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
3112              options|g_notempty, use_offsets, use_size_offsets,
3113              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3114              PCRE_ERROR_MATCHLIMIT, "match()");
3115    
3116            count = check_match_limit(re, extra, bptr, len, start_offset,
3117              options|g_notempty, use_offsets, use_size_offsets,
3118              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3119              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3120          }          }
3121    
3122        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1589  while (!done) Line 3130  while (!done)
3130            }            }
3131          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3132          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3133          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3134            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3135          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3136          }          }
# Line 1597  while (!done) Line 3138  while (!done)
3138        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
3139        value of match_limit. */        value of match_limit. */
3140    
3141        else  #if !defined NODFA
3142          else if (all_use_dfa || use_dfa)
3143          {          {
3144          count = pcre_exec(re, extra, (char *)bptr, len,          int workspace[1000];
3145            start_offset, options | g_notempty, use_offsets, use_size_offsets);          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3146              options | g_notempty, use_offsets, use_size_offsets, workspace,
3147              sizeof(workspace)/sizeof(int));
3148            if (count == 0)
3149              {
3150              fprintf(outfile, "Matched, but too many subsidiary matches\n");
3151              count = use_size_offsets/2;
3152              }
3153          }          }
3154    #endif
3155    
3156        if (count == 0)        else
3157          {          {
3158          fprintf(outfile, "Matched, but too many substrings\n");          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3159          count = use_size_offsets/3;            options | g_notempty, use_offsets, use_size_offsets);
3160            if (count == 0)
3161              {
3162              fprintf(outfile, "Matched, but too many substrings\n");
3163              count = use_size_offsets/3;
3164              }
3165          }          }
3166    
3167        /* Matched */        /* Matched */
3168    
3169        if (count >= 0)        if (count >= 0)
3170          {          {
3171          int i;          int i, maxcount;
3172    
3173    #if !defined NODFA
3174            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3175    #endif
3176              maxcount = use_size_offsets/3;
3177    
3178            /* This is a check against a lunatic return value. */
3179    
3180            if (count > maxcount)
3181              {
3182              fprintf(outfile,
3183                "** PCRE error: returned count %d is too big for offset size %d\n",
3184                count, use_size_offsets);
3185              count = use_size_offsets/3;
3186              if (do_g || do_G)
3187                {
3188                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3189                do_g = do_G = FALSE;        /* Break g/G loop */
3190                }
3191              }
3192    
3193            /* do_allcaps requests showing of all captures in the pattern, to check
3194            unset ones at the end. */
3195    
3196            if (do_allcaps)
3197              {
3198              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3199              count++;   /* Allow for full match */
3200              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3201              }
3202    
3203            /* Output the captured substrings */
3204    
3205          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3206            {            {
3207            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
3208                {
3209                if (use_offsets[i] != -1)
3210                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3211                    use_offsets[i], i);
3212                if (use_offsets[i+1] != -1)
3213                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3214                    use_offsets[i+1], i+1);
3215              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3216                }
3217            else            else
3218              {              {
3219              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3220              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr + use_offsets[i],
3221                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3222              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3223              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3224                {                {
3225                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3226                  {                PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3227                  fprintf(outfile, " 0+ ");                  outfile);
3228                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3229                }                }
3230              }              }
3231            }            }
3232    
3233            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3234    
3235          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3236            {            {
3237            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
3238              {              {
3239              char copybuffer[16];              char copybuffer[256];
3240              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3241                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
3242              if (rc < 0)              if (rc < 0)
# Line 1651  while (!done) Line 3246  while (!done)
3246              }              }
3247            }            }
3248    
3249            for (copynamesptr = copynames;
3250                 *copynamesptr != 0;
3251                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3252              {
3253              char copybuffer[256];
3254              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3255                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3256              if (rc < 0)
3257                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3258              else
3259                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3260              }
3261    
3262          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3263            {            {
3264            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1663  while (!done) Line 3271  while (!done)
3271              else              else
3272                {                {
3273                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
3274                pcre_free_substring(substring);                pcre_free_substring(substring);
3275                }                }
3276              }              }
3277            }            }
3278    
3279            for (getnamesptr = getnames;
3280                 *getnamesptr != 0;
3281                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3282              {
3283              const char *substring;
3284              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3285                count, (char *)getnamesptr, &substring);
3286              if (rc < 0)
3287                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3288              else
3289                {
3290                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
3291                pcre_free_substring(substring);
3292                }
3293              }
3294    
3295          if (getlist)          if (getlist)
3296            {            {
3297            const char **stringlist;            const char **stringlist;
# Line 1682  while (!done) Line 3305  while (!done)
3305                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3306              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3307                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3308              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3309              }              }
3310            }            }
# Line 1692  while (!done) Line 3314  while (!done)
3314    
3315        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
3316          {          {
3317          fprintf(outfile, "Partial match\n");          if (markptr == NULL) fprintf(outfile, "Partial match");
3318              else fprintf(outfile, "Partial match, mark=%s", markptr);
3319            if (use_size_offsets > 1)
3320              {
3321              fprintf(outfile, ": ");
3322              PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3323                outfile);
3324              }
3325            fprintf(outfile, "\n");
3326          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
3327          }          }
3328    
3329        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
3330        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
3331        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
3332        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
3333        offset values to achieve this. We won't be at the end of the string -  
3334        that was checked before setting g_notempty. */        Complication arises in the case when the newline convention is "any",
3335          "crlf", or "anycrlf". If the previous match was at the end of a line
3336          terminated by CRLF, an advance of one character just passes the \r,
3337          whereas we should prefer the longer newline sequence, as does the code in
3338          pcre_exec(). Fudge the offset value to achieve this. We check for a
3339          newline setting in the pattern; if none was set, use pcre_config() to
3340          find the default.
3341    
3342          Otherwise, in the case of UTF-8 matching, the advance must be one
3343          character, not one byte. */
3344    
3345        else        else
3346          {          {
3347          if (g_notempty != 0)          if (g_notempty != 0)
3348            {            {
3349            int onechar = 1;            int onechar = 1;
3350              unsigned int obits = ((real_pcre *)re)->options;
3351            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
3352            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
3353                {
3354                int d;
3355                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3356                /* Note that these values are always the ASCII ones, even in
3357                EBCDIC environments. CR = 13, NL = 10. */
3358                obits = (d == 13)? PCRE_NEWLINE_CR :
3359                        (d == 10)? PCRE_NEWLINE_LF :
3360                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3361                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
3362                        (d == -1)? PCRE_NEWLINE_ANY : 0;
3363                }
3364              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3365                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3366                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3367                  &&
3368                  start_offset < len - 1 &&
3369                  bptr[start_offset] == '\r' &&
3370                  bptr[start_offset+1] == '\n')
3371                onechar++;
3372              else if (use_utf8)
3373              {              {
3374              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3375                {                {
3376                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3377                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3378                }                }
3379              }              }
3380            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3381            }            }
3382          else          else
3383            {            {
3384            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3385              {              {
3386              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3387                if (gmatched == 0)
3388                  {
3389                  if (markptr == NULL) fprintf(outfile, "No match\n");
3390                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3391                  }
3392                break;
3393    
3394                case PCRE_ERROR_BADUTF8:
3395                case PCRE_ERROR_SHORTUTF8:
3396                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3397                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3398                if (use_size_offsets >= 2)
3399                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3400                    use_offsets[1]);
3401                fprintf(outfile, "\n");
3402                break;
3403    
3404                default:
3405                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3406                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3407                else
3408                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3409                break;
3410              }              }
3411            else fprintf(outfile, "Error %d\n", count);  
3412            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3413            }            }
3414          }          }
# Line 1737  while (!done) Line 3418  while (!done)
3418        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3419    
3420        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3421        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3422        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3423        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3424        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3425        character. */        character. */
3426    
3427        g_notempty = 0;        g_notempty = 0;
3428    
3429        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3430          {          {
3431          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3432          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3433          }          }
3434    
3435        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1762  while (!done) Line 3444  while (!done)
3444          len -= use_offsets[1];          len -= use_offsets[1];
3445          }          }
3446        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
3447    
3448        NEXT_DATA: continue;
3449      }    /* End of loop for data lines */      }    /* End of loop for data lines */
3450    
3451    CONTINUE:    CONTINUE:
# Line 1770  while (!done) Line 3454  while (!done)
3454    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
3455  #endif  #endif
3456    
3457    if (re != NULL) free(re);    if (re != NULL) new_free(re);
3458    if (extra != NULL) free(extra);    if (extra != NULL)
   if (tables != NULL)  
3459      {      {
3460      free((void *)tables);      PCRE_FREE_STUDY(extra);
3461        }
3462      if (locale_set)
3463        {
3464        new_free((void *)tables);
3465      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3466        locale_set = 0;
3467        }
3468      if (jit_stack != NULL)
3469        {
3470        pcre_jit_stack_free(jit_stack);
3471        jit_stack = NULL;
3472      }      }
3473    }    }
3474    
3475  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
3476  return 0;  
3477    EXIT:
3478    
3479    if (infile != NULL && infile != stdin) fclose(infile);
3480    if (outfile != NULL && outfile != stdout) fclose(outfile);
3481    
3482    free(buffer);
3483    free(dbuffer);
3484    free(pbuffer);
3485    free(offsets);
3486    
3487    #ifdef SUPPORT_PCRE16
3488    if (buffer16 != NULL) free(buffer16);
3489    #endif
3490    
3491    return yield;
3492  }  }
3493    
3494  /* End */  /* End of pcretest.c */

Legend:
Removed from v.75  
changed lines
  Added in v.809

  ViewVC Help
Powered by ViewVC 1.1.5