/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC code/branches/pcre16/pcretest.c revision 805 by ph10, Wed Dec 14 16:49:20 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places.  been extended and consequently is now rather, er, *very* untidy in places.
8    
9  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
10  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  /* We need the internal info for displaying the results of pcre_study(). Also  #ifdef SUPPORT_LIBREADLINE
52  for getting the opcodes for showing compiled code. */  #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef __BORLANDC__
85  #include "internal.h"  #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90    #else
91    #include <sys/time.h>          /* These two includes are needed */
92    #include <sys/resource.h>      /* for setrlimit(). */
93    #define INPUT_MODE   "rb"
94    #define OUTPUT_MODE  "wb"
95    #endif
96    
97    
98    /* We have to include pcre_internal.h because we need the internal info for
99    displaying the results of pcre_study() and we also need to know about the
100    internal macros, structures, and other internal data values; pcretest has
101    "inside information" compared to a program that strictly follows the PCRE API.
102    
103    Although pcre_internal.h does itself include pcre.h, we explicitly include it
104    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105    appropriately for an application, not for building PCRE. */
106    
107    #include "pcre.h"
108    #include "pcre_internal.h"
109    
110    /* The pcre_printint() function, which prints the internal form of a compiled
111    regex, is held in a separate file so that (a) it can be compiled in either
112    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
113    when that is compiled in debug mode. */
114    
115    #ifdef SUPPORT_PCRE8
116    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
117    #endif
118    #ifdef SUPPORT_PCRE16
119    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
120    #endif
121    
122    /* We need access to some of the data tables that PCRE uses. So as not to have
123    to keep two copies, we include the source file here, changing the names of the
124    external symbols to prevent clashes. */
125    
126    #define _pcre_ucp_gentype      ucp_gentype
127    #define _pcre_ucp_typerange    ucp_typerange
128    #define _pcre_utf8_table1      utf8_table1
129    #define _pcre_utf8_table1_size utf8_table1_size
130    #define _pcre_utf8_table2      utf8_table2
131    #define _pcre_utf8_table3      utf8_table3
132    #define _pcre_utf8_table4      utf8_table4
133    #define _pcre_utt              utt
134    #define _pcre_utt_size         utt_size
135    #define _pcre_utt_names        utt_names
136    #define _pcre_OP_lengths       OP_lengths
137    
138    #include "pcre_tables.c"
139    
140    /* The definition of the macro PRINTABLE, which determines whether to print an
141    output character as-is or as a hex value when showing compiled patterns, is
142    the same as in the printint.src file. We uses it here in cases when the locale
143    has not been explicitly changed, so as to get consistent output from systems
144    that differ in their output from isprint() even in the "C" locale. */
145    
146    #ifdef EBCDIC
147    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
148    #else
149    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
150    #endif
151    
152    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
153    
154  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
155  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 58  Makefile. */ Line 159  Makefile. */
159  #include "pcreposix.h"  #include "pcreposix.h"
160  #endif  #endif
161    
162    /* It is also possible, for the benefit of the version currently imported into
163    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
164    interface to the DFA matcher (NODFA), and without the doublecheck of the old
165    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
166    UTF8 support if PCRE is built without it. */
167    
168    #ifndef SUPPORT_UTF8
169    #ifndef NOUTF8
170    #define NOUTF8
171    #endif
172    #endif
173    
174    
175    /* Other parameters */
176    
177  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
178  #ifdef CLK_TCK  #ifdef CLK_TCK
179  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 66  Makefile. */ Line 182  Makefile. */
182  #endif  #endif
183  #endif  #endif
184    
185  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
186    
187  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
188    
189    /* Static variables */
190    
191  static FILE *outfile;  static FILE *outfile;
192  static int log_store = 0;  static int log_store = 0;
# Line 79  static int callout_count; Line 194  static int callout_count;
194  static int callout_extra;  static int callout_extra;
195  static int callout_fail_count;  static int callout_fail_count;
196  static int callout_fail_id;  static int callout_fail_id;
197    static int debug_lengths;
198  static int first_callout;  static int first_callout;
199    static int locale_set = 0;
200  static int show_malloc;  static int show_malloc;
201  static int use_utf8;  static int use_utf8;
202  static size_t gotten_store;  static size_t gotten_store;
203    static size_t first_gotten_store = 0;
204    static const unsigned char *last_callout_mark = NULL;
205    
206    static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
207    
208    /* The buffers grow automatically if very long input lines are encountered. */
209    
210    static int buffer_size = 50000;
211    static pcre_uint8 *buffer = NULL;
212    static pcre_uint8 *dbuffer = NULL;
213    static pcre_uint8 *pbuffer = NULL;
214    
215    #ifdef SUPPORT_PCRE16
216    static int buffer16_size = 0;
217    static pcre_uint16 *buffer16 = NULL;
218    #endif
219    
220    /* Textual explanations for runtime error codes */
221    
222    static const char *errtexts[] = {
223      NULL,  /* 0 is no error */
224      NULL,  /* NOMATCH is handled specially */
225      "NULL argument passed",
226      "bad option value",
227      "magic number missing",
228      "unknown opcode - pattern overwritten?",
229      "no more memory",
230      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
231      "match limit exceeded",
232      "callout error code",
233      NULL,  /* BADUTF8 is handled specially */
234      "bad UTF-8 offset",
235      NULL,  /* PARTIAL is handled specially */
236      "not used - internal error",
237      "internal error - pattern overwritten?",
238      "bad count value",
239      "item unsupported for DFA matching",
240      "backreference condition or recursion test not supported for DFA matching",
241      "match limit not supported for DFA matching",
242      "workspace size exceeded in DFA matching",
243      "too much recursion for DFA matching",
244      "recursion limit exceeded",
245      "not used - internal error",
246      "invalid combination of newline options",
247      "bad offset value",
248      NULL,  /* SHORTUTF8 is handled specially */
249      "nested recursion at the same subject position",
250      "JIT stack limit reached",
251      "pattern compiled in wrong mode (8-bit/16-bit error)"
252    };
253    
 static uschar *pbuffer = NULL;  
254    
255    /*************************************************
256    *         Alternate character tables             *
257    *************************************************/
258    
259  static const int utf8_table1[] = {  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
260    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  using the default tables of the library. However, the T option can be used to
261    select alternate sets of tables, for different kinds of testing. Note also that
262    the L (locale) option also adjusts the tables. */
263    
264    /* This is the set of tables distributed as default with PCRE. It recognizes
265    only ASCII characters. */
266    
267    static const unsigned char tables0[] = {
268    
269    /* This table is a lower casing table. */
270    
271        0,  1,  2,  3,  4,  5,  6,  7,
272        8,  9, 10, 11, 12, 13, 14, 15,
273       16, 17, 18, 19, 20, 21, 22, 23,
274       24, 25, 26, 27, 28, 29, 30, 31,
275       32, 33, 34, 35, 36, 37, 38, 39,
276       40, 41, 42, 43, 44, 45, 46, 47,
277       48, 49, 50, 51, 52, 53, 54, 55,
278       56, 57, 58, 59, 60, 61, 62, 63,
279       64, 97, 98, 99,100,101,102,103,
280      104,105,106,107,108,109,110,111,
281      112,113,114,115,116,117,118,119,
282      120,121,122, 91, 92, 93, 94, 95,
283       96, 97, 98, 99,100,101,102,103,
284      104,105,106,107,108,109,110,111,
285      112,113,114,115,116,117,118,119,
286      120,121,122,123,124,125,126,127,
287      128,129,130,131,132,133,134,135,
288      136,137,138,139,140,141,142,143,
289      144,145,146,147,148,149,150,151,
290      152,153,154,155,156,157,158,159,
291      160,161,162,163,164,165,166,167,
292      168,169,170,171,172,173,174,175,
293      176,177,178,179,180,181,182,183,
294      184,185,186,187,188,189,190,191,
295      192,193,194,195,196,197,198,199,
296      200,201,202,203,204,205,206,207,
297      208,209,210,211,212,213,214,215,
298      216,217,218,219,220,221,222,223,
299      224,225,226,227,228,229,230,231,
300      232,233,234,235,236,237,238,239,
301      240,241,242,243,244,245,246,247,
302      248,249,250,251,252,253,254,255,
303    
304    /* This table is a case flipping table. */
305    
306        0,  1,  2,  3,  4,  5,  6,  7,
307        8,  9, 10, 11, 12, 13, 14, 15,
308       16, 17, 18, 19, 20, 21, 22, 23,
309       24, 25, 26, 27, 28, 29, 30, 31,
310       32, 33, 34, 35, 36, 37, 38, 39,
311       40, 41, 42, 43, 44, 45, 46, 47,
312       48, 49, 50, 51, 52, 53, 54, 55,
313       56, 57, 58, 59, 60, 61, 62, 63,
314       64, 97, 98, 99,100,101,102,103,
315      104,105,106,107,108,109,110,111,
316      112,113,114,115,116,117,118,119,
317      120,121,122, 91, 92, 93, 94, 95,
318       96, 65, 66, 67, 68, 69, 70, 71,
319       72, 73, 74, 75, 76, 77, 78, 79,
320       80, 81, 82, 83, 84, 85, 86, 87,
321       88, 89, 90,123,124,125,126,127,
322      128,129,130,131,132,133,134,135,
323      136,137,138,139,140,141,142,143,
324      144,145,146,147,148,149,150,151,
325      152,153,154,155,156,157,158,159,
326      160,161,162,163,164,165,166,167,
327      168,169,170,171,172,173,174,175,
328      176,177,178,179,180,181,182,183,
329      184,185,186,187,188,189,190,191,
330      192,193,194,195,196,197,198,199,
331      200,201,202,203,204,205,206,207,
332      208,209,210,211,212,213,214,215,
333      216,217,218,219,220,221,222,223,
334      224,225,226,227,228,229,230,231,
335      232,233,234,235,236,237,238,239,
336      240,241,242,243,244,245,246,247,
337      248,249,250,251,252,253,254,255,
338    
339    /* This table contains bit maps for various character classes. Each map is 32
340    bytes long and the bits run from the least significant end of each byte. The
341    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
342    graph, print, punct, and cntrl. Other classes are built from combinations. */
343    
344      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
345      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348    
349      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
350      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353    
354      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
355      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358    
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363    
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368    
369      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
370      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373    
374      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
375      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
378    
379      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
380      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
381      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
382      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
383    
384      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
385      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
386      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
387      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
388    
389      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
390      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
391      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
392      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
393    
394    /* This table identifies various classes of character by individual bits:
395      0x01   white space character
396      0x02   letter
397      0x04   decimal digit
398      0x08   hexadecimal digit
399      0x10   alphanumeric or '_'
400      0x80   regular expression metacharacter or binary zero
401    */
402    
403  static const int utf8_table2[] = {    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
404    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
407      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
408      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
409      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
410      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
411      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
412      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
413      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
414      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
415      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
416      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
417      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
418      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
419      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
420      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
421      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
422      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
423      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
424      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
425      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
426      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
427      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
428      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
429      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
430      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
431      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
432      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
433      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
434      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
435    
436    /* This is a set of tables that came orginally from a Windows user. It seems to
437    be at least an approximation of ISO 8859. In particular, there are characters
438    greater than 128 that are marked as spaces, letters, etc. */
439    
440    static const unsigned char tables1[] = {
441    0,1,2,3,4,5,6,7,
442    8,9,10,11,12,13,14,15,
443    16,17,18,19,20,21,22,23,
444    24,25,26,27,28,29,30,31,
445    32,33,34,35,36,37,38,39,
446    40,41,42,43,44,45,46,47,
447    48,49,50,51,52,53,54,55,
448    56,57,58,59,60,61,62,63,
449    64,97,98,99,100,101,102,103,
450    104,105,106,107,108,109,110,111,
451    112,113,114,115,116,117,118,119,
452    120,121,122,91,92,93,94,95,
453    96,97,98,99,100,101,102,103,
454    104,105,106,107,108,109,110,111,
455    112,113,114,115,116,117,118,119,
456    120,121,122,123,124,125,126,127,
457    128,129,130,131,132,133,134,135,
458    136,137,138,139,140,141,142,143,
459    144,145,146,147,148,149,150,151,
460    152,153,154,155,156,157,158,159,
461    160,161,162,163,164,165,166,167,
462    168,169,170,171,172,173,174,175,
463    176,177,178,179,180,181,182,183,
464    184,185,186,187,188,189,190,191,
465    224,225,226,227,228,229,230,231,
466    232,233,234,235,236,237,238,239,
467    240,241,242,243,244,245,246,215,
468    248,249,250,251,252,253,254,223,
469    224,225,226,227,228,229,230,231,
470    232,233,234,235,236,237,238,239,
471    240,241,242,243,244,245,246,247,
472    248,249,250,251,252,253,254,255,
473    0,1,2,3,4,5,6,7,
474    8,9,10,11,12,13,14,15,
475    16,17,18,19,20,21,22,23,
476    24,25,26,27,28,29,30,31,
477    32,33,34,35,36,37,38,39,
478    40,41,42,43,44,45,46,47,
479    48,49,50,51,52,53,54,55,
480    56,57,58,59,60,61,62,63,
481    64,97,98,99,100,101,102,103,
482    104,105,106,107,108,109,110,111,
483    112,113,114,115,116,117,118,119,
484    120,121,122,91,92,93,94,95,
485    96,65,66,67,68,69,70,71,
486    72,73,74,75,76,77,78,79,
487    80,81,82,83,84,85,86,87,
488    88,89,90,123,124,125,126,127,
489    128,129,130,131,132,133,134,135,
490    136,137,138,139,140,141,142,143,
491    144,145,146,147,148,149,150,151,
492    152,153,154,155,156,157,158,159,
493    160,161,162,163,164,165,166,167,
494    168,169,170,171,172,173,174,175,
495    176,177,178,179,180,181,182,183,
496    184,185,186,187,188,189,190,191,
497    224,225,226,227,228,229,230,231,
498    232,233,234,235,236,237,238,239,
499    240,241,242,243,244,245,246,215,
500    248,249,250,251,252,253,254,223,
501    192,193,194,195,196,197,198,199,
502    200,201,202,203,204,205,206,207,
503    208,209,210,211,212,213,214,247,
504    216,217,218,219,220,221,222,255,
505    0,62,0,0,1,0,0,0,
506    0,0,0,0,0,0,0,0,
507    32,0,0,0,1,0,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,255,3,
510    126,0,0,0,126,0,0,0,
511    0,0,0,0,0,0,0,0,
512    0,0,0,0,0,0,0,0,
513    0,0,0,0,0,0,255,3,
514    0,0,0,0,0,0,0,0,
515    0,0,0,0,0,0,12,2,
516    0,0,0,0,0,0,0,0,
517    0,0,0,0,0,0,0,0,
518    254,255,255,7,0,0,0,0,
519    0,0,0,0,0,0,0,0,
520    255,255,127,127,0,0,0,0,
521    0,0,0,0,0,0,0,0,
522    0,0,0,0,254,255,255,7,
523    0,0,0,0,0,4,32,4,
524    0,0,0,128,255,255,127,255,
525    0,0,0,0,0,0,255,3,
526    254,255,255,135,254,255,255,7,
527    0,0,0,0,0,4,44,6,
528    255,255,127,255,255,255,127,255,
529    0,0,0,0,254,255,255,255,
530    255,255,255,255,255,255,255,127,
531    0,0,0,0,254,255,255,255,
532    255,255,255,255,255,255,255,255,
533    0,2,0,0,255,255,255,255,
534    255,255,255,255,255,255,255,127,
535    0,0,0,0,255,255,255,255,
536    255,255,255,255,255,255,255,255,
537    0,0,0,0,254,255,0,252,
538    1,0,0,248,1,0,0,120,
539    0,0,0,0,254,255,255,255,
540    0,0,128,0,0,0,128,0,
541    255,255,255,255,0,0,0,0,
542    0,0,0,0,0,0,0,128,
543    255,255,255,255,0,0,0,0,
544    0,0,0,0,0,0,0,0,
545    128,0,0,0,0,0,0,0,
546    0,1,1,0,1,1,0,0,
547    0,0,0,0,0,0,0,0,
548    0,0,0,0,0,0,0,0,
549    1,0,0,0,128,0,0,0,
550    128,128,128,128,0,0,128,0,
551    28,28,28,28,28,28,28,28,
552    28,28,0,0,0,0,0,128,
553    0,26,26,26,26,26,26,18,
554    18,18,18,18,18,18,18,18,
555    18,18,18,18,18,18,18,18,
556    18,18,18,128,128,0,128,16,
557    0,26,26,26,26,26,26,18,
558    18,18,18,18,18,18,18,18,
559    18,18,18,18,18,18,18,18,
560    18,18,18,128,128,0,0,0,
561    0,0,0,0,0,1,0,0,
562    0,0,0,0,0,0,0,0,
563    0,0,0,0,0,0,0,0,
564    0,0,0,0,0,0,0,0,
565    1,0,0,0,0,0,0,0,
566    0,0,18,0,0,0,0,0,
567    0,0,20,20,0,18,0,0,
568    0,20,18,0,0,0,0,0,
569    18,18,18,18,18,18,18,18,
570    18,18,18,18,18,18,18,18,
571    18,18,18,18,18,18,18,0,
572    18,18,18,18,18,18,18,18,
573    18,18,18,18,18,18,18,18,
574    18,18,18,18,18,18,18,18,
575    18,18,18,18,18,18,18,0,
576    18,18,18,18,18,18,18,18
577    };
578    
 static const int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
579    
580    
581    
582    #ifndef HAVE_STRERROR
583  /*************************************************  /*************************************************
584  *         Print compiled regex                   *  *     Provide strerror() for non-ANSI libraries  *
585  *************************************************/  *************************************************/
586    
587  /* The code for doing this is held in a separate file that is also included in  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
588  pcre.c when it is compiled with the debug switch. It defines a function called  in their libraries, but can provide the same facility by this simple
589  print_internals(), which uses a table of opcode lengths defined by the macro  alternative function. */
590  OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates  
591  Unicode property names to numbers; this is kept in a separate file. */  extern int   sys_nerr;
592    extern char *sys_errlist[];
593  static uschar OP_lengths[] = { OP_LENGTHS };  
594    char *
595  #include "ucp.h"  strerror(int n)
596  #include "ucptypetable.c"  {
597  #include "printint.c"  if (n < 0 || n >= sys_nerr) return "unknown error number";
598    return sys_errlist[n];
599    }
600    #endif /* HAVE_STRERROR */
601    
602    
603    /*************************************************
604    *         JIT memory callback                    *
605    *************************************************/
606    
607    static pcre_jit_stack* jit_callback(void *arg)
608    {
609    return (pcre_jit_stack *)arg;
610    }
611    
612    
613    #ifdef SUPPORT_PCRE16
614    /*************************************************
615    *         Convert a string to 16-bit             *
616    *************************************************/
617    
618    /* The result is always left in buffer16. */
619    
620    static int
621    to16(unsigned char *p, int utf)
622    {
623    pcre_uint16 *pp;
624    int len = (int)strlen((char *)p) + 1;
625    
626    if (buffer16_size < 2*len)
627      {
628      if (buffer16 != NULL) free(buffer16);
629      buffer16_size = 2*len;
630      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
631      if (buffer16 == NULL)
632        {
633        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
634        exit(1);
635        }
636      }
637    
638    pp = buffer16;
639    
640    if (!utf)
641      {
642      while (*p != 0) *pp++ = *p++;
643      *pp++ = 0;
644      }
645    
646    else
647      {
648    fprintf(stderr, "pcretest: no support yet for UTF-16\n");
649    exit(1);
650      }
651    
652    return pp - buffer16;
653    }
654    #endif
655    
656    
657    /*************************************************
658    *        Read or extend an input line            *
659    *************************************************/
660    
661    /* Input lines are read into buffer, but both patterns and data lines can be
662    continued over multiple input lines. In addition, if the buffer fills up, we
663    want to automatically expand it so as to be able to handle extremely large
664    lines that are needed for certain stress tests. When the input buffer is
665    expanded, the other two buffers must also be expanded likewise, and the
666    contents of pbuffer, which are a copy of the input for callouts, must be
667    preserved (for when expansion happens for a data line). This is not the most
668    optimal way of handling this, but hey, this is just a test program!
669    
670    Arguments:
671      f            the file to read
672      start        where in buffer to start (this *must* be within buffer)
673      prompt       for stdin or readline()
674    
675    Returns:       pointer to the start of new data
676                   could be a copy of start, or could be moved
677                   NULL if no data read and EOF reached
678    */
679    
680    static pcre_uint8 *
681    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
682    {
683    pcre_uint8 *here = start;
684    
685    for (;;)
686      {
687      int rlen = (int)(buffer_size - (here - buffer));
688    
689      if (rlen > 1000)
690        {
691        int dlen;
692    
693        /* If libreadline support is required, use readline() to read a line if the
694        input is a terminal. Note that readline() removes the trailing newline, so
695        we must put it back again, to be compatible with fgets(). */
696    
697    #ifdef SUPPORT_LIBREADLINE
698        if (isatty(fileno(f)))
699          {
700          size_t len;
701          char *s = readline(prompt);
702          if (s == NULL) return (here == start)? NULL : start;
703          len = strlen(s);
704          if (len > 0) add_history(s);
705          if (len > rlen - 1) len = rlen - 1;
706          memcpy(here, s, len);
707          here[len] = '\n';
708          here[len+1] = 0;
709          free(s);
710          }
711        else
712    #endif
713    
714        /* Read the next line by normal means, prompting if the file is stdin. */
715    
716          {
717          if (f == stdin) printf("%s", prompt);
718          if (fgets((char *)here, rlen,  f) == NULL)
719            return (here == start)? NULL : start;
720          }
721    
722        dlen = (int)strlen((char *)here);
723        if (dlen > 0 && here[dlen - 1] == '\n') return start;
724        here += dlen;
725        }
726    
727      else
728        {
729        int new_buffer_size = 2*buffer_size;
730        pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
731        pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
732        pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
733    
734        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
735          {
736          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
737          exit(1);
738          }
739    
740        memcpy(new_buffer, buffer, buffer_size);
741        memcpy(new_pbuffer, pbuffer, buffer_size);
742    
743        buffer_size = new_buffer_size;
744    
745        start = new_buffer + (start - buffer);
746        here = new_buffer + (here - buffer);
747    
748        free(buffer);
749        free(dbuffer);
750        free(pbuffer);
751    
752        buffer = new_buffer;
753        dbuffer = new_dbuffer;
754        pbuffer = new_pbuffer;
755        }
756      }
757    
758    return NULL;  /* Control never gets here */
759    }
760    
761    
762    
763    
764    
765    
766    
# Line 122  static uschar OP_lengths[] = { OP_LENGTH Line 770  static uschar OP_lengths[] = { OP_LENGTH
770    
771  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
772  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
773  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
774    
775  Arguments:  Arguments:
776    str           string to be converted    str           string to be converted
# Line 143  return(result); Line 791  return(result);
791    
792    
793    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
794    
795  /*************************************************  /*************************************************
796  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 188  return i + 1; Line 800  return i + 1;
800  and returns the value of the character.  and returns the value of the character.
801    
802  Argument:  Argument:
803    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
804    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
805    
806  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
807             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
808  */  */
809    
810    #if !defined NOUTF8
811    
812  static int  static int
813  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
814  {  {
815  int c = *buffer++;  int c = *utf8bytes++;
816  int d = c;  int d = c;
817  int i, j, s;  int i, j, s;
818    
# Line 218  d = (c & utf8_table3[i]) << s; Line 832  d = (c & utf8_table3[i]) << s;
832    
833  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
834    {    {
835    c = *buffer++;    c = *utf8bytes++;
836    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
837    s -= 6;    s -= 6;
838    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 226  for (j = 0; j < i; j++) Line 840  for (j = 0; j < i; j++)
840    
841  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
842    
843  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
844    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
845  if (j != i) return -(i+1);  if (j != i) return -(i+1);
846    
# Line 236  if (j != i) return -(i+1); Line 850  if (j != i) return -(i+1);
850  return i+1;  return i+1;
851  }  }
852    
853    #endif
854    
855    
856    
857    /*************************************************
858    *       Convert character value to UTF-8         *
859    *************************************************/
860    
861    /* This function takes an integer value in the range 0 - 0x7fffffff
862    and encodes it as a UTF-8 character in 0 to 6 bytes.
863    
864    Arguments:
865      cvalue     the character value
866      utf8bytes  pointer to buffer for result - at least 6 bytes long
867    
868    Returns:     number of characters placed in the buffer
869    */
870    
871    #if !defined NOUTF8
872    
873    static int
874    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
875    {
876    register int i, j;
877    for (i = 0; i < utf8_table1_size; i++)
878      if (cvalue <= utf8_table1[i]) break;
879    utf8bytes += i;
880    for (j = i; j > 0; j--)
881     {
882     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
883     cvalue >>= 6;
884     }
885    *utf8bytes = utf8_table2[i] | cvalue;
886    return i + 1;
887    }
888    
889    #endif
890    
891    
892    
893  /*************************************************  /*************************************************
# Line 248  chars without printing. */ Line 900  chars without printing. */
900    
901  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
902  {  {
903  int c;  int c = 0;
904  int yield = 0;  int yield = 0;
905    
906  while (length-- > 0)  while (length-- > 0)
907    {    {
908    #if !defined NOUTF8
909    if (use_utf8)    if (use_utf8)
910      {      {
911      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 261  while (length-- > 0) Line 914  while (length-- > 0)
914        {        {
915        length -= rc - 1;        length -= rc - 1;
916        p += rc;        p += rc;
917        if (c < 256 && isprint(c))        if (PRINTHEX(c))
918          {          {
919          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
920          yield++;          yield++;
921          }          }
922        else        else
923          {          {
924          int n;          int n = 4;
925          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
926          yield += n;          yield += (n <= 0x000000ff)? 2 :
927                     (n <= 0x00000fff)? 3 :
928                     (n <= 0x0000ffff)? 4 :
929                     (n <= 0x000fffff)? 5 : 6;
930          }          }
931        continue;        continue;
932        }        }
933      }      }
934    #endif
935    
936     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
937    
938    if (isprint(c = *(p++)))    c = *p++;
939      if (PRINTHEX(c))
940      {      {
941      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
942      yield++;      yield++;
# Line 376  fprintf(outfile, "%.*s", (cb->next_item_ Line 1034  fprintf(outfile, "%.*s", (cb->next_item_
1034  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1035  first_callout = 0;  first_callout = 0;
1036    
1037    if (cb->mark != last_callout_mark)
1038      {
1039      fprintf(outfile, "Latest Mark: %s\n",
1040        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1041      last_callout_mark = cb->mark;
1042      }
1043    
1044  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1045    {    {
1046    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 395  return (cb->callout_number != callout_fa Line 1060  return (cb->callout_number != callout_fa
1060  *            Local malloc functions              *  *            Local malloc functions              *
1061  *************************************************/  *************************************************/
1062    
1063  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1064  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1065    show_malloc variable is set only during matching. */
1066    
1067  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1068  {  {
1069  void *block = malloc(size);  void *block = malloc(size);
1070  gotten_store = size;  gotten_store = size;
1071    if (first_gotten_store == 0) first_gotten_store = size;
1072  if (show_malloc)  if (show_malloc)
1073    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1074  return block;  return block;
1075  }  }
1076    
# Line 414  if (show_malloc) Line 1081  if (show_malloc)
1081  free(block);  free(block);
1082  }  }
1083    
   
1084  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1085    
1086  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
1087  {  {
1088  void *block = malloc(size);  void *block = malloc(size);
1089  if (show_malloc)  if (show_malloc)
1090    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1091  return block;  return block;
1092  }  }
1093    
# Line 442  free(block); Line 1108  free(block);
1108  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1109  {  {
1110  int rc;  int rc;
1111  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
1112    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1113  }  }
1114    
# Line 452  if ((rc = pcre_fullinfo(re, study, optio Line 1118  if ((rc = pcre_fullinfo(re, study, optio
1118  *         Byte flipping function                 *  *         Byte flipping function                 *
1119  *************************************************/  *************************************************/
1120    
1121  static long int  static unsigned long int
1122  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
1123  {  {
1124  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1125  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 466  return ((value & 0x000000ff) << 24) | Line 1132  return ((value & 0x000000ff) << 24) |
1132    
1133    
1134  /*************************************************  /*************************************************
1135    *        Check match or recursion limit          *
1136    *************************************************/
1137    
1138    static int
1139    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1140      int start_offset, int options, int *use_offsets, int use_size_offsets,
1141      int flag, unsigned long int *limit, int errnumber, const char *msg)
1142    {
1143    int count;
1144    int min = 0;
1145    int mid = 64;
1146    int max = -1;
1147    
1148    extra->flags |= flag;
1149    
1150    for (;;)
1151      {
1152      *limit = mid;
1153    
1154      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1155        use_offsets, use_size_offsets);
1156    
1157      if (count == errnumber)
1158        {
1159        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1160        min = mid;
1161        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1162        }
1163    
1164      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1165                             count == PCRE_ERROR_PARTIAL)
1166        {
1167        if (mid == min + 1)
1168          {
1169          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1170          break;
1171          }
1172        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1173        max = mid;
1174        mid = (min + mid)/2;
1175        }
1176      else break;    /* Some other error */
1177      }
1178    
1179    extra->flags &= ~flag;
1180    return count;
1181    }
1182    
1183    
1184    
1185    /*************************************************
1186    *         Case-independent strncmp() function    *
1187    *************************************************/
1188    
1189    /*
1190    Arguments:
1191      s         first string
1192      t         second string
1193      n         number of characters to compare
1194    
1195    Returns:    < 0, = 0, or > 0, according to the comparison
1196    */
1197    
1198    static int
1199    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1200    {
1201    while (n--)
1202      {
1203      int c = tolower(*s++) - tolower(*t++);
1204      if (c) return c;
1205      }
1206    return 0;
1207    }
1208    
1209    
1210    
1211    /*************************************************
1212    *         Check newline indicator                *
1213    *************************************************/
1214    
1215    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1216    a message and return 0 if there is no match.
1217    
1218    Arguments:
1219      p           points after the leading '<'
1220      f           file for error message
1221    
1222    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1223    */
1224    
1225    static int
1226    check_newline(pcre_uint8 *p, FILE *f)
1227    {
1228    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1229    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1230    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1231    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1232    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1233    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1234    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1235    fprintf(f, "Unknown newline type at: <%s\n", p);
1236    return 0;
1237    }
1238    
1239    
1240    
1241    /*************************************************
1242    *             Usage function                     *
1243    *************************************************/
1244    
1245    static void
1246    usage(void)
1247    {
1248    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1249    printf("Input and output default to stdin and stdout.\n");
1250    #ifdef SUPPORT_LIBREADLINE
1251    printf("If input is a terminal, readline() is used to read from it.\n");
1252    #else
1253    printf("This version of pcretest is not linked with readline().\n");
1254    #endif
1255    printf("\nOptions:\n");
1256    #ifdef SUPPORT_PCRE16
1257    printf("  -16      use 16-bit interface\n");
1258    #endif
1259    printf("  -b       show compiled code (bytecode)\n");
1260    printf("  -C       show PCRE compile-time options and exit\n");
1261    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1262    #if !defined NODFA
1263    printf("  -dfa     force DFA matching for all subjects\n");
1264    #endif
1265    printf("  -help    show usage information\n");
1266    printf("  -i       show information about compiled patterns\n"
1267           "  -M       find MATCH_LIMIT minimum for each subject\n"
1268           "  -m       output memory used information\n"
1269           "  -o <n>   set size of offsets vector to <n>\n");
1270    #if !defined NOPOSIX
1271    printf("  -p       use POSIX interface\n");
1272    #endif
1273    printf("  -q       quiet: do not output PCRE version number at start\n");
1274    printf("  -S <n>   set stack size to <n> megabytes\n");
1275    printf("  -s       force each pattern to be studied at basic level\n"
1276           "  -s+      force each pattern to be studied, using JIT if available\n"
1277           "  -t       time compilation and execution\n");
1278    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1279    printf("  -tm      time execution (matching) only\n");
1280    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1281    }
1282    
1283    
1284    
1285    /*************************************************
1286  *                Main Program                    *  *                Main Program                    *
1287  *************************************************/  *************************************************/
1288    
# Line 478  int main(int argc, char **argv) Line 1295  int main(int argc, char **argv)
1295  FILE *infile = stdin;  FILE *infile = stdin;
1296  int options = 0;  int options = 0;
1297  int study_options = 0;  int study_options = 0;
1298    int default_find_match_limit = FALSE;
1299  int op = 1;  int op = 1;
1300  int timeit = 0;  int timeit = 0;
1301    int timeitm = 0;
1302  int showinfo = 0;  int showinfo = 0;
1303  int showstore = 0;  int showstore = 0;
1304    int force_study = -1;
1305    int force_study_options = 0;
1306    int quiet = 0;
1307  int size_offsets = 45;  int size_offsets = 45;
1308  int size_offsets_max;  int size_offsets_max;
1309  int *offsets;  int *offsets = NULL;
1310  #if !defined NOPOSIX  #if !defined NOPOSIX
1311  int posix = 0;  int posix = 0;
1312  #endif  #endif
1313  int debug = 0;  int debug = 0;
1314  int done = 0;  int done = 0;
1315    int all_use_dfa = 0;
1316    int use_pcre16 = 0;
1317    int yield = 0;
1318    int stack_size;
1319    
1320  unsigned char *buffer;  pcre_jit_stack *jit_stack = NULL;
 unsigned char *dbuffer;  
1321    
1322  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* These vectors store, end-to-end, a list of captured substring names. Assume
1323  when I am debugging. */  that 1024 is plenty long enough for the few names we'll be testing. */
1324    
1325  buffer = (unsigned char *)malloc(BUFFER_SIZE);  pcre_uchar copynames[1024];
1326  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  pcre_uchar getnames[1024];
 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1327    
1328  #if defined(_WIN32) || defined(WIN32)  pcre_uchar *copynamesptr;
1329  _setmode( _fileno( stdout ), 0x8000 );  pcre_uchar *getnamesptr;
1330  #endif  /* defined(_WIN32) || defined(WIN32) */  
1331    /* Get buffers from malloc() so that valgrind will check their misuse when
1332    debugging. They grow automatically when very long lines are read. The 16-bit
1333    buffer (buffer16) is obtained only if needed. */
1334    
1335    buffer = (pcre_uint8 *)malloc(buffer_size);
1336    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1337    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1338    
1339    /* The outfile variable is static so that new_malloc can use it. */
1340    
1341  outfile = stdout;  outfile = stdout;
1342    
1343    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1344    library to translate CRLF into a single LF character. At least, that's what
1345    I've been told: never having used Windows I take this all on trust. Originally
1346    it set 0x8000, but then I was advised that _O_BINARY was better. */
1347    
1348    #if defined(_WIN32) || defined(WIN32)
1349    _setmode( _fileno( stdout ), _O_BINARY );
1350    #endif
1351    
1352  /* Scan options */  /* Scan options */
1353    
1354  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1355    {    {
1356    unsigned char *endptr;    unsigned char *endptr;
1357    
1358    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1359      showstore = 1;    else if (strcmp(argv[op], "-m") == 0) showstore = 1;
1360    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1361      else if (strcmp(argv[op], "-s+") == 0)
1362        {
1363        force_study = 1;
1364        force_study_options = PCRE_STUDY_JIT_COMPILE;
1365        }
1366      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1367      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1368    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1369    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1370      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1371    #if !defined NODFA
1372      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1373    #endif
1374    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1375        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1376          *endptr == 0))          *endptr == 0))
# Line 529  while (argc > 1 && argv[op][0] == '-') Line 1378  while (argc > 1 && argv[op][0] == '-')
1378      op++;      op++;
1379      argc--;      argc--;
1380      }      }
1381      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1382        {
1383        int both = argv[op][2] == 0;
1384        int temp;
1385        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1386                         *endptr == 0))
1387          {
1388          timeitm = temp;
1389          op++;
1390          argc--;
1391          }
1392        else timeitm = LOOPREPEAT;
1393        if (both) timeit = timeitm;
1394        }
1395      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1396          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1397            *endptr == 0))
1398        {
1399    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1400        printf("PCRE: -S not supported on this OS\n");
1401        exit(1);
1402    #else
1403        int rc;
1404        struct rlimit rlim;
1405        getrlimit(RLIMIT_STACK, &rlim);
1406        rlim.rlim_cur = stack_size * 1024 * 1024;
1407        rc = setrlimit(RLIMIT_STACK, &rlim);
1408        if (rc != 0)
1409          {
1410        printf("PCRE: setrlimit() failed with error %d\n", rc);
1411        exit(1);
1412          }
1413        op++;
1414        argc--;
1415    #endif
1416        }
1417  #if !defined NOPOSIX  #if !defined NOPOSIX
1418    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1419  #endif  #endif
1420    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1421      {      {
1422      int rc;      int rc;
1423        unsigned long int lrc;
1424      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1425      printf("Compiled with\n");      printf("Compiled with\n");
1426    
1427    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */
1428    
1429    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1430        printf("  8-bit and 16-bit support\n");
1431        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1432        printf("  %sUTF-8 support\n", rc? "" : "No ");
1433        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1434        printf("  %sUTF-16 support\n", rc? "" : "No ");
1435    #elif defined SUPPORT_PCRE8
1436        printf("  8-bit support only\n");
1437      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1438      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1439    #else
1440        printf("  16-bit support only\n");
1441        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1442        printf("  %sUTF-16 support\n", rc? "" : "No ");
1443    #endif
1444    
1445      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1446      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1447        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1448        if (rc)
1449          printf("  Just-in-time compiler support\n");
1450        else
1451          printf("  No just-in-time compiler support\n");
1452      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1453      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1454        in EBCDIC environments. CR is 13 and NL is 10. */
1455        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1456          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1457          (rc == -2)? "ANYCRLF" :
1458          (rc == -1)? "ANY" : "???");
1459        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1460        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1461                                         "all Unicode newlines");
1462      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1463      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1464      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1465      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1466      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1467      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1468        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1469        printf("  Default recursion depth limit = %ld\n", lrc);
1470      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1471      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1472      exit(0);      goto EXIT;
1473        }
1474      else if (strcmp(argv[op], "-help") == 0 ||
1475               strcmp(argv[op], "--help") == 0)
1476        {
1477        usage();
1478        goto EXIT;
1479      }      }
1480    else    else
1481      {      {
1482      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1483      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1484      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
1485      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1486      }      }
1487    op++;    op++;
1488    argc--;    argc--;
1489    }    }
1490    
1491    /* Select which fullinfo function to use. */
1492    
1493    fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
1494    
1495  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1496    
1497  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
# Line 580  offsets = (int *)malloc(size_offsets_max Line 1499  offsets = (int *)malloc(size_offsets_max
1499  if (offsets == NULL)  if (offsets == NULL)
1500    {    {
1501    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1502      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1503    return 1;    yield = 1;
1504      goto EXIT;
1505    }    }
1506    
1507  /* Sort out the input and output files */  /* Sort out the input and output files */
1508    
1509  if (argc > 1)  if (argc > 1)
1510    {    {
1511    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1512    if (infile == NULL)    if (infile == NULL)
1513      {      {
1514      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1515      return 1;      yield = 1;
1516        goto EXIT;
1517      }      }
1518    }    }
1519    
1520  if (argc > 2)  if (argc > 2)
1521    {    {
1522    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1523    if (outfile == NULL)    if (outfile == NULL)
1524      {      {
1525      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1526      return 1;      yield = 1;
1527        goto EXIT;
1528      }      }
1529    }    }
1530    
1531  /* Set alternative malloc function */  /* Set alternative malloc function */
1532    
1533    #ifdef SUPPORT_PCRE8
1534  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1535  pcre_free = new_free;  pcre_free = new_free;
1536  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1537  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1538    #endif
1539    
1540  /* Heading line, then prompt for first regex if stdin */  #ifdef SUPPORT_PCRE16
1541    pcre16_malloc = new_malloc;
1542    pcre16_free = new_free;
1543    pcre16_stack_malloc = stack_malloc;
1544    pcre16_stack_free = stack_free;
1545    #endif
1546    
1547  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  /* Heading line unless quiet, then prompt for first regex if stdin */
1548    
1549    if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1550    
1551  /* Main loop */  /* Main loop */
1552    
# Line 630  while (!done) Line 1561  while (!done)
1561  #endif  #endif
1562    
1563    const char *error;    const char *error;
1564      unsigned char *markptr;
1565    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1566    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1567    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1568    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1569    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1570      int do_allcaps = 0;
1571      int do_mark = 0;
1572    int do_study = 0;    int do_study = 0;
1573      int no_force_study = 0;
1574    int do_debug = debug;    int do_debug = debug;
1575    int do_G = 0;    int do_G = 0;
1576    int do_g = 0;    int do_g = 0;
1577    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1578    int do_showrest = 0;    int do_showrest = 0;
1579      int do_showcaprest = 0;
1580    int do_flip = 0;    int do_flip = 0;
1581    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1582    
1583    use_utf8 = 0;    use_utf8 = 0;
1584      debug_lengths = 1;
1585    
1586    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1587    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1588    fflush(outfile);    fflush(outfile);
1589    
# Line 659  while (!done) Line 1595  while (!done)
1595    
1596    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1597      {      {
1598      unsigned long int magic;      unsigned long int magic, get_options;
1599      uschar sbuf[8];      pcre_uint8 sbuf[8];
1600      FILE *f;      FILE *f;
1601    
1602      p++;      p++;
# Line 683  while (!done) Line 1619  while (!done)
1619        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1620    
1621      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1622      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1623    
1624      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1625    
# Line 702  while (!done) Line 1638  while (!done)
1638          }          }
1639        }        }
1640    
1641      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1642        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1643    
1644      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1645    
1646      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1647      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1648    
1649      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1650    
1651      if (true_study_size != 0)      if (true_study_size != 0)
1652        {        {
# Line 726  while (!done) Line 1662  while (!done)
1662          {          {
1663          FAIL_READ:          FAIL_READ:
1664          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1665          if (extra != NULL) new_free(extra);          if (extra != NULL) pcre_free_study(extra);
1666          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1667          fclose(f);          fclose(f);
1668          continue;          continue;
# Line 747  while (!done) Line 1683  while (!done)
1683    
1684    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1685      {      {
1686      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1687      goto SKIP_DATA;      goto SKIP_DATA;
1688      }      }
1689    
1690    pp = p;    pp = p;
1691      poffset = (int)(p - buffer);
1692    
1693    for(;;)    for(;;)
1694      {      {
# Line 762  while (!done) Line 1699  while (!done)
1699        pp++;        pp++;
1700        }        }
1701      if (*pp != 0) break;      if (*pp != 0) break;
1702        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1703        {        {
1704        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1705        done = 1;        done = 1;
# Line 780  while (!done) Line 1708  while (!done)
1708      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1709      }      }
1710    
1711      /* The buffer may have moved while being extended; reset the start of data
1712      pointer to the correct relative point in the buffer. */
1713    
1714      p = buffer + poffset;
1715    
1716    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1717    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1718    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 802  while (!done) Line 1735  while (!done)
1735      {      {
1736      switch (*pp++)      switch (*pp++)
1737        {        {
1738          case 'f': options |= PCRE_FIRSTLINE; break;
1739        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1740        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1741        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1742        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1743        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1744    
1745        case '+': do_showrest = 1; break;        case '+':
1746          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1747          break;
1748    
1749          case '=': do_allcaps = 1; break;
1750        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1751          case 'B': do_debug = 1; break;
1752        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1753        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1754        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1755        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1756        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1757        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1758          case 'J': options |= PCRE_DUPNAMES; break;
1759          case 'K': do_mark = 1; break;
1760        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1761        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1762    
# Line 823  while (!done) Line 1764  while (!done)
1764        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1765  #endif  #endif
1766    
1767        case 'S': do_study = 1; break;        case 'S':
1768          if (do_study == 0)
1769            {
1770            do_study = 1;
1771            if (*pp == '+')
1772              {
1773              study_options |= PCRE_STUDY_JIT_COMPILE;
1774              pp++;
1775              }
1776            }
1777          else
1778            {
1779            do_study = 0;
1780            no_force_study = 1;
1781            }
1782          break;
1783    
1784        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1785          case 'W': options |= PCRE_UCP; break;
1786        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1787          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1788          case 'Z': debug_lengths = 0; break;
1789        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1790        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1791    
1792          case 'T':
1793          switch (*pp++)
1794            {
1795            case '0': tables = tables0; break;
1796            case '1': tables = tables1; break;
1797    
1798            case '\r':
1799            case '\n':
1800            case ' ':
1801            case 0:
1802            fprintf(outfile, "** Missing table number after /T\n");
1803            goto SKIP_DATA;
1804    
1805            default:
1806            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1807            goto SKIP_DATA;
1808            }
1809          break;
1810    
1811        case 'L':        case 'L':
1812        ppp = pp;        ppp = pp;
1813        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1814          /* The '0' test is just in case this is an unterminated line. */
1815          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1816        *ppp = 0;        *ppp = 0;
1817        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1818          {          {
1819          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1820          goto SKIP_DATA;          goto SKIP_DATA;
1821          }          }
1822          locale_set = 1;
1823        tables = pcre_maketables();        tables = pcre_maketables();
1824        pp = ppp;        pp = ppp;
1825        break;        break;
# Line 849  while (!done) Line 1831  while (!done)
1831        *pp = 0;        *pp = 0;
1832        break;        break;
1833    
1834        case '\n': case ' ': break;        case '<':
1835            {
1836            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1837              {
1838              options |= PCRE_JAVASCRIPT_COMPAT;
1839              pp += 3;
1840              }
1841            else
1842              {
1843              int x = check_newline(pp, outfile);
1844              if (x == 0) goto SKIP_DATA;
1845              options |= x;
1846              while (*pp++ != '>');
1847              }
1848            }
1849          break;
1850    
1851          case '\r':                      /* So that it works in Windows */
1852          case '\n':
1853          case ' ':
1854          break;
1855    
1856        default:        default:
1857        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 859  while (!done) Line 1861  while (!done)
1861    
1862    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1863    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1864    local character tables. */    local character tables. Neither does it have 16-bit support. */
1865    
1866  #if !defined NOPOSIX  #if !defined NOPOSIX
1867    if (posix || do_posix)    if (posix || do_posix)
# Line 869  while (!done) Line 1871  while (!done)
1871    
1872      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1873      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1874        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1875        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1876        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1877        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1878        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1879    
1880        first_gotten_store = 0;
1881      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1882    
1883      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 876  while (!done) Line 1885  while (!done)
1885    
1886      if (rc != 0)      if (rc != 0)
1887        {        {
1888        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1889        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1890        goto SKIP_DATA;        goto SKIP_DATA;
1891        }        }
# Line 888  while (!done) Line 1897  while (!done)
1897  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1898    
1899      {      {
1900      if (timeit)      unsigned long int get_options;
1901    
1902        /* In 16-bit mode, convert the input. The space needed for a non-UTF string
1903        is exactly double the 8-bit size. For a UTF-8 string, the size needed for
1904        UTF-16 is no more than double, because up to 0xffff uses no more than 3
1905        bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
1906        and up to 4 bytes in UTF-16. */
1907    
1908    #ifdef SUPPORT_PCRE16
1909        if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
1910    #endif
1911    
1912        /* Compile many times when timing */
1913    
1914        if (timeit > 0)
1915        {        {
1916        register int i;        register int i;
1917        clock_t time_taken;        clock_t time_taken;
1918        clock_t start_time = clock();        clock_t start_time = clock();
1919        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1920          {          {
1921          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  #ifdef SUPPORT_PCRE16
1922            if (use_pcre16)
1923              re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1924            else
1925    #endif
1926              re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1927          if (re != NULL) free(re);          if (re != NULL) free(re);
1928          }          }
1929        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1930        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1931          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1932            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1933        }        }
1934    
1935      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
1936    
1937    #ifdef SUPPORT_PCRE16
1938        if (use_pcre16)
1939          re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1940        else
1941    #endif
1942          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1943    
1944      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1945      if non-interactive. */      if non-interactive. */
# Line 917  while (!done) Line 1952  while (!done)
1952          {          {
1953          for (;;)          for (;;)
1954            {            {
1955            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1956              {              {
1957              done = 1;              done = 1;
1958              goto CONTINUE;              goto CONTINUE;
# Line 931  while (!done) Line 1966  while (!done)
1966        goto CONTINUE;        goto CONTINUE;
1967        }        }
1968    
1969      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1970      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1971      returns only limited data. Check that it agrees with the newer one. */      lines. */
1972    
1973      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1974        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
1975    
1976      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
1977      and remember the store that was got. */      and remember the store that was got. */
1978    
1979      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
1980      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1981    
1982        /* Output code size information if requested */
1983    
1984      /* If /S was present, study the regexp to generate additional info to      if (log_store)
1985      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
1986            (int)(first_gotten_store -
1987                  sizeof(real_pcre) -
1988                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1989    
1990      if (do_study)      /* If -s or /S was present, study the regex to generate additional info to
1991        help with the matching, unless the pattern has the SS option, which
1992        suppresses the effect of /S (used for a few test patterns where studying is
1993        never sensible). */
1994    
1995        if (do_study || (force_study >= 0 && !no_force_study))
1996        {        {
1997        if (timeit)        if (timeit > 0)
1998          {          {
1999          register int i;          register int i;
2000          clock_t time_taken;          clock_t time_taken;
2001          clock_t start_time = clock();          clock_t start_time = clock();
2002          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
2003            extra = pcre_study(re, study_options, &error);            {
2004              if (use_pcre16)
2005                extra = pcre16_study(re, study_options | force_study_options, &error);
2006              else
2007                extra = pcre_study(re, study_options | force_study_options, &error);
2008              }
2009          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2010          if (extra != NULL) free(extra);          if (extra != NULL) pcre_free_study(extra);
2011          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2012            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
2013              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2014          }          }
2015        extra = pcre_study(re, study_options, &error);        if (use_pcre16)
2016            extra = pcre16_study(re, study_options | force_study_options, &error);
2017          else
2018            extra = pcre_study(re, study_options | force_study_options, &error);
2019        if (error != NULL)        if (error != NULL)
2020          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2021        else if (extra != NULL)        else if (extra != NULL)
2022            {
2023          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2024            if (log_store)
2025              {
2026              size_t jitsize;
2027              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2028              if (jitsize != 0)
2029                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2030              }
2031            }
2032          }
2033    
2034        /* If /K was present, we set up for handling MARK data. */
2035    
2036        if (do_mark)
2037          {
2038          if (extra == NULL)
2039            {
2040            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2041            extra->flags = 0;
2042            }
2043          extra->mark = &markptr;
2044          extra->flags |= PCRE_EXTRA_MARK;
2045        }        }
2046    
2047      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
# Line 980  while (!done) Line 2052  while (!done)
2052      if (do_flip)      if (do_flip)
2053        {        {
2054        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
2055        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
2056            byteflip(rre->magic_number, sizeof(rre->magic_number));
2057        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
2058        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
2059        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2060        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
2061        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2062        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
2063        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2064          rre->first_char =
2065            (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2066          rre->req_char =
2067            (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2068          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2069          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
2070        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2071          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
2072        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2073            sizeof(rre->name_count));
2074    
2075        if (extra != NULL)        if (extra != NULL)
2076          {          {
2077          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2078          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2079          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2080            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2081          }          }
2082        }        }
2083    
2084      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
2085    
2086      SHOW_INFO:      SHOW_INFO:
2087    
2088        if (do_debug)
2089          {
2090          fprintf(outfile, "------------------------------------------------------------------\n");
2091          if (use_pcre16)
2092            pcre16_printint(re, outfile, debug_lengths);
2093          else
2094            pcre_printint(re, outfile, debug_lengths);
2095          }
2096    
2097        /* We already have the options in get_options (see above) */
2098    
2099      if (do_showinfo)      if (do_showinfo)
2100        {        {
2101        unsigned long int get_options, all_options;        unsigned long int all_options;
2102    #if !defined NOINFOCHECK
2103        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2104        int count, backrefmax, first_char, need_char;  #endif
2105          int count, backrefmax, first_char, need_char, okpartial, jchanged,
2106            hascrorlf;
2107        int nameentrysize, namecount;        int nameentrysize, namecount;
2108        const uschar *nametable;        const pcre_uchar *nametable;
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
2109    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2110        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2111        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2112        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1028  while (!done) Line 2115  while (!done)
2115        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2116        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2117        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2118          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2119        old_count = pcre_info(re, &old_options, &old_first_char);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2120        if (count < 0) fprintf(outfile,        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2121          "Error %d from pcre_info()\n", count);  
2122        else        /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2123          {        that it gives the same results as the new function. */
2124          if (old_count != count) fprintf(outfile,  
2125            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  #if !defined NOINFOCHECK
2126              old_count);        if (!use_pcre16)
2127            {
2128          if (old_first_char != first_char) fprintf(outfile,          old_count = pcre_info(re, &old_options, &old_first_char);
2129            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",          if (count < 0) fprintf(outfile,
2130              first_char, old_first_char);            "Error %d from pcre_info()\n", count);
2131            else
2132          if (old_options != (int)get_options) fprintf(outfile,            {
2133            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            if (old_count != count) fprintf(outfile,
2134              get_options, old_options);              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2135          }                old_count);
2136    
2137              if (old_first_char != first_char) fprintf(outfile,
2138                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2139                  first_char, old_first_char);
2140    
2141              if (old_options != (int)get_options) fprintf(outfile,
2142                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2143                  get_options, old_options);
2144              }
2145            }
2146    #endif
2147    
2148        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
2149          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2150          size, regex_gotten_store);          (int)size, (int)regex_gotten_store);
2151    
2152        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
2153        if (backrefmax > 0)        if (backrefmax > 0)
# Line 1067  while (!done) Line 2165  while (!done)
2165            }            }
2166          }          }
2167    
2168        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2169        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2170    
2171        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2172        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
2173    
2174        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2175          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2176            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2177            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2178            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2179            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2180              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2181            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2182              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2183              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2184            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2185            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2186            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2187              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2188            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2189            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2190              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2191              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2192              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2193    
2194          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2195    
2196          switch (get_options & PCRE_NEWLINE_BITS)
2197            {
2198            case PCRE_NEWLINE_CR:
2199            fprintf(outfile, "Forced newline sequence: CR\n");
2200            break;
2201    
2202        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
2203          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
2204            break;
2205    
2206            case PCRE_NEWLINE_CRLF:
2207            fprintf(outfile, "Forced newline sequence: CRLF\n");
2208            break;
2209    
2210            case PCRE_NEWLINE_ANYCRLF:
2211            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2212            break;
2213    
2214            case PCRE_NEWLINE_ANY:
2215            fprintf(outfile, "Forced newline sequence: ANY\n");
2216            break;
2217    
2218            default:
2219            break;
2220            }
2221    
2222        if (first_char == -1)        if (first_char == -1)
2223          {          {
2224          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
2225          }          }
2226        else if (first_char < 0)        else if (first_char < 0)
2227          {          {
# Line 1105  while (!done) Line 2229  while (!done)
2229          }          }
2230        else        else
2231          {          {
2232          int ch = first_char & 255;          const char *caseless =
2233          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2234            "" : " (caseless)";            "" : " (caseless)";
2235          if (isprint(ch))  
2236            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(first_char))
2237              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2238          else          else
2239            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2240          }          }
2241    
2242        if (need_char < 0)        if (need_char < 0)
# Line 1120  while (!done) Line 2245  while (!done)
2245          }          }
2246        else        else
2247          {          {
2248          int ch = need_char & 255;          const char *caseless =
2249          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2250            "" : " (caseless)";            "" : " (caseless)";
2251          if (isprint(ch))  
2252            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(need_char))
2253              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2254          else          else
2255            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2256          }          }
2257    
2258        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2259        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2260        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2261        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2262          information unless -i or -d was also present. This means that, except
2263          when auto-callouts are involved, the output from runs with and without
2264          -s should be identical. */
2265    
2266        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2267          {          {
2268          if (extra == NULL)          if (extra == NULL)
2269            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2270          else          else
2271            {            {
2272            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2273            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2274    
2275              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2276              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2277    
2278              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2279            if (start_bits == NULL)            if (start_bits == NULL)
2280              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2281            else            else
2282              {              {
2283              int i;              int i;
# Line 1159  while (!done) Line 2292  while (!done)
2292                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2293                    c = 2;                    c = 2;
2294                    }                    }
2295                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
2296                    {                    {
2297                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2298                    c += 2;                    c += 2;
# Line 1174  while (!done) Line 2307  while (!done)
2307              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2308              }              }
2309            }            }
2310    
2311            /* Show this only if the JIT was set by /S, not by -s. */
2312    
2313            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2314              {
2315              int jit;
2316              new_info(re, extra, PCRE_INFO_JIT, &jit);
2317              if (jit)
2318                fprintf(outfile, "JIT study was successful\n");
2319              else
2320    #ifdef SUPPORT_JIT
2321                fprintf(outfile, "JIT study was not successful\n");
2322    #else
2323                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2324    #endif
2325              }
2326          }          }
2327        }        }
2328    
# Line 1190  while (!done) Line 2339  while (!done)
2339          }          }
2340        else        else
2341          {          {
2342          uschar sbuf[8];          pcre_uint8 sbuf[8];
2343          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2344          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2345          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2346          sbuf[3] = (true_size)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
2347    
2348          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2349          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2350          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2351          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2352    
2353          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2354              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1208  while (!done) Line 2357  while (!done)
2357            }            }
2358          else          else
2359            {            {
2360            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2361    
2362              /* If there is study data, write it. */
2363    
2364            if (extra != NULL)            if (extra != NULL)
2365              {              {
2366              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1222  while (!done) Line 2374  while (!done)
2374            }            }
2375          fclose(f);          fclose(f);
2376          }          }
2377    
2378          new_free(re);
2379          if (extra != NULL) pcre_free_study(extra);
2380          if (locale_set)
2381            {
2382            new_free((void *)tables);
2383            setlocale(LC_CTYPE, "C");
2384            locale_set = 0;
2385            }
2386        continue;  /* With next regex */        continue;  /* With next regex */
2387        }        }
2388      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1230  while (!done) Line 2391  while (!done)
2391    
2392    for (;;)    for (;;)
2393      {      {
2394      unsigned char *q;      pcre_uint8 *q;
2395      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
2396      int *use_offsets = offsets;      int *use_offsets = offsets;
2397      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2398      int callout_data = 0;      int callout_data = 0;
2399      int callout_data_set = 0;      int callout_data_set = 0;
2400      int count, c;      int count, c;
2401      int copystrings = 0;      int copystrings = 0;
2402      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2403      int getstrings = 0;      int getstrings = 0;
2404      int getlist = 0;      int getlist = 0;
2405      int gmatched = 0;      int gmatched = 0;
2406      int start_offset = 0;      int start_offset = 0;
2407        int start_offset_sign = 1;
2408      int g_notempty = 0;      int g_notempty = 0;
2409        int use_dfa = 0;
2410    
2411      options = 0;      options = 0;
2412    
2413        *copynames = 0;
2414        *getnames = 0;
2415    
2416        copynamesptr = copynames;
2417        getnamesptr = getnames;
2418    
2419      pcre_callout = callout;      pcre_callout = callout;
2420      first_callout = 1;      first_callout = 1;
2421        last_callout_mark = NULL;
2422      callout_extra = 0;      callout_extra = 0;
2423      callout_count = 0;      callout_count = 0;
2424      callout_fail_count = 999999;      callout_fail_count = 999999;
2425      callout_fail_id = -1;      callout_fail_id = -1;
2426      show_malloc = 0;      show_malloc = 0;
2427    
2428      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2429      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2430    
2431        len = 0;
2432        for (;;)
2433        {        {
2434        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2435        goto CONTINUE;          {
2436            if (len > 0)    /* Reached EOF without hitting a newline */
2437              {
2438              fprintf(outfile, "\n");
2439              break;
2440              }
2441            done = 1;
2442            goto CONTINUE;
2443            }
2444          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2445          len = (int)strlen((char *)buffer);
2446          if (buffer[len-1] == '\n') break;
2447        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2448    
     len = (int)strlen((char *)buffer);  
2449      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2450      buffer[len] = 0;      buffer[len] = 0;
2451      if (len == 0) break;      if (len == 0) break;
# Line 1271  while (!done) Line 2453  while (!done)
2453      p = buffer;      p = buffer;
2454      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2455    
2456      q = dbuffer;      bptr = q = dbuffer;
2457      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2458        {        {
2459        int i = 0;        int i = 0;
# Line 1293  while (!done) Line 2475  while (!done)
2475          c -= '0';          c -= '0';
2476          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2477            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2478    
2479    #if !defined NOUTF8
2480            if (use_utf8 && c > 255)
2481              {
2482              unsigned char buff8[8];
2483              int ii, utn;
2484              utn = ord2utf8(c, buff8);
2485              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2486              c = buff8[ii];   /* Last byte */
2487              }
2488    #endif
2489          break;          break;
2490    
2491          case 'x':          case 'x':
2492    
2493          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2494    
2495    #if !defined NOUTF8
2496          if (*p == '{')          if (*p == '{')
2497            {            {
2498            unsigned char *pt = p;            unsigned char *pt = p;
2499            c = 0;            c = 0;
2500            while (isxdigit(*(++pt)))  
2501              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2502              when isxdigit() is a macro that refers to its argument more than
2503              once. This is banned by the C Standard, but apparently happens in at
2504              least one MacOS environment. */
2505    
2506              for (pt++; isxdigit(*pt); pt++)
2507                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2508            if (*pt == '}')            if (*pt == '}')
2509              {              {
2510              unsigned char buff8[8];              unsigned char buff8[8];
2511              int ii, utn;              int ii, utn;
2512              utn = ord2utf8(c, buff8);              if (use_utf8)
2513              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2514              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2515                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2516                  c = buff8[ii];   /* Last byte */
2517                  }
2518                else
2519                 {
2520                 if (c > 255)
2521                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2522                     "UTF-8 mode is not enabled.\n"
2523                     "** Truncation will probably give the wrong result.\n", c);
2524                 }
2525              p = pt + 1;              p = pt + 1;
2526              break;              break;
2527              }              }
2528            /* Not correct form; fall through */            /* Not correct form; fall through */
2529            }            }
2530    #endif
2531    
2532          /* Ordinary \x */          /* Ordinary \x */
2533    
2534          c = 0;          c = 0;
2535          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2536            {            {
2537            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2538            p++;            p++;
2539            }            }
2540          break;          break;
# Line 1333  while (!done) Line 2544  while (!done)
2544          continue;          continue;
2545    
2546          case '>':          case '>':
2547            if (*p == '-')
2548              {
2549              start_offset_sign = -1;
2550              p++;
2551              }
2552          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2553            start_offset *= start_offset_sign;
2554          continue;          continue;
2555    
2556          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1352  while (!done) Line 2569  while (!done)
2569            }            }
2570          else if (isalnum(*p))          else if (isalnum(*p))
2571            {            {
2572            uschar name[256];            pcre_uchar *npp = copynamesptr;
           uschar *npp = name;  
2573            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2574              *npp++ = 0;
2575            *npp = 0;            *npp = 0;
2576            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2577            if (n < 0)            if (n < 0)
2578              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2579            else copystrings |= 1 << n;            copynamesptr = npp;
2580            }            }
2581          else if (*p == '+')          else if (*p == '+')
2582            {            {
# Line 1397  while (!done) Line 2614  while (!done)
2614            }            }
2615          continue;          continue;
2616    
2617    #if !defined NODFA
2618            case 'D':
2619    #if !defined NOPOSIX
2620            if (posix || do_posix)
2621              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2622            else
2623    #endif
2624              use_dfa = 1;
2625            continue;
2626    #endif
2627    
2628    #if !defined NODFA
2629            case 'F':
2630            options |= PCRE_DFA_SHORTEST;
2631            continue;
2632    #endif
2633    
2634          case 'G':          case 'G':
2635          if (isdigit(*p))          if (isdigit(*p))
2636            {            {
# Line 1405  while (!done) Line 2639  while (!done)
2639            }            }
2640          else if (isalnum(*p))          else if (isalnum(*p))
2641            {            {
2642            uschar name[256];            pcre_uchar *npp = getnamesptr;
           uschar *npp = name;  
2643            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2644              *npp++ = 0;
2645            *npp = 0;            *npp = 0;
2646            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2647            if (n < 0)            if (n < 0)
2648              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2649            else getstrings |= 1 << n;            getnamesptr = npp;
2650              }
2651            continue;
2652    
2653            case 'J':
2654            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2655            if (extra != NULL
2656                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2657                && extra->executable_jit != NULL)
2658              {
2659              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2660              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2661              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2662            }            }
2663          continue;          continue;
2664    
# Line 1425  while (!done) Line 2671  while (!done)
2671          continue;          continue;
2672    
2673          case 'N':          case 'N':
2674          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2675              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2676            else
2677              options |= PCRE_NOTEMPTY;
2678          continue;          continue;
2679    
2680          case 'O':          case 'O':
# Line 1438  while (!done) Line 2687  while (!done)
2687            if (offsets == NULL)            if (offsets == NULL)
2688              {              {
2689              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2690                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2691              return 1;              yield = 1;
2692                goto EXIT;
2693              }              }
2694            }            }
2695          use_size_offsets = n;          use_size_offsets = n;
# Line 1447  while (!done) Line 2697  while (!done)
2697          continue;          continue;
2698    
2699          case 'P':          case 'P':
2700          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2701              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2702            continue;
2703    
2704            case 'Q':
2705            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2706            if (extra == NULL)
2707              {
2708              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2709              extra->flags = 0;
2710              }
2711            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2712            extra->match_limit_recursion = n;
2713            continue;
2714    
2715            case 'q':
2716            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2717            if (extra == NULL)
2718              {
2719              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2720              extra->flags = 0;
2721              }
2722            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2723            extra->match_limit = n;
2724            continue;
2725    
2726    #if !defined NODFA
2727            case 'R':
2728            options |= PCRE_DFA_RESTART;
2729          continue;          continue;
2730    #endif
2731    
2732          case 'S':          case 'S':
2733          show_malloc = 1;          show_malloc = 1;
2734          continue;          continue;
2735    
2736            case 'Y':
2737            options |= PCRE_NO_START_OPTIMIZE;
2738            continue;
2739    
2740          case 'Z':          case 'Z':
2741          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2742          continue;          continue;
# Line 1461  while (!done) Line 2744  while (!done)
2744          case '?':          case '?':
2745          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2746          continue;          continue;
2747    
2748            case '<':
2749              {
2750              int x = check_newline(p, outfile);
2751              if (x == 0) goto NEXT_DATA;
2752              options |= x;
2753              while (*p++ != '>');
2754              }
2755            continue;
2756          }          }
2757        *q++ = c;        *q++ = c;
2758        }        }
2759      *q = 0;      *q = 0;
2760      len = q - dbuffer;      len = (int)(q - dbuffer);
2761    
2762        /* Move the data to the end of the buffer so that a read over the end of
2763        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2764        we are using the POSIX interface, we must include the terminating zero. */
2765    
2766    #if !defined NOPOSIX
2767        if (posix || do_posix)
2768          {
2769          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2770          bptr += buffer_size - len - 1;
2771          }
2772        else
2773    #endif
2774          {
2775          memmove(bptr + buffer_size - len, bptr, len);
2776          bptr += buffer_size - len;
2777          }
2778    
2779        if ((all_use_dfa || use_dfa) && find_match_limit)
2780          {
2781          printf("**Match limit not relevant for DFA matching: ignored\n");
2782          find_match_limit = 0;
2783          }
2784    
2785      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2786      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
# Line 1480  while (!done) Line 2795  while (!done)
2795          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2796        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2797        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2798          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2799    
2800        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2801    
2802        if (rc != 0)        if (rc != 0)
2803          {          {
2804          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2805          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2806          }          }
2807          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2808                  != 0)
2809            {
2810            fprintf(outfile, "Matched with REG_NOSUB\n");
2811            }
2812        else        else
2813          {          {
2814          size_t i;          size_t i;
# Line 1499  while (!done) Line 2820  while (!done)
2820              (void)pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2821                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2822              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2823              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2824                {                {
2825                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2826                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2827                  outfile);                  outfile);
2828                fprintf(outfile, "\n");                fprintf(outfile, "\n");
# Line 1519  while (!done) Line 2840  while (!done)
2840    
2841      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2842        {        {
2843        if (timeit)        markptr = NULL;
2844    
2845          if (timeitm > 0)
2846          {          {
2847          register int i;          register int i;
2848          clock_t time_taken;          clock_t time_taken;
2849          clock_t start_time = clock();          clock_t start_time = clock();
2850          for (i = 0; i < LOOPREPEAT; i++)  
2851    #ifdef SUPPORT_PCRE16
2852            if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
2853    #endif
2854    
2855    
2856    #if !defined NODFA
2857            if (all_use_dfa || use_dfa)
2858              {
2859              int workspace[1000];
2860              for (i = 0; i < timeitm; i++)
2861                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2862                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2863                  sizeof(workspace)/sizeof(int));
2864              }
2865            else
2866    #endif
2867    
2868            for (i = 0; i < timeitm; i++)
2869            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2870              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2871    
2872          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2873          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2874            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2875              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2876          }          }
2877    
2878        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2879        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2880          for the recursion limit. The match limits are relevant only to the normal
2881          running of pcre_exec(), so disable the JIT optimization. This makes it
2882          possible to run the same set of tests with and without JIT externally
2883          requested. */
2884    
2885        if (find_match_limit)        if (find_match_limit)
2886          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2887          if (extra == NULL)          if (extra == NULL)
2888            {            {
2889            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2890            extra->flags = 0;            extra->flags = 0;
2891            }            }
2892          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2893    
2894          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2895              options|g_notempty, use_offsets, use_size_offsets,
2896              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2897              PCRE_ERROR_MATCHLIMIT, "match()");
2898    
2899            count = check_match_limit(re, extra, bptr, len, start_offset,
2900              options|g_notempty, use_offsets, use_size_offsets,
2901              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2902              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2903          }          }
2904    
2905        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1597  while (!done) Line 2921  while (!done)
2921        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2922        value of match_limit. */        value of match_limit. */
2923    
2924        else  #if !defined NODFA
2925          else if (all_use_dfa || use_dfa)
2926          {          {
2927          count = pcre_exec(re, extra, (char *)bptr, len,          int workspace[1000];
2928            start_offset, options | g_notempty, use_offsets, use_size_offsets);          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2929              options | g_notempty, use_offsets, use_size_offsets, workspace,
2930              sizeof(workspace)/sizeof(int));
2931            if (count == 0)
2932              {
2933              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2934              count = use_size_offsets/2;
2935              }
2936          }          }
2937    #endif
2938    
2939        if (count == 0)        else
2940          {          {
2941          fprintf(outfile, "Matched, but too many substrings\n");          if (use_pcre16)
2942          count = use_size_offsets/3;            count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
2943                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2944            else
2945              count = pcre_exec(re, extra, (char *)bptr, len,
2946                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2947            if (count == 0)
2948              {
2949              fprintf(outfile, "Matched, but too many substrings\n");
2950              count = use_size_offsets/3;
2951              }
2952          }          }
2953    
2954        /* Matched */        /* Matched */
2955    
2956        if (count >= 0)        if (count >= 0)
2957          {          {
2958          int i;          int i, maxcount;
2959    
2960    #if !defined NODFA
2961            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2962    #endif
2963              maxcount = use_size_offsets/3;
2964    
2965            /* This is a check against a lunatic return value. */
2966    
2967            if (count > maxcount)
2968              {
2969              fprintf(outfile,
2970                "** PCRE error: returned count %d is too big for offset size %d\n",
2971                count, use_size_offsets);
2972              count = use_size_offsets/3;
2973              if (do_g || do_G)
2974                {
2975                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2976                do_g = do_G = FALSE;        /* Break g/G loop */
2977                }
2978              }
2979    
2980            /* do_allcaps requests showing of all captures in the pattern, to check
2981            unset ones at the end. */
2982    
2983            if (do_allcaps)
2984              {
2985              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2986              count++;   /* Allow for full match */
2987              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2988              }
2989    
2990            /* Output the captured substrings */
2991    
2992          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2993            {            {
2994            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2995                {
2996                if (use_offsets[i] != -1)
2997                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2998                    use_offsets[i], i);
2999                if (use_offsets[i+1] != -1)
3000                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3001                    use_offsets[i+1], i+1);
3002              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3003                }
3004            else            else
3005              {              {
3006              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3007              (void)pchars(bptr + use_offsets[i],              (void)pchars(bptr + use_offsets[i],
3008                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3009              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3010              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3011                {                {
3012                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3013                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
3014                  fprintf(outfile, " 0+ ");                  outfile);
3015                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3016                }                }
3017              }              }
3018            }            }
3019    
3020            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3021    
3022          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3023            {            {
3024            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
3025              {              {
3026              char copybuffer[16];              char copybuffer[256];
3027              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3028                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
3029              if (rc < 0)              if (rc < 0)
# Line 1651  while (!done) Line 3033  while (!done)
3033              }              }
3034            }            }
3035    
3036            for (copynamesptr = copynames;
3037                 *copynamesptr != 0;
3038                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3039              {
3040              char copybuffer[256];
3041              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3042                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3043              if (rc < 0)
3044                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3045              else
3046                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3047              }
3048    
3049          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3050            {            {
3051            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1663  while (!done) Line 3058  while (!done)
3058              else              else
3059                {                {
3060                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
3061                pcre_free_substring(substring);                pcre_free_substring(substring);
3062                }                }
3063              }              }
3064            }            }
3065    
3066            for (getnamesptr = getnames;
3067                 *getnamesptr != 0;
3068                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3069              {
3070              const char *substring;
3071              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3072                count, (char *)getnamesptr, &substring);
3073              if (rc < 0)
3074                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3075              else
3076                {
3077                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
3078                pcre_free_substring(substring);
3079                }
3080              }
3081    
3082          if (getlist)          if (getlist)
3083            {            {
3084            const char **stringlist;            const char **stringlist;
# Line 1682  while (!done) Line 3092  while (!done)
3092                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3093              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3094                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3095              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3096              }              }
3097            }            }
# Line 1692  while (!done) Line 3101  while (!done)
3101    
3102        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
3103          {          {
3104          fprintf(outfile, "Partial match\n");          if (markptr == NULL) fprintf(outfile, "Partial match");
3105              else fprintf(outfile, "Partial match, mark=%s", markptr);
3106            if (use_size_offsets > 1)
3107              {
3108              fprintf(outfile, ": ");
3109              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3110                outfile);
3111              }
3112            fprintf(outfile, "\n");
3113          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
3114          }          }
3115    
3116        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
3117        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
3118        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
3119        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
3120        offset values to achieve this. We won't be at the end of the string -  
3121        that was checked before setting g_notempty. */        Complication arises in the case when the newline convention is "any",
3122          "crlf", or "anycrlf". If the previous match was at the end of a line
3123          terminated by CRLF, an advance of one character just passes the \r,
3124          whereas we should prefer the longer newline sequence, as does the code in
3125          pcre_exec(). Fudge the offset value to achieve this. We check for a
3126          newline setting in the pattern; if none was set, use pcre_config() to
3127          find the default.
3128    
3129          Otherwise, in the case of UTF-8 matching, the advance must be one
3130          character, not one byte. */
3131    
3132        else        else
3133          {          {
3134          if (g_notempty != 0)          if (g_notempty != 0)
3135            {            {
3136            int onechar = 1;            int onechar = 1;
3137              unsigned int obits = ((real_pcre *)re)->options;
3138            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
3139            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
3140                {
3141                int d;
3142                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3143                /* Note that these values are always the ASCII ones, even in
3144                EBCDIC environments. CR = 13, NL = 10. */
3145                obits = (d == 13)? PCRE_NEWLINE_CR :
3146                        (d == 10)? PCRE_NEWLINE_LF :
3147                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3148                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
3149                        (d == -1)? PCRE_NEWLINE_ANY : 0;
3150                }
3151              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3152                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3153                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3154                  &&
3155                  start_offset < len - 1 &&
3156                  bptr[start_offset] == '\r' &&
3157                  bptr[start_offset+1] == '\n')
3158                onechar++;
3159              else if (use_utf8)
3160              {              {
3161              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3162                {                {
3163                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3164                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3165                }                }
3166              }              }
3167            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3168            }            }
3169          else          else
3170            {            {
3171            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3172              {              {
3173              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3174                if (gmatched == 0)
3175                  {
3176                  if (markptr == NULL) fprintf(outfile, "No match\n");
3177                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3178                  }
3179                break;
3180    
3181                case PCRE_ERROR_BADUTF8:
3182                case PCRE_ERROR_SHORTUTF8:
3183                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3184                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3185                if (use_size_offsets >= 2)
3186                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3187                    use_offsets[1]);
3188                fprintf(outfile, "\n");
3189                break;
3190    
3191                default:
3192                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3193                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3194                else
3195                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3196                break;
3197              }              }
3198            else fprintf(outfile, "Error %d\n", count);  
3199            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3200            }            }
3201          }          }
# Line 1737  while (!done) Line 3205  while (!done)
3205        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3206    
3207        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3208        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3209        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3210        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3211        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3212        character. */        character. */
3213    
3214        g_notempty = 0;        g_notempty = 0;
3215    
3216        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3217          {          {
3218          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3219          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3220          }          }
3221    
3222        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1762  while (!done) Line 3231  while (!done)
3231          len -= use_offsets[1];          len -= use_offsets[1];
3232          }          }
3233        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
3234    
3235        NEXT_DATA: continue;
3236      }    /* End of loop for data lines */      }    /* End of loop for data lines */
3237    
3238    CONTINUE:    CONTINUE:
# Line 1770  while (!done) Line 3241  while (!done)
3241    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
3242  #endif  #endif
3243    
3244    if (re != NULL) free(re);    if (re != NULL) new_free(re);
3245    if (extra != NULL) free(extra);    if (extra != NULL) pcre_free_study(extra);
3246    if (tables != NULL)    if (locale_set)
3247      {      {
3248      free((void *)tables);      new_free((void *)tables);
3249      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3250        locale_set = 0;
3251        }
3252      if (jit_stack != NULL)
3253        {
3254        pcre_jit_stack_free(jit_stack);
3255        jit_stack = NULL;
3256      }      }
3257    }    }
3258    
3259  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
3260  return 0;  
3261    EXIT:
3262    
3263    if (infile != NULL && infile != stdin) fclose(infile);
3264    if (outfile != NULL && outfile != stdout) fclose(outfile);
3265    
3266    free(buffer);
3267    free(dbuffer);
3268    free(pbuffer);
3269    free(offsets);
3270    
3271    #ifdef SUPPORT_PCRE16
3272    if (buffer16 != NULL) free(buffer16);
3273    #endif
3274    
3275    return yield;
3276  }  }
3277    
3278  /* End */  /* End of pcretest.c */

Legend:
Removed from v.75  
changed lines
  Added in v.805

  ViewVC Help
Powered by ViewVC 1.1.5