/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 788 by ph10, Tue Dec 6 15:38:01 2011 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68  /* We need the internal info for displaying the results of pcre_study() and  #if defined(_WIN32) || defined(WIN32)
69  other internal data; pcretest also uses some of the fixed tables, and generally  #include <io.h>                /* For _setmode() */
70  has "inside information" compared to a program that strictly follows the PCRE  #include <fcntl.h>             /* For _O_BINARY */
71  API. */  #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90    #else
91    #include <sys/time.h>          /* These two includes are needed */
92    #include <sys/resource.h>      /* for setrlimit(). */
93    #define INPUT_MODE   "rb"
94    #define OUTPUT_MODE  "wb"
95    #endif
96    
97    
98    /* We have to include pcre_internal.h because we need the internal info for
99    displaying the results of pcre_study() and we also need to know about the
100    internal macros, structures, and other internal data values; pcretest has
101    "inside information" compared to a program that strictly follows the PCRE API.
102    
103    Although pcre_internal.h does itself include pcre.h, we explicitly include it
104    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105    appropriately for an application, not for building PCRE. */
106    
107    #include "pcre.h"
108  #include "pcre_internal.h"  #include "pcre_internal.h"
109    
110    /* We need access to some of the data tables that PCRE uses. So as not to have
111    to keep two copies, we include the source file here, changing the names of the
112    external symbols to prevent clashes. */
113    
114    #define _pcre_ucp_gentype      ucp_gentype
115    #define _pcre_ucp_typerange    ucp_typerange
116    #define _pcre_utf8_table1      utf8_table1
117    #define _pcre_utf8_table1_size utf8_table1_size
118    #define _pcre_utf8_table2      utf8_table2
119    #define _pcre_utf8_table3      utf8_table3
120    #define _pcre_utf8_table4      utf8_table4
121    #define _pcre_utf8_char_sizes  utf8_char_sizes
122    #define _pcre_utt              utt
123    #define _pcre_utt_size         utt_size
124    #define _pcre_utt_names        utt_names
125    #define _pcre_OP_lengths       OP_lengths
126    
127    #include "pcre_tables.c"
128    
129    /* We also need the pcre_printint() function for printing out compiled
130    patterns. This function is in a separate file so that it can be included in
131    pcre_compile.c when that module is compiled with debugging enabled. It needs to
132    know which case is being compiled. */
133    
134    #define COMPILING_PCRETEST
135    #include "pcre_printint.src"
136    
137    /* The definition of the macro PRINTABLE, which determines whether to print an
138    output character as-is or as a hex value when showing compiled patterns, is
139    contained in the printint.src file. We uses it here also, in cases when the
140    locale has not been explicitly changed, so as to get consistent output from
141    systems that differ in their output from isprint() even in the "C" locale. */
142    
143    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144    
145  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
146  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 150  Makefile. */
150  #include "pcreposix.h"  #include "pcreposix.h"
151  #endif  #endif
152    
153    /* It is also possible, for the benefit of the version currently imported into
154    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155    interface to the DFA matcher (NODFA), and without the doublecheck of the old
156    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157    UTF8 support if PCRE is built without it. */
158    
159    #ifndef SUPPORT_UTF8
160    #ifndef NOUTF8
161    #define NOUTF8
162    #endif
163    #endif
164    
165    
166    /* Other parameters */
167    
168  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
169  #ifdef CLK_TCK  #ifdef CLK_TCK
170  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 70  Makefile. */ Line 173  Makefile. */
173  #endif  #endif
174  #endif  #endif
175    
176  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
177    
178  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
179    
180    /* Static variables */
181    
182  static FILE *outfile;  static FILE *outfile;
183  static int log_store = 0;  static int log_store = 0;
# Line 83  static int callout_count; Line 185  static int callout_count;
185  static int callout_extra;  static int callout_extra;
186  static int callout_fail_count;  static int callout_fail_count;
187  static int callout_fail_id;  static int callout_fail_id;
188    static int debug_lengths;
189  static int first_callout;  static int first_callout;
190    static int locale_set = 0;
191  static int show_malloc;  static int show_malloc;
192  static int use_utf8;  static int use_utf8;
193  static size_t gotten_store;  static size_t gotten_store;
194    static size_t first_gotten_store = 0;
195    static const unsigned char *last_callout_mark = NULL;
196    
197    /* The buffers grow automatically if very long input lines are encountered. */
198    
199    static int buffer_size = 50000;
200    static uschar *buffer = NULL;
201    static uschar *dbuffer = NULL;
202  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
203    
204    /* Textual explanations for runtime error codes */
205    
206    static const char *errtexts[] = {
207      NULL,  /* 0 is no error */
208      NULL,  /* NOMATCH is handled specially */
209      "NULL argument passed",
210      "bad option value",
211      "magic number missing",
212      "unknown opcode - pattern overwritten?",
213      "no more memory",
214      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
215      "match limit exceeded",
216      "callout error code",
217      NULL,  /* BADUTF8 is handled specially */
218      "bad UTF-8 offset",
219      NULL,  /* PARTIAL is handled specially */
220      "not used - internal error",
221      "internal error - pattern overwritten?",
222      "bad count value",
223      "item unsupported for DFA matching",
224      "backreference condition or recursion test not supported for DFA matching",
225      "match limit not supported for DFA matching",
226      "workspace size exceeded in DFA matching",
227      "too much recursion for DFA matching",
228      "recursion limit exceeded",
229      "not used - internal error",
230      "invalid combination of newline options",
231      "bad offset value",
232      NULL,  /* SHORTUTF8 is handled specially */
233      "nested recursion at the same subject position",
234      "JIT stack limit reached"
235    };
236    
237    
238    /*************************************************
239    *         Alternate character tables             *
240    *************************************************/
241    
242    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
243    using the default tables of the library. However, the T option can be used to
244    select alternate sets of tables, for different kinds of testing. Note also that
245    the L (locale) option also adjusts the tables. */
246    
247    /* This is the set of tables distributed as default with PCRE. It recognizes
248    only ASCII characters. */
249    
250    static const unsigned char tables0[] = {
251    
252    /* This table is a lower casing table. */
253    
254        0,  1,  2,  3,  4,  5,  6,  7,
255        8,  9, 10, 11, 12, 13, 14, 15,
256       16, 17, 18, 19, 20, 21, 22, 23,
257       24, 25, 26, 27, 28, 29, 30, 31,
258       32, 33, 34, 35, 36, 37, 38, 39,
259       40, 41, 42, 43, 44, 45, 46, 47,
260       48, 49, 50, 51, 52, 53, 54, 55,
261       56, 57, 58, 59, 60, 61, 62, 63,
262       64, 97, 98, 99,100,101,102,103,
263      104,105,106,107,108,109,110,111,
264      112,113,114,115,116,117,118,119,
265      120,121,122, 91, 92, 93, 94, 95,
266       96, 97, 98, 99,100,101,102,103,
267      104,105,106,107,108,109,110,111,
268      112,113,114,115,116,117,118,119,
269      120,121,122,123,124,125,126,127,
270      128,129,130,131,132,133,134,135,
271      136,137,138,139,140,141,142,143,
272      144,145,146,147,148,149,150,151,
273      152,153,154,155,156,157,158,159,
274      160,161,162,163,164,165,166,167,
275      168,169,170,171,172,173,174,175,
276      176,177,178,179,180,181,182,183,
277      184,185,186,187,188,189,190,191,
278      192,193,194,195,196,197,198,199,
279      200,201,202,203,204,205,206,207,
280      208,209,210,211,212,213,214,215,
281      216,217,218,219,220,221,222,223,
282      224,225,226,227,228,229,230,231,
283      232,233,234,235,236,237,238,239,
284      240,241,242,243,244,245,246,247,
285      248,249,250,251,252,253,254,255,
286    
287    /* This table is a case flipping table. */
288    
289        0,  1,  2,  3,  4,  5,  6,  7,
290        8,  9, 10, 11, 12, 13, 14, 15,
291       16, 17, 18, 19, 20, 21, 22, 23,
292       24, 25, 26, 27, 28, 29, 30, 31,
293       32, 33, 34, 35, 36, 37, 38, 39,
294       40, 41, 42, 43, 44, 45, 46, 47,
295       48, 49, 50, 51, 52, 53, 54, 55,
296       56, 57, 58, 59, 60, 61, 62, 63,
297       64, 97, 98, 99,100,101,102,103,
298      104,105,106,107,108,109,110,111,
299      112,113,114,115,116,117,118,119,
300      120,121,122, 91, 92, 93, 94, 95,
301       96, 65, 66, 67, 68, 69, 70, 71,
302       72, 73, 74, 75, 76, 77, 78, 79,
303       80, 81, 82, 83, 84, 85, 86, 87,
304       88, 89, 90,123,124,125,126,127,
305      128,129,130,131,132,133,134,135,
306      136,137,138,139,140,141,142,143,
307      144,145,146,147,148,149,150,151,
308      152,153,154,155,156,157,158,159,
309      160,161,162,163,164,165,166,167,
310      168,169,170,171,172,173,174,175,
311      176,177,178,179,180,181,182,183,
312      184,185,186,187,188,189,190,191,
313      192,193,194,195,196,197,198,199,
314      200,201,202,203,204,205,206,207,
315      208,209,210,211,212,213,214,215,
316      216,217,218,219,220,221,222,223,
317      224,225,226,227,228,229,230,231,
318      232,233,234,235,236,237,238,239,
319      240,241,242,243,244,245,246,247,
320      248,249,250,251,252,253,254,255,
321    
322    /* This table contains bit maps for various character classes. Each map is 32
323    bytes long and the bits run from the least significant end of each byte. The
324    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
325    graph, print, punct, and cntrl. Other classes are built from combinations. */
326    
327      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
334      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336    
337      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341    
342      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346    
347      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
349      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351    
352      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
353      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
354      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356    
357      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
358      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361    
362      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
363      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366    
367      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
368      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371    
372      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
376    
377    /* This table identifies various classes of character by individual bits:
378      0x01   white space character
379      0x02   letter
380      0x04   decimal digit
381      0x08   hexadecimal digit
382      0x10   alphanumeric or '_'
383      0x80   regular expression metacharacter or binary zero
384    */
385    
386      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
387      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
388      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
389      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
390      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
391      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
392      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
393      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
394      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
395      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
396      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
397      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
398      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
399      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
400      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
401      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
402      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
403      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
404      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
407      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
408      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
409      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
410      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
411      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
412      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
413      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
414      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
415      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
416      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
417      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
418    
419    /* This is a set of tables that came orginally from a Windows user. It seems to
420    be at least an approximation of ISO 8859. In particular, there are characters
421    greater than 128 that are marked as spaces, letters, etc. */
422    
423    static const unsigned char tables1[] = {
424    0,1,2,3,4,5,6,7,
425    8,9,10,11,12,13,14,15,
426    16,17,18,19,20,21,22,23,
427    24,25,26,27,28,29,30,31,
428    32,33,34,35,36,37,38,39,
429    40,41,42,43,44,45,46,47,
430    48,49,50,51,52,53,54,55,
431    56,57,58,59,60,61,62,63,
432    64,97,98,99,100,101,102,103,
433    104,105,106,107,108,109,110,111,
434    112,113,114,115,116,117,118,119,
435    120,121,122,91,92,93,94,95,
436    96,97,98,99,100,101,102,103,
437    104,105,106,107,108,109,110,111,
438    112,113,114,115,116,117,118,119,
439    120,121,122,123,124,125,126,127,
440    128,129,130,131,132,133,134,135,
441    136,137,138,139,140,141,142,143,
442    144,145,146,147,148,149,150,151,
443    152,153,154,155,156,157,158,159,
444    160,161,162,163,164,165,166,167,
445    168,169,170,171,172,173,174,175,
446    176,177,178,179,180,181,182,183,
447    184,185,186,187,188,189,190,191,
448    224,225,226,227,228,229,230,231,
449    232,233,234,235,236,237,238,239,
450    240,241,242,243,244,245,246,215,
451    248,249,250,251,252,253,254,223,
452    224,225,226,227,228,229,230,231,
453    232,233,234,235,236,237,238,239,
454    240,241,242,243,244,245,246,247,
455    248,249,250,251,252,253,254,255,
456    0,1,2,3,4,5,6,7,
457    8,9,10,11,12,13,14,15,
458    16,17,18,19,20,21,22,23,
459    24,25,26,27,28,29,30,31,
460    32,33,34,35,36,37,38,39,
461    40,41,42,43,44,45,46,47,
462    48,49,50,51,52,53,54,55,
463    56,57,58,59,60,61,62,63,
464    64,97,98,99,100,101,102,103,
465    104,105,106,107,108,109,110,111,
466    112,113,114,115,116,117,118,119,
467    120,121,122,91,92,93,94,95,
468    96,65,66,67,68,69,70,71,
469    72,73,74,75,76,77,78,79,
470    80,81,82,83,84,85,86,87,
471    88,89,90,123,124,125,126,127,
472    128,129,130,131,132,133,134,135,
473    136,137,138,139,140,141,142,143,
474    144,145,146,147,148,149,150,151,
475    152,153,154,155,156,157,158,159,
476    160,161,162,163,164,165,166,167,
477    168,169,170,171,172,173,174,175,
478    176,177,178,179,180,181,182,183,
479    184,185,186,187,188,189,190,191,
480    224,225,226,227,228,229,230,231,
481    232,233,234,235,236,237,238,239,
482    240,241,242,243,244,245,246,215,
483    248,249,250,251,252,253,254,223,
484    192,193,194,195,196,197,198,199,
485    200,201,202,203,204,205,206,207,
486    208,209,210,211,212,213,214,247,
487    216,217,218,219,220,221,222,255,
488    0,62,0,0,1,0,0,0,
489    0,0,0,0,0,0,0,0,
490    32,0,0,0,1,0,0,0,
491    0,0,0,0,0,0,0,0,
492    0,0,0,0,0,0,255,3,
493    126,0,0,0,126,0,0,0,
494    0,0,0,0,0,0,0,0,
495    0,0,0,0,0,0,0,0,
496    0,0,0,0,0,0,255,3,
497    0,0,0,0,0,0,0,0,
498    0,0,0,0,0,0,12,2,
499    0,0,0,0,0,0,0,0,
500    0,0,0,0,0,0,0,0,
501    254,255,255,7,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    255,255,127,127,0,0,0,0,
504    0,0,0,0,0,0,0,0,
505    0,0,0,0,254,255,255,7,
506    0,0,0,0,0,4,32,4,
507    0,0,0,128,255,255,127,255,
508    0,0,0,0,0,0,255,3,
509    254,255,255,135,254,255,255,7,
510    0,0,0,0,0,4,44,6,
511    255,255,127,255,255,255,127,255,
512    0,0,0,0,254,255,255,255,
513    255,255,255,255,255,255,255,127,
514    0,0,0,0,254,255,255,255,
515    255,255,255,255,255,255,255,255,
516    0,2,0,0,255,255,255,255,
517    255,255,255,255,255,255,255,127,
518    0,0,0,0,255,255,255,255,
519    255,255,255,255,255,255,255,255,
520    0,0,0,0,254,255,0,252,
521    1,0,0,248,1,0,0,120,
522    0,0,0,0,254,255,255,255,
523    0,0,128,0,0,0,128,0,
524    255,255,255,255,0,0,0,0,
525    0,0,0,0,0,0,0,128,
526    255,255,255,255,0,0,0,0,
527    0,0,0,0,0,0,0,0,
528    128,0,0,0,0,0,0,0,
529    0,1,1,0,1,1,0,0,
530    0,0,0,0,0,0,0,0,
531    0,0,0,0,0,0,0,0,
532    1,0,0,0,128,0,0,0,
533    128,128,128,128,0,0,128,0,
534    28,28,28,28,28,28,28,28,
535    28,28,0,0,0,0,0,128,
536    0,26,26,26,26,26,26,18,
537    18,18,18,18,18,18,18,18,
538    18,18,18,18,18,18,18,18,
539    18,18,18,128,128,0,128,16,
540    0,26,26,26,26,26,26,18,
541    18,18,18,18,18,18,18,18,
542    18,18,18,18,18,18,18,18,
543    18,18,18,128,128,0,0,0,
544    0,0,0,0,0,1,0,0,
545    0,0,0,0,0,0,0,0,
546    0,0,0,0,0,0,0,0,
547    0,0,0,0,0,0,0,0,
548    1,0,0,0,0,0,0,0,
549    0,0,18,0,0,0,0,0,
550    0,0,20,20,0,18,0,0,
551    0,20,18,0,0,0,0,0,
552    18,18,18,18,18,18,18,18,
553    18,18,18,18,18,18,18,18,
554    18,18,18,18,18,18,18,0,
555    18,18,18,18,18,18,18,18,
556    18,18,18,18,18,18,18,18,
557    18,18,18,18,18,18,18,18,
558    18,18,18,18,18,18,18,0,
559    18,18,18,18,18,18,18,18
560    };
561    
562    
563    
564    
565    #ifndef HAVE_STRERROR
566    /*************************************************
567    *     Provide strerror() for non-ANSI libraries  *
568    *************************************************/
569    
570    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
571    in their libraries, but can provide the same facility by this simple
572    alternative function. */
573    
574    extern int   sys_nerr;
575    extern char *sys_errlist[];
576    
577    char *
578    strerror(int n)
579    {
580    if (n < 0 || n >= sys_nerr) return "unknown error number";
581    return sys_errlist[n];
582    }
583    #endif /* HAVE_STRERROR */
584    
585    
586    /*************************************************
587    *         JIT memory callback                    *
588    *************************************************/
589    
590    static pcre_jit_stack* jit_callback(void *arg)
591    {
592    return (pcre_jit_stack *)arg;
593    }
594    
595    
596    /*************************************************
597    *        Read or extend an input line            *
598    *************************************************/
599    
600    /* Input lines are read into buffer, but both patterns and data lines can be
601    continued over multiple input lines. In addition, if the buffer fills up, we
602    want to automatically expand it so as to be able to handle extremely large
603    lines that are needed for certain stress tests. When the input buffer is
604    expanded, the other two buffers must also be expanded likewise, and the
605    contents of pbuffer, which are a copy of the input for callouts, must be
606    preserved (for when expansion happens for a data line). This is not the most
607    optimal way of handling this, but hey, this is just a test program!
608    
609    Arguments:
610      f            the file to read
611      start        where in buffer to start (this *must* be within buffer)
612      prompt       for stdin or readline()
613    
614    Returns:       pointer to the start of new data
615                   could be a copy of start, or could be moved
616                   NULL if no data read and EOF reached
617    */
618    
619    static uschar *
620    extend_inputline(FILE *f, uschar *start, const char *prompt)
621    {
622    uschar *here = start;
623    
624    for (;;)
625      {
626      int rlen = (int)(buffer_size - (here - buffer));
627    
628      if (rlen > 1000)
629        {
630        int dlen;
631    
632        /* If libreadline support is required, use readline() to read a line if the
633        input is a terminal. Note that readline() removes the trailing newline, so
634        we must put it back again, to be compatible with fgets(). */
635    
636    #ifdef SUPPORT_LIBREADLINE
637        if (isatty(fileno(f)))
638          {
639          size_t len;
640          char *s = readline(prompt);
641          if (s == NULL) return (here == start)? NULL : start;
642          len = strlen(s);
643          if (len > 0) add_history(s);
644          if (len > rlen - 1) len = rlen - 1;
645          memcpy(here, s, len);
646          here[len] = '\n';
647          here[len+1] = 0;
648          free(s);
649          }
650        else
651    #endif
652    
653        /* Read the next line by normal means, prompting if the file is stdin. */
654    
655          {
656          if (f == stdin) printf("%s", prompt);
657          if (fgets((char *)here, rlen,  f) == NULL)
658            return (here == start)? NULL : start;
659          }
660    
661        dlen = (int)strlen((char *)here);
662        if (dlen > 0 && here[dlen - 1] == '\n') return start;
663        here += dlen;
664        }
665    
666      else
667        {
668        int new_buffer_size = 2*buffer_size;
669        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
670        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
671        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
672    
673        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
674          {
675          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
676          exit(1);
677          }
678    
679        memcpy(new_buffer, buffer, buffer_size);
680        memcpy(new_pbuffer, pbuffer, buffer_size);
681    
682        buffer_size = new_buffer_size;
683    
684        start = new_buffer + (start - buffer);
685        here = new_buffer + (here - buffer);
686    
687        free(buffer);
688        free(dbuffer);
689        free(pbuffer);
690    
691        buffer = new_buffer;
692        dbuffer = new_dbuffer;
693        pbuffer = new_pbuffer;
694        }
695      }
696    
697    return NULL;  /* Control never gets here */
698    }
699    
700    
701    
702    
703    
704    
705    
706  /*************************************************  /*************************************************
# Line 98  static uschar *pbuffer = NULL; Line 709  static uschar *pbuffer = NULL;
709    
710  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
711  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
712  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
713    
714  Arguments:  Arguments:
715    str           string to be converted    str           string to be converted
# Line 128  return(result); Line 739  return(result);
739  and returns the value of the character.  and returns the value of the character.
740    
741  Argument:  Argument:
742    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
743    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
744    
745  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
746             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
747  */  */
748    
749    #if !defined NOUTF8
750    
751  static int  static int
752  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
753  {  {
754  int c = *buffer++;  int c = *utf8bytes++;
755  int d = c;  int d = c;
756  int i, j, s;  int i, j, s;
757    
# Line 154  if (i == 0 || i == 6) return 0;        / Line 767  if (i == 0 || i == 6) return 0;        /
767  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
768    
769  s = 6*i;  s = 6*i;
770  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
771    
772  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
773    {    {
774    c = *buffer++;    c = *utf8bytes++;
775    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
776    s -= 6;    s -= 6;
777    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 166  for (j = 0; j < i; j++) Line 779  for (j = 0; j < i; j++)
779    
780  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
781    
782  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
783    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
784  if (j != i) return -(i+1);  if (j != i) return -(i+1);
785    
786  /* Valid value */  /* Valid value */
# Line 176  if (j != i) return -(i+1); Line 789  if (j != i) return -(i+1);
789  return i+1;  return i+1;
790  }  }
791    
792    #endif
793    
794    
795    
796    /*************************************************
797    *       Convert character value to UTF-8         *
798    *************************************************/
799    
800    /* This function takes an integer value in the range 0 - 0x7fffffff
801    and encodes it as a UTF-8 character in 0 to 6 bytes.
802    
803    Arguments:
804      cvalue     the character value
805      utf8bytes  pointer to buffer for result - at least 6 bytes long
806    
807    Returns:     number of characters placed in the buffer
808    */
809    
810    #if !defined NOUTF8
811    
812    static int
813    ord2utf8(int cvalue, uschar *utf8bytes)
814    {
815    register int i, j;
816    for (i = 0; i < utf8_table1_size; i++)
817      if (cvalue <= utf8_table1[i]) break;
818    utf8bytes += i;
819    for (j = i; j > 0; j--)
820     {
821     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
822     cvalue >>= 6;
823     }
824    *utf8bytes = utf8_table2[i] | cvalue;
825    return i + 1;
826    }
827    
828    #endif
829    
830    
831    
832  /*************************************************  /*************************************************
# Line 188  chars without printing. */ Line 839  chars without printing. */
839    
840  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
841  {  {
842  int c;  int c = 0;
843  int yield = 0;  int yield = 0;
844    
845  while (length-- > 0)  while (length-- > 0)
846    {    {
847    #if !defined NOUTF8
848    if (use_utf8)    if (use_utf8)
849      {      {
850      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 201  while (length-- > 0) Line 853  while (length-- > 0)
853        {        {
854        length -= rc - 1;        length -= rc - 1;
855        p += rc;        p += rc;
856        if (c < 256 && isprint(c))        if (PRINTHEX(c))
857          {          {
858          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
859          yield++;          yield++;
860          }          }
861        else        else
862          {          {
863          int n;          int n = 4;
864          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
865          yield += n;          yield += (n <= 0x000000ff)? 2 :
866                     (n <= 0x00000fff)? 3 :
867                     (n <= 0x0000ffff)? 4 :
868                     (n <= 0x000fffff)? 5 : 6;
869          }          }
870        continue;        continue;
871        }        }
872      }      }
873    #endif
874    
875     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
876    
877    if (isprint(c = *(p++)))    c = *p++;
878      if (PRINTHEX(c))
879      {      {
880      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
881      yield++;      yield++;
# Line 316  fprintf(outfile, "%.*s", (cb->next_item_ Line 973  fprintf(outfile, "%.*s", (cb->next_item_
973  fprintf(outfile, "\n");  fprintf(outfile, "\n");
974  first_callout = 0;  first_callout = 0;
975    
976    if (cb->mark != last_callout_mark)
977      {
978      fprintf(outfile, "Latest Mark: %s\n",
979        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
980      last_callout_mark = cb->mark;
981      }
982    
983  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
984    {    {
985    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 335  return (cb->callout_number != callout_fa Line 999  return (cb->callout_number != callout_fa
999  *            Local malloc functions              *  *            Local malloc functions              *
1000  *************************************************/  *************************************************/
1001    
1002  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1003  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1004    show_malloc variable is set only during matching. */
1005    
1006  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1007  {  {
1008  void *block = malloc(size);  void *block = malloc(size);
1009  gotten_store = size;  gotten_store = size;
1010    if (first_gotten_store == 0) first_gotten_store = size;
1011  if (show_malloc)  if (show_malloc)
1012    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1013  return block;  return block;
# Line 354  if (show_malloc) Line 1020  if (show_malloc)
1020  free(block);  free(block);
1021  }  }
1022    
   
1023  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1024    
1025  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 392  if ((rc = pcre_fullinfo(re, study, optio Line 1057  if ((rc = pcre_fullinfo(re, study, optio
1057  *         Byte flipping function                 *  *         Byte flipping function                 *
1058  *************************************************/  *************************************************/
1059    
1060  static long int  static unsigned long int
1061  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
1062  {  {
1063  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1064  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 406  return ((value & 0x000000ff) << 24) | Line 1071  return ((value & 0x000000ff) << 24) |
1071    
1072    
1073  /*************************************************  /*************************************************
1074    *        Check match or recursion limit          *
1075    *************************************************/
1076    
1077    static int
1078    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1079      int start_offset, int options, int *use_offsets, int use_size_offsets,
1080      int flag, unsigned long int *limit, int errnumber, const char *msg)
1081    {
1082    int count;
1083    int min = 0;
1084    int mid = 64;
1085    int max = -1;
1086    
1087    extra->flags |= flag;
1088    
1089    for (;;)
1090      {
1091      *limit = mid;
1092    
1093      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1094        use_offsets, use_size_offsets);
1095    
1096      if (count == errnumber)
1097        {
1098        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099        min = mid;
1100        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1101        }
1102    
1103      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1104                             count == PCRE_ERROR_PARTIAL)
1105        {
1106        if (mid == min + 1)
1107          {
1108          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1109          break;
1110          }
1111        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1112        max = mid;
1113        mid = (min + mid)/2;
1114        }
1115      else break;    /* Some other error */
1116      }
1117    
1118    extra->flags &= ~flag;
1119    return count;
1120    }
1121    
1122    
1123    
1124    /*************************************************
1125    *         Case-independent strncmp() function    *
1126    *************************************************/
1127    
1128    /*
1129    Arguments:
1130      s         first string
1131      t         second string
1132      n         number of characters to compare
1133    
1134    Returns:    < 0, = 0, or > 0, according to the comparison
1135    */
1136    
1137    static int
1138    strncmpic(uschar *s, uschar *t, int n)
1139    {
1140    while (n--)
1141      {
1142      int c = tolower(*s++) - tolower(*t++);
1143      if (c) return c;
1144      }
1145    return 0;
1146    }
1147    
1148    
1149    
1150    /*************************************************
1151    *         Check newline indicator                *
1152    *************************************************/
1153    
1154    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1155    a message and return 0 if there is no match.
1156    
1157    Arguments:
1158      p           points after the leading '<'
1159      f           file for error message
1160    
1161    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1162    */
1163    
1164    static int
1165    check_newline(uschar *p, FILE *f)
1166    {
1167    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1168    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1169    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1170    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1171    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1172    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1173    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1174    fprintf(f, "Unknown newline type at: <%s\n", p);
1175    return 0;
1176    }
1177    
1178    
1179    
1180    /*************************************************
1181    *             Usage function                     *
1182    *************************************************/
1183    
1184    static void
1185    usage(void)
1186    {
1187    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1188    printf("Input and output default to stdin and stdout.\n");
1189    #ifdef SUPPORT_LIBREADLINE
1190    printf("If input is a terminal, readline() is used to read from it.\n");
1191    #else
1192    printf("This version of pcretest is not linked with readline().\n");
1193    #endif
1194    printf("\nOptions:\n");
1195    printf("  -b       show compiled code (bytecode)\n");
1196    printf("  -C       show PCRE compile-time options and exit\n");
1197    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1198    #if !defined NODFA
1199    printf("  -dfa     force DFA matching for all subjects\n");
1200    #endif
1201    printf("  -help    show usage information\n");
1202    printf("  -i       show information about compiled patterns\n"
1203           "  -M       find MATCH_LIMIT minimum for each subject\n"
1204           "  -m       output memory used information\n"
1205           "  -o <n>   set size of offsets vector to <n>\n");
1206    #if !defined NOPOSIX
1207    printf("  -p       use POSIX interface\n");
1208    #endif
1209    printf("  -q       quiet: do not output PCRE version number at start\n");
1210    printf("  -S <n>   set stack size to <n> megabytes\n");
1211    printf("  -s       force each pattern to be studied at basic level\n"
1212           "  -s+      force each pattern to be studied, using JIT if available\n"
1213           "  -t       time compilation and execution\n");
1214    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1215    printf("  -tm      time execution (matching) only\n");
1216    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1217    }
1218    
1219    
1220    
1221    /*************************************************
1222  *                Main Program                    *  *                Main Program                    *
1223  *************************************************/  *************************************************/
1224    
# Line 418  int main(int argc, char **argv) Line 1231  int main(int argc, char **argv)
1231  FILE *infile = stdin;  FILE *infile = stdin;
1232  int options = 0;  int options = 0;
1233  int study_options = 0;  int study_options = 0;
1234    int default_find_match_limit = FALSE;
1235  int op = 1;  int op = 1;
1236  int timeit = 0;  int timeit = 0;
1237    int timeitm = 0;
1238  int showinfo = 0;  int showinfo = 0;
1239  int showstore = 0;  int showstore = 0;
1240    int force_study = -1;
1241    int force_study_options = 0;
1242    int quiet = 0;
1243  int size_offsets = 45;  int size_offsets = 45;
1244  int size_offsets_max;  int size_offsets_max;
1245  int *offsets = NULL;  int *offsets = NULL;
# Line 432  int debug = 0; Line 1250  int debug = 0;
1250  int done = 0;  int done = 0;
1251  int all_use_dfa = 0;  int all_use_dfa = 0;
1252  int yield = 0;  int yield = 0;
1253    int stack_size;
1254    
1255    pcre_jit_stack *jit_stack = NULL;
1256    
1257    
1258    /* These vectors store, end-to-end, a list of captured substring names. Assume
1259    that 1024 is plenty long enough for the few names we'll be testing. */
1260    
1261    uschar copynames[1024];
1262    uschar getnames[1024];
1263    
1264  unsigned char *buffer;  uschar *copynamesptr;
1265  unsigned char *dbuffer;  uschar *getnamesptr;
1266    
1267  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1268  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
1269    
1270  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
1271  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
1272  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1273    
1274  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
1275    
1276  outfile = stdout;  outfile = stdout;
1277    
1278    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1279    library to translate CRLF into a single LF character. At least, that's what
1280    I've been told: never having used Windows I take this all on trust. Originally
1281    it set 0x8000, but then I was advised that _O_BINARY was better. */
1282    
1283    #if defined(_WIN32) || defined(WIN32)
1284    _setmode( _fileno( stdout ), _O_BINARY );
1285    #endif
1286    
1287  /* Scan options */  /* Scan options */
1288    
1289  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1290    {    {
1291    unsigned char *endptr;    unsigned char *endptr;
1292    
1293    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1294      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1295    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-s+") == 0)
1296        {
1297        force_study = 1;
1298        force_study_options = PCRE_STUDY_JIT_COMPILE;
1299        }
1300      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1301      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1302    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1303    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1304      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1305    #if !defined NODFA
1306    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1307    #endif
1308    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1309        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1310          *endptr == 0))          *endptr == 0))
# Line 472  while (argc > 1 && argv[op][0] == '-') Line 1312  while (argc > 1 && argv[op][0] == '-')
1312      op++;      op++;
1313      argc--;      argc--;
1314      }      }
1315      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1316        {
1317        int both = argv[op][2] == 0;
1318        int temp;
1319        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1320                         *endptr == 0))
1321          {
1322          timeitm = temp;
1323          op++;
1324          argc--;
1325          }
1326        else timeitm = LOOPREPEAT;
1327        if (both) timeit = timeitm;
1328        }
1329      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1330          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1331            *endptr == 0))
1332        {
1333    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1334        printf("PCRE: -S not supported on this OS\n");
1335        exit(1);
1336    #else
1337        int rc;
1338        struct rlimit rlim;
1339        getrlimit(RLIMIT_STACK, &rlim);
1340        rlim.rlim_cur = stack_size * 1024 * 1024;
1341        rc = setrlimit(RLIMIT_STACK, &rlim);
1342        if (rc != 0)
1343          {
1344        printf("PCRE: setrlimit() failed with error %d\n", rc);
1345        exit(1);
1346          }
1347        op++;
1348        argc--;
1349    #endif
1350        }
1351  #if !defined NOPOSIX  #if !defined NOPOSIX
1352    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1353  #endif  #endif
1354    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1355      {      {
1356      int rc;      int rc;
1357        unsigned long int lrc;
1358      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1359      printf("Compiled with\n");      printf("Compiled with\n");
1360      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1361      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1362      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1363      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1364        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1365        if (rc)
1366          printf("  Just-in-time compiler support\n");
1367        else
1368          printf("  No just-in-time compiler support\n");
1369      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1370      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1371        in EBCDIC environments. CR is 13 and NL is 10. */
1372        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1373          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1374          (rc == -2)? "ANYCRLF" :
1375          (rc == -1)? "ANY" : "???");
1376        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1377        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1378                                         "all Unicode newlines");
1379      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1380      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1381      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1382      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1383      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1384      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1385        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1386        printf("  Default recursion depth limit = %ld\n", lrc);
1387      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1388      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1389      exit(0);      goto EXIT;
1390        }
1391      else if (strcmp(argv[op], "-help") == 0 ||
1392               strcmp(argv[op], "--help") == 0)
1393        {
1394        usage();
1395        goto EXIT;
1396      }      }
1397    else    else
1398      {      {
1399      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1400      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
     printf("  -dfa   force DFA matching for all subjects\n");  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
1401      yield = 1;      yield = 1;
1402      goto EXIT;      goto EXIT;
1403      }      }
# Line 525  offsets = (int *)malloc(size_offsets_max Line 1412  offsets = (int *)malloc(size_offsets_max
1412  if (offsets == NULL)  if (offsets == NULL)
1413    {    {
1414    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1415      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1416    yield = 1;    yield = 1;
1417    goto EXIT;    goto EXIT;
1418    }    }
# Line 534  if (offsets == NULL) Line 1421  if (offsets == NULL)
1421    
1422  if (argc > 1)  if (argc > 1)
1423    {    {
1424    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1425    if (infile == NULL)    if (infile == NULL)
1426      {      {
1427      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 545  if (argc > 1) Line 1432  if (argc > 1)
1432    
1433  if (argc > 2)  if (argc > 2)
1434    {    {
1435    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1436    if (outfile == NULL)    if (outfile == NULL)
1437      {      {
1438      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 561  pcre_free = new_free; Line 1448  pcre_free = new_free;
1448  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1449  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1450    
1451  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1452    
1453  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1454    
1455  /* Main loop */  /* Main loop */
1456    
# Line 578  while (!done) Line 1465  while (!done)
1465  #endif  #endif
1466    
1467    const char *error;    const char *error;
1468      unsigned char *markptr;
1469    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1470    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1471    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1472    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1473    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1474      int do_allcaps = 0;
1475      int do_mark = 0;
1476    int do_study = 0;    int do_study = 0;
1477      int no_force_study = 0;
1478    int do_debug = debug;    int do_debug = debug;
1479    int do_G = 0;    int do_G = 0;
1480    int do_g = 0;    int do_g = 0;
1481    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1482    int do_showrest = 0;    int do_showrest = 0;
1483      int do_showcaprest = 0;
1484    int do_flip = 0;    int do_flip = 0;
1485    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1486    
1487    use_utf8 = 0;    use_utf8 = 0;
1488      debug_lengths = 1;
1489    
1490    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1491    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1492    fflush(outfile);    fflush(outfile);
1493    
# Line 607  while (!done) Line 1499  while (!done)
1499    
1500    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1501      {      {
1502      unsigned long int magic;      unsigned long int magic, get_options;
1503      uschar sbuf[8];      uschar sbuf[8];
1504      FILE *f;      FILE *f;
1505    
# Line 631  while (!done) Line 1523  while (!done)
1523        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1524    
1525      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1526      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1527    
1528      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1529    
# Line 650  while (!done) Line 1542  while (!done)
1542          }          }
1543        }        }
1544    
1545      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1546        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1547    
1548      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1549    
1550      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1551      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1552    
1553      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1554    
1555      if (true_study_size != 0)      if (true_study_size != 0)
1556        {        {
# Line 674  while (!done) Line 1566  while (!done)
1566          {          {
1567          FAIL_READ:          FAIL_READ:
1568          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1569          if (extra != NULL) new_free(extra);          if (extra != NULL) pcre_free_study(extra);
1570          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1571          fclose(f);          fclose(f);
1572          continue;          continue;
# Line 695  while (!done) Line 1587  while (!done)
1587    
1588    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1589      {      {
1590      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1591      goto SKIP_DATA;      goto SKIP_DATA;
1592      }      }
1593    
1594    pp = p;    pp = p;
1595      poffset = (int)(p - buffer);
1596    
1597    for(;;)    for(;;)
1598      {      {
# Line 710  while (!done) Line 1603  while (!done)
1603        pp++;        pp++;
1604        }        }
1605      if (*pp != 0) break;      if (*pp != 0) break;
1606        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1607        {        {
1608        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1609        done = 1;        done = 1;
# Line 728  while (!done) Line 1612  while (!done)
1612      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1613      }      }
1614    
1615      /* The buffer may have moved while being extended; reset the start of data
1616      pointer to the correct relative point in the buffer. */
1617    
1618      p = buffer + poffset;
1619    
1620    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1621    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1622    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 757  while (!done) Line 1646  while (!done)
1646        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1647        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1648    
1649        case '+': do_showrest = 1; break;        case '+':
1650          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1651          break;
1652    
1653          case '=': do_allcaps = 1; break;
1654        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1655          case 'B': do_debug = 1; break;
1656        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1657        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1658        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1659        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1660        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1661        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1662          case 'J': options |= PCRE_DUPNAMES; break;
1663          case 'K': do_mark = 1; break;
1664        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1665        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1666    
# Line 772  while (!done) Line 1668  while (!done)
1668        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1669  #endif  #endif
1670    
1671        case 'S': do_study = 1; break;        case 'S':
1672          if (do_study == 0)
1673            {
1674            do_study = 1;
1675            if (*pp == '+')
1676              {
1677              study_options |= PCRE_STUDY_JIT_COMPILE;
1678              pp++;
1679              }
1680            }
1681          else
1682            {
1683            do_study = 0;
1684            no_force_study = 1;
1685            }
1686          break;
1687    
1688        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1689          case 'W': options |= PCRE_UCP; break;
1690        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1691          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1692          case 'Z': debug_lengths = 0; break;
1693        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1694        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1695    
1696          case 'T':
1697          switch (*pp++)
1698            {
1699            case '0': tables = tables0; break;
1700            case '1': tables = tables1; break;
1701    
1702            case '\r':
1703            case '\n':
1704            case ' ':
1705            case 0:
1706            fprintf(outfile, "** Missing table number after /T\n");
1707            goto SKIP_DATA;
1708    
1709            default:
1710            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1711            goto SKIP_DATA;
1712            }
1713          break;
1714    
1715        case 'L':        case 'L':
1716        ppp = pp;        ppp = pp;
1717        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1718        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1719          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1720        *ppp = 0;        *ppp = 0;
1721        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1722          {          {
1723          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1724          goto SKIP_DATA;          goto SKIP_DATA;
1725          }          }
1726          locale_set = 1;
1727        tables = pcre_maketables();        tables = pcre_maketables();
1728        pp = ppp;        pp = ppp;
1729        break;        break;
# Line 799  while (!done) Line 1735  while (!done)
1735        *pp = 0;        *pp = 0;
1736        break;        break;
1737    
1738          case '<':
1739            {
1740            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1741              {
1742              options |= PCRE_JAVASCRIPT_COMPAT;
1743              pp += 3;
1744              }
1745            else
1746              {
1747              int x = check_newline(pp, outfile);
1748              if (x == 0) goto SKIP_DATA;
1749              options |= x;
1750              while (*pp++ != '>');
1751              }
1752            }
1753          break;
1754    
1755        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1756        case '\n':        case '\n':
1757        case ' ':        case ' ':
# Line 823  while (!done) Line 1776  while (!done)
1776      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1777      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1778      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1779        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1780        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1781        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1782        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1783    
1784        first_gotten_store = 0;
1785      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1786    
1787      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 830  while (!done) Line 1789  while (!done)
1789    
1790      if (rc != 0)      if (rc != 0)
1791        {        {
1792        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1793        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1794        goto SKIP_DATA;        goto SKIP_DATA;
1795        }        }
# Line 842  while (!done) Line 1801  while (!done)
1801  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1802    
1803      {      {
1804      if (timeit)      unsigned long int get_options;
1805    
1806        if (timeit > 0)
1807        {        {
1808        register int i;        register int i;
1809        clock_t time_taken;        clock_t time_taken;
1810        clock_t start_time = clock();        clock_t start_time = clock();
1811        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1812          {          {
1813          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1814          if (re != NULL) free(re);          if (re != NULL) free(re);
1815          }          }
1816        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1817        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1818          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1819            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1820        }        }
1821    
1822        first_gotten_store = 0;
1823      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1824    
1825      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 871  while (!done) Line 1833  while (!done)
1833          {          {
1834          for (;;)          for (;;)
1835            {            {
1836            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1837              {              {
1838              done = 1;              done = 1;
1839              goto CONTINUE;              goto CONTINUE;
# Line 885  while (!done) Line 1847  while (!done)
1847        goto CONTINUE;        goto CONTINUE;
1848        }        }
1849    
1850      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1851      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1852      returns only limited data. Check that it agrees with the newer one. */      lines. */
1853    
1854      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1855        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
1856    
1857      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
1858      and remember the store that was got. */      and remember the store that was got. */
1859    
1860      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
1861      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1862    
1863      /* If /S was present, study the regexp to generate additional info to      /* Output code size information if requested */
     help with the matching. */  
1864    
1865      if (do_study)      if (log_store)
1866          fprintf(outfile, "Memory allocation (code space): %d\n",
1867            (int)(first_gotten_store -
1868                  sizeof(real_pcre) -
1869                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1870    
1871        /* If -s or /S was present, study the regex to generate additional info to
1872        help with the matching, unless the pattern has the SS option, which
1873        suppresses the effect of /S (used for a few test patterns where studying is
1874        never sensible). */
1875    
1876        if (do_study || (force_study >= 0 && !no_force_study))
1877        {        {
1878        if (timeit)        if (timeit > 0)
1879          {          {
1880          register int i;          register int i;
1881          clock_t time_taken;          clock_t time_taken;
1882          clock_t start_time = clock();          clock_t start_time = clock();
1883          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1884            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options | force_study_options, &error);
1885          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1886          if (extra != NULL) free(extra);          if (extra != NULL) pcre_free_study(extra);
1887          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1888            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1889              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1890          }          }
1891        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options | force_study_options, &error);
1892        if (error != NULL)        if (error != NULL)
1893          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
1894        else if (extra != NULL)        else if (extra != NULL)
1895            {
1896          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1897            if (log_store)
1898              {
1899              size_t jitsize;
1900              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
1901              if (jitsize != 0)
1902                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
1903              }
1904            }
1905          }
1906    
1907        /* If /K was present, we set up for handling MARK data. */
1908    
1909        if (do_mark)
1910          {
1911          if (extra == NULL)
1912            {
1913            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1914            extra->flags = 0;
1915            }
1916          extra->mark = &markptr;
1917          extra->flags |= PCRE_EXTRA_MARK;
1918        }        }
1919    
1920      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
# Line 934  while (!done) Line 1925  while (!done)
1925      if (do_flip)      if (do_flip)
1926        {        {
1927        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1928        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1929            byteflip(rre->magic_number, sizeof(rre->magic_number));
1930        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1931        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1932        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1933        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1934        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1935        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1936        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1937          rre->first_byte =
1938            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1939          rre->req_byte =
1940            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1941          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1942          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1943        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1944          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1945        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1946            sizeof(rre->name_count));
1947    
1948        if (extra != NULL)        if (extra != NULL)
1949          {          {
1950          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1951          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1952          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1953            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1954          }          }
1955        }        }
1956    
1957      /* Extract information from the compiled data if required */      /* Extract information from the compiled data if required. There are now
1958        two info-returning functions. The old one has a limited interface and
1959        returns only limited data. Check that it agrees with the newer one. */
1960    
1961      SHOW_INFO:      SHOW_INFO:
1962    
1963        if (do_debug)
1964          {
1965          fprintf(outfile, "------------------------------------------------------------------\n");
1966          pcre_printint(re, outfile, debug_lengths);
1967          }
1968    
1969        /* We already have the options in get_options (see above) */
1970    
1971      if (do_showinfo)      if (do_showinfo)
1972        {        {
1973        unsigned long int get_options, all_options;        unsigned long int all_options;
1974    #if !defined NOINFOCHECK
1975        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1976        int count, backrefmax, first_char, need_char;  #endif
1977          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1978            hascrorlf;
1979        int nameentrysize, namecount;        int nameentrysize, namecount;
1980        const uschar *nametable;        const uschar *nametable;
1981    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         _pcre_printint(re, outfile);  
         }  
   
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1982        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1983        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1984        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 982  while (!done) Line 1987  while (!done)
1987        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1988        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1989        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1990          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1991          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1992          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1993    
1994    #if !defined NOINFOCHECK
1995        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1996        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1997          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1000  while (!done) Line 2009  while (!done)
2009            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2010              get_options, old_options);              get_options, old_options);
2011          }          }
2012    #endif
2013    
2014        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
2015          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1021  while (!done) Line 2031  while (!done)
2031            }            }
2032          }          }
2033    
2034        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2035        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2036    
2037        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2038        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
2039    
2040        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2041          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2042            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2043            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2044            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2045            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2046            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2047            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2048              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2049              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2050            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2051            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2052            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2053              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2054            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2055            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2056              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2057              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2058              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2059    
2060          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2061    
2062          switch (get_options & PCRE_NEWLINE_BITS)
2063            {
2064            case PCRE_NEWLINE_CR:
2065            fprintf(outfile, "Forced newline sequence: CR\n");
2066            break;
2067    
2068            case PCRE_NEWLINE_LF:
2069            fprintf(outfile, "Forced newline sequence: LF\n");
2070            break;
2071    
2072        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_CRLF:
2073          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
2074            break;
2075    
2076            case PCRE_NEWLINE_ANYCRLF:
2077            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2078            break;
2079    
2080            case PCRE_NEWLINE_ANY:
2081            fprintf(outfile, "Forced newline sequence: ANY\n");
2082            break;
2083    
2084            default:
2085            break;
2086            }
2087    
2088        if (first_char == -1)        if (first_char == -1)
2089          {          {
2090          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
2091          }          }
2092        else if (first_char < 0)        else if (first_char < 0)
2093          {          {
# Line 1063  while (!done) Line 2098  while (!done)
2098          int ch = first_char & 255;          int ch = first_char & 255;
2099          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2100            "" : " (caseless)";            "" : " (caseless)";
2101          if (isprint(ch))          if (PRINTHEX(ch))
2102            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2103          else          else
2104            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1078  while (!done) Line 2113  while (!done)
2113          int ch = need_char & 255;          int ch = need_char & 255;
2114          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2115            "" : " (caseless)";            "" : " (caseless)";
2116          if (isprint(ch))          if (PRINTHEX(ch))
2117            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2118          else          else
2119            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1087  while (!done) Line 2122  while (!done)
2122        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2123        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2124        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2125        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2126          information unless -i or -d was also present. This means that, except
2127          when auto-callouts are involved, the output from runs with and without
2128          -s should be identical. */
2129    
2130        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2131          {          {
2132          if (extra == NULL)          if (extra == NULL)
2133            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2134          else          else
2135            {            {
2136            uschar *start_bits = NULL;            uschar *start_bits = NULL;
2137            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2138    
2139              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2140              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2141    
2142              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2143            if (start_bits == NULL)            if (start_bits == NULL)
2144              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2145            else            else
2146              {              {
2147              int i;              int i;
# Line 1114  while (!done) Line 2156  while (!done)
2156                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2157                    c = 2;                    c = 2;
2158                    }                    }
2159                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
2160                    {                    {
2161                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2162                    c += 2;                    c += 2;
# Line 1129  while (!done) Line 2171  while (!done)
2171              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2172              }              }
2173            }            }
2174    
2175            /* Show this only if the JIT was set by /S, not by -s. */
2176    
2177            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2178              {
2179              int jit;
2180              new_info(re, extra, PCRE_INFO_JIT, &jit);
2181              if (jit)
2182                fprintf(outfile, "JIT study was successful\n");
2183              else
2184    #ifdef SUPPORT_JIT
2185                fprintf(outfile, "JIT study was not successful\n");
2186    #else
2187                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2188    #endif
2189              }
2190          }          }
2191        }        }
2192    
# Line 1146  while (!done) Line 2204  while (!done)
2204        else        else
2205          {          {
2206          uschar sbuf[8];          uschar sbuf[8];
2207          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
2208          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
2209          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
2210          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
2211    
2212          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2213          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2214          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2215          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
2216    
2217          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2218              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1163  while (!done) Line 2221  while (!done)
2221            }            }
2222          else          else
2223            {            {
2224            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2225    
2226              /* If there is study data, write it. */
2227    
2228            if (extra != NULL)            if (extra != NULL)
2229              {              {
2230              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1179  while (!done) Line 2240  while (!done)
2240          }          }
2241    
2242        new_free(re);        new_free(re);
2243        if (extra != NULL) new_free(extra);        if (extra != NULL) pcre_free_study(extra);
2244        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2245            {
2246            new_free((void *)tables);
2247            setlocale(LC_CTYPE, "C");
2248            locale_set = 0;
2249            }
2250        continue;  /* With next regex */        continue;  /* With next regex */
2251        }        }
2252      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1189  while (!done) Line 2255  while (!done)
2255    
2256    for (;;)    for (;;)
2257      {      {
2258      unsigned char *q;      uschar *q;
2259      unsigned char *bptr = dbuffer;      uschar *bptr;
2260      int *use_offsets = offsets;      int *use_offsets = offsets;
2261      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2262      int callout_data = 0;      int callout_data = 0;
2263      int callout_data_set = 0;      int callout_data_set = 0;
2264      int count, c;      int count, c;
2265      int copystrings = 0;      int copystrings = 0;
2266      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2267      int getstrings = 0;      int getstrings = 0;
2268      int getlist = 0;      int getlist = 0;
2269      int gmatched = 0;      int gmatched = 0;
2270      int start_offset = 0;      int start_offset = 0;
2271        int start_offset_sign = 1;
2272      int g_notempty = 0;      int g_notempty = 0;
2273      int use_dfa = 0;      int use_dfa = 0;
2274    
2275      options = 0;      options = 0;
2276    
2277        *copynames = 0;
2278        *getnames = 0;
2279    
2280        copynamesptr = copynames;
2281        getnamesptr = getnames;
2282    
2283      pcre_callout = callout;      pcre_callout = callout;
2284      first_callout = 1;      first_callout = 1;
2285        last_callout_mark = NULL;
2286      callout_extra = 0;      callout_extra = 0;
2287      callout_count = 0;      callout_count = 0;
2288      callout_fail_count = 999999;      callout_fail_count = 999999;
2289      callout_fail_id = -1;      callout_fail_id = -1;
2290      show_malloc = 0;      show_malloc = 0;
2291    
2292      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2293      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2294    
2295        len = 0;
2296        for (;;)
2297        {        {
2298        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2299        goto CONTINUE;          {
2300            if (len > 0)    /* Reached EOF without hitting a newline */
2301              {
2302              fprintf(outfile, "\n");
2303              break;
2304              }
2305            done = 1;
2306            goto CONTINUE;
2307            }
2308          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2309          len = (int)strlen((char *)buffer);
2310          if (buffer[len-1] == '\n') break;
2311        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2312    
     len = (int)strlen((char *)buffer);  
2313      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2314      buffer[len] = 0;      buffer[len] = 0;
2315      if (len == 0) break;      if (len == 0) break;
# Line 1231  while (!done) Line 2317  while (!done)
2317      p = buffer;      p = buffer;
2318      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2319    
2320      q = dbuffer;      bptr = q = dbuffer;
2321      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2322        {        {
2323        int i = 0;        int i = 0;
# Line 1253  while (!done) Line 2339  while (!done)
2339          c -= '0';          c -= '0';
2340          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2341            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2342    
2343    #if !defined NOUTF8
2344            if (use_utf8 && c > 255)
2345              {
2346              unsigned char buff8[8];
2347              int ii, utn;
2348              utn = ord2utf8(c, buff8);
2349              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2350              c = buff8[ii];   /* Last byte */
2351              }
2352    #endif
2353          break;          break;
2354    
2355          case 'x':          case 'x':
2356    
2357          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2358    
2359    #if !defined NOUTF8
2360          if (*p == '{')          if (*p == '{')
2361            {            {
2362            unsigned char *pt = p;            unsigned char *pt = p;
2363            c = 0;            c = 0;
2364            while (isxdigit(*(++pt)))  
2365              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2366              when isxdigit() is a macro that refers to its argument more than
2367              once. This is banned by the C Standard, but apparently happens in at
2368              least one MacOS environment. */
2369    
2370              for (pt++; isxdigit(*pt); pt++)
2371                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2372            if (*pt == '}')            if (*pt == '}')
2373              {              {
2374              unsigned char buff8[8];              unsigned char buff8[8];
2375              int ii, utn;              int ii, utn;
2376              utn = _pcre_ord2utf8(c, buff8);              if (use_utf8)
2377              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2378              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2379                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2380                  c = buff8[ii];   /* Last byte */
2381                  }
2382                else
2383                 {
2384                 if (c > 255)
2385                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2386                     "UTF-8 mode is not enabled.\n"
2387                     "** Truncation will probably give the wrong result.\n", c);
2388                 }
2389              p = pt + 1;              p = pt + 1;
2390              break;              break;
2391              }              }
2392            /* Not correct form; fall through */            /* Not correct form; fall through */
2393            }            }
2394    #endif
2395    
2396          /* Ordinary \x */          /* Ordinary \x */
2397    
2398          c = 0;          c = 0;
2399          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2400            {            {
2401            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2402            p++;            p++;
2403            }            }
2404          break;          break;
# Line 1293  while (!done) Line 2408  while (!done)
2408          continue;          continue;
2409    
2410          case '>':          case '>':
2411            if (*p == '-')
2412              {
2413              start_offset_sign = -1;
2414              p++;
2415              }
2416          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2417            start_offset *= start_offset_sign;
2418          continue;          continue;
2419    
2420          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1312  while (!done) Line 2433  while (!done)
2433            }            }
2434          else if (isalnum(*p))          else if (isalnum(*p))
2435            {            {
2436            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
2437            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2438              *npp++ = 0;
2439            *npp = 0;            *npp = 0;
2440            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2441            if (n < 0)            if (n < 0)
2442              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2443            else copystrings |= 1 << n;            copynamesptr = npp;
2444            }            }
2445          else if (*p == '+')          else if (*p == '+')
2446            {            {
# Line 1357  while (!done) Line 2478  while (!done)
2478            }            }
2479          continue;          continue;
2480    
2481    #if !defined NODFA
2482          case 'D':          case 'D':
2483    #if !defined NOPOSIX
2484          if (posix || do_posix)          if (posix || do_posix)
2485            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2486          else          else
2487    #endif
2488            use_dfa = 1;            use_dfa = 1;
2489          continue;          continue;
2490    #endif
2491    
2492    #if !defined NODFA
2493          case 'F':          case 'F':
2494          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2495          continue;          continue;
2496    #endif
2497    
2498          case 'G':          case 'G':
2499          if (isdigit(*p))          if (isdigit(*p))
# Line 1376  while (!done) Line 2503  while (!done)
2503            }            }
2504          else if (isalnum(*p))          else if (isalnum(*p))
2505            {            {
2506            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
2507            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2508              *npp++ = 0;
2509            *npp = 0;            *npp = 0;
2510            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2511            if (n < 0)            if (n < 0)
2512              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2513            else getstrings |= 1 << n;            getnamesptr = npp;
2514              }
2515            continue;
2516    
2517            case 'J':
2518            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2519            if (extra != NULL
2520                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2521                && extra->executable_jit != NULL)
2522              {
2523              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2524              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2525              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2526            }            }
2527          continue;          continue;
2528    
# Line 1396  while (!done) Line 2535  while (!done)
2535          continue;          continue;
2536    
2537          case 'N':          case 'N':
2538          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2539              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2540            else
2541              options |= PCRE_NOTEMPTY;
2542          continue;          continue;
2543    
2544          case 'O':          case 'O':
# Line 1409  while (!done) Line 2551  while (!done)
2551            if (offsets == NULL)            if (offsets == NULL)
2552              {              {
2553              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2554                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2555              yield = 1;              yield = 1;
2556              goto EXIT;              goto EXIT;
2557              }              }
# Line 1419  while (!done) Line 2561  while (!done)
2561          continue;          continue;
2562    
2563          case 'P':          case 'P':
2564          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2565              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2566            continue;
2567    
2568            case 'Q':
2569            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2570            if (extra == NULL)
2571              {
2572              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2573              extra->flags = 0;
2574              }
2575            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2576            extra->match_limit_recursion = n;
2577            continue;
2578    
2579            case 'q':
2580            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2581            if (extra == NULL)
2582              {
2583              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2584              extra->flags = 0;
2585              }
2586            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2587            extra->match_limit = n;
2588          continue;          continue;
2589    
2590    #if !defined NODFA
2591          case 'R':          case 'R':
2592          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
2593          continue;          continue;
2594    #endif
2595    
2596          case 'S':          case 'S':
2597          show_malloc = 1;          show_malloc = 1;
2598          continue;          continue;
2599    
2600            case 'Y':
2601            options |= PCRE_NO_START_OPTIMIZE;
2602            continue;
2603    
2604          case 'Z':          case 'Z':
2605          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2606          continue;          continue;
# Line 1437  while (!done) Line 2608  while (!done)
2608          case '?':          case '?':
2609          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2610          continue;          continue;
2611    
2612            case '<':
2613              {
2614              int x = check_newline(p, outfile);
2615              if (x == 0) goto NEXT_DATA;
2616              options |= x;
2617              while (*p++ != '>');
2618              }
2619            continue;
2620          }          }
2621        *q++ = c;        *q++ = c;
2622        }        }
2623      *q = 0;      *q = 0;
2624      len = q - dbuffer;      len = (int)(q - dbuffer);
2625    
2626        /* Move the data to the end of the buffer so that a read over the end of
2627        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2628        we are using the POSIX interface, we must include the terminating zero. */
2629    
2630    #if !defined NOPOSIX
2631        if (posix || do_posix)
2632          {
2633          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2634          bptr += buffer_size - len - 1;
2635          }
2636        else
2637    #endif
2638          {
2639          memmove(bptr + buffer_size - len, bptr, len);
2640          bptr += buffer_size - len;
2641          }
2642    
2643      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2644        {        {
# Line 1462  while (!done) Line 2659  while (!done)
2659          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2660        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2661        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2662          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2663    
2664        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2665    
2666        if (rc != 0)        if (rc != 0)
2667          {          {
2668          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2669          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2670          }          }
2671          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2672                  != 0)
2673            {
2674            fprintf(outfile, "Matched with REG_NOSUB\n");
2675            }
2676        else        else
2677          {          {
2678          size_t i;          size_t i;
# Line 1481  while (!done) Line 2684  while (!done)
2684              (void)pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2685                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2686              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2687              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2688                {                {
2689                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2690                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2691                  outfile);                  outfile);
2692                fprintf(outfile, "\n");                fprintf(outfile, "\n");
# Line 1501  while (!done) Line 2704  while (!done)
2704    
2705      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2706        {        {
2707        if (timeit)        markptr = NULL;
2708    
2709          if (timeitm > 0)
2710          {          {
2711          register int i;          register int i;
2712          clock_t time_taken;          clock_t time_taken;
2713          clock_t start_time = clock();          clock_t start_time = clock();
2714    
2715    #if !defined NODFA
2716          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2717            {            {
2718            int workspace[1000];            int workspace[1000];
2719            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2720              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2721                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2722                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2723            }            }
2724          else          else
2725    #endif
2726    
2727          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2728            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2729              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2730    
2731          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2732          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2733            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2734              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2735          }          }
2736    
2737        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2738        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2739          for the recursion limit. The match limits are relevant only to the normal
2740          running of pcre_exec(), so disable the JIT optimization. This makes it
2741          possible to run the same set of tests with and without JIT externally
2742          requested. */
2743    
2744        if (find_match_limit)        if (find_match_limit)
2745          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2746          if (extra == NULL)          if (extra == NULL)
2747            {            {
2748            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2749            extra->flags = 0;            extra->flags = 0;
2750            }            }
2751          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2752    
2753          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2754              options|g_notempty, use_offsets, use_size_offsets,
2755              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2756              PCRE_ERROR_MATCHLIMIT, "match()");
2757    
2758            count = check_match_limit(re, extra, bptr, len, start_offset,
2759              options|g_notempty, use_offsets, use_size_offsets,
2760              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2761              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2762          }          }
2763    
2764        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1591  while (!done) Line 2780  while (!done)
2780        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2781        value of match_limit. */        value of match_limit. */
2782    
2783    #if !defined NODFA
2784        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2785          {          {
2786          int workspace[1000];          int workspace[1000];
2787          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2788            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2789            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2790          if (count == 0)          if (count == 0)
# Line 1603  while (!done) Line 2793  while (!done)
2793            count = use_size_offsets/2;            count = use_size_offsets/2;
2794            }            }
2795          }          }
2796    #endif
2797    
2798        else        else
2799          {          {
# Line 1619  while (!done) Line 2810  while (!done)
2810    
2811        if (count >= 0)        if (count >= 0)
2812          {          {
2813          int i;          int i, maxcount;
2814    
2815    #if !defined NODFA
2816            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2817    #endif
2818              maxcount = use_size_offsets/3;
2819    
2820            /* This is a check against a lunatic return value. */
2821    
2822            if (count > maxcount)
2823              {
2824              fprintf(outfile,
2825                "** PCRE error: returned count %d is too big for offset size %d\n",
2826                count, use_size_offsets);
2827              count = use_size_offsets/3;
2828              if (do_g || do_G)
2829                {
2830                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2831                do_g = do_G = FALSE;        /* Break g/G loop */
2832                }
2833              }
2834    
2835            /* do_allcaps requests showing of all captures in the pattern, to check
2836            unset ones at the end. */
2837    
2838            if (do_allcaps)
2839              {
2840              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2841              count++;   /* Allow for full match */
2842              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2843              }
2844    
2845            /* Output the captured substrings */
2846    
2847          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2848            {            {
2849            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2850                {
2851                if (use_offsets[i] != -1)
2852                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2853                    use_offsets[i], i);
2854                if (use_offsets[i+1] != -1)
2855                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2856                    use_offsets[i+1], i+1);
2857              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2858                }
2859            else            else
2860              {              {
2861              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2862              (void)pchars(bptr + use_offsets[i],              (void)pchars(bptr + use_offsets[i],
2863                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
2864              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2865              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
2866                {                {
2867                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
2868                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2869                  fprintf(outfile, " 0+ ");                  outfile);
2870                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
2871                }                }
2872              }              }
2873            }            }
2874    
2875            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2876    
2877          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2878            {            {
2879            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2880              {              {
2881              char copybuffer[16];              char copybuffer[256];
2882              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2883                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2884              if (rc < 0)              if (rc < 0)
# Line 1657  while (!done) Line 2888  while (!done)
2888              }              }
2889            }            }
2890    
2891            for (copynamesptr = copynames;
2892                 *copynamesptr != 0;
2893                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2894              {
2895              char copybuffer[256];
2896              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2897                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2898              if (rc < 0)
2899                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2900              else
2901                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2902              }
2903    
2904          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2905            {            {
2906            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1669  while (!done) Line 2913  while (!done)
2913              else              else
2914                {                {
2915                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2916                pcre_free_substring(substring);                pcre_free_substring(substring);
2917                }                }
2918              }              }
2919            }            }
2920    
2921            for (getnamesptr = getnames;
2922                 *getnamesptr != 0;
2923                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2924              {
2925              const char *substring;
2926              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2927                count, (char *)getnamesptr, &substring);
2928              if (rc < 0)
2929                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2930              else
2931                {
2932                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2933                pcre_free_substring(substring);
2934                }
2935              }
2936    
2937          if (getlist)          if (getlist)
2938            {            {
2939            const char **stringlist;            const char **stringlist;
# Line 1688  while (!done) Line 2947  while (!done)
2947                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2948              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2949                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
2950              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
2951              }              }
2952            }            }
# Line 1698  while (!done) Line 2956  while (!done)
2956    
2957        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2958          {          {
2959          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2960          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            else fprintf(outfile, "Partial match, mark=%s", markptr);
2961            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],          if (use_size_offsets > 1)
2962              bptr + use_offsets[0]);            {
2963              fprintf(outfile, ": ");
2964              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2965                outfile);
2966              }
2967          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2968          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2969          }          }
2970    
2971        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2972        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2973        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2974        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2975        offset values to achieve this. We won't be at the end of the string -  
2976        that was checked before setting g_notempty. */        Complication arises in the case when the newline convention is "any",
2977          "crlf", or "anycrlf". If the previous match was at the end of a line
2978          terminated by CRLF, an advance of one character just passes the \r,
2979          whereas we should prefer the longer newline sequence, as does the code in
2980          pcre_exec(). Fudge the offset value to achieve this. We check for a
2981          newline setting in the pattern; if none was set, use pcre_config() to
2982          find the default.
2983    
2984          Otherwise, in the case of UTF-8 matching, the advance must be one
2985          character, not one byte. */
2986    
2987        else        else
2988          {          {
2989          if (g_notempty != 0)          if (g_notempty != 0)
2990            {            {
2991            int onechar = 1;            int onechar = 1;
2992              unsigned int obits = ((real_pcre *)re)->options;
2993            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2994            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2995                {
2996                int d;
2997                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2998                /* Note that these values are always the ASCII ones, even in
2999                EBCDIC environments. CR = 13, NL = 10. */
3000                obits = (d == 13)? PCRE_NEWLINE_CR :
3001                        (d == 10)? PCRE_NEWLINE_LF :
3002                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3003                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
3004                        (d == -1)? PCRE_NEWLINE_ANY : 0;
3005                }
3006              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3007                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3008                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3009                  &&
3010                  start_offset < len - 1 &&
3011                  bptr[start_offset] == '\r' &&
3012                  bptr[start_offset+1] == '\n')
3013                onechar++;
3014              else if (use_utf8)
3015              {              {
3016              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3017                {                {
3018                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3019                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3020                }                }
3021              }              }
3022            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3023            }            }
3024          else          else
3025            {            {
3026            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3027              {              {
3028              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3029                if (gmatched == 0)
3030                  {
3031                  if (markptr == NULL) fprintf(outfile, "No match\n");
3032                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3033                  }
3034                break;
3035    
3036                case PCRE_ERROR_BADUTF8:
3037                case PCRE_ERROR_SHORTUTF8:
3038                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3039                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3040                if (use_size_offsets >= 2)
3041                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3042                    use_offsets[1]);
3043                fprintf(outfile, "\n");
3044                break;
3045    
3046                default:
3047                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3048                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3049                else
3050                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3051                break;
3052              }              }
3053            else fprintf(outfile, "Error %d\n", count);  
3054            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3055            }            }
3056          }          }
# Line 1747  while (!done) Line 3060  while (!done)
3060        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3061    
3062        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3063        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3064        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3065        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3066        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3067        character. */        character. */
3068    
3069        g_notempty = 0;        g_notempty = 0;
3070    
3071        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3072          {          {
3073          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3074          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3075          }          }
3076    
3077        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1772  while (!done) Line 3086  while (!done)
3086          len -= use_offsets[1];          len -= use_offsets[1];
3087          }          }
3088        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
3089    
3090        NEXT_DATA: continue;
3091      }    /* End of loop for data lines */      }    /* End of loop for data lines */
3092    
3093    CONTINUE:    CONTINUE:
# Line 1781  while (!done) Line 3097  while (!done)
3097  #endif  #endif
3098    
3099    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3100    if (extra != NULL) new_free(extra);    if (extra != NULL) pcre_free_study(extra);
3101    if (tables != NULL)    if (locale_set)
3102      {      {
3103      new_free((void *)tables);      new_free((void *)tables);
3104      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3105        locale_set = 0;
3106        }
3107      if (jit_stack != NULL)
3108        {
3109        pcre_jit_stack_free(jit_stack);
3110        jit_stack = NULL;
3111      }      }
3112    }    }
3113    

Legend:
Removed from v.77  
changed lines
  Added in v.788

  ViewVC Help
Powered by ViewVC 1.1.5