/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 172 by ph10, Tue Jun 5 10:40:13 2007 UTC code/branches/pcre16/pcretest.c revision 801 by ph10, Mon Dec 12 16:23:37 2011 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
61  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 59  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
92  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 79  appropriately for an application, not fo Line 107  appropriately for an application, not fo
107  #include "pcre.h"  #include "pcre.h"
108  #include "pcre_internal.h"  #include "pcre_internal.h"
109    
110  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
111  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
112  symbols to prevent clashes. */  external symbols to prevent clashes. */
113    
114    #define _pcre_ucp_gentype      ucp_gentype
115    #define _pcre_ucp_typerange    ucp_typerange
116  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
117  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
118  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 90  symbols to prevent clashes. */ Line 120  symbols to prevent clashes. */
120  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
121  #define _pcre_utt              utt  #define _pcre_utt              utt
122  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
123    #define _pcre_utt_names        utt_names
124  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
125    
126  #include "pcre_tables.c"  #include "pcre_tables.c"
127    
128  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
129  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
130  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
131    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
132    
133    #define COMPILING_PCRETEST
134  #include "pcre_printint.src"  #include "pcre_printint.src"
135    
136  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
137    output character as-is or as a hex value when showing compiled patterns, is
138    contained in the printint.src file. We uses it here also, in cases when the
139    locale has not been explicitly changed, so as to get consistent output from
140    systems that differ in their output from isprint() even in the "C" locale. */
141    
142    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
143    
144  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
145  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 152  static int callout_count; Line 184  static int callout_count;
184  static int callout_extra;  static int callout_extra;
185  static int callout_fail_count;  static int callout_fail_count;
186  static int callout_fail_id;  static int callout_fail_id;
187    static int debug_lengths;
188  static int first_callout;  static int first_callout;
189  static int locale_set = 0;  static int locale_set = 0;
190  static int show_malloc;  static int show_malloc;
191  static int use_utf8;  static int use_utf8;
192  static size_t gotten_store;  static size_t gotten_store;
193    static size_t first_gotten_store = 0;
194    static const unsigned char *last_callout_mark = NULL;
195    
196  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
197    
198  static int buffer_size = 50000;  static int buffer_size = 50000;
199  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
200  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
201  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
202    
203    /* Textual explanations for runtime error codes */
204    
205    static const char *errtexts[] = {
206      NULL,  /* 0 is no error */
207      NULL,  /* NOMATCH is handled specially */
208      "NULL argument passed",
209      "bad option value",
210      "magic number missing",
211      "unknown opcode - pattern overwritten?",
212      "no more memory",
213      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
214      "match limit exceeded",
215      "callout error code",
216      NULL,  /* BADUTF8 is handled specially */
217      "bad UTF-8 offset",
218      NULL,  /* PARTIAL is handled specially */
219      "not used - internal error",
220      "internal error - pattern overwritten?",
221      "bad count value",
222      "item unsupported for DFA matching",
223      "backreference condition or recursion test not supported for DFA matching",
224      "match limit not supported for DFA matching",
225      "workspace size exceeded in DFA matching",
226      "too much recursion for DFA matching",
227      "recursion limit exceeded",
228      "not used - internal error",
229      "invalid combination of newline options",
230      "bad offset value",
231      NULL,  /* SHORTUTF8 is handled specially */
232      "nested recursion at the same subject position",
233      "JIT stack limit reached"
234    };
235    
236    
237    /*************************************************
238    *         Alternate character tables             *
239    *************************************************/
240    
241    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242    using the default tables of the library. However, the T option can be used to
243    select alternate sets of tables, for different kinds of testing. Note also that
244    the L (locale) option also adjusts the tables. */
245    
246    /* This is the set of tables distributed as default with PCRE. It recognizes
247    only ASCII characters. */
248    
249    static const unsigned char tables0[] = {
250    
251    /* This table is a lower casing table. */
252    
253        0,  1,  2,  3,  4,  5,  6,  7,
254        8,  9, 10, 11, 12, 13, 14, 15,
255       16, 17, 18, 19, 20, 21, 22, 23,
256       24, 25, 26, 27, 28, 29, 30, 31,
257       32, 33, 34, 35, 36, 37, 38, 39,
258       40, 41, 42, 43, 44, 45, 46, 47,
259       48, 49, 50, 51, 52, 53, 54, 55,
260       56, 57, 58, 59, 60, 61, 62, 63,
261       64, 97, 98, 99,100,101,102,103,
262      104,105,106,107,108,109,110,111,
263      112,113,114,115,116,117,118,119,
264      120,121,122, 91, 92, 93, 94, 95,
265       96, 97, 98, 99,100,101,102,103,
266      104,105,106,107,108,109,110,111,
267      112,113,114,115,116,117,118,119,
268      120,121,122,123,124,125,126,127,
269      128,129,130,131,132,133,134,135,
270      136,137,138,139,140,141,142,143,
271      144,145,146,147,148,149,150,151,
272      152,153,154,155,156,157,158,159,
273      160,161,162,163,164,165,166,167,
274      168,169,170,171,172,173,174,175,
275      176,177,178,179,180,181,182,183,
276      184,185,186,187,188,189,190,191,
277      192,193,194,195,196,197,198,199,
278      200,201,202,203,204,205,206,207,
279      208,209,210,211,212,213,214,215,
280      216,217,218,219,220,221,222,223,
281      224,225,226,227,228,229,230,231,
282      232,233,234,235,236,237,238,239,
283      240,241,242,243,244,245,246,247,
284      248,249,250,251,252,253,254,255,
285    
286    /* This table is a case flipping table. */
287    
288        0,  1,  2,  3,  4,  5,  6,  7,
289        8,  9, 10, 11, 12, 13, 14, 15,
290       16, 17, 18, 19, 20, 21, 22, 23,
291       24, 25, 26, 27, 28, 29, 30, 31,
292       32, 33, 34, 35, 36, 37, 38, 39,
293       40, 41, 42, 43, 44, 45, 46, 47,
294       48, 49, 50, 51, 52, 53, 54, 55,
295       56, 57, 58, 59, 60, 61, 62, 63,
296       64, 97, 98, 99,100,101,102,103,
297      104,105,106,107,108,109,110,111,
298      112,113,114,115,116,117,118,119,
299      120,121,122, 91, 92, 93, 94, 95,
300       96, 65, 66, 67, 68, 69, 70, 71,
301       72, 73, 74, 75, 76, 77, 78, 79,
302       80, 81, 82, 83, 84, 85, 86, 87,
303       88, 89, 90,123,124,125,126,127,
304      128,129,130,131,132,133,134,135,
305      136,137,138,139,140,141,142,143,
306      144,145,146,147,148,149,150,151,
307      152,153,154,155,156,157,158,159,
308      160,161,162,163,164,165,166,167,
309      168,169,170,171,172,173,174,175,
310      176,177,178,179,180,181,182,183,
311      184,185,186,187,188,189,190,191,
312      192,193,194,195,196,197,198,199,
313      200,201,202,203,204,205,206,207,
314      208,209,210,211,212,213,214,215,
315      216,217,218,219,220,221,222,223,
316      224,225,226,227,228,229,230,231,
317      232,233,234,235,236,237,238,239,
318      240,241,242,243,244,245,246,247,
319      248,249,250,251,252,253,254,255,
320    
321    /* This table contains bit maps for various character classes. Each map is 32
322    bytes long and the bits run from the least significant end of each byte. The
323    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324    graph, print, punct, and cntrl. Other classes are built from combinations. */
325    
326      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375    
376    /* This table identifies various classes of character by individual bits:
377      0x01   white space character
378      0x02   letter
379      0x04   decimal digit
380      0x08   hexadecimal digit
381      0x10   alphanumeric or '_'
382      0x80   regular expression metacharacter or binary zero
383    */
384    
385      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
386      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
387      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
388      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
389      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
390      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
391      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
392      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
393      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
394      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
395      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
396      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
397      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
398      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
399      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
400      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
401      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417    
418    /* This is a set of tables that came orginally from a Windows user. It seems to
419    be at least an approximation of ISO 8859. In particular, there are characters
420    greater than 128 that are marked as spaces, letters, etc. */
421    
422    static const unsigned char tables1[] = {
423    0,1,2,3,4,5,6,7,
424    8,9,10,11,12,13,14,15,
425    16,17,18,19,20,21,22,23,
426    24,25,26,27,28,29,30,31,
427    32,33,34,35,36,37,38,39,
428    40,41,42,43,44,45,46,47,
429    48,49,50,51,52,53,54,55,
430    56,57,58,59,60,61,62,63,
431    64,97,98,99,100,101,102,103,
432    104,105,106,107,108,109,110,111,
433    112,113,114,115,116,117,118,119,
434    120,121,122,91,92,93,94,95,
435    96,97,98,99,100,101,102,103,
436    104,105,106,107,108,109,110,111,
437    112,113,114,115,116,117,118,119,
438    120,121,122,123,124,125,126,127,
439    128,129,130,131,132,133,134,135,
440    136,137,138,139,140,141,142,143,
441    144,145,146,147,148,149,150,151,
442    152,153,154,155,156,157,158,159,
443    160,161,162,163,164,165,166,167,
444    168,169,170,171,172,173,174,175,
445    176,177,178,179,180,181,182,183,
446    184,185,186,187,188,189,190,191,
447    224,225,226,227,228,229,230,231,
448    232,233,234,235,236,237,238,239,
449    240,241,242,243,244,245,246,215,
450    248,249,250,251,252,253,254,223,
451    224,225,226,227,228,229,230,231,
452    232,233,234,235,236,237,238,239,
453    240,241,242,243,244,245,246,247,
454    248,249,250,251,252,253,254,255,
455    0,1,2,3,4,5,6,7,
456    8,9,10,11,12,13,14,15,
457    16,17,18,19,20,21,22,23,
458    24,25,26,27,28,29,30,31,
459    32,33,34,35,36,37,38,39,
460    40,41,42,43,44,45,46,47,
461    48,49,50,51,52,53,54,55,
462    56,57,58,59,60,61,62,63,
463    64,97,98,99,100,101,102,103,
464    104,105,106,107,108,109,110,111,
465    112,113,114,115,116,117,118,119,
466    120,121,122,91,92,93,94,95,
467    96,65,66,67,68,69,70,71,
468    72,73,74,75,76,77,78,79,
469    80,81,82,83,84,85,86,87,
470    88,89,90,123,124,125,126,127,
471    128,129,130,131,132,133,134,135,
472    136,137,138,139,140,141,142,143,
473    144,145,146,147,148,149,150,151,
474    152,153,154,155,156,157,158,159,
475    160,161,162,163,164,165,166,167,
476    168,169,170,171,172,173,174,175,
477    176,177,178,179,180,181,182,183,
478    184,185,186,187,188,189,190,191,
479    224,225,226,227,228,229,230,231,
480    232,233,234,235,236,237,238,239,
481    240,241,242,243,244,245,246,215,
482    248,249,250,251,252,253,254,223,
483    192,193,194,195,196,197,198,199,
484    200,201,202,203,204,205,206,207,
485    208,209,210,211,212,213,214,247,
486    216,217,218,219,220,221,222,255,
487    0,62,0,0,1,0,0,0,
488    0,0,0,0,0,0,0,0,
489    32,0,0,0,1,0,0,0,
490    0,0,0,0,0,0,0,0,
491    0,0,0,0,0,0,255,3,
492    126,0,0,0,126,0,0,0,
493    0,0,0,0,0,0,0,0,
494    0,0,0,0,0,0,0,0,
495    0,0,0,0,0,0,255,3,
496    0,0,0,0,0,0,0,0,
497    0,0,0,0,0,0,12,2,
498    0,0,0,0,0,0,0,0,
499    0,0,0,0,0,0,0,0,
500    254,255,255,7,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    255,255,127,127,0,0,0,0,
503    0,0,0,0,0,0,0,0,
504    0,0,0,0,254,255,255,7,
505    0,0,0,0,0,4,32,4,
506    0,0,0,128,255,255,127,255,
507    0,0,0,0,0,0,255,3,
508    254,255,255,135,254,255,255,7,
509    0,0,0,0,0,4,44,6,
510    255,255,127,255,255,255,127,255,
511    0,0,0,0,254,255,255,255,
512    255,255,255,255,255,255,255,127,
513    0,0,0,0,254,255,255,255,
514    255,255,255,255,255,255,255,255,
515    0,2,0,0,255,255,255,255,
516    255,255,255,255,255,255,255,127,
517    0,0,0,0,255,255,255,255,
518    255,255,255,255,255,255,255,255,
519    0,0,0,0,254,255,0,252,
520    1,0,0,248,1,0,0,120,
521    0,0,0,0,254,255,255,255,
522    0,0,128,0,0,0,128,0,
523    255,255,255,255,0,0,0,0,
524    0,0,0,0,0,0,0,128,
525    255,255,255,255,0,0,0,0,
526    0,0,0,0,0,0,0,0,
527    128,0,0,0,0,0,0,0,
528    0,1,1,0,1,1,0,0,
529    0,0,0,0,0,0,0,0,
530    0,0,0,0,0,0,0,0,
531    1,0,0,0,128,0,0,0,
532    128,128,128,128,0,0,128,0,
533    28,28,28,28,28,28,28,28,
534    28,28,0,0,0,0,0,128,
535    0,26,26,26,26,26,26,18,
536    18,18,18,18,18,18,18,18,
537    18,18,18,18,18,18,18,18,
538    18,18,18,128,128,0,128,16,
539    0,26,26,26,26,26,26,18,
540    18,18,18,18,18,18,18,18,
541    18,18,18,18,18,18,18,18,
542    18,18,18,128,128,0,0,0,
543    0,0,0,0,0,1,0,0,
544    0,0,0,0,0,0,0,0,
545    0,0,0,0,0,0,0,0,
546    0,0,0,0,0,0,0,0,
547    1,0,0,0,0,0,0,0,
548    0,0,18,0,0,0,0,0,
549    0,0,20,20,0,18,0,0,
550    0,20,18,0,0,0,0,0,
551    18,18,18,18,18,18,18,18,
552    18,18,18,18,18,18,18,18,
553    18,18,18,18,18,18,18,0,
554    18,18,18,18,18,18,18,18,
555    18,18,18,18,18,18,18,18,
556    18,18,18,18,18,18,18,18,
557    18,18,18,18,18,18,18,0,
558    18,18,18,18,18,18,18,18
559    };
560    
561    
562    
563    
564    #ifndef HAVE_STRERROR
565    /*************************************************
566    *     Provide strerror() for non-ANSI libraries  *
567    *************************************************/
568    
569    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570    in their libraries, but can provide the same facility by this simple
571    alternative function. */
572    
573    extern int   sys_nerr;
574    extern char *sys_errlist[];
575    
576    char *
577    strerror(int n)
578    {
579    if (n < 0 || n >= sys_nerr) return "unknown error number";
580    return sys_errlist[n];
581    }
582    #endif /* HAVE_STRERROR */
583    
584    
585    /*************************************************
586    *         JIT memory callback                    *
587    *************************************************/
588    
589    static pcre_jit_stack* jit_callback(void *arg)
590    {
591    return (pcre_jit_stack *)arg;
592    }
593    
594    
595  /*************************************************  /*************************************************
596  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 183  optimal way of handling this, but hey, t Line 608  optimal way of handling this, but hey, t
608  Arguments:  Arguments:
609    f            the file to read    f            the file to read
610    start        where in buffer to start (this *must* be within buffer)    start        where in buffer to start (this *must* be within buffer)
611      prompt       for stdin or readline()
612    
613  Returns:       pointer to the start of new data  Returns:       pointer to the start of new data
614                 could be a copy of start, or could be moved                 could be a copy of start, or could be moved
615                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
616  */  */
617    
618  static uschar *  static pcre_uint8 *
619  extend_inputline(FILE *f, uschar *start)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
620  {  {
621  uschar *here = start;  pcre_uint8 *here = start;
622    
623  for (;;)  for (;;)
624    {    {
625    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
626    
627    if (rlen > 1000)    if (rlen > 1000)
628      {      {
629      int dlen;      int dlen;
630      if (fgets((char *)here, rlen,  f) == NULL)  
631        return (here == start)? NULL : start;      /* If libreadline support is required, use readline() to read a line if the
632        input is a terminal. Note that readline() removes the trailing newline, so
633        we must put it back again, to be compatible with fgets(). */
634    
635    #ifdef SUPPORT_LIBREADLINE
636        if (isatty(fileno(f)))
637          {
638          size_t len;
639          char *s = readline(prompt);
640          if (s == NULL) return (here == start)? NULL : start;
641          len = strlen(s);
642          if (len > 0) add_history(s);
643          if (len > rlen - 1) len = rlen - 1;
644          memcpy(here, s, len);
645          here[len] = '\n';
646          here[len+1] = 0;
647          free(s);
648          }
649        else
650    #endif
651    
652        /* Read the next line by normal means, prompting if the file is stdin. */
653    
654          {
655          if (f == stdin) printf("%s", prompt);
656          if (fgets((char *)here, rlen,  f) == NULL)
657            return (here == start)? NULL : start;
658          }
659    
660      dlen = (int)strlen((char *)here);      dlen = (int)strlen((char *)here);
661      if (dlen > 0 && here[dlen - 1] == '\n') return start;      if (dlen > 0 && here[dlen - 1] == '\n') return start;
662      here += dlen;      here += dlen;
# Line 211  for (;;) Line 665  for (;;)
665    else    else
666      {      {
667      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
668      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
669      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671    
672      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673        {        {
# Line 355  Returns:     number of characters placed Line 809  Returns:     number of characters placed
809  #if !defined NOUTF8  #if !defined NOUTF8
810    
811  static int  static int
812  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
813  {  {
814  register int i, j;  register int i, j;
815  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 518  fprintf(outfile, "%.*s", (cb->next_item_ Line 972  fprintf(outfile, "%.*s", (cb->next_item_
972  fprintf(outfile, "\n");  fprintf(outfile, "\n");
973  first_callout = 0;  first_callout = 0;
974    
975    if (cb->mark != last_callout_mark)
976      {
977      fprintf(outfile, "Latest Mark: %s\n",
978        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979      last_callout_mark = cb->mark;
980      }
981    
982  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
983    {    {
984    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 537  return (cb->callout_number != callout_fa Line 998  return (cb->callout_number != callout_fa
998  *            Local malloc functions              *  *            Local malloc functions              *
999  *************************************************/  *************************************************/
1000    
1001  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1002  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1003    show_malloc variable is set only during matching. */
1004    
1005  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1006  {  {
1007  void *block = malloc(size);  void *block = malloc(size);
1008  gotten_store = size;  gotten_store = size;
1009    if (first_gotten_store == 0) first_gotten_store = size;
1010  if (show_malloc)  if (show_malloc)
1011    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1012  return block;  return block;
# Line 556  if (show_malloc) Line 1019  if (show_malloc)
1019  free(block);  free(block);
1020  }  }
1021    
   
1022  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1023    
1024  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 612  return ((value & 0x000000ff) << 24) | Line 1074  return ((value & 0x000000ff) << 24) |
1074  *************************************************/  *************************************************/
1075    
1076  static int  static int
1077  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1078    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1079    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1080  {  {
# Line 659  return count; Line 1121  return count;
1121    
1122    
1123  /*************************************************  /*************************************************
1124    *         Case-independent strncmp() function    *
1125    *************************************************/
1126    
1127    /*
1128    Arguments:
1129      s         first string
1130      t         second string
1131      n         number of characters to compare
1132    
1133    Returns:    < 0, = 0, or > 0, according to the comparison
1134    */
1135    
1136    static int
1137    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1138    {
1139    while (n--)
1140      {
1141      int c = tolower(*s++) - tolower(*t++);
1142      if (c) return c;
1143      }
1144    return 0;
1145    }
1146    
1147    
1148    
1149    /*************************************************
1150  *         Check newline indicator                *  *         Check newline indicator                *
1151  *************************************************/  *************************************************/
1152    
1153  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1154  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1155    
1156  Arguments:  Arguments:
1157    p           points after the leading '<'    p           points after the leading '<'
# Line 674  Returns:      appropriate PCRE_NEWLINE_x Line 1161  Returns:      appropriate PCRE_NEWLINE_x
1161  */  */
1162    
1163  static int  static int
1164  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1165  {  {
1166  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1167  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1168  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1169  if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1170  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1171    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1172    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1173  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1174  return 0;  return 0;
1175  }  }
# Line 694  return 0; Line 1183  return 0;
1183  static void  static void
1184  usage(void)  usage(void)
1185  {  {
1186  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1187    printf("Input and output default to stdin and stdout.\n");
1188    #ifdef SUPPORT_LIBREADLINE
1189    printf("If input is a terminal, readline() is used to read from it.\n");
1190    #else
1191    printf("This version of pcretest is not linked with readline().\n");
1192    #endif
1193    printf("\nOptions:\n");
1194  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1195  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1196  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 703  printf("  -dfa     force DFA matching fo Line 1199  printf("  -dfa     force DFA matching fo
1199  #endif  #endif
1200  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
1201  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
1202           "  -M       find MATCH_LIMIT minimum for each subject\n"
1203         "  -m       output memory used information\n"         "  -m       output memory used information\n"
1204         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
1205  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 710  printf("  -p       use POSIX interface\n Line 1207  printf("  -p       use POSIX interface\n
1207  #endif  #endif
1208  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1209  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1210  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1211           "  -s+      force each pattern to be studied, using JIT if available\n"
1212         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1213  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1214  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 732  int main(int argc, char **argv) Line 1230  int main(int argc, char **argv)
1230  FILE *infile = stdin;  FILE *infile = stdin;
1231  int options = 0;  int options = 0;
1232  int study_options = 0;  int study_options = 0;
1233    int default_find_match_limit = FALSE;
1234  int op = 1;  int op = 1;
1235  int timeit = 0;  int timeit = 0;
1236  int timeitm = 0;  int timeitm = 0;
1237  int showinfo = 0;  int showinfo = 0;
1238  int showstore = 0;  int showstore = 0;
1239    int force_study = -1;
1240    int force_study_options = 0;
1241  int quiet = 0;  int quiet = 0;
1242  int size_offsets = 45;  int size_offsets = 45;
1243  int size_offsets_max;  int size_offsets_max;
# Line 750  int all_use_dfa = 0; Line 1251  int all_use_dfa = 0;
1251  int yield = 0;  int yield = 0;
1252  int stack_size;  int stack_size;
1253    
1254    pcre_jit_stack *jit_stack = NULL;
1255    
1256    
1257  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1258  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1259    
1260  uschar copynames[1024];  pcre_uchar copynames[1024];
1261  uschar getnames[1024];  pcre_uchar getnames[1024];
1262    
1263  uschar *copynamesptr;  pcre_uchar *copynamesptr;
1264  uschar *getnamesptr;  pcre_uchar *getnamesptr;
1265    
1266  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1267  when I am debugging. They grow automatically when very long lines are read. */  when I am debugging. They grow automatically when very long lines are read. */
1268    
1269  buffer = (unsigned char *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
1270  dbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (pcre_uint8 *)malloc(buffer_size);
1271  pbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
1272    
1273  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1274    
# Line 785  while (argc > 1 && argv[op][0] == '-') Line 1289  while (argc > 1 && argv[op][0] == '-')
1289    {    {
1290    unsigned char *endptr;    unsigned char *endptr;
1291    
1292    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1293      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1294      else if (strcmp(argv[op], "-s+") == 0)
1295        {
1296        force_study = 1;
1297        force_study_options = PCRE_STUDY_JIT_COMPILE;
1298        }
1299    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1300    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1301    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1302    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1303      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1304  #if !defined NODFA  #if !defined NODFA
1305    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1306  #endif  #endif
# Line 819  while (argc > 1 && argv[op][0] == '-') Line 1329  while (argc > 1 && argv[op][0] == '-')
1329        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1330          *endptr == 0))          *endptr == 0))
1331      {      {
1332  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1333      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1334      exit(1);      exit(1);
1335  #else  #else
# Line 843  while (argc > 1 && argv[op][0] == '-') Line 1353  while (argc > 1 && argv[op][0] == '-')
1353    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1354      {      {
1355      int rc;      int rc;
1356        unsigned long int lrc;
1357      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1358      printf("Compiled with\n");      printf("Compiled with\n");
1359      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1360      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1361      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1362      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1363        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1364        if (rc)
1365          printf("  Just-in-time compiler support\n");
1366        else
1367          printf("  No just-in-time compiler support\n");
1368      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1369      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1370        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1371        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1372          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1373        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
1374        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1375        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1376        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1377                                         "all Unicode newlines");
1378      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1379      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1380      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1381      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1382      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1383      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1384      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1385      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1386      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1387      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1388      goto EXIT;      goto EXIT;
# Line 943  while (!done) Line 1464  while (!done)
1464  #endif  #endif
1465    
1466    const char *error;    const char *error;
1467      unsigned char *markptr;
1468    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1469    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1470    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1471    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1472    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1473      int do_allcaps = 0;
1474      int do_mark = 0;
1475    int do_study = 0;    int do_study = 0;
1476      int no_force_study = 0;
1477    int do_debug = debug;    int do_debug = debug;
   int debug_lengths = 1;  
1478    int do_G = 0;    int do_G = 0;
1479    int do_g = 0;    int do_g = 0;
1480    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1481    int do_showrest = 0;    int do_showrest = 0;
1482      int do_showcaprest = 0;
1483    int do_flip = 0;    int do_flip = 0;
1484    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1485    
1486    use_utf8 = 0;    use_utf8 = 0;
1487      debug_lengths = 1;
1488    
1489    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1490    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1491    fflush(outfile);    fflush(outfile);
1492    
# Line 974  while (!done) Line 1499  while (!done)
1499    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1500      {      {
1501      unsigned long int magic, get_options;      unsigned long int magic, get_options;
1502      uschar sbuf[8];      pcre_uint8 sbuf[8];
1503      FILE *f;      FILE *f;
1504    
1505      p++;      p++;
# Line 997  while (!done) Line 1522  while (!done)
1522        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1523    
1524      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1525      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1526    
1527      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1528    
# Line 1016  while (!done) Line 1541  while (!done)
1541          }          }
1542        }        }
1543    
1544      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1545        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1546    
1547      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
# Line 1024  while (!done) Line 1549  while (!done)
1549      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1550      use_utf8 = (get_options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1551    
1552      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1553    
1554      if (true_study_size != 0)      if (true_study_size != 0)
1555        {        {
# Line 1040  while (!done) Line 1565  while (!done)
1565          {          {
1566          FAIL_READ:          FAIL_READ:
1567          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1568          if (extra != NULL) new_free(extra);          if (extra != NULL) pcre_free_study(extra);
1569          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1570          fclose(f);          fclose(f);
1571          continue;          continue;
# Line 1061  while (!done) Line 1586  while (!done)
1586    
1587    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1588      {      {
1589      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1590      goto SKIP_DATA;      goto SKIP_DATA;
1591      }      }
1592    
1593    pp = p;    pp = p;
1594    poffset = p - buffer;    poffset = (int)(p - buffer);
1595    
1596    for(;;)    for(;;)
1597      {      {
# Line 1077  while (!done) Line 1602  while (!done)
1602        pp++;        pp++;
1603        }        }
1604      if (*pp != 0) break;      if (*pp != 0) break;
1605      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1606        {        {
1607        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1608        done = 1;        done = 1;
# Line 1121  while (!done) Line 1645  while (!done)
1645        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1646        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1647    
1648        case '+': do_showrest = 1; break;        case '+':
1649          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1650          break;
1651    
1652          case '=': do_allcaps = 1; break;
1653        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1654        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
1655        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1131  while (!done) Line 1659  while (!done)
1659        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1660        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1661        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1662          case 'K': do_mark = 1; break;
1663        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1664        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1665    
# Line 1138  while (!done) Line 1667  while (!done)
1667        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1668  #endif  #endif
1669    
1670        case 'S': do_study = 1; break;        case 'S':
1671          if (do_study == 0)
1672            {
1673            do_study = 1;
1674            if (*pp == '+')
1675              {
1676              study_options |= PCRE_STUDY_JIT_COMPILE;
1677              pp++;
1678              }
1679            }
1680          else
1681            {
1682            do_study = 0;
1683            no_force_study = 1;
1684            }
1685          break;
1686    
1687        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1688          case 'W': options |= PCRE_UCP; break;
1689        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1690          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1691        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1692        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1693        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1694    
1695          case 'T':
1696          switch (*pp++)
1697            {
1698            case '0': tables = tables0; break;
1699            case '1': tables = tables1; break;
1700    
1701            case '\r':
1702            case '\n':
1703            case ' ':
1704            case 0:
1705            fprintf(outfile, "** Missing table number after /T\n");
1706            goto SKIP_DATA;
1707    
1708            default:
1709            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1710            goto SKIP_DATA;
1711            }
1712          break;
1713    
1714        case 'L':        case 'L':
1715        ppp = pp;        ppp = pp;
1716        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1170  while (!done) Line 1736  while (!done)
1736    
1737        case '<':        case '<':
1738          {          {
1739          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1740          if (x == 0) goto SKIP_DATA;            {
1741          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
1742          while (*pp++ != '>');            pp += 3;
1743              }
1744            else
1745              {
1746              int x = check_newline(pp, outfile);
1747              if (x == 0) goto SKIP_DATA;
1748              options |= x;
1749              while (*pp++ != '>');
1750              }
1751          }          }
1752        break;        break;
1753    
# Line 1203  while (!done) Line 1777  while (!done)
1777      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1778      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1779      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1780        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1781        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1782    
1783        first_gotten_store = 0;
1784      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1785    
1786      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1223  while (!done) Line 1800  while (!done)
1800  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1801    
1802      {      {
1803        unsigned long int get_options;
1804    
1805      if (timeit > 0)      if (timeit > 0)
1806        {        {
1807        register int i;        register int i;
# Line 1239  while (!done) Line 1818  while (!done)
1818            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1819        }        }
1820    
1821        first_gotten_store = 0;
1822      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1823    
1824      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1252  while (!done) Line 1832  while (!done)
1832          {          {
1833          for (;;)          for (;;)
1834            {            {
1835            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1836              {              {
1837              done = 1;              done = 1;
1838              goto CONTINUE;              goto CONTINUE;
# Line 1266  while (!done) Line 1846  while (!done)
1846        goto CONTINUE;        goto CONTINUE;
1847        }        }
1848    
1849      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1850      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1851      returns only limited data. Check that it agrees with the newer one. */      lines. */
1852    
1853      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1854        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
1855    
1856      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
1857      and remember the store that was got. */      and remember the store that was got. */
1858    
1859      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
1860      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1861    
1862        /* Output code size information if requested */
1863    
1864        if (log_store)
1865          fprintf(outfile, "Memory allocation (code space): %d\n",
1866            (int)(first_gotten_store -
1867                  sizeof(real_pcre) -
1868                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1869    
1870      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
1871      help with the matching. */      help with the matching, unless the pattern has the SS option, which
1872        suppresses the effect of /S (used for a few test patterns where studying is
1873        never sensible). */
1874    
1875      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
1876        {        {
1877        if (timeit > 0)        if (timeit > 0)
1878          {          {
# Line 1293  while (!done) Line 1880  while (!done)
1880          clock_t time_taken;          clock_t time_taken;
1881          clock_t start_time = clock();          clock_t start_time = clock();
1882          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
1883            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options | force_study_options, &error);
1884          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1885          if (extra != NULL) free(extra);          if (extra != NULL) pcre_free_study(extra);
1886          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1887            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
1888              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1889          }          }
1890        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options | force_study_options, &error);
1891        if (error != NULL)        if (error != NULL)
1892          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
1893        else if (extra != NULL)        else if (extra != NULL)
1894            {
1895          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1896            if (log_store)
1897              {
1898              size_t jitsize;
1899              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
1900              if (jitsize != 0)
1901                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
1902              }
1903            }
1904          }
1905    
1906        /* If /K was present, we set up for handling MARK data. */
1907    
1908        if (do_mark)
1909          {
1910          if (extra == NULL)
1911            {
1912            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1913            extra->flags = 0;
1914            }
1915          extra->mark = &markptr;
1916          extra->flags |= PCRE_EXTRA_MARK;
1917        }        }
1918    
1919      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
# Line 1315  while (!done) Line 1924  while (!done)
1924      if (do_flip)      if (do_flip)
1925        {        {
1926        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1927        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1928            byteflip(rre->magic_number, sizeof(rre->magic_number));
1929        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1930        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1931        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1932        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1933        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1934        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1935        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1936          rre->first_char =
1937            (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
1938          rre->req_char =
1939            (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
1940          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1941          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1942        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1943          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1944        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1945            sizeof(rre->name_count));
1946    
1947        if (extra != NULL)        if (extra != NULL)
1948          {          {
1949          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1950          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1951          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1952            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1953          }          }
1954        }        }
1955    
1956      /* Extract information from the compiled data if required */      /* Extract information from the compiled data if required. There are now
1957        two info-returning functions. The old one has a limited interface and
1958        returns only limited data. Check that it agrees with the newer one. */
1959    
1960      SHOW_INFO:      SHOW_INFO:
1961    
# Line 1346  while (!done) Line 1965  while (!done)
1965        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1966        }        }
1967    
1968        /* We already have the options in get_options (see above) */
1969    
1970      if (do_showinfo)      if (do_showinfo)
1971        {        {
1972        unsigned long int get_options, all_options;        unsigned long int all_options;
1973  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1974        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1975  #endif  #endif
1976        int count, backrefmax, first_char, need_char, okpartial, jchanged;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1977            hascrorlf;
1978        int nameentrysize, namecount;        int nameentrysize, namecount;
1979        const uschar *nametable;        const pcre_uchar *nametable;
1980    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1981        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1982        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1983        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1367  while (!done) Line 1988  while (!done)
1988        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1989        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1990        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1991          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1992    
1993  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1994        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1409  while (!done) Line 2031  while (!done)
2031          }          }
2032    
2033        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2034          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2035    
2036        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2037        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2038    
2039        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2040          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2041            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2042            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2043            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2044            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2045            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2046            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2047              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2048              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2049            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2050            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2051            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2052            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2053            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2054              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2055            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2056              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2057            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2058    
2059        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1467  while (!done) Line 2094  while (!done)
2094          }          }
2095        else        else
2096          {          {
2097          int ch = first_char & 255;          const char *caseless =
2098          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2099            "" : " (caseless)";            "" : " (caseless)";
2100          if (PRINTHEX(ch))  
2101            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(first_char))
2102              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2103          else          else
2104            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2105          }          }
2106    
2107        if (need_char < 0)        if (need_char < 0)
# Line 1482  while (!done) Line 2110  while (!done)
2110          }          }
2111        else        else
2112          {          {
2113          int ch = need_char & 255;          const char *caseless =
2114          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2115            "" : " (caseless)";            "" : " (caseless)";
2116          if (PRINTHEX(ch))  
2117            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTHEX(need_char))
2118              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2119          else          else
2120            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2121          }          }
2122    
2123        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2124        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2125        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2126        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2127          information unless -i or -d was also present. This means that, except
2128          when auto-callouts are involved, the output from runs with and without
2129          -s should be identical. */
2130    
2131        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2132          {          {
2133          if (extra == NULL)          if (extra == NULL)
2134            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2135          else          else
2136            {            {
2137            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2138            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2139    
2140              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2141              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2142    
2143              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2144            if (start_bits == NULL)            if (start_bits == NULL)
2145              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2146            else            else
2147              {              {
2148              int i;              int i;
# Line 1536  while (!done) Line 2172  while (!done)
2172              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2173              }              }
2174            }            }
2175    
2176            /* Show this only if the JIT was set by /S, not by -s. */
2177    
2178            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2179              {
2180              int jit;
2181              new_info(re, extra, PCRE_INFO_JIT, &jit);
2182              if (jit)
2183                fprintf(outfile, "JIT study was successful\n");
2184              else
2185    #ifdef SUPPORT_JIT
2186                fprintf(outfile, "JIT study was not successful\n");
2187    #else
2188                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2189    #endif
2190              }
2191          }          }
2192        }        }
2193    
# Line 1552  while (!done) Line 2204  while (!done)
2204          }          }
2205        else        else
2206          {          {
2207          uschar sbuf[8];          pcre_uint8 sbuf[8];
2208          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2209          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2210          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2211          sbuf[3] = (true_size)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
2212    
2213          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2214          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2215          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2216          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2217    
2218          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2219              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1570  while (!done) Line 2222  while (!done)
2222            }            }
2223          else          else
2224            {            {
2225            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2226    
2227              /* If there is study data, write it. */
2228    
2229            if (extra != NULL)            if (extra != NULL)
2230              {              {
2231              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1580  while (!done) Line 2235  while (!done)
2235                  strerror(errno));                  strerror(errno));
2236                }                }
2237              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2238              }              }
2239            }            }
2240          fclose(f);          fclose(f);
2241          }          }
2242    
2243        new_free(re);        new_free(re);
2244        if (extra != NULL) new_free(extra);        if (extra != NULL) pcre_free_study(extra);
2245        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2246            {
2247            new_free((void *)tables);
2248            setlocale(LC_CTYPE, "C");
2249            locale_set = 0;
2250            }
2251        continue;  /* With next regex */        continue;  /* With next regex */
2252        }        }
2253      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1597  while (!done) Line 2256  while (!done)
2256    
2257    for (;;)    for (;;)
2258      {      {
2259      uschar *q;      pcre_uint8 *q;
2260      uschar *bptr;      pcre_uint8 *bptr;
2261      int *use_offsets = offsets;      int *use_offsets = offsets;
2262      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2263      int callout_data = 0;      int callout_data = 0;
2264      int callout_data_set = 0;      int callout_data_set = 0;
2265      int count, c;      int count, c;
2266      int copystrings = 0;      int copystrings = 0;
2267      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2268      int getstrings = 0;      int getstrings = 0;
2269      int getlist = 0;      int getlist = 0;
2270      int gmatched = 0;      int gmatched = 0;
2271      int start_offset = 0;      int start_offset = 0;
2272        int start_offset_sign = 1;
2273      int g_notempty = 0;      int g_notempty = 0;
2274      int use_dfa = 0;      int use_dfa = 0;
2275    
# Line 1623  while (!done) Line 2283  while (!done)
2283    
2284      pcre_callout = callout;      pcre_callout = callout;
2285      first_callout = 1;      first_callout = 1;
2286        last_callout_mark = NULL;
2287      callout_extra = 0;      callout_extra = 0;
2288      callout_count = 0;      callout_count = 0;
2289      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1635  while (!done) Line 2296  while (!done)
2296      len = 0;      len = 0;
2297      for (;;)      for (;;)
2298        {        {
2299        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
2300          {          {
2301          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2302              {
2303              fprintf(outfile, "\n");
2304              break;
2305              }
2306          done = 1;          done = 1;
2307          goto CONTINUE;          goto CONTINUE;
2308          }          }
# Line 1698  while (!done) Line 2362  while (!done)
2362            {            {
2363            unsigned char *pt = p;            unsigned char *pt = p;
2364            c = 0;            c = 0;
2365            while (isxdigit(*(++pt)))  
2366              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2367              when isxdigit() is a macro that refers to its argument more than
2368              once. This is banned by the C Standard, but apparently happens in at
2369              least one MacOS environment. */
2370    
2371              for (pt++; isxdigit(*pt); pt++)
2372                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2373            if (*pt == '}')            if (*pt == '}')
2374              {              {
2375              unsigned char buff8[8];              unsigned char buff8[8];
2376              int ii, utn;              int ii, utn;
2377              utn = ord2utf8(c, buff8);              if (use_utf8)
2378              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2379              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2380                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2381                  c = buff8[ii];   /* Last byte */
2382                  }
2383                else
2384                 {
2385                 if (c > 255)
2386                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2387                     "UTF-8 mode is not enabled.\n"
2388                     "** Truncation will probably give the wrong result.\n", c);
2389                 }
2390              p = pt + 1;              p = pt + 1;
2391              break;              break;
2392              }              }
# Line 1719  while (!done) Line 2399  while (!done)
2399          c = 0;          c = 0;
2400          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2401            {            {
2402            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2403            p++;            p++;
2404            }            }
2405          break;          break;
# Line 1729  while (!done) Line 2409  while (!done)
2409          continue;          continue;
2410    
2411          case '>':          case '>':
2412            if (*p == '-')
2413              {
2414              start_offset_sign = -1;
2415              p++;
2416              }
2417          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2418            start_offset *= start_offset_sign;
2419          continue;          continue;
2420    
2421          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1748  while (!done) Line 2434  while (!done)
2434            }            }
2435          else if (isalnum(*p))          else if (isalnum(*p))
2436            {            {
2437            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
2438            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2439            *npp++ = 0;            *npp++ = 0;
2440            *npp = 0;            *npp = 0;
# Line 1802  while (!done) Line 2488  while (!done)
2488  #endif  #endif
2489            use_dfa = 1;            use_dfa = 1;
2490          continue;          continue;
2491    #endif
2492    
2493    #if !defined NODFA
2494          case 'F':          case 'F':
2495          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2496          continue;          continue;
# Line 1816  while (!done) Line 2504  while (!done)
2504            }            }
2505          else if (isalnum(*p))          else if (isalnum(*p))
2506            {            {
2507            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
2508            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2509            *npp++ = 0;            *npp++ = 0;
2510            *npp = 0;            *npp = 0;
# Line 1827  while (!done) Line 2515  while (!done)
2515            }            }
2516          continue;          continue;
2517    
2518            case 'J':
2519            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2520            if (extra != NULL
2521                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2522                && extra->executable_jit != NULL)
2523              {
2524              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2525              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2526              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2527              }
2528            continue;
2529    
2530          case 'L':          case 'L':
2531          getlist = 1;          getlist = 1;
2532          continue;          continue;
# Line 1836  while (!done) Line 2536  while (!done)
2536          continue;          continue;
2537    
2538          case 'N':          case 'N':
2539          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2540              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2541            else
2542              options |= PCRE_NOTEMPTY;
2543          continue;          continue;
2544    
2545          case 'O':          case 'O':
# Line 1859  while (!done) Line 2562  while (!done)
2562          continue;          continue;
2563    
2564          case 'P':          case 'P':
2565          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2566              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2567          continue;          continue;
2568    
2569          case 'Q':          case 'Q':
# Line 1894  while (!done) Line 2598  while (!done)
2598          show_malloc = 1;          show_malloc = 1;
2599          continue;          continue;
2600    
2601            case 'Y':
2602            options |= PCRE_NO_START_OPTIMIZE;
2603            continue;
2604    
2605          case 'Z':          case 'Z':
2606          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2607          continue;          continue;
# Line 1914  while (!done) Line 2622  while (!done)
2622        *q++ = c;        *q++ = c;
2623        }        }
2624      *q = 0;      *q = 0;
2625      len = q - dbuffer;      len = (int)(q - dbuffer);
2626    
2627        /* Move the data to the end of the buffer so that a read over the end of
2628        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2629        we are using the POSIX interface, we must include the terminating zero. */
2630    
2631    #if !defined NOPOSIX
2632        if (posix || do_posix)
2633          {
2634          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2635          bptr += buffer_size - len - 1;
2636          }
2637        else
2638    #endif
2639          {
2640          memmove(bptr + buffer_size - len, bptr, len);
2641          bptr += buffer_size - len;
2642          }
2643    
2644      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2645        {        {
# Line 1935  while (!done) Line 2660  while (!done)
2660          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2661        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2662        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2663          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2664    
2665        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2666    
# Line 1959  while (!done) Line 2685  while (!done)
2685              (void)pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2686                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2687              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2688              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2689                {                {
2690                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2691                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2692                  outfile);                  outfile);
2693                fprintf(outfile, "\n");                fprintf(outfile, "\n");
# Line 1979  while (!done) Line 2705  while (!done)
2705    
2706      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2707        {        {
2708          markptr = NULL;
2709    
2710        if (timeitm > 0)        if (timeitm > 0)
2711          {          {
2712          register int i;          register int i;
# Line 1990  while (!done) Line 2718  while (!done)
2718            {            {
2719            int workspace[1000];            int workspace[1000];
2720            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2721              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2722                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2723                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2724            }            }
# Line 2009  while (!done) Line 2737  while (!done)
2737    
2738        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2739        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
2740        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
2741          running of pcre_exec(), so disable the JIT optimization. This makes it
2742          possible to run the same set of tests with and without JIT externally
2743          requested. */
2744    
2745        if (find_match_limit)        if (find_match_limit)
2746          {          {
# Line 2018  while (!done) Line 2749  while (!done)
2749            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2750            extra->flags = 0;            extra->flags = 0;
2751            }            }
2752            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2753    
2754          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
2755            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2053  while (!done) Line 2785  while (!done)
2785        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2786          {          {
2787          int workspace[1000];          int workspace[1000];
2788          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2789            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2790            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2791          if (count == 0)          if (count == 0)
# Line 2101  while (!done) Line 2833  while (!done)
2833              }              }
2834            }            }
2835    
2836            /* do_allcaps requests showing of all captures in the pattern, to check
2837            unset ones at the end. */
2838    
2839            if (do_allcaps)
2840              {
2841              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2842              count++;   /* Allow for full match */
2843              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2844              }
2845    
2846            /* Output the captured substrings */
2847    
2848          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2849            {            {
2850            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2851                {
2852                if (use_offsets[i] != -1)
2853                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2854                    use_offsets[i], i);
2855                if (use_offsets[i+1] != -1)
2856                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2857                    use_offsets[i+1], i+1);
2858              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2859                }
2860            else            else
2861              {              {
2862              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2863              (void)pchars(bptr + use_offsets[i],              (void)pchars(bptr + use_offsets[i],
2864                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
2865              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2866              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
2867                {                {
2868                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
2869                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2870                  fprintf(outfile, " 0+ ");                  outfile);
2871                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
2872                }                }
2873              }              }
2874            }            }
2875    
2876            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2877    
2878          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2879            {            {
2880            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2197  while (!done) Line 2948  while (!done)
2948                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2949              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2950                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
2951              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
2952              }              }
2953            }            }
# Line 2207  while (!done) Line 2957  while (!done)
2957    
2958        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2959          {          {
2960          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2961  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
2962          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
2963            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
2964              bptr + use_offsets[0]);            fprintf(outfile, ": ");
2965  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2966                outfile);
2967              }
2968          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2969          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2970          }          }
# Line 2222  while (!done) Line 2974  while (!done)
2974        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2975        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2976    
2977        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
2978        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
2979        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
2980        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
2981        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
2982          newline setting in the pattern; if none was set, use pcre_config() to
2983          find the default.
2984    
2985        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2986        character, not one byte. */        character, not one byte. */
# Line 2242  while (!done) Line 2996  while (!done)
2996              {              {
2997              int d;              int d;
2998              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2999              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
3000                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
3001                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
3002                        (d == 10)? PCRE_NEWLINE_LF :
3003                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3004                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
3005                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
3006              }              }
3007            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3008                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3009                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3010                &&                &&
3011                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2259  while (!done) Line 3016  while (!done)
3016              {              {
3017              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3018                {                {
3019                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3020                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3021                }                }
3022              }              }
3023            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3024            }            }
3025          else          else
3026            {            {
3027            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3028              {              {
3029              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3030                if (gmatched == 0)
3031                  {
3032                  if (markptr == NULL) fprintf(outfile, "No match\n");
3033                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3034                  }
3035                break;
3036    
3037                case PCRE_ERROR_BADUTF8:
3038                case PCRE_ERROR_SHORTUTF8:
3039                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3040                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3041                if (use_size_offsets >= 2)
3042                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3043                    use_offsets[1]);
3044                fprintf(outfile, "\n");
3045                break;
3046    
3047                default:
3048                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3049                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3050                else
3051                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3052                break;
3053              }              }
3054            else fprintf(outfile, "Error %d\n", count);  
3055            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3056            }            }
3057          }          }
# Line 2283  while (!done) Line 3061  while (!done)
3061        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3062    
3063        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3064        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3065        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3066        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3067        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3068        character. */        character. */
3069    
# Line 2294  while (!done) Line 3072  while (!done)
3072        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3073          {          {
3074          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3075          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3076          }          }
3077    
3078        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2320  while (!done) Line 3098  while (!done)
3098  #endif  #endif
3099    
3100    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3101    if (extra != NULL) new_free(extra);    if (extra != NULL) pcre_free_study(extra);
3102    if (tables != NULL)    if (locale_set)
3103      {      {
3104      new_free((void *)tables);      new_free((void *)tables);
3105      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3106      locale_set = 0;      locale_set = 0;
3107      }      }
3108      if (jit_stack != NULL)
3109        {
3110        pcre_jit_stack_free(jit_stack);
3111        jit_stack = NULL;
3112        }
3113    }    }
3114    
3115  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");

Legend:
Removed from v.172  
changed lines
  Added in v.801

  ViewVC Help
Powered by ViewVC 1.1.5