/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC revision 553 by ph10, Fri Oct 22 15:57:50 2010 UTC
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37    
38    
39  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
40  #include <config.h>  #include "config.h"
41  #endif  #endif
42    
43  #include <ctype.h>  #include <ctype.h>
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
61  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 63  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82  #else  #else
83  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
84  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 83  appropriately for an application, not fo Line 99  appropriately for an application, not fo
99  #include "pcre.h"  #include "pcre.h"
100  #include "pcre_internal.h"  #include "pcre_internal.h"
101    
102  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
103  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
104  symbols to prevent clashes. */  external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
108  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
109  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 94  symbols to prevent clashes. */ Line 111  symbols to prevent clashes. */
111  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
112  #define _pcre_utt              utt  #define _pcre_utt              utt
113  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
116    
117  #include "pcre_tables.c"  #include "pcre_tables.c"
118    
119  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
120  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
121  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
123    
124    #define COMPILING_PCRETEST
125  #include "pcre_printint.src"  #include "pcre_printint.src"
126    
127  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 156  static int callout_count; Line 175  static int callout_count;
175  static int callout_extra;  static int callout_extra;
176  static int callout_fail_count;  static int callout_fail_count;
177  static int callout_fail_id;  static int callout_fail_id;
178    static int debug_lengths;
179  static int first_callout;  static int first_callout;
180  static int locale_set = 0;  static int locale_set = 0;
181  static int show_malloc;  static int show_malloc;
# Line 170  static uschar *dbuffer = NULL; Line 190  static uschar *dbuffer = NULL;
190  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
191    
192    
193    /*************************************************
194    *         Alternate character tables             *
195    *************************************************/
196    
197    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198    using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200    the L (locale) option also adjusts the tables. */
201    
202    /* This is the set of tables distributed as default with PCRE. It recognizes
203    only ASCII characters. */
204    
205    static const unsigned char tables0[] = {
206    
207    /* This table is a lower casing table. */
208    
209        0,  1,  2,  3,  4,  5,  6,  7,
210        8,  9, 10, 11, 12, 13, 14, 15,
211       16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515    };
516    
517    
518    
519  /*************************************************  /*************************************************
520  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 187  optimal way of handling this, but hey, t Line 532  optimal way of handling this, but hey, t
532  Arguments:  Arguments:
533    f            the file to read    f            the file to read
534    start        where in buffer to start (this *must* be within buffer)    start        where in buffer to start (this *must* be within buffer)
535      prompt       for stdin or readline()
536    
537  Returns:       pointer to the start of new data  Returns:       pointer to the start of new data
538                 could be a copy of start, or could be moved                 could be a copy of start, or could be moved
# Line 194  Returns:       pointer to the start of n Line 540  Returns:       pointer to the start of n
540  */  */
541    
542  static uschar *  static uschar *
543  extend_inputline(FILE *f, uschar *start)  extend_inputline(FILE *f, uschar *start, const char *prompt)
544  {  {
545  uschar *here = start;  uschar *here = start;
546    
547  for (;;)  for (;;)
548    {    {
549    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
550    
551    if (rlen > 1000)    if (rlen > 1000)
552      {      {
553      int dlen;      int dlen;
554      if (fgets((char *)here, rlen,  f) == NULL)  
555        return (here == start)? NULL : start;      /* If libreadline support is required, use readline() to read a line if the
556        input is a terminal. Note that readline() removes the trailing newline, so
557        we must put it back again, to be compatible with fgets(). */
558    
559    #ifdef SUPPORT_LIBREADLINE
560        if (isatty(fileno(f)))
561          {
562          size_t len;
563          char *s = readline(prompt);
564          if (s == NULL) return (here == start)? NULL : start;
565          len = strlen(s);
566          if (len > 0) add_history(s);
567          if (len > rlen - 1) len = rlen - 1;
568          memcpy(here, s, len);
569          here[len] = '\n';
570          here[len+1] = 0;
571          free(s);
572          }
573        else
574    #endif
575    
576        /* Read the next line by normal means, prompting if the file is stdin. */
577    
578          {
579          if (f == stdin) printf("%s", prompt);
580          if (fgets((char *)here, rlen,  f) == NULL)
581            return (here == start)? NULL : start;
582          }
583    
584      dlen = (int)strlen((char *)here);      dlen = (int)strlen((char *)here);
585      if (dlen > 0 && here[dlen - 1] == '\n') return start;      if (dlen > 0 && here[dlen - 1] == '\n') return start;
586      here += dlen;      here += dlen;
# Line 663  return count; Line 1037  return count;
1037    
1038    
1039  /*************************************************  /*************************************************
1040    *         Case-independent strncmp() function    *
1041    *************************************************/
1042    
1043    /*
1044    Arguments:
1045      s         first string
1046      t         second string
1047      n         number of characters to compare
1048    
1049    Returns:    < 0, = 0, or > 0, according to the comparison
1050    */
1051    
1052    static int
1053    strncmpic(uschar *s, uschar *t, int n)
1054    {
1055    while (n--)
1056      {
1057      int c = tolower(*s++) - tolower(*t++);
1058      if (c) return c;
1059      }
1060    return 0;
1061    }
1062    
1063    
1064    
1065    /*************************************************
1066  *         Check newline indicator                *  *         Check newline indicator                *
1067  *************************************************/  *************************************************/
1068    
1069  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1070  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1071    
1072  Arguments:  Arguments:
1073    p           points after the leading '<'    p           points after the leading '<'
# Line 680  Returns:      appropriate PCRE_NEWLINE_x Line 1079  Returns:      appropriate PCRE_NEWLINE_x
1079  static int  static int
1080  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
1081  {  {
1082  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1083  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1084  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1085  if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1086  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1087    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1088    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1089  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1090  return 0;  return 0;
1091  }  }
# Line 698  return 0; Line 1099  return 0;
1099  static void  static void
1100  usage(void)  usage(void)
1101  {  {
1102  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1103    printf("Input and output default to stdin and stdout.\n");
1104    #ifdef SUPPORT_LIBREADLINE
1105    printf("If input is a terminal, readline() is used to read from it.\n");
1106    #else
1107    printf("This version of pcretest is not linked with readline().\n");
1108    #endif
1109    printf("\nOptions:\n");
1110  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1111  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1112  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 707  printf("  -dfa     force DFA matching fo Line 1115  printf("  -dfa     force DFA matching fo
1115  #endif  #endif
1116  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
1117  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
1118           "  -M       find MATCH_LIMIT minimum for each subject\n"
1119         "  -m       output memory used information\n"         "  -m       output memory used information\n"
1120         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
1121  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 736  int main(int argc, char **argv) Line 1145  int main(int argc, char **argv)
1145  FILE *infile = stdin;  FILE *infile = stdin;
1146  int options = 0;  int options = 0;
1147  int study_options = 0;  int study_options = 0;
1148    int default_find_match_limit = FALSE;
1149  int op = 1;  int op = 1;
1150  int timeit = 0;  int timeit = 0;
1151  int timeitm = 0;  int timeitm = 0;
# Line 795  while (argc > 1 && argv[op][0] == '-') Line 1205  while (argc > 1 && argv[op][0] == '-')
1205    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1206    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1207    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1208      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1209  #if !defined NODFA  #if !defined NODFA
1210    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1211  #endif  #endif
# Line 847  while (argc > 1 && argv[op][0] == '-') Line 1258  while (argc > 1 && argv[op][0] == '-')
1258    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1259      {      {
1260      int rc;      int rc;
1261        unsigned long int lrc;
1262      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1263      printf("Compiled with\n");      printf("Compiled with\n");
1264      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 854  while (argc > 1 && argv[op][0] == '-') Line 1266  while (argc > 1 && argv[op][0] == '-')
1266      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1267      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1268      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1269      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1270        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1271        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1272          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1273        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
1274        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1275        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1276        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1277                                         "all Unicode newlines");
1278      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1279      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1280      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1281      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1282      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1283      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1284      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1285      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1286      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1287      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1288      goto EXIT;      goto EXIT;
# Line 947  while (!done) Line 1364  while (!done)
1364  #endif  #endif
1365    
1366    const char *error;    const char *error;
1367      unsigned char *markptr;
1368    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1369    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1370    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1371    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1372    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1373      int do_mark = 0;
1374    int do_study = 0;    int do_study = 0;
1375    int do_debug = debug;    int do_debug = debug;
   int debug_lengths = 1;  
1376    int do_G = 0;    int do_G = 0;
1377    int do_g = 0;    int do_g = 0;
1378    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 963  while (!done) Line 1381  while (!done)
1381    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1382    
1383    use_utf8 = 0;    use_utf8 = 0;
1384      debug_lengths = 1;
1385    
1386    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1387    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1388    fflush(outfile);    fflush(outfile);
1389    
# Line 1065  while (!done) Line 1483  while (!done)
1483    
1484    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1485      {      {
1486      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1487      goto SKIP_DATA;      goto SKIP_DATA;
1488      }      }
1489    
1490    pp = p;    pp = p;
1491    poffset = p - buffer;    poffset = (int)(p - buffer);
1492    
1493    for(;;)    for(;;)
1494      {      {
# Line 1081  while (!done) Line 1499  while (!done)
1499        pp++;        pp++;
1500        }        }
1501      if (*pp != 0) break;      if (*pp != 0) break;
1502      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1503        {        {
1504        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1505        done = 1;        done = 1;
# Line 1135  while (!done) Line 1552  while (!done)
1552        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1553        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1554        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1555          case 'K': do_mark = 1; break;
1556        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1557        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1558    
# Line 1144  while (!done) Line 1562  while (!done)
1562    
1563        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1564        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1565          case 'W': options |= PCRE_UCP; break;
1566        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1567        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1568        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1569        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1570    
1571          case 'T':
1572          switch (*pp++)
1573            {
1574            case '0': tables = tables0; break;
1575            case '1': tables = tables1; break;
1576    
1577            case '\r':
1578            case '\n':
1579            case ' ':
1580            case 0:
1581            fprintf(outfile, "** Missing table number after /T\n");
1582            goto SKIP_DATA;
1583    
1584            default:
1585            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1586            goto SKIP_DATA;
1587            }
1588          break;
1589    
1590        case 'L':        case 'L':
1591        ppp = pp;        ppp = pp;
1592        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1174  while (!done) Line 1612  while (!done)
1612    
1613        case '<':        case '<':
1614          {          {
1615          int x = check_newline(pp, outfile);          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1616          if (x == 0) goto SKIP_DATA;            {
1617          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
1618          while (*pp++ != '>');            pp += 3;
1619              }
1620            else
1621              {
1622              int x = check_newline(pp, outfile);
1623              if (x == 0) goto SKIP_DATA;
1624              options |= x;
1625              while (*pp++ != '>');
1626              }
1627          }          }
1628        break;        break;
1629    
# Line 1207  while (!done) Line 1653  while (!done)
1653      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1654      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1655      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1656        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1657        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1658    
1659      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1660    
# Line 1227  while (!done) Line 1675  while (!done)
1675  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1676    
1677      {      {
1678        unsigned long int get_options;
1679    
1680      if (timeit > 0)      if (timeit > 0)
1681        {        {
1682        register int i;        register int i;
# Line 1256  while (!done) Line 1706  while (!done)
1706          {          {
1707          for (;;)          for (;;)
1708            {            {
1709            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1710              {              {
1711              done = 1;              done = 1;
1712              goto CONTINUE;              goto CONTINUE;
# Line 1270  while (!done) Line 1720  while (!done)
1720        goto CONTINUE;        goto CONTINUE;
1721        }        }
1722    
1723      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1724      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1725      returns only limited data. Check that it agrees with the newer one. */      lines. */
1726    
1727        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1728        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1729    
1730        /* Print information if required. There are now two info-returning
1731        functions. The old one has a limited interface and returns only limited
1732        data. Check that it agrees with the newer one. */
1733    
1734      if (log_store)      if (log_store)
1735        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1311  while (!done) Line 1768  while (!done)
1768          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1769        }        }
1770    
1771        /* If /K was present, we set up for handling MARK data. */
1772    
1773        if (do_mark)
1774          {
1775          if (extra == NULL)
1776            {
1777            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1778            extra->flags = 0;
1779            }
1780          extra->mark = &markptr;
1781          extra->flags |= PCRE_EXTRA_MARK;
1782          }
1783    
1784      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1785      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1786      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 1319  while (!done) Line 1789  while (!done)
1789      if (do_flip)      if (do_flip)
1790        {        {
1791        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1792        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1793            byteflip(rre->magic_number, sizeof(rre->magic_number));
1794        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1795        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1796        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1797        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1798        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1799        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1800        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1801          rre->first_byte =
1802            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1803          rre->req_byte =
1804            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1805          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1806          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1807        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1808          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1809        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1810            sizeof(rre->name_count));
1811    
1812        if (extra != NULL)        if (extra != NULL)
1813          {          {
1814          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1815          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1816          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1817            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1818          }          }
1819        }        }
1820    
# Line 1350  while (!done) Line 1828  while (!done)
1828        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1829        }        }
1830    
1831        /* We already have the options in get_options (see above) */
1832    
1833      if (do_showinfo)      if (do_showinfo)
1834        {        {
1835        unsigned long int get_options, all_options;        unsigned long int all_options;
1836  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1837        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1838  #endif  #endif
1839        int count, backrefmax, first_char, need_char, okpartial, jchanged;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1840            hascrorlf;
1841        int nameentrysize, namecount;        int nameentrysize, namecount;
1842        const uschar *nametable;        const uschar *nametable;
1843    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1844        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1845        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1846        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1371  while (!done) Line 1851  while (!done)
1851        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1852        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1853        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1854          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1855    
1856  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1857        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1413  while (!done) Line 1894  while (!done)
1894          }          }
1895    
1896        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1897          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1898    
1899        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1900        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1901    
1902        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1903          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1904            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1905            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1906            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1907            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1908            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1909            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1910              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1911              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1912            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1913            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1914            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1915            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1916            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1917              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1918            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1919            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1920    
# Line 1507  while (!done) Line 1992  while (!done)
1992          else          else
1993            {            {
1994            uschar *start_bits = NULL;            uschar *start_bits = NULL;
1995            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
1996    
1997              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1998              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1999    
2000              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2001            if (start_bits == NULL)            if (start_bits == NULL)
2002              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2003            else            else
2004              {              {
2005              int i;              int i;
# Line 1557  while (!done) Line 2046  while (!done)
2046        else        else
2047          {          {
2048          uschar sbuf[8];          uschar sbuf[8];
2049          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
2050          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
2051          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
2052          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
2053    
2054          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2055          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2056          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2057          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
2058    
2059          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2060              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1592  while (!done) Line 2081  while (!done)
2081    
2082        new_free(re);        new_free(re);
2083        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2084        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2085            {
2086            new_free((void *)tables);
2087            setlocale(LC_CTYPE, "C");
2088            locale_set = 0;
2089            }
2090        continue;  /* With next regex */        continue;  /* With next regex */
2091        }        }
2092      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1609  while (!done) Line 2103  while (!done)
2103      int callout_data_set = 0;      int callout_data_set = 0;
2104      int count, c;      int count, c;
2105      int copystrings = 0;      int copystrings = 0;
2106      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2107      int getstrings = 0;      int getstrings = 0;
2108      int getlist = 0;      int getlist = 0;
2109      int gmatched = 0;      int gmatched = 0;
# Line 1639  while (!done) Line 2133  while (!done)
2133      len = 0;      len = 0;
2134      for (;;)      for (;;)
2135        {        {
2136        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
2137          {          {
2138          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2139              {
2140              fprintf(outfile, "\n");
2141              break;
2142              }
2143          done = 1;          done = 1;
2144          goto CONTINUE;          goto CONTINUE;
2145          }          }
# Line 1708  while (!done) Line 2205  while (!done)
2205              {              {
2206              unsigned char buff8[8];              unsigned char buff8[8];
2207              int ii, utn;              int ii, utn;
2208              utn = ord2utf8(c, buff8);              if (use_utf8)
2209              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2210              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2211                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2212                  c = buff8[ii];   /* Last byte */
2213                  }
2214                else
2215                 {
2216                 if (c > 255)
2217                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2218                     "UTF-8 mode is not enabled.\n"
2219                     "** Truncation will probably give the wrong result.\n", c);
2220                 }
2221              p = pt + 1;              p = pt + 1;
2222              break;              break;
2223              }              }
# Line 1806  while (!done) Line 2313  while (!done)
2313  #endif  #endif
2314            use_dfa = 1;            use_dfa = 1;
2315          continue;          continue;
2316    #endif
2317    
2318    #if !defined NODFA
2319          case 'F':          case 'F':
2320          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2321          continue;          continue;
# Line 1840  while (!done) Line 2349  while (!done)
2349          continue;          continue;
2350    
2351          case 'N':          case 'N':
2352          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2353              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2354            else
2355              options |= PCRE_NOTEMPTY;
2356          continue;          continue;
2357    
2358          case 'O':          case 'O':
# Line 1863  while (!done) Line 2375  while (!done)
2375          continue;          continue;
2376    
2377          case 'P':          case 'P':
2378          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2379              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2380          continue;          continue;
2381    
2382          case 'Q':          case 'Q':
# Line 1898  while (!done) Line 2411  while (!done)
2411          show_malloc = 1;          show_malloc = 1;
2412          continue;          continue;
2413    
2414            case 'Y':
2415            options |= PCRE_NO_START_OPTIMIZE;
2416            continue;
2417    
2418          case 'Z':          case 'Z':
2419          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2420          continue;          continue;
# Line 1918  while (!done) Line 2435  while (!done)
2435        *q++ = c;        *q++ = c;
2436        }        }
2437      *q = 0;      *q = 0;
2438      len = q - dbuffer;      len = (int)(q - dbuffer);
2439    
2440        /* Move the data to the end of the buffer so that a read over the end of
2441        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2442        we are using the POSIX interface, we must include the terminating zero. */
2443    
2444    #if !defined NOPOSIX
2445        if (posix || do_posix)
2446          {
2447          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2448          bptr += buffer_size - len - 1;
2449          }
2450        else
2451    #endif
2452          {
2453          memmove(bptr + buffer_size - len, bptr, len);
2454          bptr += buffer_size - len;
2455          }
2456    
2457      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2458        {        {
# Line 1939  while (!done) Line 2473  while (!done)
2473          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2474        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2475        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2476          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2477    
2478        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2479    
# Line 1983  while (!done) Line 2518  while (!done)
2518    
2519      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2520        {        {
2521          markptr = NULL;
2522    
2523        if (timeitm > 0)        if (timeitm > 0)
2524          {          {
2525          register int i;          register int i;
# Line 1994  while (!done) Line 2531  while (!done)
2531            {            {
2532            int workspace[1000];            int workspace[1000];
2533            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2534              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2535                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2536                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2537            }            }
# Line 2057  while (!done) Line 2594  while (!done)
2594        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2595          {          {
2596          int workspace[1000];          int workspace[1000];
2597          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2598            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2599            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2600          if (count == 0)          if (count == 0)
# Line 2128  while (!done) Line 2665  while (!done)
2665              }              }
2666            }            }
2667    
2668            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2669    
2670          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2671            {            {
2672            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2211  while (!done) Line 2750  while (!done)
2750    
2751        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2752          {          {
2753          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2754  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
2755          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
2756            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
2757              bptr + use_offsets[0]);            fprintf(outfile, ": ");
2758  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2759                outfile);
2760              }
2761          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2762          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2763          }          }
# Line 2246  while (!done) Line 2787  while (!done)
2787              {              {
2788              int d;              int d;
2789              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2790              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
2791                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
2792                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
2793                        (d == 10)? PCRE_NEWLINE_LF :
2794                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2795                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
2796                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2797              }              }
# Line 2275  while (!done) Line 2818  while (!done)
2818            {            {
2819            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2820              {              {
2821              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2822                  {
2823                  if (markptr == NULL) fprintf(outfile, "No match\n");
2824                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2825                  }
2826              }              }
2827            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2828            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 2287  while (!done) Line 2834  while (!done)
2834        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2835    
2836        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2837        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2838        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2839        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2840        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2841        character. */        character. */
2842    
# Line 2298  while (!done) Line 2845  while (!done)
2845        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2846          {          {
2847          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2848          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2849          }          }
2850    
2851        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2325  while (!done) Line 2872  while (!done)
2872    
2873    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
2874    if (extra != NULL) new_free(extra);    if (extra != NULL) new_free(extra);
2875    if (tables != NULL)    if (locale_set)
2876      {      {
2877      new_free((void *)tables);      new_free((void *)tables);
2878      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");

Legend:
Removed from v.200  
changed lines
  Added in v.553

  ViewVC Help
Powered by ViewVC 1.1.5