/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 255 by ph10, Wed Sep 19 08:50:04 2007 UTC revision 580 by ph10, Fri Nov 26 11:16:43 2010 UTC
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
61  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 63  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
92  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 83  appropriately for an application, not fo Line 107  appropriately for an application, not fo
107  #include "pcre.h"  #include "pcre.h"
108  #include "pcre_internal.h"  #include "pcre_internal.h"
109    
110  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
111  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
112  symbols to prevent clashes. */  external symbols to prevent clashes. */
113    
114    #define _pcre_ucp_gentype      ucp_gentype
115  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
116  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
117  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 101  symbols to prevent clashes. */ Line 126  symbols to prevent clashes. */
126    
127  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
128  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
129  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
130    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
131    
132    #define COMPILING_PCRETEST
133  #include "pcre_printint.src"  #include "pcre_printint.src"
134    
135  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
136    output character as-is or as a hex value when showing compiled patterns, is
137    contained in the printint.src file. We uses it here also, in cases when the
138    locale has not been explicitly changed, so as to get consistent output from
139    systems that differ in their output from isprint() even in the "C" locale. */
140    
141    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142    
143  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
144  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 172  static uschar *dbuffer = NULL; Line 198  static uschar *dbuffer = NULL;
198  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
199    
200    
201    /*************************************************
202    *         Alternate character tables             *
203    *************************************************/
204    
205    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
206    using the default tables of the library. However, the T option can be used to
207    select alternate sets of tables, for different kinds of testing. Note also that
208    the L (locale) option also adjusts the tables. */
209    
210    /* This is the set of tables distributed as default with PCRE. It recognizes
211    only ASCII characters. */
212    
213    static const unsigned char tables0[] = {
214    
215    /* This table is a lower casing table. */
216    
217        0,  1,  2,  3,  4,  5,  6,  7,
218        8,  9, 10, 11, 12, 13, 14, 15,
219       16, 17, 18, 19, 20, 21, 22, 23,
220       24, 25, 26, 27, 28, 29, 30, 31,
221       32, 33, 34, 35, 36, 37, 38, 39,
222       40, 41, 42, 43, 44, 45, 46, 47,
223       48, 49, 50, 51, 52, 53, 54, 55,
224       56, 57, 58, 59, 60, 61, 62, 63,
225       64, 97, 98, 99,100,101,102,103,
226      104,105,106,107,108,109,110,111,
227      112,113,114,115,116,117,118,119,
228      120,121,122, 91, 92, 93, 94, 95,
229       96, 97, 98, 99,100,101,102,103,
230      104,105,106,107,108,109,110,111,
231      112,113,114,115,116,117,118,119,
232      120,121,122,123,124,125,126,127,
233      128,129,130,131,132,133,134,135,
234      136,137,138,139,140,141,142,143,
235      144,145,146,147,148,149,150,151,
236      152,153,154,155,156,157,158,159,
237      160,161,162,163,164,165,166,167,
238      168,169,170,171,172,173,174,175,
239      176,177,178,179,180,181,182,183,
240      184,185,186,187,188,189,190,191,
241      192,193,194,195,196,197,198,199,
242      200,201,202,203,204,205,206,207,
243      208,209,210,211,212,213,214,215,
244      216,217,218,219,220,221,222,223,
245      224,225,226,227,228,229,230,231,
246      232,233,234,235,236,237,238,239,
247      240,241,242,243,244,245,246,247,
248      248,249,250,251,252,253,254,255,
249    
250    /* This table is a case flipping table. */
251    
252        0,  1,  2,  3,  4,  5,  6,  7,
253        8,  9, 10, 11, 12, 13, 14, 15,
254       16, 17, 18, 19, 20, 21, 22, 23,
255       24, 25, 26, 27, 28, 29, 30, 31,
256       32, 33, 34, 35, 36, 37, 38, 39,
257       40, 41, 42, 43, 44, 45, 46, 47,
258       48, 49, 50, 51, 52, 53, 54, 55,
259       56, 57, 58, 59, 60, 61, 62, 63,
260       64, 97, 98, 99,100,101,102,103,
261      104,105,106,107,108,109,110,111,
262      112,113,114,115,116,117,118,119,
263      120,121,122, 91, 92, 93, 94, 95,
264       96, 65, 66, 67, 68, 69, 70, 71,
265       72, 73, 74, 75, 76, 77, 78, 79,
266       80, 81, 82, 83, 84, 85, 86, 87,
267       88, 89, 90,123,124,125,126,127,
268      128,129,130,131,132,133,134,135,
269      136,137,138,139,140,141,142,143,
270      144,145,146,147,148,149,150,151,
271      152,153,154,155,156,157,158,159,
272      160,161,162,163,164,165,166,167,
273      168,169,170,171,172,173,174,175,
274      176,177,178,179,180,181,182,183,
275      184,185,186,187,188,189,190,191,
276      192,193,194,195,196,197,198,199,
277      200,201,202,203,204,205,206,207,
278      208,209,210,211,212,213,214,215,
279      216,217,218,219,220,221,222,223,
280      224,225,226,227,228,229,230,231,
281      232,233,234,235,236,237,238,239,
282      240,241,242,243,244,245,246,247,
283      248,249,250,251,252,253,254,255,
284    
285    /* This table contains bit maps for various character classes. Each map is 32
286    bytes long and the bits run from the least significant end of each byte. The
287    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
288    graph, print, punct, and cntrl. Other classes are built from combinations. */
289    
290      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
291      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294    
295      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
296      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299    
300      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
301      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304    
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
307      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309    
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
312      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314    
315      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
316      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
317      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319    
320      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
321      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
322      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324    
325      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
326      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
327      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
331      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
332      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
336      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
337      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340    /* This table identifies various classes of character by individual bits:
341      0x01   white space character
342      0x02   letter
343      0x04   decimal digit
344      0x08   hexadecimal digit
345      0x10   alphanumeric or '_'
346      0x80   regular expression metacharacter or binary zero
347    */
348    
349      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
350      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
353      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
354      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
355      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
356      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
357      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
358      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
359      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
360      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
361      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
362      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
363      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
364      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
375      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
378      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
379      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
380      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
381    
382    /* This is a set of tables that came orginally from a Windows user. It seems to
383    be at least an approximation of ISO 8859. In particular, there are characters
384    greater than 128 that are marked as spaces, letters, etc. */
385    
386    static const unsigned char tables1[] = {
387    0,1,2,3,4,5,6,7,
388    8,9,10,11,12,13,14,15,
389    16,17,18,19,20,21,22,23,
390    24,25,26,27,28,29,30,31,
391    32,33,34,35,36,37,38,39,
392    40,41,42,43,44,45,46,47,
393    48,49,50,51,52,53,54,55,
394    56,57,58,59,60,61,62,63,
395    64,97,98,99,100,101,102,103,
396    104,105,106,107,108,109,110,111,
397    112,113,114,115,116,117,118,119,
398    120,121,122,91,92,93,94,95,
399    96,97,98,99,100,101,102,103,
400    104,105,106,107,108,109,110,111,
401    112,113,114,115,116,117,118,119,
402    120,121,122,123,124,125,126,127,
403    128,129,130,131,132,133,134,135,
404    136,137,138,139,140,141,142,143,
405    144,145,146,147,148,149,150,151,
406    152,153,154,155,156,157,158,159,
407    160,161,162,163,164,165,166,167,
408    168,169,170,171,172,173,174,175,
409    176,177,178,179,180,181,182,183,
410    184,185,186,187,188,189,190,191,
411    224,225,226,227,228,229,230,231,
412    232,233,234,235,236,237,238,239,
413    240,241,242,243,244,245,246,215,
414    248,249,250,251,252,253,254,223,
415    224,225,226,227,228,229,230,231,
416    232,233,234,235,236,237,238,239,
417    240,241,242,243,244,245,246,247,
418    248,249,250,251,252,253,254,255,
419    0,1,2,3,4,5,6,7,
420    8,9,10,11,12,13,14,15,
421    16,17,18,19,20,21,22,23,
422    24,25,26,27,28,29,30,31,
423    32,33,34,35,36,37,38,39,
424    40,41,42,43,44,45,46,47,
425    48,49,50,51,52,53,54,55,
426    56,57,58,59,60,61,62,63,
427    64,97,98,99,100,101,102,103,
428    104,105,106,107,108,109,110,111,
429    112,113,114,115,116,117,118,119,
430    120,121,122,91,92,93,94,95,
431    96,65,66,67,68,69,70,71,
432    72,73,74,75,76,77,78,79,
433    80,81,82,83,84,85,86,87,
434    88,89,90,123,124,125,126,127,
435    128,129,130,131,132,133,134,135,
436    136,137,138,139,140,141,142,143,
437    144,145,146,147,148,149,150,151,
438    152,153,154,155,156,157,158,159,
439    160,161,162,163,164,165,166,167,
440    168,169,170,171,172,173,174,175,
441    176,177,178,179,180,181,182,183,
442    184,185,186,187,188,189,190,191,
443    224,225,226,227,228,229,230,231,
444    232,233,234,235,236,237,238,239,
445    240,241,242,243,244,245,246,215,
446    248,249,250,251,252,253,254,223,
447    192,193,194,195,196,197,198,199,
448    200,201,202,203,204,205,206,207,
449    208,209,210,211,212,213,214,247,
450    216,217,218,219,220,221,222,255,
451    0,62,0,0,1,0,0,0,
452    0,0,0,0,0,0,0,0,
453    32,0,0,0,1,0,0,0,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,255,3,
456    126,0,0,0,126,0,0,0,
457    0,0,0,0,0,0,0,0,
458    0,0,0,0,0,0,0,0,
459    0,0,0,0,0,0,255,3,
460    0,0,0,0,0,0,0,0,
461    0,0,0,0,0,0,12,2,
462    0,0,0,0,0,0,0,0,
463    0,0,0,0,0,0,0,0,
464    254,255,255,7,0,0,0,0,
465    0,0,0,0,0,0,0,0,
466    255,255,127,127,0,0,0,0,
467    0,0,0,0,0,0,0,0,
468    0,0,0,0,254,255,255,7,
469    0,0,0,0,0,4,32,4,
470    0,0,0,128,255,255,127,255,
471    0,0,0,0,0,0,255,3,
472    254,255,255,135,254,255,255,7,
473    0,0,0,0,0,4,44,6,
474    255,255,127,255,255,255,127,255,
475    0,0,0,0,254,255,255,255,
476    255,255,255,255,255,255,255,127,
477    0,0,0,0,254,255,255,255,
478    255,255,255,255,255,255,255,255,
479    0,2,0,0,255,255,255,255,
480    255,255,255,255,255,255,255,127,
481    0,0,0,0,255,255,255,255,
482    255,255,255,255,255,255,255,255,
483    0,0,0,0,254,255,0,252,
484    1,0,0,248,1,0,0,120,
485    0,0,0,0,254,255,255,255,
486    0,0,128,0,0,0,128,0,
487    255,255,255,255,0,0,0,0,
488    0,0,0,0,0,0,0,128,
489    255,255,255,255,0,0,0,0,
490    0,0,0,0,0,0,0,0,
491    128,0,0,0,0,0,0,0,
492    0,1,1,0,1,1,0,0,
493    0,0,0,0,0,0,0,0,
494    0,0,0,0,0,0,0,0,
495    1,0,0,0,128,0,0,0,
496    128,128,128,128,0,0,128,0,
497    28,28,28,28,28,28,28,28,
498    28,28,0,0,0,0,0,128,
499    0,26,26,26,26,26,26,18,
500    18,18,18,18,18,18,18,18,
501    18,18,18,18,18,18,18,18,
502    18,18,18,128,128,0,128,16,
503    0,26,26,26,26,26,26,18,
504    18,18,18,18,18,18,18,18,
505    18,18,18,18,18,18,18,18,
506    18,18,18,128,128,0,0,0,
507    0,0,0,0,0,1,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,0,0,
510    0,0,0,0,0,0,0,0,
511    1,0,0,0,0,0,0,0,
512    0,0,18,0,0,0,0,0,
513    0,0,20,20,0,18,0,0,
514    0,20,18,0,0,0,0,0,
515    18,18,18,18,18,18,18,18,
516    18,18,18,18,18,18,18,18,
517    18,18,18,18,18,18,18,0,
518    18,18,18,18,18,18,18,18,
519    18,18,18,18,18,18,18,18,
520    18,18,18,18,18,18,18,18,
521    18,18,18,18,18,18,18,0,
522    18,18,18,18,18,18,18,18
523    };
524    
525    
526    
527    
528    #ifndef HAVE_STRERROR
529    /*************************************************
530    *     Provide strerror() for non-ANSI libraries  *
531    *************************************************/
532    
533    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534    in their libraries, but can provide the same facility by this simple
535    alternative function. */
536    
537    extern int   sys_nerr;
538    extern char *sys_errlist[];
539    
540    char *
541    strerror(int n)
542    {
543    if (n < 0 || n >= sys_nerr) return "unknown error number";
544    return sys_errlist[n];
545    }
546    #endif /* HAVE_STRERROR */
547    
548    
549    
550    
551  /*************************************************  /*************************************************
552  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 189  optimal way of handling this, but hey, t Line 564  optimal way of handling this, but hey, t
564  Arguments:  Arguments:
565    f            the file to read    f            the file to read
566    start        where in buffer to start (this *must* be within buffer)    start        where in buffer to start (this *must* be within buffer)
567      prompt       for stdin or readline()
568    
569  Returns:       pointer to the start of new data  Returns:       pointer to the start of new data
570                 could be a copy of start, or could be moved                 could be a copy of start, or could be moved
# Line 196  Returns:       pointer to the start of n Line 572  Returns:       pointer to the start of n
572  */  */
573    
574  static uschar *  static uschar *
575  extend_inputline(FILE *f, uschar *start)  extend_inputline(FILE *f, uschar *start, const char *prompt)
576  {  {
577  uschar *here = start;  uschar *here = start;
578    
579  for (;;)  for (;;)
580    {    {
581    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
582    
583    if (rlen > 1000)    if (rlen > 1000)
584      {      {
585      int dlen;      int dlen;
586      if (fgets((char *)here, rlen,  f) == NULL)  
587        return (here == start)? NULL : start;      /* If libreadline support is required, use readline() to read a line if the
588        input is a terminal. Note that readline() removes the trailing newline, so
589        we must put it back again, to be compatible with fgets(). */
590    
591    #ifdef SUPPORT_LIBREADLINE
592        if (isatty(fileno(f)))
593          {
594          size_t len;
595          char *s = readline(prompt);
596          if (s == NULL) return (here == start)? NULL : start;
597          len = strlen(s);
598          if (len > 0) add_history(s);
599          if (len > rlen - 1) len = rlen - 1;
600          memcpy(here, s, len);
601          here[len] = '\n';
602          here[len+1] = 0;
603          free(s);
604          }
605        else
606    #endif
607    
608        /* Read the next line by normal means, prompting if the file is stdin. */
609    
610          {
611          if (f == stdin) printf("%s", prompt);
612          if (fgets((char *)here, rlen,  f) == NULL)
613            return (here == start)? NULL : start;
614          }
615    
616      dlen = (int)strlen((char *)here);      dlen = (int)strlen((char *)here);
617      if (dlen > 0 && here[dlen - 1] == '\n') return start;      if (dlen > 0 && here[dlen - 1] == '\n') return start;
618      here += dlen;      here += dlen;
# Line 694  return 0; Line 1098  return 0;
1098  *         Check newline indicator                *  *         Check newline indicator                *
1099  *************************************************/  *************************************************/
1100    
1101  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1102  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1103    
1104  Arguments:  Arguments:
1105    p           points after the leading '<'    p           points after the leading '<'
# Line 728  return 0; Line 1131  return 0;
1131  static void  static void
1132  usage(void)  usage(void)
1133  {  {
1134  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1135    printf("Input and output default to stdin and stdout.\n");
1136    #ifdef SUPPORT_LIBREADLINE
1137    printf("If input is a terminal, readline() is used to read from it.\n");
1138    #else
1139    printf("This version of pcretest is not linked with readline().\n");
1140    #endif
1141    printf("\nOptions:\n");
1142  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1143  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1144  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 737  printf("  -dfa     force DFA matching fo Line 1147  printf("  -dfa     force DFA matching fo
1147  #endif  #endif
1148  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
1149  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
1150           "  -M       find MATCH_LIMIT minimum for each subject\n"
1151         "  -m       output memory used information\n"         "  -m       output memory used information\n"
1152         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
1153  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 766  int main(int argc, char **argv) Line 1177  int main(int argc, char **argv)
1177  FILE *infile = stdin;  FILE *infile = stdin;
1178  int options = 0;  int options = 0;
1179  int study_options = 0;  int study_options = 0;
1180    int default_find_match_limit = FALSE;
1181  int op = 1;  int op = 1;
1182  int timeit = 0;  int timeit = 0;
1183  int timeitm = 0;  int timeitm = 0;
# Line 825  while (argc > 1 && argv[op][0] == '-') Line 1237  while (argc > 1 && argv[op][0] == '-')
1237    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1238    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1239    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1240      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1241  #if !defined NODFA  #if !defined NODFA
1242    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1243  #endif  #endif
# Line 877  while (argc > 1 && argv[op][0] == '-') Line 1290  while (argc > 1 && argv[op][0] == '-')
1290    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1291      {      {
1292      int rc;      int rc;
1293        unsigned long int lrc;
1294      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1295      printf("Compiled with\n");      printf("Compiled with\n");
1296      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 884  while (argc > 1 && argv[op][0] == '-') Line 1298  while (argc > 1 && argv[op][0] == '-')
1298      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1299      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1300      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1301      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1302        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1303        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1304          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1305        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
1306        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1307      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)pcre_config(PCRE_CONFIG_BSR, &rc);
# Line 895  while (argc > 1 && argv[op][0] == '-') Line 1311  while (argc > 1 && argv[op][0] == '-')
1311      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1312      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1313      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1314      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1315      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1316      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1317      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1318      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1319      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1320      goto EXIT;      goto EXIT;
# Line 980  while (!done) Line 1396  while (!done)
1396  #endif  #endif
1397    
1398    const char *error;    const char *error;
1399      unsigned char *markptr;
1400    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1401    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1402    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1403    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1404    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1405      int do_mark = 0;
1406    int do_study = 0;    int do_study = 0;
1407    int do_debug = debug;    int do_debug = debug;
1408    int do_G = 0;    int do_G = 0;
# Line 997  while (!done) Line 1415  while (!done)
1415    use_utf8 = 0;    use_utf8 = 0;
1416    debug_lengths = 1;    debug_lengths = 1;
1417    
1418    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1419    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1420    fflush(outfile);    fflush(outfile);
1421    
# Line 1098  while (!done) Line 1515  while (!done)
1515    
1516    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1517      {      {
1518      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1519      goto SKIP_DATA;      goto SKIP_DATA;
1520      }      }
1521    
1522    pp = p;    pp = p;
1523    poffset = p - buffer;    poffset = (int)(p - buffer);
1524    
1525    for(;;)    for(;;)
1526      {      {
# Line 1114  while (!done) Line 1531  while (!done)
1531        pp++;        pp++;
1532        }        }
1533      if (*pp != 0) break;      if (*pp != 0) break;
1534      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1535        {        {
1536        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1537        done = 1;        done = 1;
# Line 1168  while (!done) Line 1584  while (!done)
1584        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1585        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1586        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1587          case 'K': do_mark = 1; break;
1588        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1589        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1590    
# Line 1177  while (!done) Line 1594  while (!done)
1594    
1595        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1596        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1597          case 'W': options |= PCRE_UCP; break;
1598        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1599          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1600        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1601        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1602        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1603    
1604          case 'T':
1605          switch (*pp++)
1606            {
1607            case '0': tables = tables0; break;
1608            case '1': tables = tables1; break;
1609    
1610            case '\r':
1611            case '\n':
1612            case ' ':
1613            case 0:
1614            fprintf(outfile, "** Missing table number after /T\n");
1615            goto SKIP_DATA;
1616    
1617            default:
1618            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1619            goto SKIP_DATA;
1620            }
1621          break;
1622    
1623        case 'L':        case 'L':
1624        ppp = pp;        ppp = pp;
1625        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1207  while (!done) Line 1645  while (!done)
1645    
1646        case '<':        case '<':
1647          {          {
1648          int x = check_newline(pp, outfile);          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1649          if (x == 0) goto SKIP_DATA;            {
1650          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
1651          while (*pp++ != '>');            pp += 3;
1652              }
1653            else
1654              {
1655              int x = check_newline(pp, outfile);
1656              if (x == 0) goto SKIP_DATA;
1657              options |= x;
1658              while (*pp++ != '>');
1659              }
1660          }          }
1661        break;        break;
1662    
# Line 1240  while (!done) Line 1686  while (!done)
1686      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1687      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1688      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1689        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1690        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1691    
1692      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1693    
# Line 1260  while (!done) Line 1708  while (!done)
1708  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1709    
1710      {      {
1711        unsigned long int get_options;
1712    
1713      if (timeit > 0)      if (timeit > 0)
1714        {        {
1715        register int i;        register int i;
# Line 1289  while (!done) Line 1739  while (!done)
1739          {          {
1740          for (;;)          for (;;)
1741            {            {
1742            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1743              {              {
1744              done = 1;              done = 1;
1745              goto CONTINUE;              goto CONTINUE;
# Line 1303  while (!done) Line 1753  while (!done)
1753        goto CONTINUE;        goto CONTINUE;
1754        }        }
1755    
1756      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1757      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1758      returns only limited data. Check that it agrees with the newer one. */      lines. */
1759    
1760        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1761        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1762    
1763        /* Print information if required. There are now two info-returning
1764        functions. The old one has a limited interface and returns only limited
1765        data. Check that it agrees with the newer one. */
1766    
1767      if (log_store)      if (log_store)
1768        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1344  while (!done) Line 1801  while (!done)
1801          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1802        }        }
1803    
1804        /* If /K was present, we set up for handling MARK data. */
1805    
1806        if (do_mark)
1807          {
1808          if (extra == NULL)
1809            {
1810            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1811            extra->flags = 0;
1812            }
1813          extra->mark = &markptr;
1814          extra->flags |= PCRE_EXTRA_MARK;
1815          }
1816    
1817      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1818      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1819      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 1352  while (!done) Line 1822  while (!done)
1822      if (do_flip)      if (do_flip)
1823        {        {
1824        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1825        rre->magic_number =        rre->magic_number =
1826          byteflip(rre->magic_number, sizeof(rre->magic_number));          byteflip(rre->magic_number, sizeof(rre->magic_number));
1827        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1828        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1829        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1830        rre->top_bracket =        rre->top_bracket =
1831          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1832        rre->top_backref =        rre->top_backref =
1833          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1834        rre->first_byte =        rre->first_byte =
1835          (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1836        rre->req_byte =        rre->req_byte =
1837          (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));          (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1838        rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,        rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1839          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1840        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1841          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1842        rre->name_count = (pcre_uint16)byteflip(rre->name_count,        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1843          sizeof(rre->name_count));          sizeof(rre->name_count));
1844    
1845        if (extra != NULL)        if (extra != NULL)
1846          {          {
1847          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1848          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1849          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1850            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1851          }          }
1852        }        }
1853    
# Line 1390  while (!done) Line 1861  while (!done)
1861        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1862        }        }
1863    
1864        /* We already have the options in get_options (see above) */
1865    
1866      if (do_showinfo)      if (do_showinfo)
1867        {        {
1868        unsigned long int get_options, all_options;        unsigned long int all_options;
1869  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1870        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1871  #endif  #endif
# Line 1401  while (!done) Line 1874  while (!done)
1874        int nameentrysize, namecount;        int nameentrysize, namecount;
1875        const uschar *nametable;        const uschar *nametable;
1876    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1877        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1878        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1879        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1461  while (!done) Line 1933  while (!done)
1933        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1934    
1935        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1936          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1937            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1938            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1939            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1475  while (!done) Line 1947  while (!done)
1947            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1948            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1949            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1950              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1951            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1952              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1953            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1954    
1955        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1552  while (!done) Line 2026  while (!done)
2026          else          else
2027            {            {
2028            uschar *start_bits = NULL;            uschar *start_bits = NULL;
2029            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2030    
2031              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2032              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2033    
2034              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2035            if (start_bits == NULL)            if (start_bits == NULL)
2036              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2037            else            else
2038              {              {
2039              int i;              int i;
# Line 1606  while (!done) Line 2084  while (!done)
2084          sbuf[1] = (uschar)((true_size >> 16) & 255);          sbuf[1] = (uschar)((true_size >> 16) & 255);
2085          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[2] = (uschar)((true_size >>  8) & 255);
2086          sbuf[3] = (uschar)((true_size) & 255);          sbuf[3] = (uschar)((true_size) & 255);
2087    
2088          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2089          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2090          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
# Line 1637  while (!done) Line 2115  while (!done)
2115    
2116        new_free(re);        new_free(re);
2117        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2118        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2119            {
2120            new_free((void *)tables);
2121            setlocale(LC_CTYPE, "C");
2122            locale_set = 0;
2123            }
2124        continue;  /* With next regex */        continue;  /* With next regex */
2125        }        }
2126      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1654  while (!done) Line 2137  while (!done)
2137      int callout_data_set = 0;      int callout_data_set = 0;
2138      int count, c;      int count, c;
2139      int copystrings = 0;      int copystrings = 0;
2140      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2141      int getstrings = 0;      int getstrings = 0;
2142      int getlist = 0;      int getlist = 0;
2143      int gmatched = 0;      int gmatched = 0;
2144      int start_offset = 0;      int start_offset = 0;
2145        int start_offset_sign = 1;
2146      int g_notempty = 0;      int g_notempty = 0;
2147      int use_dfa = 0;      int use_dfa = 0;
2148    
# Line 1684  while (!done) Line 2168  while (!done)
2168      len = 0;      len = 0;
2169      for (;;)      for (;;)
2170        {        {
2171        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
2172          {          {
2173          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2174              {
2175              fprintf(outfile, "\n");
2176              break;
2177              }
2178          done = 1;          done = 1;
2179          goto CONTINUE;          goto CONTINUE;
2180          }          }
# Line 1753  while (!done) Line 2240  while (!done)
2240              {              {
2241              unsigned char buff8[8];              unsigned char buff8[8];
2242              int ii, utn;              int ii, utn;
2243              utn = ord2utf8(c, buff8);              if (use_utf8)
2244              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2245              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2246                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2247                  c = buff8[ii];   /* Last byte */
2248                  }
2249                else
2250                 {
2251                 if (c > 255)
2252                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2253                     "UTF-8 mode is not enabled.\n"
2254                     "** Truncation will probably give the wrong result.\n", c);
2255                 }
2256              p = pt + 1;              p = pt + 1;
2257              break;              break;
2258              }              }
# Line 1778  while (!done) Line 2275  while (!done)
2275          continue;          continue;
2276    
2277          case '>':          case '>':
2278            if (*p == '-')
2279              {
2280              start_offset_sign = -1;
2281              p++;
2282              }
2283          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2284            start_offset *= start_offset_sign;
2285          continue;          continue;
2286    
2287          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1851  while (!done) Line 2354  while (!done)
2354  #endif  #endif
2355            use_dfa = 1;            use_dfa = 1;
2356          continue;          continue;
2357    #endif
2358    
2359    #if !defined NODFA
2360          case 'F':          case 'F':
2361          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2362          continue;          continue;
# Line 1885  while (!done) Line 2390  while (!done)
2390          continue;          continue;
2391    
2392          case 'N':          case 'N':
2393          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2394              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2395            else
2396              options |= PCRE_NOTEMPTY;
2397          continue;          continue;
2398    
2399          case 'O':          case 'O':
# Line 1908  while (!done) Line 2416  while (!done)
2416          continue;          continue;
2417    
2418          case 'P':          case 'P':
2419          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2420              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2421          continue;          continue;
2422    
2423          case 'Q':          case 'Q':
# Line 1943  while (!done) Line 2452  while (!done)
2452          show_malloc = 1;          show_malloc = 1;
2453          continue;          continue;
2454    
2455            case 'Y':
2456            options |= PCRE_NO_START_OPTIMIZE;
2457            continue;
2458    
2459          case 'Z':          case 'Z':
2460          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2461          continue;          continue;
# Line 1963  while (!done) Line 2476  while (!done)
2476        *q++ = c;        *q++ = c;
2477        }        }
2478      *q = 0;      *q = 0;
2479      len = q - dbuffer;      len = (int)(q - dbuffer);
2480    
2481        /* Move the data to the end of the buffer so that a read over the end of
2482        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2483        we are using the POSIX interface, we must include the terminating zero. */
2484    
2485    #if !defined NOPOSIX
2486        if (posix || do_posix)
2487          {
2488          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2489          bptr += buffer_size - len - 1;
2490          }
2491        else
2492    #endif
2493          {
2494          memmove(bptr + buffer_size - len, bptr, len);
2495          bptr += buffer_size - len;
2496          }
2497    
2498      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2499        {        {
# Line 1984  while (!done) Line 2514  while (!done)
2514          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2515        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2516        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2517          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2518    
2519        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2520    
# Line 2028  while (!done) Line 2559  while (!done)
2559    
2560      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2561        {        {
2562          markptr = NULL;
2563    
2564        if (timeitm > 0)        if (timeitm > 0)
2565          {          {
2566          register int i;          register int i;
# Line 2039  while (!done) Line 2572  while (!done)
2572            {            {
2573            int workspace[1000];            int workspace[1000];
2574            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2575              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2576                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2577                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2578            }            }
# Line 2102  while (!done) Line 2635  while (!done)
2635        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2636          {          {
2637          int workspace[1000];          int workspace[1000];
2638          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2640            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2641          if (count == 0)          if (count == 0)
# Line 2173  while (!done) Line 2706  while (!done)
2706              }              }
2707            }            }
2708    
2709            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2710    
2711          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2712            {            {
2713            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2256  while (!done) Line 2791  while (!done)
2791    
2792        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2793          {          {
2794          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2795  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
2796          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
2797            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
2798              bptr + use_offsets[0]);            fprintf(outfile, ": ");
2799  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2800                outfile);
2801              }
2802          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2803          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2804          }          }
# Line 2271  while (!done) Line 2808  while (!done)
2808        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2809        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2810    
2811        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
2812        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
2813        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
2814        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
2815        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
2816          newline setting in the pattern; if none was set, use pcre_config() to
2817          find the default.
2818    
2819        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2820        character, not one byte. */        character, not one byte. */
# Line 2291  while (!done) Line 2830  while (!done)
2830              {              {
2831              int d;              int d;
2832              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2833              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
2834                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
2835                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
2836                        (d == 10)? PCRE_NEWLINE_LF :
2837                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2838                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
2839                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2840              }              }
2841            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2842                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2843                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2844                &&                &&
2845                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2308  while (!done) Line 2850  while (!done)
2850              {              {
2851              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2852                {                {
2853                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2854                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
2855                }                }
2856              }              }
2857            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
# Line 2320  while (!done) Line 2860  while (!done)
2860            {            {
2861            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2862              {              {
2863              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2864                  {
2865                  if (markptr == NULL) fprintf(outfile, "No match\n");
2866                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2867                  }
2868              }              }
2869            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2870            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 2332  while (!done) Line 2876  while (!done)
2876        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2877    
2878        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2879        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2880        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2881        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2882        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2883        character. */        character. */
2884    
# Line 2343  while (!done) Line 2887  while (!done)
2887        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2888          {          {
2889          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2890          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2891          }          }
2892    
2893        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2370  while (!done) Line 2914  while (!done)
2914    
2915    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
2916    if (extra != NULL) new_free(extra);    if (extra != NULL) new_free(extra);
2917    if (tables != NULL)    if (locale_set)
2918      {      {
2919      new_free((void *)tables);      new_free((void *)tables);
2920      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");

Legend:
Removed from v.255  
changed lines
  Added in v.580

  ViewVC Help
Powered by ViewVC 1.1.5