/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 411 by ph10, Fri Apr 10 15:40:21 2009 UTC revision 580 by ph10, Fri Nov 26 11:16:43 2010 UTC
# Line 79  input mode under Windows. */ Line 79  input mode under Windows. */
79  #define fileno _fileno  #define fileno _fileno
80  #endif  #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
92  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 118  external symbols to prevent clashes. */ Line 126  external symbols to prevent clashes. */
126    
127  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
128  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
129  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
130    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
131    
132    #define COMPILING_PCRETEST
133  #include "pcre_printint.src"  #include "pcre_printint.src"
134    
135  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
136    output character as-is or as a hex value when showing compiled patterns, is
137    contained in the printint.src file. We uses it here also, in cases when the
138    locale has not been explicitly changed, so as to get consistent output from
139    systems that differ in their output from isprint() even in the "C" locale. */
140    
141    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142    
143  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
144  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 189  static uschar *dbuffer = NULL; Line 198  static uschar *dbuffer = NULL;
198  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
199    
200    
201    /*************************************************
202    *         Alternate character tables             *
203    *************************************************/
204    
205    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
206    using the default tables of the library. However, the T option can be used to
207    select alternate sets of tables, for different kinds of testing. Note also that
208    the L (locale) option also adjusts the tables. */
209    
210    /* This is the set of tables distributed as default with PCRE. It recognizes
211    only ASCII characters. */
212    
213    static const unsigned char tables0[] = {
214    
215    /* This table is a lower casing table. */
216    
217        0,  1,  2,  3,  4,  5,  6,  7,
218        8,  9, 10, 11, 12, 13, 14, 15,
219       16, 17, 18, 19, 20, 21, 22, 23,
220       24, 25, 26, 27, 28, 29, 30, 31,
221       32, 33, 34, 35, 36, 37, 38, 39,
222       40, 41, 42, 43, 44, 45, 46, 47,
223       48, 49, 50, 51, 52, 53, 54, 55,
224       56, 57, 58, 59, 60, 61, 62, 63,
225       64, 97, 98, 99,100,101,102,103,
226      104,105,106,107,108,109,110,111,
227      112,113,114,115,116,117,118,119,
228      120,121,122, 91, 92, 93, 94, 95,
229       96, 97, 98, 99,100,101,102,103,
230      104,105,106,107,108,109,110,111,
231      112,113,114,115,116,117,118,119,
232      120,121,122,123,124,125,126,127,
233      128,129,130,131,132,133,134,135,
234      136,137,138,139,140,141,142,143,
235      144,145,146,147,148,149,150,151,
236      152,153,154,155,156,157,158,159,
237      160,161,162,163,164,165,166,167,
238      168,169,170,171,172,173,174,175,
239      176,177,178,179,180,181,182,183,
240      184,185,186,187,188,189,190,191,
241      192,193,194,195,196,197,198,199,
242      200,201,202,203,204,205,206,207,
243      208,209,210,211,212,213,214,215,
244      216,217,218,219,220,221,222,223,
245      224,225,226,227,228,229,230,231,
246      232,233,234,235,236,237,238,239,
247      240,241,242,243,244,245,246,247,
248      248,249,250,251,252,253,254,255,
249    
250    /* This table is a case flipping table. */
251    
252        0,  1,  2,  3,  4,  5,  6,  7,
253        8,  9, 10, 11, 12, 13, 14, 15,
254       16, 17, 18, 19, 20, 21, 22, 23,
255       24, 25, 26, 27, 28, 29, 30, 31,
256       32, 33, 34, 35, 36, 37, 38, 39,
257       40, 41, 42, 43, 44, 45, 46, 47,
258       48, 49, 50, 51, 52, 53, 54, 55,
259       56, 57, 58, 59, 60, 61, 62, 63,
260       64, 97, 98, 99,100,101,102,103,
261      104,105,106,107,108,109,110,111,
262      112,113,114,115,116,117,118,119,
263      120,121,122, 91, 92, 93, 94, 95,
264       96, 65, 66, 67, 68, 69, 70, 71,
265       72, 73, 74, 75, 76, 77, 78, 79,
266       80, 81, 82, 83, 84, 85, 86, 87,
267       88, 89, 90,123,124,125,126,127,
268      128,129,130,131,132,133,134,135,
269      136,137,138,139,140,141,142,143,
270      144,145,146,147,148,149,150,151,
271      152,153,154,155,156,157,158,159,
272      160,161,162,163,164,165,166,167,
273      168,169,170,171,172,173,174,175,
274      176,177,178,179,180,181,182,183,
275      184,185,186,187,188,189,190,191,
276      192,193,194,195,196,197,198,199,
277      200,201,202,203,204,205,206,207,
278      208,209,210,211,212,213,214,215,
279      216,217,218,219,220,221,222,223,
280      224,225,226,227,228,229,230,231,
281      232,233,234,235,236,237,238,239,
282      240,241,242,243,244,245,246,247,
283      248,249,250,251,252,253,254,255,
284    
285    /* This table contains bit maps for various character classes. Each map is 32
286    bytes long and the bits run from the least significant end of each byte. The
287    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
288    graph, print, punct, and cntrl. Other classes are built from combinations. */
289    
290      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
291      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294    
295      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
296      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299    
300      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
301      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304    
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
307      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309    
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
312      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314    
315      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
316      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
317      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319    
320      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
321      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
322      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324    
325      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
326      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
327      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
331      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
332      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
336      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
337      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340    /* This table identifies various classes of character by individual bits:
341      0x01   white space character
342      0x02   letter
343      0x04   decimal digit
344      0x08   hexadecimal digit
345      0x10   alphanumeric or '_'
346      0x80   regular expression metacharacter or binary zero
347    */
348    
349      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
350      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
353      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
354      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
355      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
356      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
357      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
358      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
359      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
360      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
361      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
362      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
363      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
364      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
375      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
378      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
379      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
380      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
381    
382    /* This is a set of tables that came orginally from a Windows user. It seems to
383    be at least an approximation of ISO 8859. In particular, there are characters
384    greater than 128 that are marked as spaces, letters, etc. */
385    
386    static const unsigned char tables1[] = {
387    0,1,2,3,4,5,6,7,
388    8,9,10,11,12,13,14,15,
389    16,17,18,19,20,21,22,23,
390    24,25,26,27,28,29,30,31,
391    32,33,34,35,36,37,38,39,
392    40,41,42,43,44,45,46,47,
393    48,49,50,51,52,53,54,55,
394    56,57,58,59,60,61,62,63,
395    64,97,98,99,100,101,102,103,
396    104,105,106,107,108,109,110,111,
397    112,113,114,115,116,117,118,119,
398    120,121,122,91,92,93,94,95,
399    96,97,98,99,100,101,102,103,
400    104,105,106,107,108,109,110,111,
401    112,113,114,115,116,117,118,119,
402    120,121,122,123,124,125,126,127,
403    128,129,130,131,132,133,134,135,
404    136,137,138,139,140,141,142,143,
405    144,145,146,147,148,149,150,151,
406    152,153,154,155,156,157,158,159,
407    160,161,162,163,164,165,166,167,
408    168,169,170,171,172,173,174,175,
409    176,177,178,179,180,181,182,183,
410    184,185,186,187,188,189,190,191,
411    224,225,226,227,228,229,230,231,
412    232,233,234,235,236,237,238,239,
413    240,241,242,243,244,245,246,215,
414    248,249,250,251,252,253,254,223,
415    224,225,226,227,228,229,230,231,
416    232,233,234,235,236,237,238,239,
417    240,241,242,243,244,245,246,247,
418    248,249,250,251,252,253,254,255,
419    0,1,2,3,4,5,6,7,
420    8,9,10,11,12,13,14,15,
421    16,17,18,19,20,21,22,23,
422    24,25,26,27,28,29,30,31,
423    32,33,34,35,36,37,38,39,
424    40,41,42,43,44,45,46,47,
425    48,49,50,51,52,53,54,55,
426    56,57,58,59,60,61,62,63,
427    64,97,98,99,100,101,102,103,
428    104,105,106,107,108,109,110,111,
429    112,113,114,115,116,117,118,119,
430    120,121,122,91,92,93,94,95,
431    96,65,66,67,68,69,70,71,
432    72,73,74,75,76,77,78,79,
433    80,81,82,83,84,85,86,87,
434    88,89,90,123,124,125,126,127,
435    128,129,130,131,132,133,134,135,
436    136,137,138,139,140,141,142,143,
437    144,145,146,147,148,149,150,151,
438    152,153,154,155,156,157,158,159,
439    160,161,162,163,164,165,166,167,
440    168,169,170,171,172,173,174,175,
441    176,177,178,179,180,181,182,183,
442    184,185,186,187,188,189,190,191,
443    224,225,226,227,228,229,230,231,
444    232,233,234,235,236,237,238,239,
445    240,241,242,243,244,245,246,215,
446    248,249,250,251,252,253,254,223,
447    192,193,194,195,196,197,198,199,
448    200,201,202,203,204,205,206,207,
449    208,209,210,211,212,213,214,247,
450    216,217,218,219,220,221,222,255,
451    0,62,0,0,1,0,0,0,
452    0,0,0,0,0,0,0,0,
453    32,0,0,0,1,0,0,0,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,255,3,
456    126,0,0,0,126,0,0,0,
457    0,0,0,0,0,0,0,0,
458    0,0,0,0,0,0,0,0,
459    0,0,0,0,0,0,255,3,
460    0,0,0,0,0,0,0,0,
461    0,0,0,0,0,0,12,2,
462    0,0,0,0,0,0,0,0,
463    0,0,0,0,0,0,0,0,
464    254,255,255,7,0,0,0,0,
465    0,0,0,0,0,0,0,0,
466    255,255,127,127,0,0,0,0,
467    0,0,0,0,0,0,0,0,
468    0,0,0,0,254,255,255,7,
469    0,0,0,0,0,4,32,4,
470    0,0,0,128,255,255,127,255,
471    0,0,0,0,0,0,255,3,
472    254,255,255,135,254,255,255,7,
473    0,0,0,0,0,4,44,6,
474    255,255,127,255,255,255,127,255,
475    0,0,0,0,254,255,255,255,
476    255,255,255,255,255,255,255,127,
477    0,0,0,0,254,255,255,255,
478    255,255,255,255,255,255,255,255,
479    0,2,0,0,255,255,255,255,
480    255,255,255,255,255,255,255,127,
481    0,0,0,0,255,255,255,255,
482    255,255,255,255,255,255,255,255,
483    0,0,0,0,254,255,0,252,
484    1,0,0,248,1,0,0,120,
485    0,0,0,0,254,255,255,255,
486    0,0,128,0,0,0,128,0,
487    255,255,255,255,0,0,0,0,
488    0,0,0,0,0,0,0,128,
489    255,255,255,255,0,0,0,0,
490    0,0,0,0,0,0,0,0,
491    128,0,0,0,0,0,0,0,
492    0,1,1,0,1,1,0,0,
493    0,0,0,0,0,0,0,0,
494    0,0,0,0,0,0,0,0,
495    1,0,0,0,128,0,0,0,
496    128,128,128,128,0,0,128,0,
497    28,28,28,28,28,28,28,28,
498    28,28,0,0,0,0,0,128,
499    0,26,26,26,26,26,26,18,
500    18,18,18,18,18,18,18,18,
501    18,18,18,18,18,18,18,18,
502    18,18,18,128,128,0,128,16,
503    0,26,26,26,26,26,26,18,
504    18,18,18,18,18,18,18,18,
505    18,18,18,18,18,18,18,18,
506    18,18,18,128,128,0,0,0,
507    0,0,0,0,0,1,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,0,0,
510    0,0,0,0,0,0,0,0,
511    1,0,0,0,0,0,0,0,
512    0,0,18,0,0,0,0,0,
513    0,0,20,20,0,18,0,0,
514    0,20,18,0,0,0,0,0,
515    18,18,18,18,18,18,18,18,
516    18,18,18,18,18,18,18,18,
517    18,18,18,18,18,18,18,0,
518    18,18,18,18,18,18,18,18,
519    18,18,18,18,18,18,18,18,
520    18,18,18,18,18,18,18,18,
521    18,18,18,18,18,18,18,0,
522    18,18,18,18,18,18,18,18
523    };
524    
525    
526    
527    
528    #ifndef HAVE_STRERROR
529    /*************************************************
530    *     Provide strerror() for non-ANSI libraries  *
531    *************************************************/
532    
533    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534    in their libraries, but can provide the same facility by this simple
535    alternative function. */
536    
537    extern int   sys_nerr;
538    extern char *sys_errlist[];
539    
540    char *
541    strerror(int n)
542    {
543    if (n < 0 || n >= sys_nerr) return "unknown error number";
544    return sys_errlist[n];
545    }
546    #endif /* HAVE_STRERROR */
547    
548    
549    
550    
551  /*************************************************  /*************************************************
552  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 220  uschar *here = start; Line 578  uschar *here = start;
578    
579  for (;;)  for (;;)
580    {    {
581    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
582    
583    if (rlen > 1000)    if (rlen > 1000)
584      {      {
# Line 250  for (;;) Line 608  for (;;)
608      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
609    
610        {        {
611        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
612        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
613          return (here == start)? NULL : start;          return (here == start)? NULL : start;
614        }        }
# Line 740  return 0; Line 1098  return 0;
1098  *         Check newline indicator                *  *         Check newline indicator                *
1099  *************************************************/  *************************************************/
1100    
1101  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1102  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1103    
1104  Arguments:  Arguments:
1105    p           points after the leading '<'    p           points after the leading '<'
# Line 1039  while (!done) Line 1396  while (!done)
1396  #endif  #endif
1397    
1398    const char *error;    const char *error;
1399      unsigned char *markptr;
1400    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1401    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1402    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1403    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1404    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1405      int do_mark = 0;
1406    int do_study = 0;    int do_study = 0;
1407    int do_debug = debug;    int do_debug = debug;
1408    int do_G = 0;    int do_G = 0;
# Line 1161  while (!done) Line 1520  while (!done)
1520      }      }
1521    
1522    pp = p;    pp = p;
1523    poffset = p - buffer;    poffset = (int)(p - buffer);
1524    
1525    for(;;)    for(;;)
1526      {      {
# Line 1225  while (!done) Line 1584  while (!done)
1584        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1585        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1586        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1587          case 'K': do_mark = 1; break;
1588        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1589        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1590    
# Line 1234  while (!done) Line 1594  while (!done)
1594    
1595        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1596        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1597          case 'W': options |= PCRE_UCP; break;
1598        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1599          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1600        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1601        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1602        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1603    
1604          case 'T':
1605          switch (*pp++)
1606            {
1607            case '0': tables = tables0; break;
1608            case '1': tables = tables1; break;
1609    
1610            case '\r':
1611            case '\n':
1612            case ' ':
1613            case 0:
1614            fprintf(outfile, "** Missing table number after /T\n");
1615            goto SKIP_DATA;
1616    
1617            default:
1618            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1619            goto SKIP_DATA;
1620            }
1621          break;
1622    
1623        case 'L':        case 'L':
1624        ppp = pp;        ppp = pp;
1625        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1264  while (!done) Line 1645  while (!done)
1645    
1646        case '<':        case '<':
1647          {          {
1648          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1649            {            {
1650            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
1651            pp += 3;            pp += 3;
# Line 1305  while (!done) Line 1686  while (!done)
1686      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1687      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1688      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1689        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1690        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1691    
1692      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1693    
# Line 1325  while (!done) Line 1708  while (!done)
1708  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1709    
1710      {      {
1711        unsigned long int get_options;
1712    
1713      if (timeit > 0)      if (timeit > 0)
1714        {        {
1715        register int i;        register int i;
# Line 1368  while (!done) Line 1753  while (!done)
1753        goto CONTINUE;        goto CONTINUE;
1754        }        }
1755    
1756      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1757      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1758      returns only limited data. Check that it agrees with the newer one. */      lines. */
1759    
1760        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1761        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1762    
1763        /* Print information if required. There are now two info-returning
1764        functions. The old one has a limited interface and returns only limited
1765        data. Check that it agrees with the newer one. */
1766    
1767      if (log_store)      if (log_store)
1768        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1409  while (!done) Line 1801  while (!done)
1801          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1802        }        }
1803    
1804        /* If /K was present, we set up for handling MARK data. */
1805    
1806        if (do_mark)
1807          {
1808          if (extra == NULL)
1809            {
1810            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1811            extra->flags = 0;
1812            }
1813          extra->mark = &markptr;
1814          extra->flags |= PCRE_EXTRA_MARK;
1815          }
1816    
1817      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1818      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1819      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 1441  while (!done) Line 1846  while (!done)
1846          {          {
1847          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1848          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1849          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1850            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1851          }          }
1852        }        }
1853    
# Line 1455  while (!done) Line 1861  while (!done)
1861        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1862        }        }
1863    
1864        /* We already have the options in get_options (see above) */
1865    
1866      if (do_showinfo)      if (do_showinfo)
1867        {        {
1868        unsigned long int get_options, all_options;        unsigned long int all_options;
1869  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1870        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1871  #endif  #endif
# Line 1466  while (!done) Line 1874  while (!done)
1874        int nameentrysize, namecount;        int nameentrysize, namecount;
1875        const uschar *nametable;        const uschar *nametable;
1876    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1877        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1878        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1879        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1526  while (!done) Line 1933  while (!done)
1933        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1934    
1935        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1936          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1937            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1938            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1939            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1540  while (!done) Line 1947  while (!done)
1947            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1948            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1949            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1950              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1951            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1952              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1953            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1954    
1955        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1617  while (!done) Line 2026  while (!done)
2026          else          else
2027            {            {
2028            uschar *start_bits = NULL;            uschar *start_bits = NULL;
2029            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2030    
2031              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2032              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2033    
2034              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2035            if (start_bits == NULL)            if (start_bits == NULL)
2036              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2037            else            else
2038              {              {
2039              int i;              int i;
# Line 1702  while (!done) Line 2115  while (!done)
2115    
2116        new_free(re);        new_free(re);
2117        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2118        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2119            {
2120            new_free((void *)tables);
2121            setlocale(LC_CTYPE, "C");
2122            locale_set = 0;
2123            }
2124        continue;  /* With next regex */        continue;  /* With next regex */
2125        }        }
2126      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1724  while (!done) Line 2142  while (!done)
2142      int getlist = 0;      int getlist = 0;
2143      int gmatched = 0;      int gmatched = 0;
2144      int start_offset = 0;      int start_offset = 0;
2145        int start_offset_sign = 1;
2146      int g_notempty = 0;      int g_notempty = 0;
2147      int use_dfa = 0;      int use_dfa = 0;
2148    
# Line 1751  while (!done) Line 2170  while (!done)
2170        {        {
2171        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2172          {          {
2173          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2174              {
2175              fprintf(outfile, "\n");
2176              break;
2177              }
2178          done = 1;          done = 1;
2179          goto CONTINUE;          goto CONTINUE;
2180          }          }
# Line 1852  while (!done) Line 2275  while (!done)
2275          continue;          continue;
2276    
2277          case '>':          case '>':
2278            if (*p == '-')
2279              {
2280              start_offset_sign = -1;
2281              p++;
2282              }
2283          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2284            start_offset *= start_offset_sign;
2285          continue;          continue;
2286    
2287          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1925  while (!done) Line 2354  while (!done)
2354  #endif  #endif
2355            use_dfa = 1;            use_dfa = 1;
2356          continue;          continue;
2357    #endif
2358    
2359    #if !defined NODFA
2360          case 'F':          case 'F':
2361          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2362          continue;          continue;
# Line 1959  while (!done) Line 2390  while (!done)
2390          continue;          continue;
2391    
2392          case 'N':          case 'N':
2393          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2394              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2395            else
2396              options |= PCRE_NOTEMPTY;
2397          continue;          continue;
2398    
2399          case 'O':          case 'O':
# Line 1982  while (!done) Line 2416  while (!done)
2416          continue;          continue;
2417    
2418          case 'P':          case 'P':
2419          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2420              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2421          continue;          continue;
2422    
2423          case 'Q':          case 'Q':
# Line 2041  while (!done) Line 2476  while (!done)
2476        *q++ = c;        *q++ = c;
2477        }        }
2478      *q = 0;      *q = 0;
2479      len = q - dbuffer;      len = (int)(q - dbuffer);
2480    
2481      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
2482      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
# Line 2124  while (!done) Line 2559  while (!done)
2559    
2560      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2561        {        {
2562          markptr = NULL;
2563    
2564        if (timeitm > 0)        if (timeitm > 0)
2565          {          {
2566          register int i;          register int i;
# Line 2135  while (!done) Line 2572  while (!done)
2572            {            {
2573            int workspace[1000];            int workspace[1000];
2574            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2575              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2576                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2577                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2578            }            }
# Line 2198  while (!done) Line 2635  while (!done)
2635        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2636          {          {
2637          int workspace[1000];          int workspace[1000];
2638          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2640            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2641          if (count == 0)          if (count == 0)
# Line 2269  while (!done) Line 2706  while (!done)
2706              }              }
2707            }            }
2708    
2709            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2710    
2711          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2712            {            {
2713            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2352  while (!done) Line 2791  while (!done)
2791    
2792        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2793          {          {
2794          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2795  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
2796          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
2797            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
2798              bptr + use_offsets[0]);            fprintf(outfile, ": ");
2799  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2800                outfile);
2801              }
2802          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2803          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2804          }          }
# Line 2367  while (!done) Line 2808  while (!done)
2808        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2809        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2810    
2811        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
2812        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
2813        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
2814        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
2815        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
2816          newline setting in the pattern; if none was set, use pcre_config() to
2817          find the default.
2818    
2819        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2820        character, not one byte. */        character, not one byte. */
# Line 2396  while (!done) Line 2839  while (!done)
2839                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2840              }              }
2841            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2842                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2843                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2844                &&                &&
2845                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2406  while (!done) Line 2850  while (!done)
2850              {              {
2851              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2852                {                {
2853                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2854                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
2855                }                }
2856              }              }
2857            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
# Line 2418  while (!done) Line 2860  while (!done)
2860            {            {
2861            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2862              {              {
2863              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2864                  {
2865                  if (markptr == NULL) fprintf(outfile, "No match\n");
2866                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2867                  }
2868              }              }
2869            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2870            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 2430  while (!done) Line 2876  while (!done)
2876        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2877    
2878        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2879        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2880        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2881        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2882        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2883        character. */        character. */
2884    
# Line 2441  while (!done) Line 2887  while (!done)
2887        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2888          {          {
2889          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2890          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2891          }          }
2892    
2893        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2468  while (!done) Line 2914  while (!done)
2914    
2915    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
2916    if (extra != NULL) new_free(extra);    if (extra != NULL) new_free(extra);
2917    if (tables != NULL)    if (locale_set)
2918      {      {
2919      new_free((void *)tables);      new_free((void *)tables);
2920      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");

Legend:
Removed from v.411  
changed lines
  Added in v.580

  ViewVC Help
Powered by ViewVC 1.1.5