/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC revision 579 by ph10, Wed Nov 24 17:39:25 2010 UTC
# Line 118  external symbols to prevent clashes. */ Line 118  external symbols to prevent clashes. */
118    
119  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
120  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
121  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
123    
124    #define COMPILING_PCRETEST
125  #include "pcre_printint.src"  #include "pcre_printint.src"
126    
127  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 189  static uschar *dbuffer = NULL; Line 190  static uschar *dbuffer = NULL;
190  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
191    
192    
193    /*************************************************
194    *         Alternate character tables             *
195    *************************************************/
196    
197    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198    using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200    the L (locale) option also adjusts the tables. */
201    
202    /* This is the set of tables distributed as default with PCRE. It recognizes
203    only ASCII characters. */
204    
205    static const unsigned char tables0[] = {
206    
207    /* This table is a lower casing table. */
208    
209        0,  1,  2,  3,  4,  5,  6,  7,
210        8,  9, 10, 11, 12, 13, 14, 15,
211       16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515    };
516    
517    
518    
519    
520    #ifndef HAVE_STRERROR
521    /*************************************************
522    *     Provide strerror() for non-ANSI libraries  *
523    *************************************************/
524    
525    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
526    in their libraries, but can provide the same facility by this simple
527    alternative function. */
528    
529    extern int   sys_nerr;
530    extern char *sys_errlist[];
531    
532    char *
533    strerror(int n)
534    {
535    if (n < 0 || n >= sys_nerr) return "unknown error number";
536    return sys_errlist[n];
537    }
538    #endif /* HAVE_STRERROR */
539    
540    
541    
542    
543  /*************************************************  /*************************************************
544  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 220  uschar *here = start; Line 570  uschar *here = start;
570    
571  for (;;)  for (;;)
572    {    {
573    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
574    
575    if (rlen > 1000)    if (rlen > 1000)
576      {      {
# Line 250  for (;;) Line 600  for (;;)
600      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
601    
602        {        {
603        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
604        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
605          return (here == start)? NULL : start;          return (here == start)? NULL : start;
606        }        }
# Line 740  return 0; Line 1090  return 0;
1090  *         Check newline indicator                *  *         Check newline indicator                *
1091  *************************************************/  *************************************************/
1092    
1093  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1094  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1095    
1096  Arguments:  Arguments:
1097    p           points after the leading '<'    p           points after the leading '<'
# Line 1039  while (!done) Line 1388  while (!done)
1388  #endif  #endif
1389    
1390    const char *error;    const char *error;
1391      unsigned char *markptr;
1392    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1393    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1394    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1395    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1396    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1397      int do_mark = 0;
1398    int do_study = 0;    int do_study = 0;
1399    int do_debug = debug;    int do_debug = debug;
1400    int do_G = 0;    int do_G = 0;
# Line 1161  while (!done) Line 1512  while (!done)
1512      }      }
1513    
1514    pp = p;    pp = p;
1515    poffset = p - buffer;    poffset = (int)(p - buffer);
1516    
1517    for(;;)    for(;;)
1518      {      {
# Line 1225  while (!done) Line 1576  while (!done)
1576        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1577        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1578        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1579          case 'K': do_mark = 1; break;
1580        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1581        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1582    
# Line 1234  while (!done) Line 1586  while (!done)
1586    
1587        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1588        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1589          case 'W': options |= PCRE_UCP; break;
1590        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1591          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1592        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1593        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1594        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1595    
1596          case 'T':
1597          switch (*pp++)
1598            {
1599            case '0': tables = tables0; break;
1600            case '1': tables = tables1; break;
1601    
1602            case '\r':
1603            case '\n':
1604            case ' ':
1605            case 0:
1606            fprintf(outfile, "** Missing table number after /T\n");
1607            goto SKIP_DATA;
1608    
1609            default:
1610            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1611            goto SKIP_DATA;
1612            }
1613          break;
1614    
1615        case 'L':        case 'L':
1616        ppp = pp;        ppp = pp;
1617        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1264  while (!done) Line 1637  while (!done)
1637    
1638        case '<':        case '<':
1639          {          {
1640          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1641            {            {
1642            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
1643            pp += 3;            pp += 3;
# Line 1305  while (!done) Line 1678  while (!done)
1678      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1679      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1680      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1681        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1682        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1683    
1684      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1685    
# Line 1418  while (!done) Line 1793  while (!done)
1793          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1794        }        }
1795    
1796        /* If /K was present, we set up for handling MARK data. */
1797    
1798        if (do_mark)
1799          {
1800          if (extra == NULL)
1801            {
1802            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1803            extra->flags = 0;
1804            }
1805          extra->mark = &markptr;
1806          extra->flags |= PCRE_EXTRA_MARK;
1807          }
1808    
1809      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1810      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1811      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 1450  while (!done) Line 1838  while (!done)
1838          {          {
1839          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1840          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1841          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1842            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1843          }          }
1844        }        }
1845    
# Line 1536  while (!done) Line 1925  while (!done)
1925        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1926    
1927        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1928          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1929            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1930            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1931            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1550  while (!done) Line 1939  while (!done)
1939            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1940            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1941            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1942              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1943            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1944              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1945            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1946    
1947        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1627  while (!done) Line 2018  while (!done)
2018          else          else
2019            {            {
2020            uschar *start_bits = NULL;            uschar *start_bits = NULL;
2021            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2022    
2023              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2024              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2025    
2026              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2027            if (start_bits == NULL)            if (start_bits == NULL)
2028              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2029            else            else
2030              {              {
2031              int i;              int i;
# Line 1712  while (!done) Line 2107  while (!done)
2107    
2108        new_free(re);        new_free(re);
2109        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2110        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2111            {
2112            new_free((void *)tables);
2113            setlocale(LC_CTYPE, "C");
2114            locale_set = 0;
2115            }
2116        continue;  /* With next regex */        continue;  /* With next regex */
2117        }        }
2118      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1734  while (!done) Line 2134  while (!done)
2134      int getlist = 0;      int getlist = 0;
2135      int gmatched = 0;      int gmatched = 0;
2136      int start_offset = 0;      int start_offset = 0;
2137        int start_offset_sign = 1;
2138      int g_notempty = 0;      int g_notempty = 0;
2139      int use_dfa = 0;      int use_dfa = 0;
2140    
# Line 1761  while (!done) Line 2162  while (!done)
2162        {        {
2163        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2164          {          {
2165          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2166              {
2167              fprintf(outfile, "\n");
2168              break;
2169              }
2170          done = 1;          done = 1;
2171          goto CONTINUE;          goto CONTINUE;
2172          }          }
# Line 1862  while (!done) Line 2267  while (!done)
2267          continue;          continue;
2268    
2269          case '>':          case '>':
2270            if (*p == '-')
2271              {
2272              start_offset_sign = -1;
2273              p++;
2274              }
2275          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2276            start_offset *= start_offset_sign;
2277          continue;          continue;
2278    
2279          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1935  while (!done) Line 2346  while (!done)
2346  #endif  #endif
2347            use_dfa = 1;            use_dfa = 1;
2348          continue;          continue;
2349    #endif
2350    
2351    #if !defined NODFA
2352          case 'F':          case 'F':
2353          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2354          continue;          continue;
# Line 1969  while (!done) Line 2382  while (!done)
2382          continue;          continue;
2383    
2384          case 'N':          case 'N':
2385          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2386              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2387            else
2388              options |= PCRE_NOTEMPTY;
2389          continue;          continue;
2390    
2391          case 'O':          case 'O':
# Line 1992  while (!done) Line 2408  while (!done)
2408          continue;          continue;
2409    
2410          case 'P':          case 'P':
2411          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2412            PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;            PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2413          continue;          continue;
2414    
# Line 2052  while (!done) Line 2468  while (!done)
2468        *q++ = c;        *q++ = c;
2469        }        }
2470      *q = 0;      *q = 0;
2471      len = q - dbuffer;      len = (int)(q - dbuffer);
2472    
2473      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
2474      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
# Line 2135  while (!done) Line 2551  while (!done)
2551    
2552      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2553        {        {
2554          markptr = NULL;
2555    
2556        if (timeitm > 0)        if (timeitm > 0)
2557          {          {
2558          register int i;          register int i;
# Line 2146  while (!done) Line 2564  while (!done)
2564            {            {
2565            int workspace[1000];            int workspace[1000];
2566            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2567              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2568                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2569                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2570            }            }
# Line 2209  while (!done) Line 2627  while (!done)
2627        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2628          {          {
2629          int workspace[1000];          int workspace[1000];
2630          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2631            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2632            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2633          if (count == 0)          if (count == 0)
# Line 2280  while (!done) Line 2698  while (!done)
2698              }              }
2699            }            }
2700    
2701            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2702    
2703          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2704            {            {
2705            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2363  while (!done) Line 2783  while (!done)
2783    
2784        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2785          {          {
2786          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2787              else fprintf(outfile, "Partial match, mark=%s", markptr);
2788          if (use_size_offsets > 1)          if (use_size_offsets > 1)
2789            {            {
2790            fprintf(outfile, ": ");            fprintf(outfile, ": ");
2791            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2792              outfile);              outfile);
2793            }            }
2794          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2795          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2796          }          }
# Line 2379  while (!done) Line 2800  while (!done)
2800        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2801        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2802    
2803        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
2804        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
2805        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
2806        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
2807        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
2808          newline setting in the pattern; if none was set, use pcre_config() to
2809          find the default.
2810    
2811        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2812        character, not one byte. */        character, not one byte. */
# Line 2408  while (!done) Line 2831  while (!done)
2831                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2832              }              }
2833            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2834                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2835                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2836                &&                &&
2837                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2418  while (!done) Line 2842  while (!done)
2842              {              {
2843              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2844                {                {
2845                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2846                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
2847                }                }
2848              }              }
2849            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
# Line 2430  while (!done) Line 2852  while (!done)
2852            {            {
2853            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2854              {              {
2855              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2856                  {
2857                  if (markptr == NULL) fprintf(outfile, "No match\n");
2858                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2859                  }
2860              }              }
2861            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2862            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 2442  while (!done) Line 2868  while (!done)
2868        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2869    
2870        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2871        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2872        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2873        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2874        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2875        character. */        character. */
2876    
# Line 2453  while (!done) Line 2879  while (!done)
2879        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2880          {          {
2881          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2882          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2883          }          }
2884    
2885        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2480  while (!done) Line 2906  while (!done)
2906    
2907    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
2908    if (extra != NULL) new_free(extra);    if (extra != NULL) new_free(extra);
2909    if (tables != NULL)    if (locale_set)
2910      {      {
2911      new_free((void *)tables);      new_free((void *)tables);
2912      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");

Legend:
Removed from v.427  
changed lines
  Added in v.579

  ViewVC Help
Powered by ViewVC 1.1.5