/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 55 by nigel, Sat Feb 24 21:39:46 2007 UTC revision 65 by nigel, Sat Feb 24 21:40:08 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 35  Makefile. */
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    
41  static FILE *outfile;  static FILE *outfile;
42  static int log_store = 0;  static int log_store = 0;
43    static int callout_count;
44    static int callout_extra;
45    static int callout_fail_count;
46    static int callout_fail_id;
47    static int first_callout;
48    static int utf8;
49  static size_t gotten_store;  static size_t gotten_store;
50    
51    
# Line 48  static int utf8_table3[] = { Line 60  static int utf8_table3[] = {
60    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
61    
62    
63    
64    /*************************************************
65    *         Print compiled regex                   *
66    *************************************************/
67    
68    /* The code for doing this is held in a separate file that is also included in
69    pcre.c when it is compiled with the debug switch. It defines a function called
70    print_internals(), which uses a table of opcode lengths defined by the macro
71    OP_LENGTHS, whose name must be OP_lengths. */
72    
73    static uschar OP_lengths[] = { OP_LENGTHS };
74    
75    #include "printint.c"
76    
77    
78    
79    /*************************************************
80    *          Read number from string               *
81    *************************************************/
82    
83    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
84    around with conditional compilation, just do the job by hand. It is only used
85    for unpicking the -o argument, so just keep it simple.
86    
87    Arguments:
88      str           string to be converted
89      endptr        where to put the end pointer
90    
91    Returns:        the unsigned long
92    */
93    
94    static int
95    get_value(unsigned char *str, unsigned char **endptr)
96    {
97    int result = 0;
98    while(*str != 0 && isspace(*str)) str++;
99    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
100    *endptr = str;
101    return(result);
102    }
103    
104    
105    
106  /*************************************************  /*************************************************
107  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
108  *************************************************/  *************************************************/
# Line 73  for (i = 0; i < sizeof(utf8_table1)/size Line 128  for (i = 0; i < sizeof(utf8_table1)/size
128    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
129  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
130  if (cvalue < 0) return -1;  if (cvalue < 0) return -1;
131  *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
132  cvalue >>= 6 - i;  buffer += i;
133  for (j = 0; j < i; j++)  for (j = i; j > 0; j--)
134    {   {
135    *buffer++ = 0x80 | (cvalue & 0x3f);   *buffer-- = 0x80 | (cvalue & 0x3f);
136    cvalue >>= 6;   cvalue >>= 6;
137    }   }
138    *buffer = utf8_table2[i] | cvalue;
139  return i + 1;  return i + 1;
140  }  }
141    
# Line 117  if (i == 0 || i == 6) return 0;        / Line 173  if (i == 0 || i == 6) return 0;        /
173    
174  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
175    
176  d = c & utf8_table3[i];  s = 6*i;
177  s = 6 - i;  d = (c & utf8_table3[i]) << s;
178    
179  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
180    {    {
181    c = *buffer++;    c = *buffer++;
182    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
183      s -= 6;
184    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
185    }    }
186    
187  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
# Line 142  return i+1; Line 198  return i+1;
198    
199    
200    
201    /*************************************************
202    *             Print character string             *
203    *************************************************/
204    
205    /* Character string printing function. Must handle UTF-8 strings in utf8
206    mode. Yields number of characters printed. If handed a NULL file, just counts
207    chars without printing. */
208    
209    static int pchars(unsigned char *p, int length, FILE *f)
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
   
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
   
   
 static void print_internals(pcre *re)  
210  {  {
211  unsigned char *code = ((real_pcre *)re)->code;  int c;
212    int yield = 0;
 fprintf(outfile, "------------------------------------------------------------------\n");  
213    
214  for(;;)  while (length-- > 0)
215    {    {
216    int c;    if (utf8)
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     if (*code - OP_BRA > EXTRACT_BASIC_MAX)  
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
217      {      {
218      case OP_END:      int rc = utf82ord(p, &c);
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
219    
220      case OP_CLASS:      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
221        {        {
222        int i, min, max;        length -= rc - 1;
223        code++;        p += rc;
224        fprintf(outfile, "    [");        if (c < 256 && isprint(c))
225            {
226        for (i = 0; i < 256; i++)          if (f != NULL) fprintf(f, "%c", c);
227          {          yield++;
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
228          }          }
229        fprintf(outfile, "]");        else
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
230          {          {
231          case OP_CRSTAR:          int n;
232          case OP_CRMINSTAR:          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
233          case OP_CRPLUS:          yield += n;
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
   
         default:  
         code--;  
234          }          }
235          continue;
236        }        }
237      break;      }
238    
239      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
240    
241      default:    if (isprint(c = *(p++)))
242      fprintf(outfile, "    %s", OP_names[*code]);      {
243      break;      if (f != NULL) fprintf(f, "%c", c);
244        yield++;
245        }
246      else
247        {
248        if (f != NULL) fprintf(f, "\\x%02x", c);
249        yield += 4;
250      }      }
   
   code++;  
   fprintf(outfile, "\n");  
251    }    }
252    
253    return yield;
254  }  }
255    
256    
257    
258  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
259    *              Callout function                  *
260    *************************************************/
261    
262    /* Called from PCRE as a result of the (?C) item. We print out where we are in
263    the match. Yield zero unless more callouts than the fail count, or the callout
264    data is not zero. */
265    
266  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
267  {  {
268  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
269  while (length-- > 0)  int i, pre_start, post_start;
270    
271    if (callout_extra)
272    {    {
273    if (utf8)    int i;
274      fprintf(f, "Callout %d: last capture = %d\n",
275        cb->callout_number, cb->capture_last);
276    
277      for (i = 0; i < cb->capture_top * 2; i += 2)
278      {      {
279      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
280      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
281        else
282        {        {
283        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
284        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
285        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
286          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
287        }        }
288      }      }
289      }
290    
291     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
292    datails. On subsequent calls in the same match, we use pchars just to find the
293    printed lengths of the substrings. */
294    
295    if (f != NULL) fprintf(f, "--->");
296    
297    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
298    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
299      cb->current_position - cb->start_match, f);
300    
301    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
302      cb->subject_length - cb->current_position, f);
303    
304    if (f != NULL) fprintf(f, "\n");
305    
306    /* Always print appropriate indicators, with callout number if not already
307    shown */
308    
309    if (callout_extra) fprintf(outfile, "    ");
310      else fprintf(outfile, "%3d ", cb->callout_number);
311    
312    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
313      else fprintf(outfile, "\\x%02x", c);  fprintf(outfile, "^");
314    
315    if (post_start > 0)
316      {
317      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
318      fprintf(outfile, "^");
319    }    }
320    
321    fprintf(outfile, "\n");
322    
323    first_callout = 0;
324    
325    if ((int)(cb->callout_data) != 0)
326      {
327      fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
328      return (int)(cb->callout_data);
329      }
330    
331    return (cb->callout_number != callout_fail_id)? 0 :
332           (++callout_count >= callout_fail_count)? 1 : 0;
333  }  }
334    
335    
336    /*************************************************
337    *            Local malloc function               *
338    *************************************************/
339    
340  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
341  compiled re. */  compiled re. */
# Line 399  compiled re. */ Line 343  compiled re. */
343  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
344  {  {
345  gotten_store = size;  gotten_store = size;
 if (log_store)  
   fprintf(outfile, "Memory allocation (code space): %d\n",  
     (int)((int)size - offsetof(real_pcre, code[0])));  
346  return malloc(size);  return malloc(size);
347  }  }
348    
349    
350    
351    /*************************************************
352    *          Call pcre_fullinfo()                  *
353    *************************************************/
354    
355  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
356    
# Line 419  if ((rc = pcre_fullinfo(re, study, optio Line 363  if ((rc = pcre_fullinfo(re, study, optio
363    
364    
365    
366    /*************************************************
367    *                Main Program                    *
368    *************************************************/
369    
370  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
371  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 452  outfile = stdout; Line 399  outfile = stdout;
399    
400  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
401    {    {
402    char *endptr;    unsigned char *endptr;
403    
404    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
405      showstore = 1;      showstore = 1;
# Line 460  while (argc > 1 && argv[op][0] == '-') Line 407  while (argc > 1 && argv[op][0] == '-')
407    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
408    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
409    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
410        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
411            *endptr == 0))
412      {      {
413      op++;      op++;
414      argc--;      argc--;
# Line 468  while (argc > 1 && argv[op][0] == '-') Line 416  while (argc > 1 && argv[op][0] == '-')
416  #if !defined NOPOSIX  #if !defined NOPOSIX
417    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
418  #endif  #endif
419      else if (strcmp(argv[op], "-C") == 0)
420        {
421        int rc;
422        printf("PCRE version %s\n", pcre_version());
423        printf("Compiled with\n");
424        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
425        printf("  %sUTF-8 support\n", rc? "" : "No ");
426        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
427        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
428        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
429        printf("  Internal link size = %d\n", rc);
430        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
431        printf("  POSIX malloc threshold = %d\n", rc);
432        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
433        printf("  Default match limit = %d\n", rc);
434        exit(0);
435        }
436    else    else
437      {      {
438      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
439      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
440        printf("  -C     show PCRE compile-time options and exit\n");
441      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
442             "  -i     show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
443             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
# Line 548  while (!done) Line 514  while (!done)
514    int do_g = 0;    int do_g = 0;
515    int do_showinfo = showinfo;    int do_showinfo = showinfo;
516    int do_showrest = 0;    int do_showrest = 0;
   int utf8 = 0;  
517    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
518    
519      utf8 = 0;
520    
521    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
522    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
523    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
524      fflush(outfile);
525    
526    p = buffer;    p = buffer;
527    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 632  while (!done) Line 600  while (!done)
600        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
601        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
602        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
603          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
604    
605  #if !defined NOPOSIX  #if !defined NOPOSIX
606        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 704  while (!done) Line 673  while (!done)
673          }          }
674        time_taken = clock() - start_time;        time_taken = clock() - start_time;
675        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
676          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
677          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
678        }        }
679    
680      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 739  while (!done) Line 708  while (!done)
708      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
709      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
710    
711        if (log_store)
712          fprintf(outfile, "Memory allocation (code space): %d\n",
713            (int)(gotten_store -
714                  sizeof(real_pcre) -
715                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
716    
717      if (do_showinfo)      if (do_showinfo)
718        {        {
719        unsigned long int get_options;        unsigned long int get_options;
720        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
721        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
722          int nameentrysize, namecount;
723          const uschar *nametable;
724        size_t size;        size_t size;
725    
726        if (do_debug) print_internals(re);        if (do_debug)
727            {
728            fprintf(outfile, "------------------------------------------------------------------\n");
729            print_internals(re, outfile);
730            }
731    
732        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
733        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
734        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
735        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
736        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
737        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
738          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
739          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
740          new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);
741    
742        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
743        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 780  while (!done) Line 764  while (!done)
764        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
765        if (backrefmax > 0)        if (backrefmax > 0)
766          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
767    
768          if (namecount > 0)
769            {
770            fprintf(outfile, "Named capturing subpatterns:\n");
771            while (namecount-- > 0)
772              {
773              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
774                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
775                GET2(nametable, 0));
776              nametable += nameentrysize;
777              }
778            }
779    
780        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
781          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
782            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
# Line 805  while (!done) Line 802  while (!done)
802          }          }
803        else        else
804          {          {
805          if (isprint(first_char))          int ch = first_char & 255;
806            fprintf(outfile, "First char = \'%c\'\n", first_char);          char *caseless = ((first_char & REQ_CASELESS) == 0)?
807              "" : " (caseless)";
808            if (isprint(ch))
809              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
810          else          else
811            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
812          }          }
813    
814        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 817  while (!done)
817          }          }
818        else        else
819          {          {
820          if (isprint(need_char))          int ch = need_char & 255;
821            fprintf(outfile, "Need char = \'%c\'\n", need_char);          char *caseless = ((need_char & REQ_CASELESS) == 0)?
822              "" : " (caseless)";
823            if (isprint(ch))
824              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
825          else          else
826            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
827          }          }
828        }        }
829    
# Line 839  while (!done) Line 842  while (!done)
842          time_taken = clock() - start_time;          time_taken = clock() - start_time;
843          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
844          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
845            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
846            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
847          }          }
848    
849        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 851  while (!done) Line 854  while (!done)
854    
855        else if (do_showinfo)        else if (do_showinfo)
856          {          {
857            size_t size;
858          uschar *start_bits = NULL;          uschar *start_bits = NULL;
859            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
860          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
861            fprintf(outfile, "Study size = %d\n", size);
862          if (start_bits == NULL)          if (start_bits == NULL)
863            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
864          else          else
# Line 893  while (!done) Line 899  while (!done)
899      {      {
900      unsigned char *q;      unsigned char *q;
901      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
902        int *use_offsets = offsets;
903      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
904        int callout_data = 0;
905        int callout_data_set = 0;
906      int count, c;      int count, c;
907      int copystrings = 0;      int copystrings = 0;
908        int find_match_limit = 0;
909      int getstrings = 0;      int getstrings = 0;
910      int getlist = 0;      int getlist = 0;
911      int gmatched = 0;      int gmatched = 0;
# Line 904  while (!done) Line 914  while (!done)
914    
915      options = 0;      options = 0;
916    
917        pcre_callout = callout;
918        first_callout = 1;
919        callout_extra = 0;
920        callout_count = 0;
921        callout_fail_count = 999999;
922        callout_fail_id = -1;
923    
924      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
925      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
926        {        {
# Line 925  while (!done) Line 942  while (!done)
942        {        {
943        int i = 0;        int i = 0;
944        int n = 0;        int n = 0;
945    
946        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
947          {          {
948          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 989  while (!done) Line 1007  while (!done)
1007          continue;          continue;
1008    
1009          case 'C':          case 'C':
1010          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1011          copystrings |= 1 << n;            {
1012              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013              copystrings |= 1 << n;
1014              }
1015            else if (isalnum(*p))
1016              {
1017              uschar name[256];
1018              uschar *pp = name;
1019              while (isalnum(*p)) *pp++ = *p++;
1020              *pp = 0;
1021              n = pcre_get_stringnumber(re, (char *)name);
1022              if (n < 0)
1023                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1024              else copystrings |= 1 << n;
1025              }
1026            else if (*p == '+')
1027              {
1028              callout_extra = 1;
1029              p++;
1030              }
1031            else if (*p == '-')
1032              {
1033              pcre_callout = NULL;
1034              p++;
1035              }
1036            else if (*p == '!')
1037              {
1038              callout_fail_id = 0;
1039              p++;
1040              while(isdigit(*p))
1041                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1042              callout_fail_count = 0;
1043              if (*p == '!')
1044                {
1045                p++;
1046                while(isdigit(*p))
1047                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1048                }
1049              }
1050            else if (*p == '*')
1051              {
1052              int sign = 1;
1053              callout_data = 0;
1054              if (*(++p) == '-') { sign = -1; p++; }
1055              while(isdigit(*p))
1056                callout_data = callout_data * 10 + *p++ - '0';
1057              callout_data *= sign;
1058              callout_data_set = 1;
1059              }
1060          continue;          continue;
1061    
1062          case 'G':          case 'G':
1063          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1064          getstrings |= 1 << n;            {
1065              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1066              getstrings |= 1 << n;
1067              }
1068            else if (isalnum(*p))
1069              {
1070              uschar name[256];
1071              uschar *pp = name;
1072              while (isalnum(*p)) *pp++ = *p++;
1073              *pp = 0;
1074              n = pcre_get_stringnumber(re, (char *)name);
1075              if (n < 0)
1076                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1077              else getstrings |= 1 << n;
1078              }
1079          continue;          continue;
1080    
1081          case 'L':          case 'L':
1082          getlist = 1;          getlist = 1;
1083          continue;          continue;
1084    
1085            case 'M':
1086            find_match_limit = 1;
1087            continue;
1088    
1089          case 'N':          case 'N':
1090          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1091          continue;          continue;
# Line 1010  while (!done) Line 1094  while (!done)
1094          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1095          if (n > size_offsets_max)          if (n > size_offsets_max)
1096            {            {
   
 if (offsets != NULL)  
   
           free(offsets);  
1097            size_offsets_max = n;            size_offsets_max = n;
1098            offsets = malloc(size_offsets_max * sizeof(int));            free(offsets);
1099              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1100            if (offsets == NULL)            if (offsets == NULL)
1101              {              {
1102              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
# Line 1024  if (offsets != NULL) Line 1105  if (offsets != NULL)
1105              }              }
1106            }            }
1107          use_size_offsets = n;          use_size_offsets = n;
1108            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
 if (n == 0)  
   {  
   free(offsets);  
   offsets = NULL;  
   size_offsets_max = 0;  
   }  
   
1109          continue;          continue;
1110    
1111          case 'Z':          case 'Z':
# Line 1044  if (n == 0) Line 1118  if (n == 0)
1118      len = q - dbuffer;      len = q - dbuffer;
1119    
1120      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1121      support timing. */      support timing or playing with the match limit or callout data. */
1122    
1123  #if !defined NOPOSIX  #if !defined NOPOSIX
1124      if (posix || do_posix)      if (posix || do_posix)
1125        {        {
1126        int rc;        int rc;
1127        int eflags = 0;        int eflags = 0;
1128        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1129          if (use_size_offsets > 0)
1130            pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1131        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1132        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1133    
# Line 1065  if (n == 0) Line 1141  if (n == 0)
1141        else        else
1142          {          {
1143          size_t i;          size_t i;
1144          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1145            {            {
1146            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1147              {              {
1148              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1149              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1150                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1151              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1152              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1153                {                {
1154                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1155                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1156                    outfile);
1157                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1158                }                }
1159              }              }
# Line 1099  if (n == 0) Line 1176  if (n == 0)
1176          clock_t start_time = clock();          clock_t start_time = clock();
1177          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1178            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1179              start_offset, options | g_notempty, offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1180          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1181          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1182            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1183            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1184          }          }
1185    
1186        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1187          start_offset, options | g_notempty, offsets, use_size_offsets);        varying limits in order to find the minimum value. */
1188    
1189          if (find_match_limit)
1190            {
1191            int min = 0;
1192            int mid = 64;
1193            int max = -1;
1194    
1195            if (extra == NULL)
1196              {
1197              extra = malloc(sizeof(pcre_extra));
1198              extra->flags = 0;
1199              }
1200            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1201    
1202            for (;;)
1203              {
1204              extra->match_limit = mid;
1205              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1206                options | g_notempty, use_offsets, use_size_offsets);
1207              if (count == PCRE_ERROR_MATCHLIMIT)
1208                {
1209                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1210                min = mid;
1211                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1212                }
1213              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1214                {
1215                if (mid == min + 1)
1216                  {
1217                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1218                  break;
1219                  }
1220                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1221                max = mid;
1222                mid = (min + mid)/2;
1223                }
1224              else break;    /* Some other error */
1225              }
1226    
1227            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1228            }
1229    
1230          /* If callout_data is set, use the interface with additional data */
1231    
1232          else if (callout_data_set)
1233            {
1234            if (extra == NULL)
1235              {
1236              extra = malloc(sizeof(pcre_extra));
1237              extra->flags = 0;
1238              }
1239            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1240            extra->callout_data = (void *)callout_data;
1241            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1242              options | g_notempty, use_offsets, use_size_offsets);
1243            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1244            }
1245    
1246          /* The normal case is just to do the match once, with the default
1247          value of match_limit. */
1248    
1249          else count = pcre_exec(re, extra, (char *)bptr, len,
1250            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1251    
1252        if (count == 0)        if (count == 0)
1253          {          {
# Line 1122  if (n == 0) Line 1262  if (n == 0)
1262          int i;          int i;
1263          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1264            {            {
1265            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1266              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1267            else            else
1268              {              {
1269              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1270              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1271                  use_offsets[i+1] - use_offsets[i], outfile);
1272              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1273              if (i == 0)              if (i == 0)
1274                {                {
1275                if (do_showrest)                if (do_showrest)
1276                  {                  {
1277                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1278                  pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1279                      outfile);
1280                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1281                  }                  }
1282                }                }
# Line 1146  if (n == 0) Line 1288  if (n == 0)
1288            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1289              {              {
1290              char copybuffer[16];              char copybuffer[16];
1291              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1292                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1293              if (rc < 0)              if (rc < 0)
1294                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 1160  if (n == 0) Line 1302  if (n == 0)
1302            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1303              {              {
1304              const char *substring;              const char *substring;
1305              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1306                i, &substring);                i, &substring);
1307              if (rc < 0)              if (rc < 0)
1308                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
# Line 1176  if (n == 0) Line 1318  if (n == 0)
1318          if (getlist)          if (getlist)
1319            {            {
1320            const char **stringlist;            const char **stringlist;
1321            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1322              &stringlist);              &stringlist);
1323            if (rc < 0)            if (rc < 0)
1324              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 1202  if (n == 0) Line 1344  if (n == 0)
1344          {          {
1345          if (g_notempty != 0)          if (g_notempty != 0)
1346            {            {
1347            offsets[0] = start_offset;            use_offsets[0] = start_offset;
1348            offsets[1] = start_offset + 1;            use_offsets[1] = start_offset + 1;
1349            }            }
1350          else          else
1351            {            {
# Line 1228  if (n == 0) Line 1370  if (n == 0)
1370        character. */        character. */
1371    
1372        g_notempty = 0;        g_notempty = 0;
1373        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1374          {          {
1375          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1376          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1377          }          }
1378    
1379        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1380    
1381        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1382    
1383        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1384    
1385        else        else
1386          {          {
1387          bptr += offsets[1];          bptr += use_offsets[1];
1388          len -= offsets[1];          len -= use_offsets[1];
1389          }          }
1390        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1391      }    /* End of loop for data lines */      }    /* End of loop for data lines */

Legend:
Removed from v.55  
changed lines
  Added in v.65

  ViewVC Help
Powered by ViewVC 1.1.5