/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 69 by nigel, Sat Feb 24 21:40:18 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 35  Makefile. */
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE 1024
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int use_utf8;
52  static size_t gotten_store;  static size_t gotten_store;
53    
54    
55    
56  /* Debugging function to print the internal form of the regex. This is the same  static const int utf8_table1[] = {
57  code as contained in pcre.c under the DEBUG macro. */    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59    static const int utf8_table2[] = {
60      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62    static const int utf8_table3[] = {
63      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64    
65    
66    
67    /*************************************************
68    *         Print compiled regex                   *
69    *************************************************/
70    
71    /* The code for doing this is held in a separate file that is also included in
72    pcre.c when it is compiled with the debug switch. It defines a function called
73    print_internals(), which uses a table of opcode lengths defined by the macro
74    OP_LENGTHS, whose name must be OP_lengths. */
75    
76    static uschar OP_lengths[] = { OP_LENGTHS };
77    
78    #include "printint.c"
79    
80    
81    
82    /*************************************************
83    *          Read number from string               *
84    *************************************************/
85    
86    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87    around with conditional compilation, just do the job by hand. It is only used
88    for unpicking the -o argument, so just keep it simple.
89    
90    Arguments:
91      str           string to be converted
92      endptr        where to put the end pointer
93    
94    Returns:        the unsigned long
95    */
96    
97    static int
98    get_value(unsigned char *str, unsigned char **endptr)
99    {
100    int result = 0;
101    while(*str != 0 && isspace(*str)) str++;
102    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103    *endptr = str;
104    return(result);
105    }
106    
107    
108    
109    /*************************************************
110    *       Convert character value to UTF-8         *
111    *************************************************/
112    
113    /* This function takes an integer value in the range 0 - 0x7fffffff
114    and encodes it as a UTF-8 character in 0 to 6 bytes.
115    
116    Arguments:
117      cvalue     the character value
118      buffer     pointer to buffer for result - at least 6 bytes long
119    
120    Returns:     number of characters placed in the buffer
121                 -1 if input character is negative
122                 0 if input character is positive but too big (only when
123                 int is longer than 32 bits)
124    */
125    
126    static int
127    ord2utf8(int cvalue, unsigned char *buffer)
128    {
129    register int i, j;
130    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131      if (cvalue <= utf8_table1[i]) break;
132    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133    if (cvalue < 0) return -1;
134    
135    buffer += i;
136    for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141    *buffer = utf8_table2[i] | cvalue;
142    return i + 1;
143    }
144    
145    
146    /*************************************************
147    *            Convert UTF-8 string to value       *
148    *************************************************/
149    
150  static const char *OP_names[] = {  /* This function takes one or more bytes that represents a UTF-8 character,
151    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  and returns the value of the character.
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
152    
153    Argument:
154      buffer   a pointer to the byte vector
155      vptr     a pointer to an int to receive the value
156    
157    Returns:   >  0 => the number of bytes consumed
158               -6 to 0 => malformed UTF-8 character at offset = (-return)
159    */
160    
161  static void print_internals(pcre *re)  static int
162    utf82ord(unsigned char *buffer, int *vptr)
163  {  {
164  unsigned char *code = ((real_pcre *)re)->code;  int c = *buffer++;
165    int d = c;
166    int i, j, s;
167    
168  fprintf(outfile, "------------------------------------------------------------------\n");  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
169      {
170      if ((d & 0x80) == 0) break;
171      d <<= 1;
172      }
173    
174    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
175    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
176    
177  for(;;)  /* i now has a value in the range 1-5 */
178    
179    s = 6*i;
180    d = (c & utf8_table3[i]) << s;
181    
182    for (j = 0; j < i; j++)
183    {    {
184    int c;    c = *buffer++;
185    int charlength;    if ((c & 0xc0) != 0x80) return -(j+1);
186      s -= 6;
187      d |= (c & 0x3f) << s;
188      }
189    
190    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /* Check that encoding was the correct unique one */
191    
192    if (*code >= OP_BRA)  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193      {    if (d <= utf8_table1[j]) break;
194      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  if (j != i) return -(i+1);
     code += 2;  
     }  
195    
196    else switch(*code)  /* Valid value */
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
197    
198      case OP_CLASS:  *vptr = d;
199        {  return i+1;
200        int i, min, max;  }
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
201    
       CLASS_REF_REPEAT:  
202    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
203    
204          case OP_CRRANGE:  /*************************************************
205          case OP_CRMINRANGE:  *             Print character string             *
206          min = (code[1] << 8) + code[2];  *************************************************/
207          max = (code[3] << 8) + code[4];  
208          if (max == 0) fprintf(outfile, "{%d,}", min);  /* Character string printing function. Must handle UTF-8 strings in utf8
209          else fprintf(outfile, "{%d,%d}", min, max);  mode. Yields number of characters printed. If handed a NULL file, just counts
210          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  chars without printing. */
         code += 4;  
         break;  
211    
212          default:  static int pchars(unsigned char *p, int length, FILE *f)
213          code--;  {
214    int c;
215    int yield = 0;
216    
217    while (length-- > 0)
218      {
219      if (use_utf8)
220        {
221        int rc = utf82ord(p, &c);
222    
223        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
224          {
225          length -= rc - 1;
226          p += rc;
227          if (c < 256 && isprint(c))
228            {
229            if (f != NULL) fprintf(f, "%c", c);
230            yield++;
231          }          }
232          else
233            {
234            int n;
235            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236            yield += n;
237            }
238          continue;
239        }        }
240      break;      }
241    
242      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
243    
244      default:    if (isprint(c = *(p++)))
245      fprintf(outfile, "    %s", OP_names[*code]);      {
246      break;      if (f != NULL) fprintf(f, "%c", c);
247        yield++;
248        }
249      else
250        {
251        if (f != NULL) fprintf(f, "\\x%02x", c);
252        yield += 4;
253      }      }
   
   code++;  
   fprintf(outfile, "\n");  
254    }    }
255    
256    return yield;
257  }  }
258    
259    
260    
261  /* Character string printing function. */  /*************************************************
262    *              Callout function                  *
263    *************************************************/
264    
265    /* Called from PCRE as a result of the (?C) item. We print out where we are in
266    the match. Yield zero unless more callouts than the fail count, or the callout
267    data is not zero. */
268    
269  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
270  {  {
271  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
272  while (length-- > 0)  int i, pre_start, post_start;
273    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
274      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
275      {
276      fprintf(f, "Callout %d: last capture = %d\n",
277        cb->callout_number, cb->capture_last);
278    
279      for (i = 0; i < cb->capture_top * 2; i += 2)
280        {
281        if (cb->offset_vector[i] < 0)
282          fprintf(f, "%2d: <unset>\n", i/2);
283        else
284          {
285          fprintf(f, "%2d: ", i/2);
286          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287            cb->offset_vector[i+1] - cb->offset_vector[i], f);
288          fprintf(f, "\n");
289          }
290        }
291      }
292    
293    /* Re-print the subject in canonical form, the first time or if giving full
294    datails. On subsequent calls in the same match, we use pchars just to find the
295    printed lengths of the substrings. */
296    
297    if (f != NULL) fprintf(f, "--->");
298    
299    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301      cb->current_position - cb->start_match, f);
302    
303    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304      cb->subject_length - cb->current_position, f);
305    
306    if (f != NULL) fprintf(f, "\n");
307    
308    /* Always print appropriate indicators, with callout number if not already
309    shown */
310    
311    if (callout_extra) fprintf(outfile, "    ");
312      else fprintf(outfile, "%3d ", cb->callout_number);
313    
314    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315    fprintf(outfile, "^");
316    
317    if (post_start > 0)
318      {
319      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320      fprintf(outfile, "^");
321      }
322    
323    fprintf(outfile, "\n");
324    
325    first_callout = 0;
326    
327    if ((int)(cb->callout_data) != 0)
328      {
329      fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
330      return (int)(cb->callout_data);
331      }
332    
333    return (cb->callout_number != callout_fail_id)? 0 :
334           (++callout_count >= callout_fail_count)? 1 : 0;
335  }  }
336    
337    
338    /*************************************************
339    *            Local malloc function               *
340    *************************************************/
341    
342  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
343  compiled re. */  compiled re. */
# Line 283  compiled re. */ Line 345  compiled re. */
345  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
346  {  {
347  gotten_store = size;  gotten_store = size;
 if (log_store)  
   fprintf(outfile, "Memory allocation (code space): %d\n",  
     (int)((int)size - offsetof(real_pcre, code[0])));  
348  return malloc(size);  return malloc(size);
349  }  }
350    
351    
352    
353    /*************************************************
354    *          Call pcre_fullinfo()                  *
355    *************************************************/
356    
357  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
358    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 365  if ((rc = pcre_fullinfo(re, study, optio
365    
366    
367    
368    /*************************************************
369    *                Main Program                    *
370    *************************************************/
371    
372  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
373  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 317  int op = 1; Line 382  int op = 1;
382  int timeit = 0;  int timeit = 0;
383  int showinfo = 0;  int showinfo = 0;
384  int showstore = 0;  int showstore = 0;
385    int size_offsets = 45;
386    int size_offsets_max;
387    int *offsets;
388    #if !defined NOPOSIX
389  int posix = 0;  int posix = 0;
390    #endif
391  int debug = 0;  int debug = 0;
392  int done = 0;  int done = 0;
393  unsigned char buffer[30000];  
394  unsigned char dbuffer[1024];  unsigned char *buffer;
395    unsigned char *dbuffer;
396    
397    /* Get buffers from malloc() so that Electric Fence will check their misuse
398    when I am debugging. */
399    
400    buffer = malloc(BUFFER_SIZE);
401    dbuffer = malloc(DBUFFER_SIZE);
402    
403  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
404    
# Line 331  outfile = stdout; Line 408  outfile = stdout;
408    
409  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
410    {    {
411      unsigned char *endptr;
412    
413    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
414      showstore = 1;      showstore = 1;
415    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
416    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
417    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
418      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
419          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
420            *endptr == 0))
421        {
422        op++;
423        argc--;
424        }
425    #if !defined NOPOSIX
426    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
427    #endif
428      else if (strcmp(argv[op], "-C") == 0)
429        {
430        int rc;
431        printf("PCRE version %s\n", pcre_version());
432        printf("Compiled with\n");
433        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
434        printf("  %sUTF-8 support\n", rc? "" : "No ");
435        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
436        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
437        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
438        printf("  Internal link size = %d\n", rc);
439        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
440        printf("  POSIX malloc threshold = %d\n", rc);
441        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
442        printf("  Default match limit = %d\n", rc);
443        exit(0);
444        }
445    else    else
446      {      {
447      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
448      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
449      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
450             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
451             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
452             "  -s   output store information\n"             "  -o <n> set size of offsets vector to <n>\n");
453             "  -t   time compilation and execution\n");  #if !defined NOPOSIX
454        printf("  -p     use POSIX interface\n");
455    #endif
456        printf("  -s     output store information\n"
457               "  -t     time compilation and execution\n");
458      return 1;      return 1;
459      }      }
460    op++;    op++;
461    argc--;    argc--;
462    }    }
463    
464    /* Get the store for the offsets vector, and remember what it was */
465    
466    size_offsets_max = size_offsets;
467    offsets = malloc(size_offsets_max * sizeof(int));
468    if (offsets == NULL)
469      {
470      printf("** Failed to get %d bytes of memory for offsets vector\n",
471        size_offsets_max * sizeof(int));
472      return 1;
473      }
474    
475  /* Sort out the input and output files */  /* Sort out the input and output files */
476    
477  if (argc > 1)  if (argc > 1)
# Line 391  while (!done) Line 511  while (!done)
511    
512  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
513    regex_t preg;    regex_t preg;
514      int do_posix = 0;
515  #endif  #endif
516    
517    const char *error;    const char *error;
518    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
519    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
520    int do_study = 0;    int do_study = 0;
521    int do_debug = debug;    int do_debug = debug;
522    int do_G = 0;    int do_G = 0;
523    int do_g = 0;    int do_g = 0;
524    int do_showinfo = showinfo;    int do_showinfo = showinfo;
525    int do_showrest = 0;    int do_showrest = 0;
   int do_posix = 0;  
526    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
527    
528      use_utf8 = 0;
529    
530    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
531    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
532    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
533      fflush(outfile);
534    
535    p = buffer;    p = buffer;
536    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 436  while (!done) Line 559  while (!done)
559        }        }
560      if (*pp != 0) break;      if (*pp != 0) break;
561    
562      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
563      if (len < 256)      if (len < 256)
564        {        {
565        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 486  while (!done) Line 609  while (!done)
609        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
610        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
611        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
612          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
613    
614  #if !defined NOPOSIX  #if !defined NOPOSIX
615        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 494  while (!done) Line 618  while (!done)
618        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
619        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
620        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
621          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
622    
623        case 'L':        case 'L':
624        ppp = pp;        ppp = pp;
# Line 533  while (!done) Line 658  while (!done)
658    
659      if (rc != 0)      if (rc != 0)
660        {        {
661        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
662        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
663        goto SKIP_DATA;        goto SKIP_DATA;
664        }        }
# Line 557  while (!done) Line 682  while (!done)
682          }          }
683        time_taken = clock() - start_time;        time_taken = clock() - start_time;
684        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
685          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
686          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
687        }        }
688    
689      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 574  while (!done) Line 699  while (!done)
699          {          {
700          for (;;)          for (;;)
701            {            {
702            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
703              {              {
704              done = 1;              done = 1;
705              goto CONTINUE;              goto CONTINUE;
# Line 592  while (!done) Line 717  while (!done)
717      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
718      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
719    
720        if (log_store)
721          fprintf(outfile, "Memory allocation (code space): %d\n",
722            (int)(gotten_store -
723                  sizeof(real_pcre) -
724                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
725    
726      if (do_showinfo)      if (do_showinfo)
727        {        {
728          unsigned long int get_options;
729        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
730        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
731          int nameentrysize, namecount;
732          const uschar *nametable;
733        size_t size;        size_t size;
734    
735        if (do_debug) print_internals(re);        if (do_debug)
736            {
737            fprintf(outfile, "------------------------------------------------------------------\n");
738            print_internals(re, outfile);
739            }
740    
741        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
742        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
743        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
744        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
745        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
746        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
747          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
748          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
749          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
750    
751        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
752        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 620  while (!done) Line 761  while (!done)
761            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
762              first_char, old_first_char);              first_char, old_first_char);
763    
764          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
765            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
766              old_options);              get_options, old_options);
767          }          }
768    
769        if (size != gotten_store) fprintf(outfile,        if (size != gotten_store) fprintf(outfile,
# Line 632  while (!done) Line 773  while (!done)
773        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
774        if (backrefmax > 0)        if (backrefmax > 0)
775          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
776        if (options == 0) fprintf(outfile, "No options\n");  
777          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",        if (namecount > 0)
778            ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          {
779            ((options & PCRE_CASELESS) != 0)? " caseless" : "",          fprintf(outfile, "Named capturing subpatterns:\n");
780            ((options & PCRE_EXTENDED) != 0)? " extended" : "",          while (namecount-- > 0)
781            ((options & PCRE_MULTILINE) != 0)? " multiline" : "",            {
782            ((options & PCRE_DOTALL) != 0)? " dotall" : "",            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
783            ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
784            ((options & PCRE_EXTRA) != 0)? " extra" : "",              GET2(nametable, 0));
785            ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");            nametable += nameentrysize;
786              }
787            }
788    
789          if (get_options == 0) fprintf(outfile, "No options\n");
790            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
791              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
792              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
793              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
794              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
795              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
796              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
797              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
798              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
799              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
800    
801        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
802          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 656  while (!done) Line 811  while (!done)
811          }          }
812        else        else
813          {          {
814          if (isprint(first_char))          int ch = first_char & 255;
815            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
816              "" : " (caseless)";
817            if (isprint(ch))
818              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
819          else          else
820            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
821          }          }
822    
823        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 826  while (!done)
826          }          }
827        else        else
828          {          {
829          if (isprint(need_char))          int ch = need_char & 255;
830            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
831              "" : " (caseless)";
832            if (isprint(ch))
833              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
834          else          else
835            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
836          }          }
837        }        }
838    
# Line 690  while (!done) Line 851  while (!done)
851          time_taken = clock() - start_time;          time_taken = clock() - start_time;
852          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
853          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
854            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
855            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
856          }          }
857    
858        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 702  while (!done) Line 863  while (!done)
863    
864        else if (do_showinfo)        else if (do_showinfo)
865          {          {
866            size_t size;
867          uschar *start_bits = NULL;          uschar *start_bits = NULL;
868            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
869          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
870            fprintf(outfile, "Study size = %d\n", size);
871          if (start_bits == NULL)          if (start_bits == NULL)
872            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
873          else          else
# Line 744  while (!done) Line 908  while (!done)
908      {      {
909      unsigned char *q;      unsigned char *q;
910      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
911        int *use_offsets = offsets;
912        int use_size_offsets = size_offsets;
913        int callout_data = 0;
914        int callout_data_set = 0;
915      int count, c;      int count, c;
916      int copystrings = 0;      int copystrings = 0;
917        int find_match_limit = 0;
918      int getstrings = 0;      int getstrings = 0;
919      int getlist = 0;      int getlist = 0;
920      int gmatched = 0;      int gmatched = 0;
921      int start_offset = 0;      int start_offset = 0;
922      int g_notempty = 0;      int g_notempty = 0;
     int offsets[45];  
     int size_offsets = sizeof(offsets)/sizeof(int);  
923    
924      options = 0;      options = 0;
925    
926        pcre_callout = callout;
927        first_callout = 1;
928        callout_extra = 0;
929        callout_count = 0;
930        callout_fail_count = 999999;
931        callout_fail_id = -1;
932    
933      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
934      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
935        {        {
936        done = 1;        done = 1;
937        goto CONTINUE;        goto CONTINUE;
# Line 777  while (!done) Line 951  while (!done)
951        {        {
952        int i = 0;        int i = 0;
953        int n = 0;        int n = 0;
954    
955        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
956          {          {
957          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 796  while (!done) Line 971  while (!done)
971          break;          break;
972    
973          case 'x':          case 'x':
974    
975            /* Handle \x{..} specially - new Perl thing for utf8 */
976    
977            if (*p == '{')
978              {
979              unsigned char *pt = p;
980              c = 0;
981              while (isxdigit(*(++pt)))
982                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
983              if (*pt == '}')
984                {
985                unsigned char buff8[8];
986                int ii, utn;
987                utn = ord2utf8(c, buff8);
988                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
989                c = buff8[ii];   /* Last byte */
990                p = pt + 1;
991                break;
992                }
993              /* Not correct form; fall through */
994              }
995    
996            /* Ordinary \x */
997    
998          c = 0;          c = 0;
999          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1000            {            {
# Line 817  while (!done) Line 1016  while (!done)
1016          continue;          continue;
1017    
1018          case 'C':          case 'C':
1019          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1020          copystrings |= 1 << n;            {
1021              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1022              copystrings |= 1 << n;
1023              }
1024            else if (isalnum(*p))
1025              {
1026              uschar name[256];
1027              uschar *npp = name;
1028              while (isalnum(*p)) *npp++ = *p++;
1029              *npp = 0;
1030              n = pcre_get_stringnumber(re, (char *)name);
1031              if (n < 0)
1032                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1033              else copystrings |= 1 << n;
1034              }
1035            else if (*p == '+')
1036              {
1037              callout_extra = 1;
1038              p++;
1039              }
1040            else if (*p == '-')
1041              {
1042              pcre_callout = NULL;
1043              p++;
1044              }
1045            else if (*p == '!')
1046              {
1047              callout_fail_id = 0;
1048              p++;
1049              while(isdigit(*p))
1050                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1051              callout_fail_count = 0;
1052              if (*p == '!')
1053                {
1054                p++;
1055                while(isdigit(*p))
1056                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1057                }
1058              }
1059            else if (*p == '*')
1060              {
1061              int sign = 1;
1062              callout_data = 0;
1063              if (*(++p) == '-') { sign = -1; p++; }
1064              while(isdigit(*p))
1065                callout_data = callout_data * 10 + *p++ - '0';
1066              callout_data *= sign;
1067              callout_data_set = 1;
1068              }
1069          continue;          continue;
1070    
1071          case 'G':          case 'G':
1072          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1073          getstrings |= 1 << n;            {
1074              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1075              getstrings |= 1 << n;
1076              }
1077            else if (isalnum(*p))
1078              {
1079              uschar name[256];
1080              uschar *npp = name;
1081              while (isalnum(*p)) *npp++ = *p++;
1082              *npp = 0;
1083              n = pcre_get_stringnumber(re, (char *)name);
1084              if (n < 0)
1085                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1086              else getstrings |= 1 << n;
1087              }
1088          continue;          continue;
1089    
1090          case 'L':          case 'L':
1091          getlist = 1;          getlist = 1;
1092          continue;          continue;
1093    
1094            case 'M':
1095            find_match_limit = 1;
1096            continue;
1097    
1098          case 'N':          case 'N':
1099          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1100          continue;          continue;
1101    
1102          case 'O':          case 'O':
1103          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1104          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1105              {
1106              size_offsets_max = n;
1107              free(offsets);
1108              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1109              if (offsets == NULL)
1110                {
1111                printf("** Failed to get %d bytes of memory for offsets vector\n",
1112                  size_offsets_max * sizeof(int));
1113                return 1;
1114                }
1115              }
1116            use_size_offsets = n;
1117            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1118          continue;          continue;
1119    
1120          case 'Z':          case 'Z':
# Line 849  while (!done) Line 1127  while (!done)
1127      len = q - dbuffer;      len = q - dbuffer;
1128    
1129      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1130      support timing. */      support timing or playing with the match limit or callout data. */
1131    
1132  #if !defined NOPOSIX  #if !defined NOPOSIX
1133      if (posix || do_posix)      if (posix || do_posix)
1134        {        {
1135        int rc;        int rc;
1136        int eflags = 0;        int eflags = 0;
1137        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
1138          if (use_size_offsets > 0)
1139            pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1140        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1141        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1142    
1143        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1144    
1145        if (rc != 0)        if (rc != 0)
1146          {          {
1147          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1148          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1149          }          }
1150        else        else
1151          {          {
1152          size_t i;          size_t i;
1153          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1154            {            {
1155            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1156              {              {
1157              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1158              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1159                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1160              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1161              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1162                {                {
1163                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1164                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1165                    outfile);
1166                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1167                }                }
1168              }              }
1169            }            }
1170          }          }
1171          free(pmatch);
1172        }        }
1173    
1174      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 903  while (!done) Line 1185  while (!done)
1185          clock_t start_time = clock();          clock_t start_time = clock();
1186          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1187            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1188              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1189          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1190          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1191            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1192            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1193          }          }
1194    
1195        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1196          start_offset, options | g_notempty, offsets, size_offsets);        varying limits in order to find the minimum value. */
1197    
1198          if (find_match_limit)
1199            {
1200            int min = 0;
1201            int mid = 64;
1202            int max = -1;
1203    
1204            if (extra == NULL)
1205              {
1206              extra = malloc(sizeof(pcre_extra));
1207              extra->flags = 0;
1208              }
1209            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1210    
1211            for (;;)
1212              {
1213              extra->match_limit = mid;
1214              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1215                options | g_notempty, use_offsets, use_size_offsets);
1216              if (count == PCRE_ERROR_MATCHLIMIT)
1217                {
1218                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1219                min = mid;
1220                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1221                }
1222              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1223                {
1224                if (mid == min + 1)
1225                  {
1226                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1227                  break;
1228                  }
1229                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1230                max = mid;
1231                mid = (min + mid)/2;
1232                }
1233              else break;    /* Some other error */
1234              }
1235    
1236            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1237            }
1238    
1239          /* If callout_data is set, use the interface with additional data */
1240    
1241          else if (callout_data_set)
1242            {
1243            if (extra == NULL)
1244              {
1245              extra = malloc(sizeof(pcre_extra));
1246              extra->flags = 0;
1247              }
1248            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1249            extra->callout_data = (void *)callout_data;
1250            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1251              options | g_notempty, use_offsets, use_size_offsets);
1252            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1253            }
1254    
1255          /* The normal case is just to do the match once, with the default
1256          value of match_limit. */
1257    
1258          else count = pcre_exec(re, extra, (char *)bptr, len,
1259            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1260    
1261        if (count == 0)        if (count == 0)
1262          {          {
1263          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1264          count = size_offsets/3;          count = use_size_offsets/3;
1265          }          }
1266    
1267        /* Matched */        /* Matched */
# Line 926  while (!done) Line 1271  while (!done)
1271          int i;          int i;
1272          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1273            {            {
1274            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1275              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1276            else            else
1277              {              {
1278              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1279              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1280                  use_offsets[i+1] - use_offsets[i], outfile);
1281              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1282              if (i == 0)              if (i == 0)
1283                {                {
1284                if (do_showrest)                if (do_showrest)
1285                  {                  {
1286                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1287                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1288                      outfile);
1289                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1290                  }                  }
1291                }                }
# Line 950  while (!done) Line 1297  while (!done)
1297            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1298              {              {
1299              char copybuffer[16];              char copybuffer[16];
1300              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1301                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1302              if (rc < 0)              if (rc < 0)
1303                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 964  while (!done) Line 1311  while (!done)
1311            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1312              {              {
1313              const char *substring;              const char *substring;
1314              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1315                i, &substring);                i, &substring);
1316              if (rc < 0)              if (rc < 0)
1317                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1318              else              else
1319                {                {
1320                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1321                free((void *)substring);                /* free((void *)substring); */
1322                  pcre_free_substring(substring);
1323                }                }
1324              }              }
1325            }            }
# Line 979  while (!done) Line 1327  while (!done)
1327          if (getlist)          if (getlist)
1328            {            {
1329            const char **stringlist;            const char **stringlist;
1330            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1331              &stringlist);              &stringlist);
1332            if (rc < 0)            if (rc < 0)
1333              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 1337  while (!done)
1337                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1338              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1339                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1340              free((void *)stringlist);              /* free((void *)stringlist); */
1341                pcre_free_substring_list(stringlist);
1342              }              }
1343            }            }
1344          }          }
1345    
1346        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1347        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1348        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. Fudge the offset
1349        values to achieve this. We won't be at the end of the string - that        values to achieve this. We won't be at the end of the string - that
1350        was checked before setting PCRE_NOTEMPTY. */        was checked before setting g_notempty. */
1351    
1352        else        else
1353          {          {
1354          if (g_notempty != 0)          if (g_notempty != 0)
1355            {            {
1356            offsets[0] = start_offset;            use_offsets[0] = start_offset;
1357            offsets[1] = start_offset + 1;            use_offsets[1] = start_offset + 1;
1358            }            }
1359          else          else
1360            {            {
# Line 1025  while (!done) Line 1374  while (!done)
1374        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
1375        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
1376        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
1377        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1378        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
1379          character. */
1380    
1381        g_notempty = 0;        g_notempty = 0;
1382        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1383          {          {
1384          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1385          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1386          }          }
1387    
1388        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1389    
1390        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1391    
1392        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1393    
1394        else        else
1395          {          {
1396          bptr += offsets[1];          bptr += use_offsets[1];
1397          len -= offsets[1];          len -= use_offsets[1];
1398          }          }
1399        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1400      }    /* End of loop for data lines */      }    /* End of loop for data lines */

Legend:
Removed from v.43  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.5