/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 71 by nigel, Sat Feb 24 21:40:24 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 35  Makefile. */
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE 1024
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int use_utf8;
52  static size_t gotten_store;  static size_t gotten_store;
53    
54    
55    static const int utf8_table1[] = {
 static int utf8_table1[] = {  
56    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
57    
58  static int utf8_table2[] = {  static const int utf8_table2[] = {
59    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
60    
61  static int utf8_table3[] = {  static const int utf8_table3[] = {
62    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
63    
64    
65    
66    /*************************************************
67    *         Print compiled regex                   *
68    *************************************************/
69    
70    /* The code for doing this is held in a separate file that is also included in
71    pcre.c when it is compiled with the debug switch. It defines a function called
72    print_internals(), which uses a table of opcode lengths defined by the macro
73    OP_LENGTHS, whose name must be OP_lengths. */
74    
75    static uschar OP_lengths[] = { OP_LENGTHS };
76    
77    #include "printint.c"
78    
79    
80    
81    /*************************************************
82    *          Read number from string               *
83    *************************************************/
84    
85    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
86    around with conditional compilation, just do the job by hand. It is only used
87    for unpicking the -o argument, so just keep it simple.
88    
89    Arguments:
90      str           string to be converted
91      endptr        where to put the end pointer
92    
93    Returns:        the unsigned long
94    */
95    
96    static int
97    get_value(unsigned char *str, unsigned char **endptr)
98    {
99    int result = 0;
100    while(*str != 0 && isspace(*str)) str++;
101    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
102    *endptr = str;
103    return(result);
104    }
105    
106    
107    
108  /*************************************************  /*************************************************
109  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
110  *************************************************/  *************************************************/
# Line 73  for (i = 0; i < sizeof(utf8_table1)/size Line 130  for (i = 0; i < sizeof(utf8_table1)/size
130    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
131  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
132  if (cvalue < 0) return -1;  if (cvalue < 0) return -1;
133  *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
134  cvalue >>= 6 - i;  buffer += i;
135  for (j = 0; j < i; j++)  for (j = i; j > 0; j--)
136    {   {
137    *buffer++ = 0x80 | (cvalue & 0x3f);   *buffer-- = 0x80 | (cvalue & 0x3f);
138    cvalue >>= 6;   cvalue >>= 6;
139    }   }
140    *buffer = utf8_table2[i] | cvalue;
141  return i + 1;  return i + 1;
142  }  }
143    
# Line 99  Returns:   >  0 => the number of bytes c Line 157  Returns:   >  0 => the number of bytes c
157             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
158  */  */
159    
160  int  static int
161  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
162  {  {
163  int c = *buffer++;  int c = *buffer++;
# Line 117  if (i == 0 || i == 6) return 0;        / Line 175  if (i == 0 || i == 6) return 0;        /
175    
176  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
177    
178  d = c & utf8_table3[i];  s = 6*i;
179  s = 6 - i;  d = (c & utf8_table3[i]) << s;
180    
181  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
182    {    {
183    c = *buffer++;    c = *buffer++;
184    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
185      s -= 6;
186    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
187    }    }
188    
189  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
# Line 142  return i+1; Line 200  return i+1;
200    
201    
202    
203    /*************************************************
204    *             Print character string             *
205    *************************************************/
206    
207    /* Character string printing function. Must handle UTF-8 strings in utf8
208    mode. Yields number of characters printed. If handed a NULL file, just counts
209    chars without printing. */
210    
211    static int pchars(unsigned char *p, int length, FILE *f)
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
   
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
   
   
 static void print_internals(pcre *re)  
212  {  {
213  unsigned char *code = ((real_pcre *)re)->code;  int c;
214    int yield = 0;
 fprintf(outfile, "------------------------------------------------------------------\n");  
215    
216  for(;;)  while (length-- > 0)
217    {    {
218    int c;    if (use_utf8)
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
219      {      {
220      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      int rc = utf82ord(p, &c);
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
221    
222      case OP_CLASS:      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
223        {        {
224        int i, min, max;        length -= rc - 1;
225        code++;        p += rc;
226        fprintf(outfile, "    [");        if (c < 256 && isprint(c))
227            {
228        for (i = 0; i < 256; i++)          if (f != NULL) fprintf(f, "%c", c);
229          {          yield++;
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
230          }          }
231        fprintf(outfile, "]");        else
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
232          {          {
233          case OP_CRSTAR:          int n;
234          case OP_CRMINSTAR:          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
235          case OP_CRPLUS:          yield += n;
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
   
         default:  
         code--;  
236          }          }
237          continue;
238        }        }
239      break;      }
240    
241      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
242    
243      default:    if (isprint(c = *(p++)))
244      fprintf(outfile, "    %s", OP_names[*code]);      {
245      break;      if (f != NULL) fprintf(f, "%c", c);
246        yield++;
247        }
248      else
249        {
250        if (f != NULL) fprintf(f, "\\x%02x", c);
251        yield += 4;
252      }      }
   
   code++;  
   fprintf(outfile, "\n");  
253    }    }
254    
255    return yield;
256  }  }
257    
258    
259    
260  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
261    *              Callout function                  *
262    *************************************************/
263    
264    /* Called from PCRE as a result of the (?C) item. We print out where we are in
265    the match. Yield zero unless more callouts than the fail count, or the callout
266    data is not zero. */
267    
268  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
269  {  {
270  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
271  while (length-- > 0)  int i, pre_start, post_start;
272    
273    if (callout_extra)
274    {    {
275    if (utf8)    fprintf(f, "Callout %d: last capture = %d\n",
276        cb->callout_number, cb->capture_last);
277    
278      for (i = 0; i < cb->capture_top * 2; i += 2)
279      {      {
280      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
281      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
282        else
283        {        {
284        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
285        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
286        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
287          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
288        }        }
289      }      }
290      }
291    
292     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
293    datails. On subsequent calls in the same match, we use pchars just to find the
294    printed lengths of the substrings. */
295    
296    if (f != NULL) fprintf(f, "--->");
297    
298    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
299    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
300      cb->current_position - cb->start_match, f);
301    
302    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
303      else fprintf(outfile, "\\x%02x", c);    cb->subject_length - cb->current_position, f);
304    
305    if (f != NULL) fprintf(f, "\n");
306    
307    /* Always print appropriate indicators, with callout number if not already
308    shown */
309    
310    if (callout_extra) fprintf(outfile, "    ");
311      else fprintf(outfile, "%3d ", cb->callout_number);
312    
313    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314    fprintf(outfile, "^");
315    
316    if (post_start > 0)
317      {
318      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
319      fprintf(outfile, "^");
320      }
321    
322    fprintf(outfile, "\n");
323    first_callout = 0;
324    
325    if (cb->callout_data != NULL)
326      {
327      int callout_data = *((int *)(cb->callout_data));
328      if (callout_data != 0)
329        {
330        fprintf(outfile, "Callout data = %d\n", callout_data);
331        return callout_data;
332        }
333    }    }
334    
335    return (cb->callout_number != callout_fail_id)? 0 :
336           (++callout_count >= callout_fail_count)? 1 : 0;
337  }  }
338    
339    
340    /*************************************************
341    *            Local malloc function               *
342    *************************************************/
343    
344  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
345  compiled re. */  compiled re. */
# Line 407  compiled re. */ Line 347  compiled re. */
347  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
348  {  {
349  gotten_store = size;  gotten_store = size;
 if (log_store)  
   fprintf(outfile, "Memory allocation (code space): %d\n",  
     (int)((int)size - offsetof(real_pcre, code[0])));  
350  return malloc(size);  return malloc(size);
351  }  }
352    
353    
354    
355    /*************************************************
356    *          Call pcre_fullinfo()                  *
357    *************************************************/
358    
359  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
360    
# Line 427  if ((rc = pcre_fullinfo(re, study, optio Line 367  if ((rc = pcre_fullinfo(re, study, optio
367    
368    
369    
370    /*************************************************
371    *                Main Program                    *
372    *************************************************/
373    
374  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
375  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 441  int op = 1; Line 384  int op = 1;
384  int timeit = 0;  int timeit = 0;
385  int showinfo = 0;  int showinfo = 0;
386  int showstore = 0;  int showstore = 0;
387    int size_offsets = 45;
388    int size_offsets_max;
389    int *offsets;
390    #if !defined NOPOSIX
391  int posix = 0;  int posix = 0;
392    #endif
393  int debug = 0;  int debug = 0;
394  int done = 0;  int done = 0;
395  unsigned char buffer[30000];  
396  unsigned char dbuffer[1024];  unsigned char *buffer;
397    unsigned char *dbuffer;
398    
399    /* Get buffers from malloc() so that Electric Fence will check their misuse
400    when I am debugging. */
401    
402    buffer = (unsigned char *)malloc(BUFFER_SIZE);
403    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
404    
405  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
406    
# Line 455  outfile = stdout; Line 410  outfile = stdout;
410    
411  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
412    {    {
413      unsigned char *endptr;
414    
415    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
416      showstore = 1;      showstore = 1;
417    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
418    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
419    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
420      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
421          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
422            *endptr == 0))
423        {
424        op++;
425        argc--;
426        }
427    #if !defined NOPOSIX
428    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
429    #endif
430      else if (strcmp(argv[op], "-C") == 0)
431        {
432        int rc;
433        printf("PCRE version %s\n", pcre_version());
434        printf("Compiled with\n");
435        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
436        printf("  %sUTF-8 support\n", rc? "" : "No ");
437        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
438        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
439        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
440        printf("  Internal link size = %d\n", rc);
441        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
442        printf("  POSIX malloc threshold = %d\n", rc);
443        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
444        printf("  Default match limit = %d\n", rc);
445        exit(0);
446        }
447    else    else
448      {      {
449      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
450      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
451      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
452             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
453             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
454             "  -s   output store information\n"             "  -o <n> set size of offsets vector to <n>\n");
455             "  -t   time compilation and execution\n");  #if !defined NOPOSIX
456        printf("  -p     use POSIX interface\n");
457    #endif
458        printf("  -s     output store information\n"
459               "  -t     time compilation and execution\n");
460      return 1;      return 1;
461      }      }
462    op++;    op++;
463    argc--;    argc--;
464    }    }
465    
466    /* Get the store for the offsets vector, and remember what it was */
467    
468    size_offsets_max = size_offsets;
469    offsets = (int *)malloc(size_offsets_max * sizeof(int));
470    if (offsets == NULL)
471      {
472      printf("** Failed to get %d bytes of memory for offsets vector\n",
473        size_offsets_max * sizeof(int));
474      return 1;
475      }
476    
477  /* Sort out the input and output files */  /* Sort out the input and output files */
478    
479  if (argc > 1)  if (argc > 1)
# Line 520  while (!done) Line 518  while (!done)
518    
519    const char *error;    const char *error;
520    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
521    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
522    int do_study = 0;    int do_study = 0;
523    int do_debug = debug;    int do_debug = debug;
524    int do_G = 0;    int do_G = 0;
525    int do_g = 0;    int do_g = 0;
526    int do_showinfo = showinfo;    int do_showinfo = showinfo;
527    int do_showrest = 0;    int do_showrest = 0;
   int utf8 = 0;  
528    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
529    
530      use_utf8 = 0;
531    
532    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
533    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
534    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
535      fflush(outfile);
536    
537    p = buffer;    p = buffer;
538    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 561  while (!done) Line 561  while (!done)
561        }        }
562      if (*pp != 0) break;      if (*pp != 0) break;
563    
564      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
565      if (len < 256)      if (len < 256)
566        {        {
567        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 611  while (!done) Line 611  while (!done)
611        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
612        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
613        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
614          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
615    
616  #if !defined NOPOSIX  #if !defined NOPOSIX
617        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 619  while (!done) Line 620  while (!done)
620        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
621        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
622        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
623        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
624          case '?': options |= PCRE_NO_UTF8_CHECK; break;
625    
626        case 'L':        case 'L':
627        ppp = pp;        ppp = pp;
# Line 659  while (!done) Line 661  while (!done)
661    
662      if (rc != 0)      if (rc != 0)
663        {        {
664        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
665        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
666        goto SKIP_DATA;        goto SKIP_DATA;
667        }        }
# Line 683  while (!done) Line 685  while (!done)
685          }          }
686        time_taken = clock() - start_time;        time_taken = clock() - start_time;
687        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
688          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
689          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
690        }        }
691    
692      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 700  while (!done) Line 702  while (!done)
702          {          {
703          for (;;)          for (;;)
704            {            {
705            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
706              {              {
707              done = 1;              done = 1;
708              goto CONTINUE;              goto CONTINUE;
# Line 718  while (!done) Line 720  while (!done)
720      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
721      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
722    
723        if (log_store)
724          fprintf(outfile, "Memory allocation (code space): %d\n",
725            (int)(gotten_store -
726                  sizeof(real_pcre) -
727                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
728    
729      if (do_showinfo)      if (do_showinfo)
730        {        {
731          unsigned long int get_options;
732        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
733        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
734          int nameentrysize, namecount;
735          const uschar *nametable;
736        size_t size;        size_t size;
737    
738        if (do_debug) print_internals(re);        if (do_debug)
739            {
740            fprintf(outfile, "------------------------------------------------------------------\n");
741            print_internals(re, outfile);
742            }
743    
744        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
745        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
746        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
747        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
748        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
749        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
750          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
751          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
752          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
753    
754        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
755        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 746  while (!done) Line 764  while (!done)
764            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
765              first_char, old_first_char);              first_char, old_first_char);
766    
767          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
768            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
769              old_options);              get_options, old_options);
770          }          }
771    
772        if (size != gotten_store) fprintf(outfile,        if (size != gotten_store) fprintf(outfile,
# Line 758  while (!done) Line 776  while (!done)
776        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
777        if (backrefmax > 0)        if (backrefmax > 0)
778          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
779        if (options == 0) fprintf(outfile, "No options\n");  
780          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",        if (namecount > 0)
781            ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          {
782            ((options & PCRE_CASELESS) != 0)? " caseless" : "",          fprintf(outfile, "Named capturing subpatterns:\n");
783            ((options & PCRE_EXTENDED) != 0)? " extended" : "",          while (namecount-- > 0)
784            ((options & PCRE_MULTILINE) != 0)? " multiline" : "",            {
785            ((options & PCRE_DOTALL) != 0)? " dotall" : "",            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
786            ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
787            ((options & PCRE_EXTRA) != 0)? " extra" : "",              GET2(nametable, 0));
788            ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            nametable += nameentrysize;
789            ((options & PCRE_UTF8) != 0)? " utf8" : "");            }
790            }
791    
792          if (get_options == 0) fprintf(outfile, "No options\n");
793            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
794              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
795              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
796              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
797              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
798              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
799              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
800              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
801              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
802              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
803              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
804    
805        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
806          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 783  while (!done) Line 815  while (!done)
815          }          }
816        else        else
817          {          {
818          if (isprint(first_char))          int ch = first_char & 255;
819            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
820              "" : " (caseless)";
821            if (isprint(ch))
822              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
823          else          else
824            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
825          }          }
826    
827        if (need_char < 0)        if (need_char < 0)
# Line 795  while (!done) Line 830  while (!done)
830          }          }
831        else        else
832          {          {
833          if (isprint(need_char))          int ch = need_char & 255;
834            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
835              "" : " (caseless)";
836            if (isprint(ch))
837              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
838          else          else
839            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
840          }          }
841        }        }
842    
# Line 817  while (!done) Line 855  while (!done)
855          time_taken = clock() - start_time;          time_taken = clock() - start_time;
856          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
857          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
858            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
859            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
860          }          }
861    
862        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 827  while (!done) Line 865  while (!done)
865        else if (extra == NULL)        else if (extra == NULL)
866          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
867    
868          /* Don't output study size; at present it is in any case a fixed
869          value, but it varies, depending on the computer architecture, and
870          so messes up the test suite. */
871    
872        else if (do_showinfo)        else if (do_showinfo)
873          {          {
874            size_t size;
875          uschar *start_bits = NULL;          uschar *start_bits = NULL;
876            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
877          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
878            /* fprintf(outfile, "Study size = %d\n", size); */
879          if (start_bits == NULL)          if (start_bits == NULL)
880            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
881          else          else
# Line 871  while (!done) Line 916  while (!done)
916      {      {
917      unsigned char *q;      unsigned char *q;
918      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
919        int *use_offsets = offsets;
920        int use_size_offsets = size_offsets;
921        int callout_data = 0;
922        int callout_data_set = 0;
923      int count, c;      int count, c;
924      int copystrings = 0;      int copystrings = 0;
925        int find_match_limit = 0;
926      int getstrings = 0;      int getstrings = 0;
927      int getlist = 0;      int getlist = 0;
928      int gmatched = 0;      int gmatched = 0;
929      int start_offset = 0;      int start_offset = 0;
930      int g_notempty = 0;      int g_notempty = 0;
     int offsets[45];  
     int size_offsets = sizeof(offsets)/sizeof(int);  
931    
932      options = 0;      options = 0;
933    
934        pcre_callout = callout;
935        first_callout = 1;
936        callout_extra = 0;
937        callout_count = 0;
938        callout_fail_count = 999999;
939        callout_fail_id = -1;
940    
941      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
942      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
943        {        {
944        done = 1;        done = 1;
945        goto CONTINUE;        goto CONTINUE;
# Line 904  while (!done) Line 959  while (!done)
959        {        {
960        int i = 0;        int i = 0;
961        int n = 0;        int n = 0;
962    
963        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
964          {          {
965          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 934  while (!done) Line 990  while (!done)
990              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
991            if (*pt == '}')            if (*pt == '}')
992              {              {
993              unsigned char buffer[8];              unsigned char buff8[8];
994              int ii, utn;              int ii, utn;
995              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
996              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
997              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
998              p = pt + 1;              p = pt + 1;
999              break;              break;
1000              }              }
# Line 968  while (!done) Line 1024  while (!done)
1024          continue;          continue;
1025    
1026          case 'C':          case 'C':
1027          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1028          copystrings |= 1 << n;            {
1029              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1030              copystrings |= 1 << n;
1031              }
1032            else if (isalnum(*p))
1033              {
1034              uschar name[256];
1035              uschar *npp = name;
1036              while (isalnum(*p)) *npp++ = *p++;
1037              *npp = 0;
1038              n = pcre_get_stringnumber(re, (char *)name);
1039              if (n < 0)
1040                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1041              else copystrings |= 1 << n;
1042              }
1043            else if (*p == '+')
1044              {
1045              callout_extra = 1;
1046              p++;
1047              }
1048            else if (*p == '-')
1049              {
1050              pcre_callout = NULL;
1051              p++;
1052              }
1053            else if (*p == '!')
1054              {
1055              callout_fail_id = 0;
1056              p++;
1057              while(isdigit(*p))
1058                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1059              callout_fail_count = 0;
1060              if (*p == '!')
1061                {
1062                p++;
1063                while(isdigit(*p))
1064                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1065                }
1066              }
1067            else if (*p == '*')
1068              {
1069              int sign = 1;
1070              callout_data = 0;
1071              if (*(++p) == '-') { sign = -1; p++; }
1072              while(isdigit(*p))
1073                callout_data = callout_data * 10 + *p++ - '0';
1074              callout_data *= sign;
1075              callout_data_set = 1;
1076              }
1077          continue;          continue;
1078    
1079          case 'G':          case 'G':
1080          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1081          getstrings |= 1 << n;            {
1082              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1083              getstrings |= 1 << n;
1084              }
1085            else if (isalnum(*p))
1086              {
1087              uschar name[256];
1088              uschar *npp = name;
1089              while (isalnum(*p)) *npp++ = *p++;
1090              *npp = 0;
1091              n = pcre_get_stringnumber(re, (char *)name);
1092              if (n < 0)
1093                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1094              else getstrings |= 1 << n;
1095              }
1096          continue;          continue;
1097    
1098          case 'L':          case 'L':
1099          getlist = 1;          getlist = 1;
1100          continue;          continue;
1101    
1102            case 'M':
1103            find_match_limit = 1;
1104            continue;
1105    
1106          case 'N':          case 'N':
1107          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1108          continue;          continue;
1109    
1110          case 'O':          case 'O':
1111          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1112          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1113              {
1114              size_offsets_max = n;
1115              free(offsets);
1116              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1117              if (offsets == NULL)
1118                {
1119                printf("** Failed to get %d bytes of memory for offsets vector\n",
1120                  size_offsets_max * sizeof(int));
1121                return 1;
1122                }
1123              }
1124            use_size_offsets = n;
1125            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1126          continue;          continue;
1127    
1128          case 'Z':          case 'Z':
1129          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1130          continue;          continue;
1131    
1132            case '?':
1133            options |= PCRE_NO_UTF8_CHECK;
1134            continue;
1135          }          }
1136        *q++ = c;        *q++ = c;
1137        }        }
# Line 1000  while (!done) Line 1139  while (!done)
1139      len = q - dbuffer;      len = q - dbuffer;
1140    
1141      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1142      support timing. */      support timing or playing with the match limit or callout data. */
1143    
1144  #if !defined NOPOSIX  #if !defined NOPOSIX
1145      if (posix || do_posix)      if (posix || do_posix)
1146        {        {
1147        int rc;        int rc;
1148        int eflags = 0;        int eflags = 0;
1149        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
1150          if (use_size_offsets > 0)
1151            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1152        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1153        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1154    
1155        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1156    
1157        if (rc != 0)        if (rc != 0)
1158          {          {
1159          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1160          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1161          }          }
1162        else        else
1163          {          {
1164          size_t i;          size_t i;
1165          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1166            {            {
1167            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1168              {              {
1169              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1170              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1171                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1172              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1173              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1174                {                {
1175                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1176                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1177                    outfile);
1178                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1179                }                }
1180              }              }
1181            }            }
1182          }          }
1183          free(pmatch);
1184        }        }
1185    
1186      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 1054  while (!done) Line 1197  while (!done)
1197          clock_t start_time = clock();          clock_t start_time = clock();
1198          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1199            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1200              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1201          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1202          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1203            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1204            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1205          }          }
1206    
1207        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1208          start_offset, options | g_notempty, offsets, size_offsets);        varying limits in order to find the minimum value. */
1209    
1210          if (find_match_limit)
1211            {
1212            int min = 0;
1213            int mid = 64;
1214            int max = -1;
1215    
1216            if (extra == NULL)
1217              {
1218              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1219              extra->flags = 0;
1220              }
1221            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1222    
1223            for (;;)
1224              {
1225              extra->match_limit = mid;
1226              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1227                options | g_notempty, use_offsets, use_size_offsets);
1228              if (count == PCRE_ERROR_MATCHLIMIT)
1229                {
1230                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1231                min = mid;
1232                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1233                }
1234              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1235                {
1236                if (mid == min + 1)
1237                  {
1238                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1239                  break;
1240                  }
1241                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1242                max = mid;
1243                mid = (min + mid)/2;
1244                }
1245              else break;    /* Some other error */
1246              }
1247    
1248            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1249            }
1250    
1251          /* If callout_data is set, use the interface with additional data */
1252    
1253          else if (callout_data_set)
1254            {
1255            if (extra == NULL)
1256              {
1257              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1258              extra->flags = 0;
1259              }
1260            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1261            extra->callout_data = &callout_data;
1262            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1263              options | g_notempty, use_offsets, use_size_offsets);
1264            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1265            }
1266    
1267          /* The normal case is just to do the match once, with the default
1268          value of match_limit. */
1269    
1270          else count = pcre_exec(re, extra, (char *)bptr, len,
1271            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1272    
1273        if (count == 0)        if (count == 0)
1274          {          {
1275          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1276          count = size_offsets/3;          count = use_size_offsets/3;
1277          }          }
1278    
1279        /* Matched */        /* Matched */
# Line 1077  while (!done) Line 1283  while (!done)
1283          int i;          int i;
1284          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1285            {            {
1286            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1287              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1288            else            else
1289              {              {
1290              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1291              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1292                  use_offsets[i+1] - use_offsets[i], outfile);
1293              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1294              if (i == 0)              if (i == 0)
1295                {                {
1296                if (do_showrest)                if (do_showrest)
1297                  {                  {
1298                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1299                  pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1300                      outfile);
1301                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1302                  }                  }
1303                }                }
# Line 1101  while (!done) Line 1309  while (!done)
1309            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1310              {              {
1311              char copybuffer[16];              char copybuffer[16];
1312              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1313                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1314              if (rc < 0)              if (rc < 0)
1315                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 1115  while (!done) Line 1323  while (!done)
1323            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1324              {              {
1325              const char *substring;              const char *substring;
1326              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1327                i, &substring);                i, &substring);
1328              if (rc < 0)              if (rc < 0)
1329                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
# Line 1131  while (!done) Line 1339  while (!done)
1339          if (getlist)          if (getlist)
1340            {            {
1341            const char **stringlist;            const char **stringlist;
1342            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1343              &stringlist);              &stringlist);
1344            if (rc < 0)            if (rc < 0)
1345              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 1157  while (!done) Line 1365  while (!done)
1365          {          {
1366          if (g_notempty != 0)          if (g_notempty != 0)
1367            {            {
1368            offsets[0] = start_offset;            use_offsets[0] = start_offset;
1369            offsets[1] = start_offset + 1;            use_offsets[1] = start_offset + 1;
1370            }            }
1371          else          else
1372            {            {
# Line 1183  while (!done) Line 1391  while (!done)
1391        character. */        character. */
1392    
1393        g_notempty = 0;        g_notempty = 0;
1394        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1395          {          {
1396          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1397          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1398          }          }
1399    
1400        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1401    
1402        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1403    
1404        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1405    
1406        else        else
1407          {          {
1408          bptr += offsets[1];          bptr += use_offsets[1];
1409          len -= offsets[1];          len -= use_offsets[1];
1410          }          }
1411        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1412      }    /* End of loop for data lines */      }    /* End of loop for data lines */

Legend:
Removed from v.49  
changed lines
  Added in v.71

  ViewVC Help
Powered by ViewVC 1.1.5