/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 25 by nigel, Sat Feb 24 21:38:45 2007 UTC revision 65 by nigel, Sat Feb 24 21:40:08 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22    /* It is possible to compile this test program without including support for
23    testing the POSIX interface, though this is not available via the standard
24    Makefile. */
25    
26    #if !defined NOPOSIX
27  #include "pcreposix.h"  #include "pcreposix.h"
28    #endif
29    
30  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
31  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 35 
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 10000  #define LOOPREPEAT 50000
39    
40    
41  static FILE *outfile;  static FILE *outfile;
42  static int log_store = 0;  static int log_store = 0;
43    static int callout_count;
44    static int callout_extra;
45    static int callout_fail_count;
46    static int callout_fail_id;
47    static int first_callout;
48    static int utf8;
49    static size_t gotten_store;
50    
51    
52    
53    static int utf8_table1[] = {
54      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
55    
56  /* Debugging function to print the internal form of the regex. This is the same  static int utf8_table2[] = {
57  code as contained in pcre.c under the DEBUG macro. */    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
58    
59    static int utf8_table3[] = {
60      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
61    
62    
63    
64    /*************************************************
65    *         Print compiled regex                   *
66    *************************************************/
67    
68  static const char *OP_names[] = {  /* The code for doing this is held in a separate file that is also included in
69    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  pcre.c when it is compiled with the debug switch. It defines a function called
70    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  print_internals(), which uses a table of opcode lengths defined by the macro
71    "Opt", "^", "$", "Any", "chars", "not",  OP_LENGTHS, whose name must be OP_lengths. */
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
72    
73    static uschar OP_lengths[] = { OP_LENGTHS };
74    
75  static void print_internals(pcre *re, FILE *outfile)  #include "printint.c"
76    
77    
78    
79    /*************************************************
80    *          Read number from string               *
81    *************************************************/
82    
83    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
84    around with conditional compilation, just do the job by hand. It is only used
85    for unpicking the -o argument, so just keep it simple.
86    
87    Arguments:
88      str           string to be converted
89      endptr        where to put the end pointer
90    
91    Returns:        the unsigned long
92    */
93    
94    static int
95    get_value(unsigned char *str, unsigned char **endptr)
96  {  {
97  unsigned char *code = ((real_pcre *)re)->code;  int result = 0;
98    while(*str != 0 && isspace(*str)) str++;
99    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
100    *endptr = str;
101    return(result);
102    }
103    
104    
 fprintf(outfile, "------------------------------------------------------------------\n");  
105    
106  for(;;)  /*************************************************
107    *       Convert character value to UTF-8         *
108    *************************************************/
109    
110    /* This function takes an integer value in the range 0 - 0x7fffffff
111    and encodes it as a UTF-8 character in 0 to 6 bytes.
112    
113    Arguments:
114      cvalue     the character value
115      buffer     pointer to buffer for result - at least 6 bytes long
116    
117    Returns:     number of characters placed in the buffer
118                 -1 if input character is negative
119                 0 if input character is positive but too big (only when
120                 int is longer than 32 bits)
121    */
122    
123    static int
124    ord2utf8(int cvalue, unsigned char *buffer)
125    {
126    register int i, j;
127    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
128      if (cvalue <= utf8_table1[i]) break;
129    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
130    if (cvalue < 0) return -1;
131    
132    buffer += i;
133    for (j = i; j > 0; j--)
134     {
135     *buffer-- = 0x80 | (cvalue & 0x3f);
136     cvalue >>= 6;
137     }
138    *buffer = utf8_table2[i] | cvalue;
139    return i + 1;
140    }
141    
142    
143    /*************************************************
144    *            Convert UTF-8 string to value       *
145    *************************************************/
146    
147    /* This function takes one or more bytes that represents a UTF-8 character,
148    and returns the value of the character.
149    
150    Argument:
151      buffer   a pointer to the byte vector
152      vptr     a pointer to an int to receive the value
153    
154    Returns:   >  0 => the number of bytes consumed
155               -6 to 0 => malformed UTF-8 character at offset = (-return)
156    */
157    
158    int
159    utf82ord(unsigned char *buffer, int *vptr)
160    {
161    int c = *buffer++;
162    int d = c;
163    int i, j, s;
164    
165    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
166    {    {
167    int c;    if ((d & 0x80) == 0) break;
168    int charlength;    d <<= 1;
169      }
170    
171    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
172    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
173    
174          case OP_CRRANGE:  /* i now has a value in the range 1-5 */
175          case OP_CRMINRANGE:  
176          min = (code[1] << 8) + code[2];  s = 6*i;
177          max = (code[3] << 8) + code[4];  d = (c & utf8_table3[i]) << s;
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
178    
179          default:  for (j = 0; j < i; j++)
180          code--;    {
181      c = *buffer++;
182      if ((c & 0xc0) != 0x80) return -(j+1);
183      s -= 6;
184      d |= (c & 0x3f) << s;
185      }
186    
187    /* Check that encoding was the correct unique one */
188    
189    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
190      if (d <= utf8_table1[j]) break;
191    if (j != i) return -(i+1);
192    
193    /* Valid value */
194    
195    *vptr = d;
196    return i+1;
197    }
198    
199    
200    
201    /*************************************************
202    *             Print character string             *
203    *************************************************/
204    
205    /* Character string printing function. Must handle UTF-8 strings in utf8
206    mode. Yields number of characters printed. If handed a NULL file, just counts
207    chars without printing. */
208    
209    static int pchars(unsigned char *p, int length, FILE *f)
210    {
211    int c;
212    int yield = 0;
213    
214    while (length-- > 0)
215      {
216      if (utf8)
217        {
218        int rc = utf82ord(p, &c);
219    
220        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
221          {
222          length -= rc - 1;
223          p += rc;
224          if (c < 256 && isprint(c))
225            {
226            if (f != NULL) fprintf(f, "%c", c);
227            yield++;
228          }          }
229          else
230            {
231            int n;
232            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
233            yield += n;
234            }
235          continue;
236        }        }
237      break;      }
238    
239      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
240    
241      default:    if (isprint(c = *(p++)))
242      fprintf(outfile, "    %s", OP_names[*code]);      {
243      break;      if (f != NULL) fprintf(f, "%c", c);
244        yield++;
245        }
246      else
247        {
248        if (f != NULL) fprintf(f, "\\x%02x", c);
249        yield += 4;
250      }      }
   
   code++;  
   fprintf(outfile, "\n");  
251    }    }
252    
253    return yield;
254  }  }
255    
256    
257    
258  /* Character string printing function. */  /*************************************************
259    *              Callout function                  *
260    *************************************************/
261    
262    /* Called from PCRE as a result of the (?C) item. We print out where we are in
263    the match. Yield zero unless more callouts than the fail count, or the callout
264    data is not zero. */
265    
266  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
267  {  {
268  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
269  while (length-- > 0)  int i, pre_start, post_start;
270    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
271      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
272      {
273      int i;
274      fprintf(f, "Callout %d: last capture = %d\n",
275        cb->callout_number, cb->capture_last);
276    
277      for (i = 0; i < cb->capture_top * 2; i += 2)
278        {
279        if (cb->offset_vector[i] < 0)
280          fprintf(f, "%2d: <unset>\n", i/2);
281        else
282          {
283          fprintf(f, "%2d: ", i/2);
284          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
285            cb->offset_vector[i+1] - cb->offset_vector[i], f);
286          fprintf(f, "\n");
287          }
288        }
289      }
290    
291    /* Re-print the subject in canonical form, the first time or if giving full
292    datails. On subsequent calls in the same match, we use pchars just to find the
293    printed lengths of the substrings. */
294    
295    if (f != NULL) fprintf(f, "--->");
296    
297    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
298    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
299      cb->current_position - cb->start_match, f);
300    
301    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
302      cb->subject_length - cb->current_position, f);
303    
304    if (f != NULL) fprintf(f, "\n");
305    
306    /* Always print appropriate indicators, with callout number if not already
307    shown */
308    
309    if (callout_extra) fprintf(outfile, "    ");
310      else fprintf(outfile, "%3d ", cb->callout_number);
311    
312    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
313    fprintf(outfile, "^");
314    
315    if (post_start > 0)
316      {
317      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
318      fprintf(outfile, "^");
319      }
320    
321    fprintf(outfile, "\n");
322    
323    first_callout = 0;
324    
325    if ((int)(cb->callout_data) != 0)
326      {
327      fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
328      return (int)(cb->callout_data);
329      }
330    
331    return (cb->callout_number != callout_fail_id)? 0 :
332           (++callout_count >= callout_fail_count)? 1 : 0;
333  }  }
334    
335    
336    /*************************************************
337    *            Local malloc function               *
338    *************************************************/
339    
340  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
341  compiled re. */  compiled re. */
342    
343  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
344  {  {
345  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
346  return malloc(size);  return malloc(size);
347  }  }
348    
349    
350    
351    /*************************************************
352    *          Call pcre_fullinfo()                  *
353    *************************************************/
354    
355    /* Get one piece of information from the pcre_fullinfo() function */
356    
357    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
358    {
359    int rc;
360    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
361      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
362    }
363    
364    
365    
366    /*************************************************
367    *                Main Program                    *
368    *************************************************/
369    
370  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
371  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
372  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 292  int study_options = 0; Line 379  int study_options = 0;
379  int op = 1;  int op = 1;
380  int timeit = 0;  int timeit = 0;
381  int showinfo = 0;  int showinfo = 0;
382    int showstore = 0;
383    int size_offsets = 45;
384    int size_offsets_max;
385    int *offsets;
386    #if !defined NOPOSIX
387  int posix = 0;  int posix = 0;
388    #endif
389  int debug = 0;  int debug = 0;
390  int done = 0;  int done = 0;
391  unsigned char buffer[30000];  unsigned char buffer[30000];
# Line 306  outfile = stdout; Line 399  outfile = stdout;
399    
400  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
401    {    {
402    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
403    
404      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
405        showstore = 1;
406    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
407    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
408    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
409      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
410          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
411            *endptr == 0))
412        {
413        op++;
414        argc--;
415        }
416    #if !defined NOPOSIX
417    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
418    #endif
419      else if (strcmp(argv[op], "-C") == 0)
420        {
421        int rc;
422        printf("PCRE version %s\n", pcre_version());
423        printf("Compiled with\n");
424        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
425        printf("  %sUTF-8 support\n", rc? "" : "No ");
426        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
427        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
428        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
429        printf("  Internal link size = %d\n", rc);
430        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
431        printf("  POSIX malloc threshold = %d\n", rc);
432        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
433        printf("  Default match limit = %d\n", rc);
434        exit(0);
435        }
436    else    else
437      {      {
438      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
439      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
440      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
441             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
442             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
443             "  -s   output store information\n"             "  -o <n> set size of offsets vector to <n>\n");
444             "  -t   time compilation and execution\n");  #if !defined NOPOSIX
445        printf("  -p     use POSIX interface\n");
446    #endif
447        printf("  -s     output store information\n"
448               "  -t     time compilation and execution\n");
449      return 1;      return 1;
450      }      }
451    op++;    op++;
452    argc--;    argc--;
453    }    }
454    
455    /* Get the store for the offsets vector, and remember what it was */
456    
457    size_offsets_max = size_offsets;
458    offsets = malloc(size_offsets_max * sizeof(int));
459    if (offsets == NULL)
460      {
461      printf("** Failed to get %d bytes of memory for offsets vector\n",
462        size_offsets_max * sizeof(int));
463      return 1;
464      }
465    
466  /* Sort out the input and output files */  /* Sort out the input and output files */
467    
468  if (argc > 1)  if (argc > 1)
# Line 362  while (!done) Line 499  while (!done)
499    {    {
500    pcre *re = NULL;    pcre *re = NULL;
501    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
502    
503    #if !defined NOPOSIX  /* There are still compilers that require no indent */
504    regex_t preg;    regex_t preg;
505      int do_posix = 0;
506    #endif
507    
508    const char *error;    const char *error;
509    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
510    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
511    int do_study = 0;    int do_study = 0;
512    int do_debug = debug;    int do_debug = debug;
513      int do_G = 0;
514      int do_g = 0;
515    int do_showinfo = showinfo;    int do_showinfo = showinfo;
516    int do_posix = 0;    int do_showrest = 0;
517    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
518    
519      utf8 = 0;
520    
521    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
522    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
523    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
524      fflush(outfile);
525    
526    p = buffer;    p = buffer;
527    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 385  while (!done) Line 532  while (!done)
532    
533    delimiter = *p++;    delimiter = *p++;
534    
535    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
536      {      {
537      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
538      goto SKIP_DATA;      goto SKIP_DATA;
539      }      }
540    
# Line 395  while (!done) Line 542  while (!done)
542    
543    for(;;)    for(;;)
544      {      {
545      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
546          {
547          if (*pp == '\\' && pp[1] != 0) pp++;
548            else if (*pp == delimiter) break;
549          pp++;
550          }
551      if (*pp != 0) break;      if (*pp != 0) break;
552    
553      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 415  while (!done) Line 567  while (!done)
567      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
568      }      }
569    
570      /* If the first character after the delimiter is backslash, make
571      the pattern end with backslash. This is purely to provide a way
572      of testing for the error message when a pattern ends with backslash. */
573    
574      if (pp[1] == '\\') *pp++ = '\\';
575    
576    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
577    
578    *pp++ = 0;    *pp++ = 0;
# Line 423  while (!done) Line 581  while (!done)
581    
582    options = 0;    options = 0;
583    study_options = 0;    study_options = 0;
584      log_store = showstore;  /* default from command line */
585    
586    while (*pp != 0)    while (*pp != 0)
587      {      {
588      switch (*pp++)      switch (*pp++)
589        {        {
590          case 'g': do_g = 1; break;
591        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
592        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
593        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
594        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
595    
596          case '+': do_showrest = 1; break;
597        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
598        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
599        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
600          case 'G': do_G = 1; break;
601        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
602          case 'M': log_store = 1; break;
603          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
604    
605    #if !defined NOPOSIX
606        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
607    #endif
608    
609        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
610        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
611        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
612          case '8': options |= PCRE_UTF8; utf8 = 1; break;
613    
614        case 'L':        case 'L':
615        ppp = pp;        ppp = pp;
# Line 465  while (!done) Line 635  while (!done)
635    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
636    local character tables. */    local character tables. */
637    
638    #if !defined NOPOSIX
639    if (posix || do_posix)    if (posix || do_posix)
640      {      {
641      int rc;      int rc;
# Line 487  while (!done) Line 658  while (!done)
658    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
659    
660    else    else
661    #endif  /* !defined NOPOSIX */
662    
663      {      {
664      if (timeit)      if (timeit)
665        {        {
# Line 499  while (!done) Line 672  while (!done)
672          if (re != NULL) free(re);          if (re != NULL) free(re);
673          }          }
674        time_taken = clock() - start_time;        time_taken = clock() - start_time;
675        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
676          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
677              (double)CLOCKS_PER_SEC);
678        }        }
679    
680      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 530  while (!done) Line 704  while (!done)
704        goto CONTINUE;        goto CONTINUE;
705        }        }
706    
707      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
708        info-returning functions. The old one has a limited interface and
709        returns only limited data. Check that it agrees with the newer one. */
710    
711        if (log_store)
712          fprintf(outfile, "Memory allocation (code space): %d\n",
713            (int)(gotten_store -
714                  sizeof(real_pcre) -
715                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
716    
717      if (do_showinfo)      if (do_showinfo)
718        {        {
719        int first_char, count;        unsigned long int get_options;
720          int old_first_char, old_options, old_count;
721          int count, backrefmax, first_char, need_char;
722          int nameentrysize, namecount;
723          const uschar *nametable;
724          size_t size;
725    
726        if (do_debug) print_internals(re, outfile);        if (do_debug)
727            {
728            fprintf(outfile, "------------------------------------------------------------------\n");
729            print_internals(re, outfile);
730            }
731    
732        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
733          new_info(re, NULL, PCRE_INFO_SIZE, &size);
734          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
735          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
736          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
737          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
738          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
739          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
740          new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);
741    
742          old_count = pcre_info(re, &old_options, &old_first_char);
743        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
744          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
745        else        else
746          {          {
747          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
748          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
749            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
750              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
751              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
752              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
753              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
754              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
755              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
756              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
757              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
758          if (first_char == -1)          }
759            {  
760            fprintf(outfile, "First char at start or follows \\n\n");        if (size != gotten_store) fprintf(outfile,
761            }          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
762          else if (first_char < 0)          size, gotten_store);
763    
764          fprintf(outfile, "Capturing subpattern count = %d\n", count);
765          if (backrefmax > 0)
766            fprintf(outfile, "Max back reference = %d\n", backrefmax);
767    
768          if (namecount > 0)
769            {
770            fprintf(outfile, "Named capturing subpatterns:\n");
771            while (namecount-- > 0)
772            {            {
773            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
774                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
775                GET2(nametable, 0));
776              nametable += nameentrysize;
777            }            }
778            }
779    
780          if (get_options == 0) fprintf(outfile, "No options\n");
781            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
782              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
783              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
784              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
785              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
786              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
787              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
788              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
789              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
790              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
791    
792          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
793            fprintf(outfile, "Case state changes\n");
794    
795          if (first_char == -1)
796            {
797            fprintf(outfile, "First char at start or follows \\n\n");
798            }
799          else if (first_char < 0)
800            {
801            fprintf(outfile, "No first char\n");
802            }
803          else
804            {
805            int ch = first_char & 255;
806            char *caseless = ((first_char & REQ_CASELESS) == 0)?
807              "" : " (caseless)";
808            if (isprint(ch))
809              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
810          else          else
811            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
812            if (isprint(first_char))          }
813              fprintf(outfile, "First char = \'%c\'\n", first_char);  
814            else        if (need_char < 0)
815              fprintf(outfile, "First char = %d\n", first_char);          {
816            }          fprintf(outfile, "No need char\n");
817            }
818          else
819            {
820            int ch = need_char & 255;
821            char *caseless = ((need_char & REQ_CASELESS) == 0)?
822              "" : " (caseless)";
823            if (isprint(ch))
824              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
825            else
826              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
827          }          }
828        }        }
829    
# Line 586  while (!done) Line 841  while (!done)
841            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
842          time_taken = clock() - start_time;          time_taken = clock() - start_time;
843          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
844          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
845            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
846                (double)CLOCKS_PER_SEC);
847          }          }
848    
849        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 596  while (!done) Line 852  while (!done)
852        else if (extra == NULL)        else if (extra == NULL)
853          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
854    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
855        else if (do_showinfo)        else if (do_showinfo)
856          {          {
857          real_pcre_extra *xx = (real_pcre_extra *)extra;          size_t size;
858          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          uschar *start_bits = NULL;
859            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
860            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
861            fprintf(outfile, "Study size = %d\n", size);
862            if (start_bits == NULL)
863            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
864          else          else
865            {            {
# Line 611  while (!done) Line 868  while (!done)
868            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
869            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
870              {              {
871              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
872                {                {
873                if (c > 75)                if (c > 75)
874                  {                  {
# Line 641  while (!done) Line 898  while (!done)
898    for (;;)    for (;;)
899      {      {
900      unsigned char *q;      unsigned char *q;
901        unsigned char *bptr = dbuffer;
902        int *use_offsets = offsets;
903        int use_size_offsets = size_offsets;
904        int callout_data = 0;
905        int callout_data_set = 0;
906      int count, c;      int count, c;
907      int offsets[45];      int copystrings = 0;
908      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
909        int getstrings = 0;
910        int getlist = 0;
911        int gmatched = 0;
912        int start_offset = 0;
913        int g_notempty = 0;
914    
915      options = 0;      options = 0;
916    
917      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
918        first_callout = 1;
919        callout_extra = 0;
920        callout_count = 0;
921        callout_fail_count = 999999;
922        callout_fail_id = -1;
923    
924        if (infile == stdin) printf("data> ");
925      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
926        {        {
927        done = 1;        done = 1;
# Line 668  while (!done) Line 942  while (!done)
942        {        {
943        int i = 0;        int i = 0;
944        int n = 0;        int n = 0;
945    
946        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
947          {          {
948          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 687  while (!done) Line 962  while (!done)
962          break;          break;
963    
964          case 'x':          case 'x':
965    
966            /* Handle \x{..} specially - new Perl thing for utf8 */
967    
968            if (*p == '{')
969              {
970              unsigned char *pt = p;
971              c = 0;
972              while (isxdigit(*(++pt)))
973                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
974              if (*pt == '}')
975                {
976                unsigned char buffer[8];
977                int ii, utn;
978                utn = ord2utf8(c, buffer);
979                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
980                c = buffer[ii];   /* Last byte */
981                p = pt + 1;
982                break;
983                }
984              /* Not correct form; fall through */
985              }
986    
987            /* Ordinary \x */
988    
989          c = 0;          c = 0;
990          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
991            {            {
# Line 707  while (!done) Line 1006  while (!done)
1006          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1007          continue;          continue;
1008    
1009            case 'C':
1010            if (isdigit(*p))    /* Set copy string */
1011              {
1012              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013              copystrings |= 1 << n;
1014              }
1015            else if (isalnum(*p))
1016              {
1017              uschar name[256];
1018              uschar *pp = name;
1019              while (isalnum(*p)) *pp++ = *p++;
1020              *pp = 0;
1021              n = pcre_get_stringnumber(re, (char *)name);
1022              if (n < 0)
1023                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1024              else copystrings |= 1 << n;
1025              }
1026            else if (*p == '+')
1027              {
1028              callout_extra = 1;
1029              p++;
1030              }
1031            else if (*p == '-')
1032              {
1033              pcre_callout = NULL;
1034              p++;
1035              }
1036            else if (*p == '!')
1037              {
1038              callout_fail_id = 0;
1039              p++;
1040              while(isdigit(*p))
1041                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1042              callout_fail_count = 0;
1043              if (*p == '!')
1044                {
1045                p++;
1046                while(isdigit(*p))
1047                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1048                }
1049              }
1050            else if (*p == '*')
1051              {
1052              int sign = 1;
1053              callout_data = 0;
1054              if (*(++p) == '-') { sign = -1; p++; }
1055              while(isdigit(*p))
1056                callout_data = callout_data * 10 + *p++ - '0';
1057              callout_data *= sign;
1058              callout_data_set = 1;
1059              }
1060            continue;
1061    
1062            case 'G':
1063            if (isdigit(*p))
1064              {
1065              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1066              getstrings |= 1 << n;
1067              }
1068            else if (isalnum(*p))
1069              {
1070              uschar name[256];
1071              uschar *pp = name;
1072              while (isalnum(*p)) *pp++ = *p++;
1073              *pp = 0;
1074              n = pcre_get_stringnumber(re, (char *)name);
1075              if (n < 0)
1076                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1077              else getstrings |= 1 << n;
1078              }
1079            continue;
1080    
1081            case 'L':
1082            getlist = 1;
1083            continue;
1084    
1085            case 'M':
1086            find_match_limit = 1;
1087            continue;
1088    
1089            case 'N':
1090            options |= PCRE_NOTEMPTY;
1091            continue;
1092    
1093          case 'O':          case 'O':
1094          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1095          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1096              {
1097              size_offsets_max = n;
1098              free(offsets);
1099              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1100              if (offsets == NULL)
1101                {
1102                printf("** Failed to get %d bytes of memory for offsets vector\n",
1103                  size_offsets_max * sizeof(int));
1104                return 1;
1105                }
1106              }
1107            use_size_offsets = n;
1108            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1109          continue;          continue;
1110    
1111          case 'Z':          case 'Z':
# Line 722  while (!done) Line 1118  while (!done)
1118      len = q - dbuffer;      len = q - dbuffer;
1119    
1120      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1121      support timing. */      support timing or playing with the match limit or callout data. */
1122    
1123    #if !defined NOPOSIX
1124      if (posix || do_posix)      if (posix || do_posix)
1125        {        {
1126        int rc;        int rc;
1127        int eflags = 0;        int eflags = 0;
1128        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1129          if (use_size_offsets > 0)
1130            pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1131        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1132        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1133    
1134        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1135    
1136        if (rc != 0)        if (rc != 0)
1137          {          {
# Line 743  while (!done) Line 1141  while (!done)
1141        else        else
1142          {          {
1143          size_t i;          size_t i;
1144          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1145            {            {
1146            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1147              {              {
1148              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1149              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1150                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1151              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1152                if (i == 0 && do_showrest)
1153                  {
1154                  fprintf(outfile, " 0+ ");
1155                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1156                    outfile);
1157                  fprintf(outfile, "\n");
1158                  }
1159              }              }
1160            }            }
1161          }          }
1162          free(pmatch);
1163        }        }
1164    
1165      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1166    
1167      else      else
1168    #endif  /* !defined NOPOSIX */
1169    
1170        for (;; gmatched++)    /* Loop for /g or /G */
1171        {        {
1172        if (timeit)        if (timeit)
1173          {          {
1174          register int i;          register int i;
1175          clock_t time_taken;          clock_t time_taken;
1176          clock_t start_time = clock();          clock_t start_time = clock();
1177          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1178            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1179              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1180          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1181          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1182            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1183                (double)CLOCKS_PER_SEC);
1184            }
1185    
1186          /* If find_match_limit is set, we want to do repeated matches with
1187          varying limits in order to find the minimum value. */
1188    
1189          if (find_match_limit)
1190            {
1191            int min = 0;
1192            int mid = 64;
1193            int max = -1;
1194    
1195            if (extra == NULL)
1196              {
1197              extra = malloc(sizeof(pcre_extra));
1198              extra->flags = 0;
1199              }
1200            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1201    
1202            for (;;)
1203              {
1204              extra->match_limit = mid;
1205              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1206                options | g_notempty, use_offsets, use_size_offsets);
1207              if (count == PCRE_ERROR_MATCHLIMIT)
1208                {
1209                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1210                min = mid;
1211                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1212                }
1213              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1214                {
1215                if (mid == min + 1)
1216                  {
1217                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1218                  break;
1219                  }
1220                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1221                max = mid;
1222                mid = (min + mid)/2;
1223                }
1224              else break;    /* Some other error */
1225              }
1226    
1227            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1228          }          }
1229    
1230        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If callout_data is set, use the interface with additional data */
1231          size_offsets);  
1232          else if (callout_data_set)
1233            {
1234            if (extra == NULL)
1235              {
1236              extra = malloc(sizeof(pcre_extra));
1237              extra->flags = 0;
1238              }
1239            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1240            extra->callout_data = (void *)callout_data;
1241            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1242              options | g_notempty, use_offsets, use_size_offsets);
1243            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1244            }
1245    
1246          /* The normal case is just to do the match once, with the default
1247          value of match_limit. */
1248    
1249          else count = pcre_exec(re, extra, (char *)bptr, len,
1250            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1251    
1252        if (count == 0)        if (count == 0)
1253          {          {
1254          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1255          count = size_offsets/3;          count = use_size_offsets/3;
1256          }          }
1257    
1258          /* Matched */
1259    
1260        if (count >= 0)        if (count >= 0)
1261          {          {
1262          int i;          int i;
1263          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1264            {            {
1265            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1266              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1267            else            else
1268              {              {
1269              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1270              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1271                  use_offsets[i+1] - use_offsets[i], outfile);
1272              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1273                if (i == 0)
1274                  {
1275                  if (do_showrest)
1276                    {
1277                    fprintf(outfile, " 0+ ");
1278                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1279                      outfile);
1280                    fprintf(outfile, "\n");
1281                    }
1282                  }
1283              }              }
1284            }            }
1285    
1286            for (i = 0; i < 32; i++)
1287              {
1288              if ((copystrings & (1 << i)) != 0)
1289                {
1290                char copybuffer[16];
1291                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1292                  i, copybuffer, sizeof(copybuffer));
1293                if (rc < 0)
1294                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1295                else
1296                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1297                }
1298              }
1299    
1300            for (i = 0; i < 32; i++)
1301              {
1302              if ((getstrings & (1 << i)) != 0)
1303                {
1304                const char *substring;
1305                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1306                  i, &substring);
1307                if (rc < 0)
1308                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1309                else
1310                  {
1311                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1312                  /* free((void *)substring); */
1313                  pcre_free_substring(substring);
1314                  }
1315                }
1316              }
1317    
1318            if (getlist)
1319              {
1320              const char **stringlist;
1321              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1322                &stringlist);
1323              if (rc < 0)
1324                fprintf(outfile, "get substring list failed %d\n", rc);
1325              else
1326                {
1327                for (i = 0; i < count; i++)
1328                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1329                if (stringlist[i] != NULL)
1330                  fprintf(outfile, "string list not terminated by NULL\n");
1331                /* free((void *)stringlist); */
1332                pcre_free_substring_list(stringlist);
1333                }
1334              }
1335            }
1336    
1337          /* Failed to match. If this is a /g or /G loop and we previously set
1338          g_notempty after a null match, this is not necessarily the end.
1339          We want to advance the start offset, and continue. Fudge the offset
1340          values to achieve this. We won't be at the end of the string - that
1341          was checked before setting g_notempty. */
1342    
1343          else
1344            {
1345            if (g_notempty != 0)
1346              {
1347              use_offsets[0] = start_offset;
1348              use_offsets[1] = start_offset + 1;
1349              }
1350            else
1351              {
1352              if (gmatched == 0)   /* Error if no previous matches */
1353                {
1354                if (count == -1) fprintf(outfile, "No match\n");
1355                  else fprintf(outfile, "Error %d\n", count);
1356                }
1357              break;  /* Out of the /g loop */
1358              }
1359            }
1360    
1361          /* If not /g or /G we are done */
1362    
1363          if (!do_g && !do_G) break;
1364    
1365          /* If we have matched an empty string, first check to see if we are at
1366          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1367          what Perl's /g options does. This turns out to be rather cunning. First
1368          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1369          same point. If this fails (picked up above) we advance to the next
1370          character. */
1371    
1372          g_notempty = 0;
1373          if (use_offsets[0] == use_offsets[1])
1374            {
1375            if (use_offsets[0] == len) break;
1376            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1377          }          }
1378    
1379          /* For /g, update the start offset, leaving the rest alone */
1380    
1381          if (do_g) start_offset = use_offsets[1];
1382    
1383          /* For /G, update the pointer and length */
1384    
1385        else        else
1386          {          {
1387          if (count == -1) fprintf(outfile, "No match\n");          bptr += use_offsets[1];
1388            else fprintf(outfile, "Error %d\n", count);          len -= use_offsets[1];
1389          }          }
1390        }        }  /* End of loop for /g and /G */
1391      }      }    /* End of loop for data lines */
1392    
1393    CONTINUE:    CONTINUE:
1394    
1395    #if !defined NOPOSIX
1396    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1397    #endif
1398    
1399    if (re != NULL) free(re);    if (re != NULL) free(re);
1400    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1401    if (tables != NULL)    if (tables != NULL)

Legend:
Removed from v.25  
changed lines
  Added in v.65

  ViewVC Help
Powered by ViewVC 1.1.5