/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 25 by nigel, Sat Feb 24 21:38:45 2007 UTC revision 69 by nigel, Sat Feb 24 21:40:18 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22    /* It is possible to compile this test program without including support for
23    testing the POSIX interface, though this is not available via the standard
24    Makefile. */
25    
26    #if !defined NOPOSIX
27  #include "pcreposix.h"  #include "pcreposix.h"
28    #endif
29    
30  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
31  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 35 
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 10000  #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE 1024
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int use_utf8;
52    static size_t gotten_store;
53    
54    
55    
56  /* Debugging function to print the internal form of the regex. This is the same  static const int utf8_table1[] = {
57  code as contained in pcre.c under the DEBUG macro. */    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59  static const char *OP_names[] = {  static const int utf8_table2[] = {
60    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
62    "Opt", "^", "$", "Any", "chars", "not",  static const int utf8_table3[] = {
63    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
65    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
66    "*", "*?", "+", "+?", "?", "??", "{", "{",  
67    "class", "Ref",  /*************************************************
68    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  *         Print compiled regex                   *
69    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  *************************************************/
   "Brazero", "Braminzero", "Bra"  
 };  
70    
71    /* The code for doing this is held in a separate file that is also included in
72    pcre.c when it is compiled with the debug switch. It defines a function called
73    print_internals(), which uses a table of opcode lengths defined by the macro
74    OP_LENGTHS, whose name must be OP_lengths. */
75    
76  static void print_internals(pcre *re, FILE *outfile)  static uschar OP_lengths[] = { OP_LENGTHS };
77    
78    #include "printint.c"
79    
80    
81    
82    /*************************************************
83    *          Read number from string               *
84    *************************************************/
85    
86    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87    around with conditional compilation, just do the job by hand. It is only used
88    for unpicking the -o argument, so just keep it simple.
89    
90    Arguments:
91      str           string to be converted
92      endptr        where to put the end pointer
93    
94    Returns:        the unsigned long
95    */
96    
97    static int
98    get_value(unsigned char *str, unsigned char **endptr)
99  {  {
100  unsigned char *code = ((real_pcre *)re)->code;  int result = 0;
101    while(*str != 0 && isspace(*str)) str++;
102    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103    *endptr = str;
104    return(result);
105    }
106    
 fprintf(outfile, "------------------------------------------------------------------\n");  
107    
108  for(;;)  
109    /*************************************************
110    *       Convert character value to UTF-8         *
111    *************************************************/
112    
113    /* This function takes an integer value in the range 0 - 0x7fffffff
114    and encodes it as a UTF-8 character in 0 to 6 bytes.
115    
116    Arguments:
117      cvalue     the character value
118      buffer     pointer to buffer for result - at least 6 bytes long
119    
120    Returns:     number of characters placed in the buffer
121                 -1 if input character is negative
122                 0 if input character is positive but too big (only when
123                 int is longer than 32 bits)
124    */
125    
126    static int
127    ord2utf8(int cvalue, unsigned char *buffer)
128    {
129    register int i, j;
130    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131      if (cvalue <= utf8_table1[i]) break;
132    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133    if (cvalue < 0) return -1;
134    
135    buffer += i;
136    for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141    *buffer = utf8_table2[i] | cvalue;
142    return i + 1;
143    }
144    
145    
146    /*************************************************
147    *            Convert UTF-8 string to value       *
148    *************************************************/
149    
150    /* This function takes one or more bytes that represents a UTF-8 character,
151    and returns the value of the character.
152    
153    Argument:
154      buffer   a pointer to the byte vector
155      vptr     a pointer to an int to receive the value
156    
157    Returns:   >  0 => the number of bytes consumed
158               -6 to 0 => malformed UTF-8 character at offset = (-return)
159    */
160    
161    static int
162    utf82ord(unsigned char *buffer, int *vptr)
163    {
164    int c = *buffer++;
165    int d = c;
166    int i, j, s;
167    
168    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
169    {    {
170    int c;    if ((d & 0x80) == 0) break;
171    int charlength;    d <<= 1;
172      }
173    
174    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
175    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
176    
177          case OP_CRRANGE:  /* i now has a value in the range 1-5 */
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
178    
179          default:  s = 6*i;
180          code--;  d = (c & utf8_table3[i]) << s;
181    
182    for (j = 0; j < i; j++)
183      {
184      c = *buffer++;
185      if ((c & 0xc0) != 0x80) return -(j+1);
186      s -= 6;
187      d |= (c & 0x3f) << s;
188      }
189    
190    /* Check that encoding was the correct unique one */
191    
192    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193      if (d <= utf8_table1[j]) break;
194    if (j != i) return -(i+1);
195    
196    /* Valid value */
197    
198    *vptr = d;
199    return i+1;
200    }
201    
202    
203    
204    /*************************************************
205    *             Print character string             *
206    *************************************************/
207    
208    /* Character string printing function. Must handle UTF-8 strings in utf8
209    mode. Yields number of characters printed. If handed a NULL file, just counts
210    chars without printing. */
211    
212    static int pchars(unsigned char *p, int length, FILE *f)
213    {
214    int c;
215    int yield = 0;
216    
217    while (length-- > 0)
218      {
219      if (use_utf8)
220        {
221        int rc = utf82ord(p, &c);
222    
223        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
224          {
225          length -= rc - 1;
226          p += rc;
227          if (c < 256 && isprint(c))
228            {
229            if (f != NULL) fprintf(f, "%c", c);
230            yield++;
231          }          }
232          else
233            {
234            int n;
235            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236            yield += n;
237            }
238          continue;
239        }        }
240      break;      }
241    
242      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
243    
244      default:    if (isprint(c = *(p++)))
245      fprintf(outfile, "    %s", OP_names[*code]);      {
246      break;      if (f != NULL) fprintf(f, "%c", c);
247        yield++;
248        }
249      else
250        {
251        if (f != NULL) fprintf(f, "\\x%02x", c);
252        yield += 4;
253      }      }
   
   code++;  
   fprintf(outfile, "\n");  
254    }    }
255    
256    return yield;
257  }  }
258    
259    
260    
261  /* Character string printing function. */  /*************************************************
262    *              Callout function                  *
263    *************************************************/
264    
265    /* Called from PCRE as a result of the (?C) item. We print out where we are in
266    the match. Yield zero unless more callouts than the fail count, or the callout
267    data is not zero. */
268    
269  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
270  {  {
271  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
272  while (length-- > 0)  int i, pre_start, post_start;
273    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
274      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
275      {
276      fprintf(f, "Callout %d: last capture = %d\n",
277        cb->callout_number, cb->capture_last);
278    
279      for (i = 0; i < cb->capture_top * 2; i += 2)
280        {
281        if (cb->offset_vector[i] < 0)
282          fprintf(f, "%2d: <unset>\n", i/2);
283        else
284          {
285          fprintf(f, "%2d: ", i/2);
286          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287            cb->offset_vector[i+1] - cb->offset_vector[i], f);
288          fprintf(f, "\n");
289          }
290        }
291      }
292    
293    /* Re-print the subject in canonical form, the first time or if giving full
294    datails. On subsequent calls in the same match, we use pchars just to find the
295    printed lengths of the substrings. */
296    
297    if (f != NULL) fprintf(f, "--->");
298    
299    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301      cb->current_position - cb->start_match, f);
302    
303    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304      cb->subject_length - cb->current_position, f);
305    
306    if (f != NULL) fprintf(f, "\n");
307    
308    /* Always print appropriate indicators, with callout number if not already
309    shown */
310    
311    if (callout_extra) fprintf(outfile, "    ");
312      else fprintf(outfile, "%3d ", cb->callout_number);
313    
314    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315    fprintf(outfile, "^");
316    
317    if (post_start > 0)
318      {
319      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320      fprintf(outfile, "^");
321      }
322    
323    fprintf(outfile, "\n");
324    
325    first_callout = 0;
326    
327    if ((int)(cb->callout_data) != 0)
328      {
329      fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
330      return (int)(cb->callout_data);
331      }
332    
333    return (cb->callout_number != callout_fail_id)? 0 :
334           (++callout_count >= callout_fail_count)? 1 : 0;
335  }  }
336    
337    
338    /*************************************************
339    *            Local malloc function               *
340    *************************************************/
341    
342  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
343  compiled re. */  compiled re. */
344    
345  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
346  {  {
347  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
348  return malloc(size);  return malloc(size);
349  }  }
350    
351    
352    
353    /*************************************************
354    *          Call pcre_fullinfo()                  *
355    *************************************************/
356    
357    /* Get one piece of information from the pcre_fullinfo() function */
358    
359    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
360    {
361    int rc;
362    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
363      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
364    }
365    
366    
367    
368    /*************************************************
369    *                Main Program                    *
370    *************************************************/
371    
372  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
373  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
374  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 292  int study_options = 0; Line 381  int study_options = 0;
381  int op = 1;  int op = 1;
382  int timeit = 0;  int timeit = 0;
383  int showinfo = 0;  int showinfo = 0;
384    int showstore = 0;
385    int size_offsets = 45;
386    int size_offsets_max;
387    int *offsets;
388    #if !defined NOPOSIX
389  int posix = 0;  int posix = 0;
390    #endif
391  int debug = 0;  int debug = 0;
392  int done = 0;  int done = 0;
393  unsigned char buffer[30000];  
394  unsigned char dbuffer[1024];  unsigned char *buffer;
395    unsigned char *dbuffer;
396    
397    /* Get buffers from malloc() so that Electric Fence will check their misuse
398    when I am debugging. */
399    
400    buffer = malloc(BUFFER_SIZE);
401    dbuffer = malloc(DBUFFER_SIZE);
402    
403  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
404    
# Line 306  outfile = stdout; Line 408  outfile = stdout;
408    
409  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
410    {    {
411    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
412    
413      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
414        showstore = 1;
415    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
416    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
417    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
418      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
419          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
420            *endptr == 0))
421        {
422        op++;
423        argc--;
424        }
425    #if !defined NOPOSIX
426    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
427    #endif
428      else if (strcmp(argv[op], "-C") == 0)
429        {
430        int rc;
431        printf("PCRE version %s\n", pcre_version());
432        printf("Compiled with\n");
433        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
434        printf("  %sUTF-8 support\n", rc? "" : "No ");
435        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
436        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
437        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
438        printf("  Internal link size = %d\n", rc);
439        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
440        printf("  POSIX malloc threshold = %d\n", rc);
441        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
442        printf("  Default match limit = %d\n", rc);
443        exit(0);
444        }
445    else    else
446      {      {
447      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
448      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
449      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
450             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
451             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
452             "  -s   output store information\n"             "  -o <n> set size of offsets vector to <n>\n");
453             "  -t   time compilation and execution\n");  #if !defined NOPOSIX
454        printf("  -p     use POSIX interface\n");
455    #endif
456        printf("  -s     output store information\n"
457               "  -t     time compilation and execution\n");
458      return 1;      return 1;
459      }      }
460    op++;    op++;
461    argc--;    argc--;
462    }    }
463    
464    /* Get the store for the offsets vector, and remember what it was */
465    
466    size_offsets_max = size_offsets;
467    offsets = malloc(size_offsets_max * sizeof(int));
468    if (offsets == NULL)
469      {
470      printf("** Failed to get %d bytes of memory for offsets vector\n",
471        size_offsets_max * sizeof(int));
472      return 1;
473      }
474    
475  /* Sort out the input and output files */  /* Sort out the input and output files */
476    
477  if (argc > 1)  if (argc > 1)
# Line 362  while (!done) Line 508  while (!done)
508    {    {
509    pcre *re = NULL;    pcre *re = NULL;
510    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
511    
512    #if !defined NOPOSIX  /* There are still compilers that require no indent */
513    regex_t preg;    regex_t preg;
514      int do_posix = 0;
515    #endif
516    
517    const char *error;    const char *error;
518    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
519    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
520    int do_study = 0;    int do_study = 0;
521    int do_debug = debug;    int do_debug = debug;
522      int do_G = 0;
523      int do_g = 0;
524    int do_showinfo = showinfo;    int do_showinfo = showinfo;
525    int do_posix = 0;    int do_showrest = 0;
526    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
527    
528      use_utf8 = 0;
529    
530    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
531    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
532    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
533      fflush(outfile);
534    
535    p = buffer;    p = buffer;
536    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 385  while (!done) Line 541  while (!done)
541    
542    delimiter = *p++;    delimiter = *p++;
543    
544    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
545      {      {
546      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
547      goto SKIP_DATA;      goto SKIP_DATA;
548      }      }
549    
# Line 395  while (!done) Line 551  while (!done)
551    
552    for(;;)    for(;;)
553      {      {
554      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
555          {
556          if (*pp == '\\' && pp[1] != 0) pp++;
557            else if (*pp == delimiter) break;
558          pp++;
559          }
560      if (*pp != 0) break;      if (*pp != 0) break;
561    
562      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
563      if (len < 256)      if (len < 256)
564        {        {
565        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 415  while (!done) Line 576  while (!done)
576      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
577      }      }
578    
579      /* If the first character after the delimiter is backslash, make
580      the pattern end with backslash. This is purely to provide a way
581      of testing for the error message when a pattern ends with backslash. */
582    
583      if (pp[1] == '\\') *pp++ = '\\';
584    
585    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
586    
587    *pp++ = 0;    *pp++ = 0;
# Line 423  while (!done) Line 590  while (!done)
590    
591    options = 0;    options = 0;
592    study_options = 0;    study_options = 0;
593      log_store = showstore;  /* default from command line */
594    
595    while (*pp != 0)    while (*pp != 0)
596      {      {
597      switch (*pp++)      switch (*pp++)
598        {        {
599          case 'g': do_g = 1; break;
600        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
601        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
602        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
603        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
604    
605          case '+': do_showrest = 1; break;
606        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
607        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
608        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
609          case 'G': do_G = 1; break;
610        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
611          case 'M': log_store = 1; break;
612          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
613    
614    #if !defined NOPOSIX
615        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
616    #endif
617    
618        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
619        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
620        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
621          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
622    
623        case 'L':        case 'L':
624        ppp = pp;        ppp = pp;
# Line 465  while (!done) Line 644  while (!done)
644    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
645    local character tables. */    local character tables. */
646    
647    #if !defined NOPOSIX
648    if (posix || do_posix)    if (posix || do_posix)
649      {      {
650      int rc;      int rc;
# Line 478  while (!done) Line 658  while (!done)
658    
659      if (rc != 0)      if (rc != 0)
660        {        {
661        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
662        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
663        goto SKIP_DATA;        goto SKIP_DATA;
664        }        }
# Line 487  while (!done) Line 667  while (!done)
667    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
668    
669    else    else
670    #endif  /* !defined NOPOSIX */
671    
672      {      {
673      if (timeit)      if (timeit)
674        {        {
# Line 499  while (!done) Line 681  while (!done)
681          if (re != NULL) free(re);          if (re != NULL) free(re);
682          }          }
683        time_taken = clock() - start_time;        time_taken = clock() - start_time;
684        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
685          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
686              (double)CLOCKS_PER_SEC);
687        }        }
688    
689      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 516  while (!done) Line 699  while (!done)
699          {          {
700          for (;;)          for (;;)
701            {            {
702            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
703              {              {
704              done = 1;              done = 1;
705              goto CONTINUE;              goto CONTINUE;
# Line 530  while (!done) Line 713  while (!done)
713        goto CONTINUE;        goto CONTINUE;
714        }        }
715    
716      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
717        info-returning functions. The old one has a limited interface and
718        returns only limited data. Check that it agrees with the newer one. */
719    
720        if (log_store)
721          fprintf(outfile, "Memory allocation (code space): %d\n",
722            (int)(gotten_store -
723                  sizeof(real_pcre) -
724                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
725    
726      if (do_showinfo)      if (do_showinfo)
727        {        {
728        int first_char, count;        unsigned long int get_options;
729          int old_first_char, old_options, old_count;
730          int count, backrefmax, first_char, need_char;
731          int nameentrysize, namecount;
732          const uschar *nametable;
733          size_t size;
734    
735        if (do_debug) print_internals(re, outfile);        if (do_debug)
736            {
737            fprintf(outfile, "------------------------------------------------------------------\n");
738            print_internals(re, outfile);
739            }
740    
741        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
742          new_info(re, NULL, PCRE_INFO_SIZE, &size);
743          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
744          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
745          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
746          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
747          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
748          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
749          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
750    
751          old_count = pcre_info(re, &old_options, &old_first_char);
752        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
753          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
754        else        else
755          {          {
756          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
757          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
758            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
759              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
760              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
761              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
762              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
763              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
764              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
765              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
766              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
767          if (first_char == -1)          }
768            {  
769            fprintf(outfile, "First char at start or follows \\n\n");        if (size != gotten_store) fprintf(outfile,
770            }          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
771          else if (first_char < 0)          size, gotten_store);
772    
773          fprintf(outfile, "Capturing subpattern count = %d\n", count);
774          if (backrefmax > 0)
775            fprintf(outfile, "Max back reference = %d\n", backrefmax);
776    
777          if (namecount > 0)
778            {
779            fprintf(outfile, "Named capturing subpatterns:\n");
780            while (namecount-- > 0)
781            {            {
782            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
783                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
784                GET2(nametable, 0));
785              nametable += nameentrysize;
786            }            }
787            }
788    
789          if (get_options == 0) fprintf(outfile, "No options\n");
790            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
791              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
792              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
793              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
794              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
795              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
796              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
797              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
798              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
799              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
800    
801          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
802            fprintf(outfile, "Case state changes\n");
803    
804          if (first_char == -1)
805            {
806            fprintf(outfile, "First char at start or follows \\n\n");
807            }
808          else if (first_char < 0)
809            {
810            fprintf(outfile, "No first char\n");
811            }
812          else
813            {
814            int ch = first_char & 255;
815            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
816              "" : " (caseless)";
817            if (isprint(ch))
818              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
819          else          else
820            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
821            if (isprint(first_char))          }
822              fprintf(outfile, "First char = \'%c\'\n", first_char);  
823            else        if (need_char < 0)
824              fprintf(outfile, "First char = %d\n", first_char);          {
825            }          fprintf(outfile, "No need char\n");
826            }
827          else
828            {
829            int ch = need_char & 255;
830            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
831              "" : " (caseless)";
832            if (isprint(ch))
833              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
834            else
835              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
836          }          }
837        }        }
838    
# Line 586  while (!done) Line 850  while (!done)
850            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
851          time_taken = clock() - start_time;          time_taken = clock() - start_time;
852          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
853          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
854            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
855                (double)CLOCKS_PER_SEC);
856          }          }
857    
858        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 596  while (!done) Line 861  while (!done)
861        else if (extra == NULL)        else if (extra == NULL)
862          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
863    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
864        else if (do_showinfo)        else if (do_showinfo)
865          {          {
866          real_pcre_extra *xx = (real_pcre_extra *)extra;          size_t size;
867          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          uschar *start_bits = NULL;
868            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
869            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
870            fprintf(outfile, "Study size = %d\n", size);
871            if (start_bits == NULL)
872            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
873          else          else
874            {            {
# Line 611  while (!done) Line 877  while (!done)
877            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
878            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
879              {              {
880              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
881                {                {
882                if (c > 75)                if (c > 75)
883                  {                  {
# Line 641  while (!done) Line 907  while (!done)
907    for (;;)    for (;;)
908      {      {
909      unsigned char *q;      unsigned char *q;
910        unsigned char *bptr = dbuffer;
911        int *use_offsets = offsets;
912        int use_size_offsets = size_offsets;
913        int callout_data = 0;
914        int callout_data_set = 0;
915      int count, c;      int count, c;
916      int offsets[45];      int copystrings = 0;
917      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
918        int getstrings = 0;
919        int getlist = 0;
920        int gmatched = 0;
921        int start_offset = 0;
922        int g_notempty = 0;
923    
924      options = 0;      options = 0;
925    
926      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
927      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      first_callout = 1;
928        callout_extra = 0;
929        callout_count = 0;
930        callout_fail_count = 999999;
931        callout_fail_id = -1;
932    
933        if (infile == stdin) printf("data> ");
934        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
935        {        {
936        done = 1;        done = 1;
937        goto CONTINUE;        goto CONTINUE;
# Line 668  while (!done) Line 951  while (!done)
951        {        {
952        int i = 0;        int i = 0;
953        int n = 0;        int n = 0;
954    
955        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
956          {          {
957          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 687  while (!done) Line 971  while (!done)
971          break;          break;
972    
973          case 'x':          case 'x':
974    
975            /* Handle \x{..} specially - new Perl thing for utf8 */
976    
977            if (*p == '{')
978              {
979              unsigned char *pt = p;
980              c = 0;
981              while (isxdigit(*(++pt)))
982                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
983              if (*pt == '}')
984                {
985                unsigned char buff8[8];
986                int ii, utn;
987                utn = ord2utf8(c, buff8);
988                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
989                c = buff8[ii];   /* Last byte */
990                p = pt + 1;
991                break;
992                }
993              /* Not correct form; fall through */
994              }
995    
996            /* Ordinary \x */
997    
998          c = 0;          c = 0;
999          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1000            {            {
# Line 707  while (!done) Line 1015  while (!done)
1015          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1016          continue;          continue;
1017    
1018            case 'C':
1019            if (isdigit(*p))    /* Set copy string */
1020              {
1021              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1022              copystrings |= 1 << n;
1023              }
1024            else if (isalnum(*p))
1025              {
1026              uschar name[256];
1027              uschar *npp = name;
1028              while (isalnum(*p)) *npp++ = *p++;
1029              *npp = 0;
1030              n = pcre_get_stringnumber(re, (char *)name);
1031              if (n < 0)
1032                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1033              else copystrings |= 1 << n;
1034              }
1035            else if (*p == '+')
1036              {
1037              callout_extra = 1;
1038              p++;
1039              }
1040            else if (*p == '-')
1041              {
1042              pcre_callout = NULL;
1043              p++;
1044              }
1045            else if (*p == '!')
1046              {
1047              callout_fail_id = 0;
1048              p++;
1049              while(isdigit(*p))
1050                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1051              callout_fail_count = 0;
1052              if (*p == '!')
1053                {
1054                p++;
1055                while(isdigit(*p))
1056                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1057                }
1058              }
1059            else if (*p == '*')
1060              {
1061              int sign = 1;
1062              callout_data = 0;
1063              if (*(++p) == '-') { sign = -1; p++; }
1064              while(isdigit(*p))
1065                callout_data = callout_data * 10 + *p++ - '0';
1066              callout_data *= sign;
1067              callout_data_set = 1;
1068              }
1069            continue;
1070    
1071            case 'G':
1072            if (isdigit(*p))
1073              {
1074              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1075              getstrings |= 1 << n;
1076              }
1077            else if (isalnum(*p))
1078              {
1079              uschar name[256];
1080              uschar *npp = name;
1081              while (isalnum(*p)) *npp++ = *p++;
1082              *npp = 0;
1083              n = pcre_get_stringnumber(re, (char *)name);
1084              if (n < 0)
1085                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1086              else getstrings |= 1 << n;
1087              }
1088            continue;
1089    
1090            case 'L':
1091            getlist = 1;
1092            continue;
1093    
1094            case 'M':
1095            find_match_limit = 1;
1096            continue;
1097    
1098            case 'N':
1099            options |= PCRE_NOTEMPTY;
1100            continue;
1101    
1102          case 'O':          case 'O':
1103          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1104          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1105              {
1106              size_offsets_max = n;
1107              free(offsets);
1108              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1109              if (offsets == NULL)
1110                {
1111                printf("** Failed to get %d bytes of memory for offsets vector\n",
1112                  size_offsets_max * sizeof(int));
1113                return 1;
1114                }
1115              }
1116            use_size_offsets = n;
1117            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1118          continue;          continue;
1119    
1120          case 'Z':          case 'Z':
# Line 722  while (!done) Line 1127  while (!done)
1127      len = q - dbuffer;      len = q - dbuffer;
1128    
1129      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1130      support timing. */      support timing or playing with the match limit or callout data. */
1131    
1132    #if !defined NOPOSIX
1133      if (posix || do_posix)      if (posix || do_posix)
1134        {        {
1135        int rc;        int rc;
1136        int eflags = 0;        int eflags = 0;
1137        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1138          if (use_size_offsets > 0)
1139            pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1140        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1141        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1142    
1143        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1144    
1145        if (rc != 0)        if (rc != 0)
1146          {          {
1147          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1148          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1149          }          }
1150        else        else
1151          {          {
1152          size_t i;          size_t i;
1153          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1154            {            {
1155            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1156              {              {
1157              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1158              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1159                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1160              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1161                if (i == 0 && do_showrest)
1162                  {
1163                  fprintf(outfile, " 0+ ");
1164                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1165                    outfile);
1166                  fprintf(outfile, "\n");
1167                  }
1168              }              }
1169            }            }
1170          }          }
1171          free(pmatch);
1172        }        }
1173    
1174      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1175    
1176      else      else
1177    #endif  /* !defined NOPOSIX */
1178    
1179        for (;; gmatched++)    /* Loop for /g or /G */
1180        {        {
1181        if (timeit)        if (timeit)
1182          {          {
1183          register int i;          register int i;
1184          clock_t time_taken;          clock_t time_taken;
1185          clock_t start_time = clock();          clock_t start_time = clock();
1186          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1187            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1188              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1189          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1190          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1191            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1192                (double)CLOCKS_PER_SEC);
1193            }
1194    
1195          /* If find_match_limit is set, we want to do repeated matches with
1196          varying limits in order to find the minimum value. */
1197    
1198          if (find_match_limit)
1199            {
1200            int min = 0;
1201            int mid = 64;
1202            int max = -1;
1203    
1204            if (extra == NULL)
1205              {
1206              extra = malloc(sizeof(pcre_extra));
1207              extra->flags = 0;
1208              }
1209            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1210    
1211            for (;;)
1212              {
1213              extra->match_limit = mid;
1214              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1215                options | g_notempty, use_offsets, use_size_offsets);
1216              if (count == PCRE_ERROR_MATCHLIMIT)
1217                {
1218                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1219                min = mid;
1220                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1221                }
1222              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1223                {
1224                if (mid == min + 1)
1225                  {
1226                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1227                  break;
1228                  }
1229                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1230                max = mid;
1231                mid = (min + mid)/2;
1232                }
1233              else break;    /* Some other error */
1234              }
1235    
1236            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1237          }          }
1238    
1239        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If callout_data is set, use the interface with additional data */
1240          size_offsets);  
1241          else if (callout_data_set)
1242            {
1243            if (extra == NULL)
1244              {
1245              extra = malloc(sizeof(pcre_extra));
1246              extra->flags = 0;
1247              }
1248            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1249            extra->callout_data = (void *)callout_data;
1250            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1251              options | g_notempty, use_offsets, use_size_offsets);
1252            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1253            }
1254    
1255          /* The normal case is just to do the match once, with the default
1256          value of match_limit. */
1257    
1258          else count = pcre_exec(re, extra, (char *)bptr, len,
1259            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1260    
1261        if (count == 0)        if (count == 0)
1262          {          {
1263          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1264          count = size_offsets/3;          count = use_size_offsets/3;
1265          }          }
1266    
1267          /* Matched */
1268    
1269        if (count >= 0)        if (count >= 0)
1270          {          {
1271          int i;          int i;
1272          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1273            {            {
1274            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1275              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1276            else            else
1277              {              {
1278              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1279              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1280                  use_offsets[i+1] - use_offsets[i], outfile);
1281              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1282                if (i == 0)
1283                  {
1284                  if (do_showrest)
1285                    {
1286                    fprintf(outfile, " 0+ ");
1287                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1288                      outfile);
1289                    fprintf(outfile, "\n");
1290                    }
1291                  }
1292                }
1293              }
1294    
1295            for (i = 0; i < 32; i++)
1296              {
1297              if ((copystrings & (1 << i)) != 0)
1298                {
1299                char copybuffer[16];
1300                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1301                  i, copybuffer, sizeof(copybuffer));
1302                if (rc < 0)
1303                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1304                else
1305                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1306                }
1307              }
1308    
1309            for (i = 0; i < 32; i++)
1310              {
1311              if ((getstrings & (1 << i)) != 0)
1312                {
1313                const char *substring;
1314                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1315                  i, &substring);
1316                if (rc < 0)
1317                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1318                else
1319                  {
1320                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1321                  /* free((void *)substring); */
1322                  pcre_free_substring(substring);
1323                  }
1324                }
1325              }
1326    
1327            if (getlist)
1328              {
1329              const char **stringlist;
1330              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1331                &stringlist);
1332              if (rc < 0)
1333                fprintf(outfile, "get substring list failed %d\n", rc);
1334              else
1335                {
1336                for (i = 0; i < count; i++)
1337                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1338                if (stringlist[i] != NULL)
1339                  fprintf(outfile, "string list not terminated by NULL\n");
1340                /* free((void *)stringlist); */
1341                pcre_free_substring_list(stringlist);
1342              }              }
1343            }            }
1344          }          }
1345    
1346          /* Failed to match. If this is a /g or /G loop and we previously set
1347          g_notempty after a null match, this is not necessarily the end.
1348          We want to advance the start offset, and continue. Fudge the offset
1349          values to achieve this. We won't be at the end of the string - that
1350          was checked before setting g_notempty. */
1351    
1352        else        else
1353          {          {
1354          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1355            else fprintf(outfile, "Error %d\n", count);            {
1356              use_offsets[0] = start_offset;
1357              use_offsets[1] = start_offset + 1;
1358              }
1359            else
1360              {
1361              if (gmatched == 0)   /* Error if no previous matches */
1362                {
1363                if (count == -1) fprintf(outfile, "No match\n");
1364                  else fprintf(outfile, "Error %d\n", count);
1365                }
1366              break;  /* Out of the /g loop */
1367              }
1368          }          }
1369        }  
1370      }        /* If not /g or /G we are done */
1371    
1372          if (!do_g && !do_G) break;
1373    
1374          /* If we have matched an empty string, first check to see if we are at
1375          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1376          what Perl's /g options does. This turns out to be rather cunning. First
1377          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1378          same point. If this fails (picked up above) we advance to the next
1379          character. */
1380    
1381          g_notempty = 0;
1382          if (use_offsets[0] == use_offsets[1])
1383            {
1384            if (use_offsets[0] == len) break;
1385            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1386            }
1387    
1388          /* For /g, update the start offset, leaving the rest alone */
1389    
1390          if (do_g) start_offset = use_offsets[1];
1391    
1392          /* For /G, update the pointer and length */
1393    
1394          else
1395            {
1396            bptr += use_offsets[1];
1397            len -= use_offsets[1];
1398            }
1399          }  /* End of loop for /g and /G */
1400        }    /* End of loop for data lines */
1401    
1402    CONTINUE:    CONTINUE:
1403    
1404    #if !defined NOPOSIX
1405    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1406    #endif
1407    
1408    if (re != NULL) free(re);    if (re != NULL) free(re);
1409    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1410    if (tables != NULL)    if (tables != NULL)

Legend:
Removed from v.25  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.5