/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 31 by nigel, Sat Feb 24 21:38:57 2007 UTC revision 71 by nigel, Sat Feb 24 21:40:24 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22    /* It is possible to compile this test program without including support for
23    testing the POSIX interface, though this is not available via the standard
24    Makefile. */
25    
26    #if !defined NOPOSIX
27  #include "pcreposix.h"  #include "pcreposix.h"
28    #endif
29    
30  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
31  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 35 
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE 1024
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int use_utf8;
52    static size_t gotten_store;
53    
54    
55    static const int utf8_table1[] = {
56      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
57    
58    static const int utf8_table2[] = {
59      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
60    
61    static const int utf8_table3[] = {
62      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
63    
64    
65    
66    /*************************************************
67    *         Print compiled regex                   *
68    *************************************************/
69    
70    /* The code for doing this is held in a separate file that is also included in
71    pcre.c when it is compiled with the debug switch. It defines a function called
72    print_internals(), which uses a table of opcode lengths defined by the macro
73    OP_LENGTHS, whose name must be OP_lengths. */
74    
75    static uschar OP_lengths[] = { OP_LENGTHS };
76    
77    #include "printint.c"
78    
79    
80    
81    /*************************************************
82    *          Read number from string               *
83    *************************************************/
84    
85    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
86    around with conditional compilation, just do the job by hand. It is only used
87    for unpicking the -o argument, so just keep it simple.
88    
89    Arguments:
90      str           string to be converted
91      endptr        where to put the end pointer
92    
93    Returns:        the unsigned long
94    */
95    
96  /* Debugging function to print the internal form of the regex. This is the same  static int
97  code as contained in pcre.c under the DEBUG macro. */  get_value(unsigned char *str, unsigned char **endptr)
98    {
99    int result = 0;
100    while(*str != 0 && isspace(*str)) str++;
101    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
102    *endptr = str;
103    return(result);
104    }
105    
106    
107    
108    /*************************************************
109    *       Convert character value to UTF-8         *
110    *************************************************/
111    
112  static const char *OP_names[] = {  /* This function takes an integer value in the range 0 - 0x7fffffff
113    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  and encodes it as a UTF-8 character in 0 to 6 bytes.
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
114    
115    Arguments:
116      cvalue     the character value
117      buffer     pointer to buffer for result - at least 6 bytes long
118    
119    Returns:     number of characters placed in the buffer
120                 -1 if input character is negative
121                 0 if input character is positive but too big (only when
122                 int is longer than 32 bits)
123    */
124    
125  static void print_internals(pcre *re, FILE *outfile)  static int
126    ord2utf8(int cvalue, unsigned char *buffer)
127  {  {
128  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
129    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
130      if (cvalue <= utf8_table1[i]) break;
131    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
132    if (cvalue < 0) return -1;
133    
134    buffer += i;
135    for (j = i; j > 0; j--)
136     {
137     *buffer-- = 0x80 | (cvalue & 0x3f);
138     cvalue >>= 6;
139     }
140    *buffer = utf8_table2[i] | cvalue;
141    return i + 1;
142    }
143    
144    
145    /*************************************************
146    *            Convert UTF-8 string to value       *
147    *************************************************/
148    
149  fprintf(outfile, "------------------------------------------------------------------\n");  /* This function takes one or more bytes that represents a UTF-8 character,
150    and returns the value of the character.
151    
152  for(;;)  Argument:
153      buffer   a pointer to the byte vector
154      vptr     a pointer to an int to receive the value
155    
156    Returns:   >  0 => the number of bytes consumed
157               -6 to 0 => malformed UTF-8 character at offset = (-return)
158    */
159    
160    static int
161    utf82ord(unsigned char *buffer, int *vptr)
162    {
163    int c = *buffer++;
164    int d = c;
165    int i, j, s;
166    
167    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
168    {    {
169    int c;    if ((d & 0x80) == 0) break;
170    int charlength;    d <<= 1;
171      }
172    
173    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
174    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
175    
176          case OP_CRRANGE:  /* i now has a value in the range 1-5 */
177          case OP_CRMINRANGE:  
178          min = (code[1] << 8) + code[2];  s = 6*i;
179          max = (code[3] << 8) + code[4];  d = (c & utf8_table3[i]) << s;
180          if (max == 0) fprintf(outfile, "{%d,}", min);  
181          else fprintf(outfile, "{%d,%d}", min, max);  for (j = 0; j < i; j++)
182          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");    {
183          code += 4;    c = *buffer++;
184          break;    if ((c & 0xc0) != 0x80) return -(j+1);
185      s -= 6;
186      d |= (c & 0x3f) << s;
187      }
188    
189    /* Check that encoding was the correct unique one */
190    
191    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
192      if (d <= utf8_table1[j]) break;
193    if (j != i) return -(i+1);
194    
195    /* Valid value */
196    
197          default:  *vptr = d;
198          code--;  return i+1;
199    }
200    
201    
202    
203    /*************************************************
204    *             Print character string             *
205    *************************************************/
206    
207    /* Character string printing function. Must handle UTF-8 strings in utf8
208    mode. Yields number of characters printed. If handed a NULL file, just counts
209    chars without printing. */
210    
211    static int pchars(unsigned char *p, int length, FILE *f)
212    {
213    int c;
214    int yield = 0;
215    
216    while (length-- > 0)
217      {
218      if (use_utf8)
219        {
220        int rc = utf82ord(p, &c);
221    
222        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
223          {
224          length -= rc - 1;
225          p += rc;
226          if (c < 256 && isprint(c))
227            {
228            if (f != NULL) fprintf(f, "%c", c);
229            yield++;
230          }          }
231          else
232            {
233            int n;
234            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
235            yield += n;
236            }
237          continue;
238        }        }
239      break;      }
240    
241      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
242    
243      default:    if (isprint(c = *(p++)))
244      fprintf(outfile, "    %s", OP_names[*code]);      {
245      break;      if (f != NULL) fprintf(f, "%c", c);
246        yield++;
247        }
248      else
249        {
250        if (f != NULL) fprintf(f, "\\x%02x", c);
251        yield += 4;
252      }      }
   
   code++;  
   fprintf(outfile, "\n");  
253    }    }
254    
255    return yield;
256  }  }
257    
258    
259    
260  /* Character string printing function. */  /*************************************************
261    *              Callout function                  *
262    *************************************************/
263    
264  static void pchars(unsigned char *p, int length)  /* Called from PCRE as a result of the (?C) item. We print out where we are in
265    the match. Yield zero unless more callouts than the fail count, or the callout
266    data is not zero. */
267    
268    static int callout(pcre_callout_block *cb)
269  {  {
270  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
271  while (length-- > 0)  int i, pre_start, post_start;
272    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
273      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
274      {
275      fprintf(f, "Callout %d: last capture = %d\n",
276        cb->callout_number, cb->capture_last);
277    
278      for (i = 0; i < cb->capture_top * 2; i += 2)
279        {
280        if (cb->offset_vector[i] < 0)
281          fprintf(f, "%2d: <unset>\n", i/2);
282        else
283          {
284          fprintf(f, "%2d: ", i/2);
285          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
286            cb->offset_vector[i+1] - cb->offset_vector[i], f);
287          fprintf(f, "\n");
288          }
289        }
290      }
291    
292    /* Re-print the subject in canonical form, the first time or if giving full
293    datails. On subsequent calls in the same match, we use pchars just to find the
294    printed lengths of the substrings. */
295    
296    if (f != NULL) fprintf(f, "--->");
297    
298    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
299    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
300      cb->current_position - cb->start_match, f);
301    
302    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
303      cb->subject_length - cb->current_position, f);
304    
305    if (f != NULL) fprintf(f, "\n");
306    
307    /* Always print appropriate indicators, with callout number if not already
308    shown */
309    
310    if (callout_extra) fprintf(outfile, "    ");
311      else fprintf(outfile, "%3d ", cb->callout_number);
312    
313    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314    fprintf(outfile, "^");
315    
316    if (post_start > 0)
317      {
318      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
319      fprintf(outfile, "^");
320      }
321    
322    fprintf(outfile, "\n");
323    first_callout = 0;
324    
325    if (cb->callout_data != NULL)
326      {
327      int callout_data = *((int *)(cb->callout_data));
328      if (callout_data != 0)
329        {
330        fprintf(outfile, "Callout data = %d\n", callout_data);
331        return callout_data;
332        }
333      }
334    
335    return (cb->callout_number != callout_fail_id)? 0 :
336           (++callout_count >= callout_fail_count)? 1 : 0;
337  }  }
338    
339    
340    /*************************************************
341    *            Local malloc function               *
342    *************************************************/
343    
344  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
345  compiled re. */  compiled re. */
346    
347  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
348  {  {
349  if (log_store)  gotten_store = size;
   fprintf(outfile, "Memory allocation request: %d (code space %d)\n",  
     (int)size, (int)size - offsetof(real_pcre, code[0]));  
350  return malloc(size);  return malloc(size);
351  }  }
352    
353    
354    
355    /*************************************************
356    *          Call pcre_fullinfo()                  *
357    *************************************************/
358    
359    /* Get one piece of information from the pcre_fullinfo() function */
360    
361    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
362    {
363    int rc;
364    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
365      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
366    }
367    
368    
369    
370    /*************************************************
371    *                Main Program                    *
372    *************************************************/
373    
374  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
375  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
376  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 295  int op = 1; Line 384  int op = 1;
384  int timeit = 0;  int timeit = 0;
385  int showinfo = 0;  int showinfo = 0;
386  int showstore = 0;  int showstore = 0;
387    int size_offsets = 45;
388    int size_offsets_max;
389    int *offsets;
390    #if !defined NOPOSIX
391  int posix = 0;  int posix = 0;
392    #endif
393  int debug = 0;  int debug = 0;
394  int done = 0;  int done = 0;
395  unsigned char buffer[30000];  
396  unsigned char dbuffer[1024];  unsigned char *buffer;
397    unsigned char *dbuffer;
398    
399    /* Get buffers from malloc() so that Electric Fence will check their misuse
400    when I am debugging. */
401    
402    buffer = (unsigned char *)malloc(BUFFER_SIZE);
403    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
404    
405  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
406    
# Line 309  outfile = stdout; Line 410  outfile = stdout;
410    
411  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
412    {    {
413      unsigned char *endptr;
414    
415    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
416      showstore = 1;      showstore = 1;
417    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
418    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
419    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
420      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
421          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
422            *endptr == 0))
423        {
424        op++;
425        argc--;
426        }
427    #if !defined NOPOSIX
428    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
429    #endif
430      else if (strcmp(argv[op], "-C") == 0)
431        {
432        int rc;
433        printf("PCRE version %s\n", pcre_version());
434        printf("Compiled with\n");
435        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
436        printf("  %sUTF-8 support\n", rc? "" : "No ");
437        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
438        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
439        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
440        printf("  Internal link size = %d\n", rc);
441        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
442        printf("  POSIX malloc threshold = %d\n", rc);
443        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
444        printf("  Default match limit = %d\n", rc);
445        exit(0);
446        }
447    else    else
448      {      {
449      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
450      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
451      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
452             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
453             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
454             "  -s   output store information\n"             "  -o <n> set size of offsets vector to <n>\n");
455             "  -t   time compilation and execution\n");  #if !defined NOPOSIX
456        printf("  -p     use POSIX interface\n");
457    #endif
458        printf("  -s     output store information\n"
459               "  -t     time compilation and execution\n");
460      return 1;      return 1;
461      }      }
462    op++;    op++;
463    argc--;    argc--;
464    }    }
465    
466    /* Get the store for the offsets vector, and remember what it was */
467    
468    size_offsets_max = size_offsets;
469    offsets = (int *)malloc(size_offsets_max * sizeof(int));
470    if (offsets == NULL)
471      {
472      printf("** Failed to get %d bytes of memory for offsets vector\n",
473        size_offsets_max * sizeof(int));
474      return 1;
475      }
476    
477  /* Sort out the input and output files */  /* Sort out the input and output files */
478    
479  if (argc > 1)  if (argc > 1)
# Line 366  while (!done) Line 510  while (!done)
510    {    {
511    pcre *re = NULL;    pcre *re = NULL;
512    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
513    
514    #if !defined NOPOSIX  /* There are still compilers that require no indent */
515    regex_t preg;    regex_t preg;
516      int do_posix = 0;
517    #endif
518    
519    const char *error;    const char *error;
520    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
521    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
522    int do_study = 0;    int do_study = 0;
523    int do_debug = debug;    int do_debug = debug;
524      int do_G = 0;
525      int do_g = 0;
526    int do_showinfo = showinfo;    int do_showinfo = showinfo;
527    int do_posix = 0;    int do_showrest = 0;
528    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
529    
530      use_utf8 = 0;
531    
532    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
533    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
534    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
535      fflush(outfile);
536    
537    p = buffer;    p = buffer;
538    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 407  while (!done) Line 561  while (!done)
561        }        }
562      if (*pp != 0) break;      if (*pp != 0) break;
563    
564      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
565      if (len < 256)      if (len < 256)
566        {        {
567        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 444  while (!done) Line 598  while (!done)
598      {      {
599      switch (*pp++)      switch (*pp++)
600        {        {
601          case 'g': do_g = 1; break;
602        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
603        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
604        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
605        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
606    
607          case '+': do_showrest = 1; break;
608        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
609        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
610        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611          case 'G': do_G = 1; break;
612        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
613        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
614          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
615    
616    #if !defined NOPOSIX
617        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
618    #endif
619    
620        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
621        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
622        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
623          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
624          case '?': options |= PCRE_NO_UTF8_CHECK; break;
625    
626        case 'L':        case 'L':
627        ppp = pp;        ppp = pp;
# Line 483  while (!done) Line 647  while (!done)
647    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
648    local character tables. */    local character tables. */
649    
650    #if !defined NOPOSIX
651    if (posix || do_posix)    if (posix || do_posix)
652      {      {
653      int rc;      int rc;
# Line 496  while (!done) Line 661  while (!done)
661    
662      if (rc != 0)      if (rc != 0)
663        {        {
664        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
665        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
666        goto SKIP_DATA;        goto SKIP_DATA;
667        }        }
# Line 505  while (!done) Line 670  while (!done)
670    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
671    
672    else    else
673    #endif  /* !defined NOPOSIX */
674    
675      {      {
676      if (timeit)      if (timeit)
677        {        {
# Line 518  while (!done) Line 685  while (!done)
685          }          }
686        time_taken = clock() - start_time;        time_taken = clock() - start_time;
687        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
688          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
689          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
690        }        }
691    
692      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 535  while (!done) Line 702  while (!done)
702          {          {
703          for (;;)          for (;;)
704            {            {
705            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
706              {              {
707              done = 1;              done = 1;
708              goto CONTINUE;              goto CONTINUE;
# Line 549  while (!done) Line 716  while (!done)
716        goto CONTINUE;        goto CONTINUE;
717        }        }
718    
719      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
720        info-returning functions. The old one has a limited interface and
721        returns only limited data. Check that it agrees with the newer one. */
722    
723        if (log_store)
724          fprintf(outfile, "Memory allocation (code space): %d\n",
725            (int)(gotten_store -
726                  sizeof(real_pcre) -
727                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
728    
729      if (do_showinfo)      if (do_showinfo)
730        {        {
731        int first_char, count;        unsigned long int get_options;
732          int old_first_char, old_options, old_count;
733          int count, backrefmax, first_char, need_char;
734          int nameentrysize, namecount;
735          const uschar *nametable;
736          size_t size;
737    
738        if (do_debug) print_internals(re, outfile);        if (do_debug)
739            {
740            fprintf(outfile, "------------------------------------------------------------------\n");
741            print_internals(re, outfile);
742            }
743    
744        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
745          new_info(re, NULL, PCRE_INFO_SIZE, &size);
746          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
747          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
748          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
749          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
750          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
751          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
752          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
753    
754          old_count = pcre_info(re, &old_options, &old_first_char);
755        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
756          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
757        else        else
758          {          {
759          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
760          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
761            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
762              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
763              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
764              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
765              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
766              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
767              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
768              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
769              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
770          if (first_char == -1)          }
771            {  
772            fprintf(outfile, "First char at start or follows \\n\n");        if (size != gotten_store) fprintf(outfile,
773            }          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
774          else if (first_char < 0)          size, gotten_store);
775    
776          fprintf(outfile, "Capturing subpattern count = %d\n", count);
777          if (backrefmax > 0)
778            fprintf(outfile, "Max back reference = %d\n", backrefmax);
779    
780          if (namecount > 0)
781            {
782            fprintf(outfile, "Named capturing subpatterns:\n");
783            while (namecount-- > 0)
784            {            {
785            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
786                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
787                GET2(nametable, 0));
788              nametable += nameentrysize;
789            }            }
790            }
791    
792          if (get_options == 0) fprintf(outfile, "No options\n");
793            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
794              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
795              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
796              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
797              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
798              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
799              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
800              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
801              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
802              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
803              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
804    
805          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
806            fprintf(outfile, "Case state changes\n");
807    
808          if (first_char == -1)
809            {
810            fprintf(outfile, "First char at start or follows \\n\n");
811            }
812          else if (first_char < 0)
813            {
814            fprintf(outfile, "No first char\n");
815            }
816          else
817            {
818            int ch = first_char & 255;
819            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
820              "" : " (caseless)";
821            if (isprint(ch))
822              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
823          else          else
824            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
825            if (isprint(first_char))          }
826              fprintf(outfile, "First char = \'%c\'\n", first_char);  
827            else        if (need_char < 0)
828              fprintf(outfile, "First char = %d\n", first_char);          {
829            }          fprintf(outfile, "No need char\n");
830            }
831          else
832            {
833            int ch = need_char & 255;
834            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
835              "" : " (caseless)";
836            if (isprint(ch))
837              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
838            else
839              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
840          }          }
841        }        }
842    
# Line 606  while (!done) Line 855  while (!done)
855          time_taken = clock() - start_time;          time_taken = clock() - start_time;
856          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
857          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
858            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
859            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
860          }          }
861    
862        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 616  while (!done) Line 865  while (!done)
865        else if (extra == NULL)        else if (extra == NULL)
866          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
867    
868        /* This looks at internal information. A bit kludgy to do it this        /* Don't output study size; at present it is in any case a fixed
869        way, but it is useful for testing. */        value, but it varies, depending on the computer architecture, and
870          so messes up the test suite. */
871    
872        else if (do_showinfo)        else if (do_showinfo)
873          {          {
874          real_pcre_extra *xx = (real_pcre_extra *)extra;          size_t size;
875          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          uschar *start_bits = NULL;
876            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
877            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
878            /* fprintf(outfile, "Study size = %d\n", size); */
879            if (start_bits == NULL)
880            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
881          else          else
882            {            {
# Line 631  while (!done) Line 885  while (!done)
885            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
886            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
887              {              {
888              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
889                {                {
890                if (c > 75)                if (c > 75)
891                  {                  {
# Line 661  while (!done) Line 915  while (!done)
915    for (;;)    for (;;)
916      {      {
917      unsigned char *q;      unsigned char *q;
918        unsigned char *bptr = dbuffer;
919        int *use_offsets = offsets;
920        int use_size_offsets = size_offsets;
921        int callout_data = 0;
922        int callout_data_set = 0;
923      int count, c;      int count, c;
924      int copystrings = 0;      int copystrings = 0;
925        int find_match_limit = 0;
926      int getstrings = 0;      int getstrings = 0;
927      int getlist = 0;      int getlist = 0;
928      int offsets[45];      int gmatched = 0;
929      int size_offsets = sizeof(offsets)/sizeof(int);      int start_offset = 0;
930        int g_notempty = 0;
931    
932      options = 0;      options = 0;
933    
934      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
935      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      first_callout = 1;
936        callout_extra = 0;
937        callout_count = 0;
938        callout_fail_count = 999999;
939        callout_fail_id = -1;
940    
941        if (infile == stdin) printf("data> ");
942        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
943        {        {
944        done = 1;        done = 1;
945        goto CONTINUE;        goto CONTINUE;
# Line 691  while (!done) Line 959  while (!done)
959        {        {
960        int i = 0;        int i = 0;
961        int n = 0;        int n = 0;
962    
963        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
964          {          {
965          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 710  while (!done) Line 979  while (!done)
979          break;          break;
980    
981          case 'x':          case 'x':
982    
983            /* Handle \x{..} specially - new Perl thing for utf8 */
984    
985            if (*p == '{')
986              {
987              unsigned char *pt = p;
988              c = 0;
989              while (isxdigit(*(++pt)))
990                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
991              if (*pt == '}')
992                {
993                unsigned char buff8[8];
994                int ii, utn;
995                utn = ord2utf8(c, buff8);
996                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
997                c = buff8[ii];   /* Last byte */
998                p = pt + 1;
999                break;
1000                }
1001              /* Not correct form; fall through */
1002              }
1003    
1004            /* Ordinary \x */
1005    
1006          c = 0;          c = 0;
1007          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1008            {            {
# Line 731  while (!done) Line 1024  while (!done)
1024          continue;          continue;
1025    
1026          case 'C':          case 'C':
1027          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1028          copystrings |= 1 << n;            {
1029              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1030              copystrings |= 1 << n;
1031              }
1032            else if (isalnum(*p))
1033              {
1034              uschar name[256];
1035              uschar *npp = name;
1036              while (isalnum(*p)) *npp++ = *p++;
1037              *npp = 0;
1038              n = pcre_get_stringnumber(re, (char *)name);
1039              if (n < 0)
1040                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1041              else copystrings |= 1 << n;
1042              }
1043            else if (*p == '+')
1044              {
1045              callout_extra = 1;
1046              p++;
1047              }
1048            else if (*p == '-')
1049              {
1050              pcre_callout = NULL;
1051              p++;
1052              }
1053            else if (*p == '!')
1054              {
1055              callout_fail_id = 0;
1056              p++;
1057              while(isdigit(*p))
1058                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1059              callout_fail_count = 0;
1060              if (*p == '!')
1061                {
1062                p++;
1063                while(isdigit(*p))
1064                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1065                }
1066              }
1067            else if (*p == '*')
1068              {
1069              int sign = 1;
1070              callout_data = 0;
1071              if (*(++p) == '-') { sign = -1; p++; }
1072              while(isdigit(*p))
1073                callout_data = callout_data * 10 + *p++ - '0';
1074              callout_data *= sign;
1075              callout_data_set = 1;
1076              }
1077          continue;          continue;
1078    
1079          case 'G':          case 'G':
1080          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1081          getstrings |= 1 << n;            {
1082              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1083              getstrings |= 1 << n;
1084              }
1085            else if (isalnum(*p))
1086              {
1087              uschar name[256];
1088              uschar *npp = name;
1089              while (isalnum(*p)) *npp++ = *p++;
1090              *npp = 0;
1091              n = pcre_get_stringnumber(re, (char *)name);
1092              if (n < 0)
1093                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1094              else getstrings |= 1 << n;
1095              }
1096          continue;          continue;
1097    
1098          case 'L':          case 'L':
1099          getlist = 1;          getlist = 1;
1100          continue;          continue;
1101    
1102            case 'M':
1103            find_match_limit = 1;
1104            continue;
1105    
1106            case 'N':
1107            options |= PCRE_NOTEMPTY;
1108            continue;
1109    
1110          case 'O':          case 'O':
1111          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1112          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1113              {
1114              size_offsets_max = n;
1115              free(offsets);
1116              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1117              if (offsets == NULL)
1118                {
1119                printf("** Failed to get %d bytes of memory for offsets vector\n",
1120                  size_offsets_max * sizeof(int));
1121                return 1;
1122                }
1123              }
1124            use_size_offsets = n;
1125            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1126          continue;          continue;
1127    
1128          case 'Z':          case 'Z':
1129          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1130          continue;          continue;
1131    
1132            case '?':
1133            options |= PCRE_NO_UTF8_CHECK;
1134            continue;
1135          }          }
1136        *q++ = c;        *q++ = c;
1137        }        }
# Line 759  while (!done) Line 1139  while (!done)
1139      len = q - dbuffer;      len = q - dbuffer;
1140    
1141      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1142      support timing. */      support timing or playing with the match limit or callout data. */
1143    
1144    #if !defined NOPOSIX
1145      if (posix || do_posix)      if (posix || do_posix)
1146        {        {
1147        int rc;        int rc;
1148        int eflags = 0;        int eflags = 0;
1149        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1150          if (use_size_offsets > 0)
1151            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1152        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1153        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1154    
1155        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1156    
1157        if (rc != 0)        if (rc != 0)
1158          {          {
1159          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1160          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1161          }          }
1162        else        else
1163          {          {
1164          size_t i;          size_t i;
1165          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1166            {            {
1167            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1168              {              {
1169              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1170              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1171                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1172              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1173                if (i == 0 && do_showrest)
1174                  {
1175                  fprintf(outfile, " 0+ ");
1176                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1177                    outfile);
1178                  fprintf(outfile, "\n");
1179                  }
1180              }              }
1181            }            }
1182          }          }
1183          free(pmatch);
1184        }        }
1185    
1186      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1187    
1188      else      else
1189    #endif  /* !defined NOPOSIX */
1190    
1191        for (;; gmatched++)    /* Loop for /g or /G */
1192        {        {
1193        if (timeit)        if (timeit)
1194          {          {
# Line 803  while (!done) Line 1196  while (!done)
1196          clock_t time_taken;          clock_t time_taken;
1197          clock_t start_time = clock();          clock_t start_time = clock();
1198          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1199            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1200              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1201          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1202          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1203            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1204            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1205            }
1206    
1207          /* If find_match_limit is set, we want to do repeated matches with
1208          varying limits in order to find the minimum value. */
1209    
1210          if (find_match_limit)
1211            {
1212            int min = 0;
1213            int mid = 64;
1214            int max = -1;
1215    
1216            if (extra == NULL)
1217              {
1218              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1219              extra->flags = 0;
1220              }
1221            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1222    
1223            for (;;)
1224              {
1225              extra->match_limit = mid;
1226              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1227                options | g_notempty, use_offsets, use_size_offsets);
1228              if (count == PCRE_ERROR_MATCHLIMIT)
1229                {
1230                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1231                min = mid;
1232                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1233                }
1234              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1235                {
1236                if (mid == min + 1)
1237                  {
1238                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1239                  break;
1240                  }
1241                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1242                max = mid;
1243                mid = (min + mid)/2;
1244                }
1245              else break;    /* Some other error */
1246              }
1247    
1248            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1249          }          }
1250    
1251        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If callout_data is set, use the interface with additional data */
1252          size_offsets);  
1253          else if (callout_data_set)
1254            {
1255            if (extra == NULL)
1256              {
1257              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1258              extra->flags = 0;
1259              }
1260            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1261            extra->callout_data = &callout_data;
1262            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1263              options | g_notempty, use_offsets, use_size_offsets);
1264            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1265            }
1266    
1267          /* The normal case is just to do the match once, with the default
1268          value of match_limit. */
1269    
1270          else count = pcre_exec(re, extra, (char *)bptr, len,
1271            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1272    
1273        if (count == 0)        if (count == 0)
1274          {          {
1275          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1276          count = size_offsets/3;          count = use_size_offsets/3;
1277          }          }
1278    
1279          /* Matched */
1280    
1281        if (count >= 0)        if (count >= 0)
1282          {          {
1283          int i;          int i;
1284          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1285            {            {
1286            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1287              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1288            else            else
1289              {              {
1290              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1291              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1292                  use_offsets[i+1] - use_offsets[i], outfile);
1293              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1294                if (i == 0)
1295                  {
1296                  if (do_showrest)
1297                    {
1298                    fprintf(outfile, " 0+ ");
1299                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1300                      outfile);
1301                    fprintf(outfile, "\n");
1302                    }
1303                  }
1304              }              }
1305            }            }
1306    
# Line 839  while (!done) Line 1308  while (!done)
1308            {            {
1309            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1310              {              {
1311              char buffer[16];              char copybuffer[16];
1312              int rc = pcre_copy_substring((char *)dbuffer, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1313                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
1314              if (rc < 0)              if (rc < 0)
1315                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1316              else              else
1317                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1318              }              }
1319            }            }
1320    
# Line 854  while (!done) Line 1323  while (!done)
1323            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1324              {              {
1325              const char *substring;              const char *substring;
1326              int rc = pcre_get_substring((char *)dbuffer, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1327                i, &substring);                i, &substring);
1328              if (rc < 0)              if (rc < 0)
1329                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1330              else              else
1331                {                {
1332                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1333                free((void *)substring);                /* free((void *)substring); */
1334                  pcre_free_substring(substring);
1335                }                }
1336              }              }
1337            }            }
# Line 869  while (!done) Line 1339  while (!done)
1339          if (getlist)          if (getlist)
1340            {            {
1341            const char **stringlist;            const char **stringlist;
1342            int rc = pcre_get_substring_list((char *)dbuffer, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1343              &stringlist);              &stringlist);
1344            if (rc < 0)            if (rc < 0)
1345              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 879  while (!done) Line 1349  while (!done)
1349                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1350              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1351                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1352              free((void *)stringlist);              /* free((void *)stringlist); */
1353                pcre_free_substring_list(stringlist);
1354              }              }
1355            }            }
1356            }
1357    
1358          /* Failed to match. If this is a /g or /G loop and we previously set
1359          g_notempty after a null match, this is not necessarily the end.
1360          We want to advance the start offset, and continue. Fudge the offset
1361          values to achieve this. We won't be at the end of the string - that
1362          was checked before setting g_notempty. */
1363    
1364          else
1365            {
1366            if (g_notempty != 0)
1367              {
1368              use_offsets[0] = start_offset;
1369              use_offsets[1] = start_offset + 1;
1370              }
1371            else
1372              {
1373              if (gmatched == 0)   /* Error if no previous matches */
1374                {
1375                if (count == -1) fprintf(outfile, "No match\n");
1376                  else fprintf(outfile, "Error %d\n", count);
1377                }
1378              break;  /* Out of the /g loop */
1379              }
1380            }
1381    
1382          /* If not /g or /G we are done */
1383    
1384          if (!do_g && !do_G) break;
1385    
1386          /* If we have matched an empty string, first check to see if we are at
1387          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1388          what Perl's /g options does. This turns out to be rather cunning. First
1389          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1390          same point. If this fails (picked up above) we advance to the next
1391          character. */
1392    
1393          g_notempty = 0;
1394          if (use_offsets[0] == use_offsets[1])
1395            {
1396            if (use_offsets[0] == len) break;
1397            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1398          }          }
1399    
1400          /* For /g, update the start offset, leaving the rest alone */
1401    
1402          if (do_g) start_offset = use_offsets[1];
1403    
1404          /* For /G, update the pointer and length */
1405    
1406        else        else
1407          {          {
1408          if (count == -1) fprintf(outfile, "No match\n");          bptr += use_offsets[1];
1409            else fprintf(outfile, "Error %d\n", count);          len -= use_offsets[1];
1410          }          }
1411        }        }  /* End of loop for /g and /G */
1412      }      }    /* End of loop for data lines */
1413    
1414    CONTINUE:    CONTINUE:
1415    
1416    #if !defined NOPOSIX
1417    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1418    #endif
1419    
1420    if (re != NULL) free(re);    if (re != NULL) free(re);
1421    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1422    if (tables != NULL)    if (tables != NULL)

Legend:
Removed from v.31  
changed lines
  Added in v.71

  ViewVC Help
Powered by ViewVC 1.1.5