/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 69 by nigel, Sat Feb 24 21:40:18 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
12  #include <stdlib.h>  #include <stdlib.h>
13  #include <time.h>  #include <time.h>
14    #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22    /* It is possible to compile this test program without including support for
23    testing the POSIX interface, though this is not available via the standard
24    Makefile. */
25    
26    #if !defined NOPOSIX
27  #include "pcreposix.h"  #include "pcreposix.h"
28    #endif
29    
30  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
31  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 35 
35  #endif  #endif
36  #endif  #endif
37    
38    #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE 1024
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int use_utf8;
52    static size_t gotten_store;
53    
54    
55    
56    static const int utf8_table1[] = {
57      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59    static const int utf8_table2[] = {
60      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62    static const int utf8_table3[] = {
63      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64    
65    
66    
67    /*************************************************
68    *         Print compiled regex                   *
69    *************************************************/
70    
71    /* The code for doing this is held in a separate file that is also included in
72    pcre.c when it is compiled with the debug switch. It defines a function called
73    print_internals(), which uses a table of opcode lengths defined by the macro
74    OP_LENGTHS, whose name must be OP_lengths. */
75    
76    static uschar OP_lengths[] = { OP_LENGTHS };
77    
78    #include "printint.c"
79    
80    
81    
82    /*************************************************
83    *          Read number from string               *
84    *************************************************/
85    
86    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87    around with conditional compilation, just do the job by hand. It is only used
88    for unpicking the -o argument, so just keep it simple.
89    
90    Arguments:
91      str           string to be converted
92      endptr        where to put the end pointer
93    
94    Returns:        the unsigned long
95    */
96    
97    static int
98    get_value(unsigned char *str, unsigned char **endptr)
99    {
100    int result = 0;
101    while(*str != 0 && isspace(*str)) str++;
102    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103    *endptr = str;
104    return(result);
105    }
106    
107    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
108    
109  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  /*************************************************
110    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  *       Convert character value to UTF-8         *
111    "not",  *************************************************/
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  
   "Brazero", "Braminzero", "Bra"  
 };  
112    
113    /* This function takes an integer value in the range 0 - 0x7fffffff
114    and encodes it as a UTF-8 character in 0 to 6 bytes.
115    
116  static void print_internals(pcre *re)  Arguments:
117      cvalue     the character value
118      buffer     pointer to buffer for result - at least 6 bytes long
119    
120    Returns:     number of characters placed in the buffer
121                 -1 if input character is negative
122                 0 if input character is positive but too big (only when
123                 int is longer than 32 bits)
124    */
125    
126    static int
127    ord2utf8(int cvalue, unsigned char *buffer)
128  {  {
129  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
130    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131      if (cvalue <= utf8_table1[i]) break;
132    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133    if (cvalue < 0) return -1;
134    
135    buffer += i;
136    for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141    *buffer = utf8_table2[i] | cvalue;
142    return i + 1;
143    }
144    
 printf("------------------------------------------------------------------\n");  
145    
146  for(;;)  /*************************************************
147    *            Convert UTF-8 string to value       *
148    *************************************************/
149    
150    /* This function takes one or more bytes that represents a UTF-8 character,
151    and returns the value of the character.
152    
153    Argument:
154      buffer   a pointer to the byte vector
155      vptr     a pointer to an int to receive the value
156    
157    Returns:   >  0 => the number of bytes consumed
158               -6 to 0 => malformed UTF-8 character at offset = (-return)
159    */
160    
161    static int
162    utf82ord(unsigned char *buffer, int *vptr)
163    {
164    int c = *buffer++;
165    int d = c;
166    int i, j, s;
167    
168    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
169    {    {
170    int c;    if ((d & 0x80) == 0) break;
171    int charlength;    d <<= 1;
172      }
173    
174    printf("%3d ", code - ((real_pcre *)re)->code);  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
175    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     break;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
   
       code++;  
       printf("    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
176    
177          case OP_CRRANGE:  /* i now has a value in the range 1-5 */
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
178    
179          default:  s = 6*i;
180          code--;  d = (c & utf8_table3[i]) << s;
181    
182    for (j = 0; j < i; j++)
183      {
184      c = *buffer++;
185      if ((c & 0xc0) != 0x80) return -(j+1);
186      s -= 6;
187      d |= (c & 0x3f) << s;
188      }
189    
190    /* Check that encoding was the correct unique one */
191    
192    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193      if (d <= utf8_table1[j]) break;
194    if (j != i) return -(i+1);
195    
196    /* Valid value */
197    
198    *vptr = d;
199    return i+1;
200    }
201    
202    
203    
204    /*************************************************
205    *             Print character string             *
206    *************************************************/
207    
208    /* Character string printing function. Must handle UTF-8 strings in utf8
209    mode. Yields number of characters printed. If handed a NULL file, just counts
210    chars without printing. */
211    
212    static int pchars(unsigned char *p, int length, FILE *f)
213    {
214    int c;
215    int yield = 0;
216    
217    while (length-- > 0)
218      {
219      if (use_utf8)
220        {
221        int rc = utf82ord(p, &c);
222    
223        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
224          {
225          length -= rc - 1;
226          p += rc;
227          if (c < 256 && isprint(c))
228            {
229            if (f != NULL) fprintf(f, "%c", c);
230            yield++;
231            }
232          else
233            {
234            int n;
235            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236            yield += n;
237          }          }
238          continue;
239        }        }
240      break;      }
241    
242      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
243    
244      default:    if (isprint(c = *(p++)))
245      printf("    %s", OP_names[*code]);      {
246      break;      if (f != NULL) fprintf(f, "%c", c);
247        yield++;
248        }
249      else
250        {
251        if (f != NULL) fprintf(f, "\\x%02x", c);
252        yield += 4;
253      }      }
   
   code++;  
   printf("\n");  
254    }    }
255    
256    return yield;
257  }  }
258    
259    
260    
261  /* Character string printing function. */  /*************************************************
262    *              Callout function                  *
263    *************************************************/
264    
265    /* Called from PCRE as a result of the (?C) item. We print out where we are in
266    the match. Yield zero unless more callouts than the fail count, or the callout
267    data is not zero. */
268    
269  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
270  {  {
271  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
272  while (length-- > 0)  int i, pre_start, post_start;
273    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
274      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
275      {
276      fprintf(f, "Callout %d: last capture = %d\n",
277        cb->callout_number, cb->capture_last);
278    
279      for (i = 0; i < cb->capture_top * 2; i += 2)
280        {
281        if (cb->offset_vector[i] < 0)
282          fprintf(f, "%2d: <unset>\n", i/2);
283        else
284          {
285          fprintf(f, "%2d: ", i/2);
286          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287            cb->offset_vector[i+1] - cb->offset_vector[i], f);
288          fprintf(f, "\n");
289          }
290        }
291      }
292    
293    /* Re-print the subject in canonical form, the first time or if giving full
294    datails. On subsequent calls in the same match, we use pchars just to find the
295    printed lengths of the substrings. */
296    
297    if (f != NULL) fprintf(f, "--->");
298    
299    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301      cb->current_position - cb->start_match, f);
302    
303    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304      cb->subject_length - cb->current_position, f);
305    
306    if (f != NULL) fprintf(f, "\n");
307    
308    /* Always print appropriate indicators, with callout number if not already
309    shown */
310    
311    if (callout_extra) fprintf(outfile, "    ");
312      else fprintf(outfile, "%3d ", cb->callout_number);
313    
314    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315    fprintf(outfile, "^");
316    
317    if (post_start > 0)
318      {
319      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320      fprintf(outfile, "^");
321      }
322    
323    fprintf(outfile, "\n");
324    
325    first_callout = 0;
326    
327    if ((int)(cb->callout_data) != 0)
328      {
329      fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
330      return (int)(cb->callout_data);
331      }
332    
333    return (cb->callout_number != callout_fail_id)? 0 :
334           (++callout_count >= callout_fail_count)? 1 : 0;
335  }  }
336    
337    
338    /*************************************************
339    *            Local malloc function               *
340    *************************************************/
341    
342  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
343  compiled re. */  compiled re. */
344    
345  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
346  {  {
347  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
348  return malloc(size);  return malloc(size);
349  }  }
350    
351    
352    
353    /*************************************************
354    *          Call pcre_fullinfo()                  *
355    *************************************************/
356    
357    /* Get one piece of information from the pcre_fullinfo() function */
358    
359    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
360    {
361    int rc;
362    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
363      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
364    }
365    
366    
367    
368    /*************************************************
369    *                Main Program                    *
370    *************************************************/
371    
372  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
373  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
374  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 262  int study_options = 0; Line 381  int study_options = 0;
381  int op = 1;  int op = 1;
382  int timeit = 0;  int timeit = 0;
383  int showinfo = 0;  int showinfo = 0;
384    int showstore = 0;
385    int size_offsets = 45;
386    int size_offsets_max;
387    int *offsets;
388    #if !defined NOPOSIX
389  int posix = 0;  int posix = 0;
390    #endif
391  int debug = 0;  int debug = 0;
392  unsigned char buffer[30000];  int done = 0;
393  unsigned char dbuffer[1024];  
394    unsigned char *buffer;
395    unsigned char *dbuffer;
396    
397    /* Get buffers from malloc() so that Electric Fence will check their misuse
398    when I am debugging. */
399    
400    buffer = malloc(BUFFER_SIZE);
401    dbuffer = malloc(DBUFFER_SIZE);
402    
403  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
404    
# Line 275  outfile = stdout; Line 408  outfile = stdout;
408    
409  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
410    {    {
411    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
412    
413      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
414        showstore = 1;
415    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
416    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
417    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
418      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
419          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
420            *endptr == 0))
421        {
422        op++;
423        argc--;
424        }
425    #if !defined NOPOSIX
426    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
427    #endif
428      else if (strcmp(argv[op], "-C") == 0)
429        {
430        int rc;
431        printf("PCRE version %s\n", pcre_version());
432        printf("Compiled with\n");
433        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
434        printf("  %sUTF-8 support\n", rc? "" : "No ");
435        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
436        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
437        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
438        printf("  Internal link size = %d\n", rc);
439        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
440        printf("  POSIX malloc threshold = %d\n", rc);
441        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
442        printf("  Default match limit = %d\n", rc);
443        exit(0);
444        }
445    else    else
446      {      {
447      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
448        printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
449        printf("  -C     show PCRE compile-time options and exit\n");
450        printf("  -d     debug: show compiled code; implies -i\n"
451               "  -i     show information about compiled pattern\n"
452               "  -o <n> set size of offsets vector to <n>\n");
453    #if !defined NOPOSIX
454        printf("  -p     use POSIX interface\n");
455    #endif
456        printf("  -s     output store information\n"
457               "  -t     time compilation and execution\n");
458      return 1;      return 1;
459      }      }
460    op++;    op++;
461    argc--;    argc--;
462    }    }
463    
464    /* Get the store for the offsets vector, and remember what it was */
465    
466    size_offsets_max = size_offsets;
467    offsets = malloc(size_offsets_max * sizeof(int));
468    if (offsets == NULL)
469      {
470      printf("** Failed to get %d bytes of memory for offsets vector\n",
471        size_offsets_max * sizeof(int));
472      return 1;
473      }
474    
475  /* Sort out the input and output files */  /* Sort out the input and output files */
476    
477  if (argc > 1)  if (argc > 1)
# Line 315  if (argc > 2) Line 498  if (argc > 2)
498    
499  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
500    
501  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
502    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
503  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
504    
505  /* Main loop */  /* Main loop */
506    
507  for (;;)  while (!done)
508    {    {
509    pcre *re = NULL;    pcre *re = NULL;
510    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
511    
512    #if !defined NOPOSIX  /* There are still compilers that require no indent */
513    regex_t preg;    regex_t preg;
   char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
514    int do_posix = 0;    int do_posix = 0;
515    #endif
516    
517      const char *error;
518      unsigned char *p, *pp, *ppp;
519      const unsigned char *tables = NULL;
520      int do_study = 0;
521      int do_debug = debug;
522      int do_G = 0;
523      int do_g = 0;
524      int do_showinfo = showinfo;
525      int do_showrest = 0;
526    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
527    
528      use_utf8 = 0;
529    
530    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
531    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
532    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
533      fflush(outfile);
534    
535    p = buffer;    p = buffer;
536    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 347  for (;;) Line 541  for (;;)
541    
542    delimiter = *p++;    delimiter = *p++;
543    
544    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
545      {      {
546      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
547      goto SKIP_DATA;      goto SKIP_DATA;
548      }      }
549    
# Line 357  for (;;) Line 551  for (;;)
551    
552    for(;;)    for(;;)
553      {      {
554      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
555          {
556          if (*pp == '\\' && pp[1] != 0) pp++;
557            else if (*pp == delimiter) break;
558          pp++;
559          }
560      if (*pp != 0) break;      if (*pp != 0) break;
561    
562      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
563      if (len < 256)      if (len < 256)
564        {        {
565        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 371  for (;;) Line 570  for (;;)
570      if (fgets((char *)pp, len, infile) == NULL)      if (fgets((char *)pp, len, infile) == NULL)
571        {        {
572        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
573        goto END_OFF;        done = 1;
574          goto CONTINUE;
575        }        }
576      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
577      }      }
578    
579      /* If the first character after the delimiter is backslash, make
580      the pattern end with backslash. This is purely to provide a way
581      of testing for the error message when a pattern ends with backslash. */
582    
583      if (pp[1] == '\\') *pp++ = '\\';
584    
585    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
586    
587    *pp++ = 0;    *pp++ = 0;
# Line 384  for (;;) Line 590  for (;;)
590    
591    options = 0;    options = 0;
592    study_options = 0;    study_options = 0;
593      log_store = showstore;  /* default from command line */
594    
595    while (*pp != 0)    while (*pp != 0)
596      {      {
597      switch (*pp++)      switch (*pp++)
598        {        {
599          case 'g': do_g = 1; break;
600        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
601        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
602        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
603        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
604    
605          case '+': do_showrest = 1; break;
606        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
607        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
608        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
609          case 'G': do_G = 1; break;
610          case 'I': do_showinfo = 1; break;
611          case 'M': log_store = 1; break;
612          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
613    
614    #if !defined NOPOSIX
615        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
616    #endif
617    
618        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
619        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
620        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
621          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
622    
623          case 'L':
624          ppp = pp;
625          while (*ppp != '\n' && *ppp != ' ') ppp++;
626          *ppp = 0;
627          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
628            {
629            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
630            goto SKIP_DATA;
631            }
632          tables = pcre_maketables();
633          pp = ppp;
634          break;
635    
636        case '\n': case ' ': break;        case '\n': case ' ': break;
637        default:        default:
638        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 406  for (;;) Line 640  for (;;)
640        }        }
641      }      }
642    
643    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
644    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
645      local character tables. */
646    
647    #if !defined NOPOSIX
648    if (posix || do_posix)    if (posix || do_posix)
649      {      {
650      int rc;      int rc;
# Line 422  for (;;) Line 658  for (;;)
658    
659      if (rc != 0)      if (rc != 0)
660        {        {
661        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
662        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
663        goto SKIP_DATA;        goto SKIP_DATA;
664        }        }
# Line 431  for (;;) Line 667  for (;;)
667    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
668    
669    else    else
670    #endif  /* !defined NOPOSIX */
671    
672      {      {
673      if (timeit)      if (timeit)
674        {        {
675        register int i;        register int i;
676        clock_t time_taken;        clock_t time_taken;
677        clock_t start_time = clock();        clock_t start_time = clock();
678        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
679          {          {
680          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
681          if (re != NULL) free(re);          if (re != NULL) free(re);
682          }          }
683        time_taken = clock() - start_time;        time_taken = clock() - start_time;
684        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
685          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
686              (double)CLOCKS_PER_SEC);
687        }        }
688    
689      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
690    
691      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
692      if non-interactive. */      if non-interactive. */
# Line 460  for (;;) Line 699  for (;;)
699          {          {
700          for (;;)          for (;;)
701            {            {
702            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
703              goto END_OFF;              {
704                done = 1;
705                goto CONTINUE;
706                }
707            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
708            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
709            if (len == 0) break;            if (len == 0) break;
710            }            }
711          fprintf(outfile, "\n");          fprintf(outfile, "\n");
712          }          }
713        continue;        goto CONTINUE;
714        }        }
715    
716      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
717        info-returning functions. The old one has a limited interface and
718        returns only limited data. Check that it agrees with the newer one. */
719    
720        if (log_store)
721          fprintf(outfile, "Memory allocation (code space): %d\n",
722            (int)(gotten_store -
723                  sizeof(real_pcre) -
724                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
725    
726      if (showinfo || do_debug)      if (do_showinfo)
727        {        {
728        int first_char, count;        unsigned long int get_options;
729          int old_first_char, old_options, old_count;
730          int count, backrefmax, first_char, need_char;
731          int nameentrysize, namecount;
732          const uschar *nametable;
733          size_t size;
734    
735        if (debug || do_debug) print_internals(re);        if (do_debug)
736            {
737            fprintf(outfile, "------------------------------------------------------------------\n");
738            print_internals(re, outfile);
739            }
740    
741        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
742          new_info(re, NULL, PCRE_INFO_SIZE, &size);
743          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
744          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
745          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
746          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
747          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
748          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
749          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
750    
751          old_count = pcre_info(re, &old_options, &old_first_char);
752        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
753          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
754        else        else
755          {          {
756          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
757          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
758            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
759              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
760              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
761              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
762              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
763              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
764              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
765              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
766          if (first_char == -1)              get_options, old_options);
767            {          }
768            fprintf(outfile, "First char at start or follows \\n\n");  
769            }        if (size != gotten_store) fprintf(outfile,
770          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
771            size, gotten_store);
772    
773          fprintf(outfile, "Capturing subpattern count = %d\n", count);
774          if (backrefmax > 0)
775            fprintf(outfile, "Max back reference = %d\n", backrefmax);
776    
777          if (namecount > 0)
778            {
779            fprintf(outfile, "Named capturing subpatterns:\n");
780            while (namecount-- > 0)
781            {            {
782            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
783                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
784                GET2(nametable, 0));
785              nametable += nameentrysize;
786            }            }
787            }
788    
789          if (get_options == 0) fprintf(outfile, "No options\n");
790            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
791              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
792              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
793              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
794              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
795              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
796              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
797              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
798              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
799              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
800    
801          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
802            fprintf(outfile, "Case state changes\n");
803    
804          if (first_char == -1)
805            {
806            fprintf(outfile, "First char at start or follows \\n\n");
807            }
808          else if (first_char < 0)
809            {
810            fprintf(outfile, "No first char\n");
811            }
812          else
813            {
814            int ch = first_char & 255;
815            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
816              "" : " (caseless)";
817            if (isprint(ch))
818              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
819          else          else
820            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
821            if (isprint(first_char))          }
822              fprintf(outfile, "First char = \'%c\'\n", first_char);  
823            else        if (need_char < 0)
824              fprintf(outfile, "First char = %d\n", first_char);          {
825            }          fprintf(outfile, "No need char\n");
826            }
827          else
828            {
829            int ch = need_char & 255;
830            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
831              "" : " (caseless)";
832            if (isprint(ch))
833              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
834            else
835              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
836          }          }
837        }        }
838    
# Line 522  for (;;) Line 846  for (;;)
846          register int i;          register int i;
847          clock_t time_taken;          clock_t time_taken;
848          clock_t start_time = clock();          clock_t start_time = clock();
849          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
850            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
851          time_taken = clock() - start_time;          time_taken = clock() - start_time;
852          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
853          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
854            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
855                (double)CLOCKS_PER_SEC);
856          }          }
857    
858        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 536  for (;;) Line 861  for (;;)
861        else if (extra == NULL)        else if (extra == NULL)
862          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
863    
864        /* This looks at internal information. A bit kludgy to do it this        else if (do_showinfo)
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
865          {          {
866          real_pcre_extra *xx = (real_pcre_extra *)extra;          size_t size;
867          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          uschar *start_bits = NULL;
868            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
869            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
870            fprintf(outfile, "Study size = %d\n", size);
871            if (start_bits == NULL)
872            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
873          else          else
874            {            {
# Line 551  for (;;) Line 877  for (;;)
877            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
878            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
879              {              {
880              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
881                {                {
882                if (c > 75)                if (c > 75)
883                  {                  {
# Line 580  for (;;) Line 906  for (;;)
906    
907    for (;;)    for (;;)
908      {      {
909      unsigned char *pp;      unsigned char *q;
910        unsigned char *bptr = dbuffer;
911        int *use_offsets = offsets;
912        int use_size_offsets = size_offsets;
913        int callout_data = 0;
914        int callout_data_set = 0;
915      int count, c;      int count, c;
916      int offsets[30];      int copystrings = 0;
917      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
918        int getstrings = 0;
919        int getlist = 0;
920        int gmatched = 0;
921        int start_offset = 0;
922        int g_notempty = 0;
923    
924      options = 0;      options = 0;
925    
926      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
927      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      first_callout = 1;
928      if (infile != stdin) fprintf(outfile, (char *)buffer);      callout_extra = 0;
929        callout_count = 0;
930        callout_fail_count = 999999;
931        callout_fail_id = -1;
932    
933        if (infile == stdin) printf("data> ");
934        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
935          {
936          done = 1;
937          goto CONTINUE;
938          }
939        if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
940    
941      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
942      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 599  for (;;) Line 946  for (;;)
946      p = buffer;      p = buffer;
947      while (isspace(*p)) p++;      while (isspace(*p)) p++;
948    
949      pp = dbuffer;      q = dbuffer;
950      while ((c = *p++) != 0)      while ((c = *p++) != 0)
951        {        {
952        int i = 0;        int i = 0;
953        int n = 0;        int n = 0;
954    
955        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
956          {          {
957          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 623  for (;;) Line 971  for (;;)
971          break;          break;
972    
973          case 'x':          case 'x':
974    
975            /* Handle \x{..} specially - new Perl thing for utf8 */
976    
977            if (*p == '{')
978              {
979              unsigned char *pt = p;
980              c = 0;
981              while (isxdigit(*(++pt)))
982                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
983              if (*pt == '}')
984                {
985                unsigned char buff8[8];
986                int ii, utn;
987                utn = ord2utf8(c, buff8);
988                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
989                c = buff8[ii];   /* Last byte */
990                p = pt + 1;
991                break;
992                }
993              /* Not correct form; fall through */
994              }
995    
996            /* Ordinary \x */
997    
998          c = 0;          c = 0;
999          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1000            {            {
# Line 643  for (;;) Line 1015  for (;;)
1015          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1016          continue;          continue;
1017    
1018          case 'E':          case 'C':
1019          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1020              {
1021              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1022              copystrings |= 1 << n;
1023              }
1024            else if (isalnum(*p))
1025              {
1026              uschar name[256];
1027              uschar *npp = name;
1028              while (isalnum(*p)) *npp++ = *p++;
1029              *npp = 0;
1030              n = pcre_get_stringnumber(re, (char *)name);
1031              if (n < 0)
1032                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1033              else copystrings |= 1 << n;
1034              }
1035            else if (*p == '+')
1036              {
1037              callout_extra = 1;
1038              p++;
1039              }
1040            else if (*p == '-')
1041              {
1042              pcre_callout = NULL;
1043              p++;
1044              }
1045            else if (*p == '!')
1046              {
1047              callout_fail_id = 0;
1048              p++;
1049              while(isdigit(*p))
1050                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1051              callout_fail_count = 0;
1052              if (*p == '!')
1053                {
1054                p++;
1055                while(isdigit(*p))
1056                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1057                }
1058              }
1059            else if (*p == '*')
1060              {
1061              int sign = 1;
1062              callout_data = 0;
1063              if (*(++p) == '-') { sign = -1; p++; }
1064              while(isdigit(*p))
1065                callout_data = callout_data * 10 + *p++ - '0';
1066              callout_data *= sign;
1067              callout_data_set = 1;
1068              }
1069            continue;
1070    
1071            case 'G':
1072            if (isdigit(*p))
1073              {
1074              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1075              getstrings |= 1 << n;
1076              }
1077            else if (isalnum(*p))
1078              {
1079              uschar name[256];
1080              uschar *npp = name;
1081              while (isalnum(*p)) *npp++ = *p++;
1082              *npp = 0;
1083              n = pcre_get_stringnumber(re, (char *)name);
1084              if (n < 0)
1085                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1086              else getstrings |= 1 << n;
1087              }
1088          continue;          continue;
1089    
1090          case 'I':          case 'L':
1091          options |= PCRE_CASELESS;          getlist = 1;
1092          continue;          continue;
1093    
1094          case 'M':          case 'M':
1095          options |= PCRE_MULTILINE;          find_match_limit = 1;
1096          continue;          continue;
1097    
1098          case 'S':          case 'N':
1099          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1100          continue;          continue;
1101    
1102          case 'O':          case 'O':
1103          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1104          if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n > size_offsets_max)
1105              {
1106              size_offsets_max = n;
1107              free(offsets);
1108              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1109              if (offsets == NULL)
1110                {
1111                printf("** Failed to get %d bytes of memory for offsets vector\n",
1112                  size_offsets_max * sizeof(int));
1113                return 1;
1114                }
1115              }
1116            use_size_offsets = n;
1117            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1118          continue;          continue;
1119    
1120          case 'Z':          case 'Z':
1121          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1122          continue;          continue;
1123          }          }
1124        *pp++ = c;        *q++ = c;
1125        }        }
1126      *pp = 0;      *q = 0;
1127      len = pp - dbuffer;      len = q - dbuffer;
1128    
1129      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1130      support timing. */      support timing or playing with the match limit or callout data. */
1131    
1132    #if !defined NOPOSIX
1133      if (posix || do_posix)      if (posix || do_posix)
1134        {        {
1135        int rc;        int rc;
1136        int eflags = 0;        int eflags = 0;
1137        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1138          if (use_size_offsets > 0)
1139            pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1140        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1141        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1142    
1143        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1144    
1145        if (rc != 0)        if (rc != 0)
1146          {          {
1147          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1148          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1149          }          }
1150        else        else
1151          {          {
1152          int i;          size_t i;
1153          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1154            {            {
1155            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1156              {              {
1157              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1158              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1159                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1160              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1161                if (i == 0 && do_showrest)
1162                  {
1163                  fprintf(outfile, " 0+ ");
1164                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1165                    outfile);
1166                  fprintf(outfile, "\n");
1167                  }
1168              }              }
1169            }            }
1170          }          }
1171          free(pmatch);
1172        }        }
1173    
1174      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1175    
1176      else      else
1177    #endif  /* !defined NOPOSIX */
1178    
1179        for (;; gmatched++)    /* Loop for /g or /G */
1180        {        {
1181        if (timeit)        if (timeit)
1182          {          {
1183          register int i;          register int i;
1184          clock_t time_taken;          clock_t time_taken;
1185          clock_t start_time = clock();          clock_t start_time = clock();
1186          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1187            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1188              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1189          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1190          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1191            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1192                (double)CLOCKS_PER_SEC);
1193          }          }
1194    
1195        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If find_match_limit is set, we want to do repeated matches with
1196          size_offsets);        varying limits in order to find the minimum value. */
1197    
1198          if (find_match_limit)
1199            {
1200            int min = 0;
1201            int mid = 64;
1202            int max = -1;
1203    
1204            if (extra == NULL)
1205              {
1206              extra = malloc(sizeof(pcre_extra));
1207              extra->flags = 0;
1208              }
1209            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1210    
1211            for (;;)
1212              {
1213              extra->match_limit = mid;
1214              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1215                options | g_notempty, use_offsets, use_size_offsets);
1216              if (count == PCRE_ERROR_MATCHLIMIT)
1217                {
1218                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1219                min = mid;
1220                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1221                }
1222              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1223                {
1224                if (mid == min + 1)
1225                  {
1226                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1227                  break;
1228                  }
1229                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1230                max = mid;
1231                mid = (min + mid)/2;
1232                }
1233              else break;    /* Some other error */
1234              }
1235    
1236            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1237            }
1238    
1239          /* If callout_data is set, use the interface with additional data */
1240    
1241          else if (callout_data_set)
1242            {
1243            if (extra == NULL)
1244              {
1245              extra = malloc(sizeof(pcre_extra));
1246              extra->flags = 0;
1247              }
1248            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1249            extra->callout_data = (void *)callout_data;
1250            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1251              options | g_notempty, use_offsets, use_size_offsets);
1252            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1253            }
1254    
1255          /* The normal case is just to do the match once, with the default
1256          value of match_limit. */
1257    
1258          else count = pcre_exec(re, extra, (char *)bptr, len,
1259            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1260    
1261        if (count == 0)        if (count == 0)
1262          {          {
1263          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1264          count = size_offsets/2;          count = use_size_offsets/3;
1265          }          }
1266    
1267          /* Matched */
1268    
1269        if (count >= 0)        if (count >= 0)
1270          {          {
1271          int i;          int i;
1272          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1273            {            {
1274            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1275              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1276            else            else
1277              {              {
1278              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1279              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1280                  use_offsets[i+1] - use_offsets[i], outfile);
1281              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1282                if (i == 0)
1283                  {
1284                  if (do_showrest)
1285                    {
1286                    fprintf(outfile, " 0+ ");
1287                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1288                      outfile);
1289                    fprintf(outfile, "\n");
1290                    }
1291                  }
1292                }
1293              }
1294    
1295            for (i = 0; i < 32; i++)
1296              {
1297              if ((copystrings & (1 << i)) != 0)
1298                {
1299                char copybuffer[16];
1300                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1301                  i, copybuffer, sizeof(copybuffer));
1302                if (rc < 0)
1303                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1304                else
1305                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1306                }
1307              }
1308    
1309            for (i = 0; i < 32; i++)
1310              {
1311              if ((getstrings & (1 << i)) != 0)
1312                {
1313                const char *substring;
1314                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1315                  i, &substring);
1316                if (rc < 0)
1317                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1318                else
1319                  {
1320                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1321                  /* free((void *)substring); */
1322                  pcre_free_substring(substring);
1323                  }
1324                }
1325              }
1326    
1327            if (getlist)
1328              {
1329              const char **stringlist;
1330              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1331                &stringlist);
1332              if (rc < 0)
1333                fprintf(outfile, "get substring list failed %d\n", rc);
1334              else
1335                {
1336                for (i = 0; i < count; i++)
1337                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1338                if (stringlist[i] != NULL)
1339                  fprintf(outfile, "string list not terminated by NULL\n");
1340                /* free((void *)stringlist); */
1341                pcre_free_substring_list(stringlist);
1342              }              }
1343            }            }
1344          }          }
1345    
1346          /* Failed to match. If this is a /g or /G loop and we previously set
1347          g_notempty after a null match, this is not necessarily the end.
1348          We want to advance the start offset, and continue. Fudge the offset
1349          values to achieve this. We won't be at the end of the string - that
1350          was checked before setting g_notempty. */
1351    
1352        else        else
1353          {          {
1354          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1355            else fprintf(outfile, "Error %d\n", count);            {
1356              use_offsets[0] = start_offset;
1357              use_offsets[1] = start_offset + 1;
1358              }
1359            else
1360              {
1361              if (gmatched == 0)   /* Error if no previous matches */
1362                {
1363                if (count == -1) fprintf(outfile, "No match\n");
1364                  else fprintf(outfile, "Error %d\n", count);
1365                }
1366              break;  /* Out of the /g loop */
1367              }
1368          }          }
       }  
     }  
1369    
1370          /* If not /g or /G we are done */
1371    
1372          if (!do_g && !do_G) break;
1373    
1374          /* If we have matched an empty string, first check to see if we are at
1375          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1376          what Perl's /g options does. This turns out to be rather cunning. First
1377          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1378          same point. If this fails (picked up above) we advance to the next
1379          character. */
1380    
1381          g_notempty = 0;
1382          if (use_offsets[0] == use_offsets[1])
1383            {
1384            if (use_offsets[0] == len) break;
1385            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1386            }
1387    
1388          /* For /g, update the start offset, leaving the rest alone */
1389    
1390          if (do_g) start_offset = use_offsets[1];
1391    
1392          /* For /G, update the pointer and length */
1393    
1394          else
1395            {
1396            bptr += use_offsets[1];
1397            len -= use_offsets[1];
1398            }
1399          }  /* End of loop for /g and /G */
1400        }    /* End of loop for data lines */
1401    
1402      CONTINUE:
1403    
1404    #if !defined NOPOSIX
1405    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1406    #endif
1407    
1408    if (re != NULL) free(re);    if (re != NULL) free(re);
1409    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1410      if (tables != NULL)
1411        {
1412        free((void *)tables);
1413        setlocale(LC_CTYPE, "C");
1414        }
1415    }    }
1416    
 END_OFF:  
1417  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1418  return 0;  return 0;
1419  }  }

Legend:
Removed from v.3  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.5