/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 11 by nigel, Sat Feb 24 21:38:17 2007 UTC revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
12  #include <stdlib.h>  #include <stdlib.h>
13  #include <time.h>  #include <time.h>
14    #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22    /* It is possible to compile this test program without including support for
23    testing the POSIX interface, though this is not available via the standard
24    Makefile. */
25    
26    #if !defined NOPOSIX
27  #include "pcreposix.h"  #include "pcreposix.h"
28    #endif
29    
30  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
31  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 35 
35  #endif  #endif
36  #endif  #endif
37    
38    #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE BUFFER_SIZE
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int show_malloc;
52    static int use_utf8;
53    static size_t gotten_store;
54    
55    
56    static const int utf8_table1[] = {
57      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59  /* Debugging function to print the internal form of the regex. This is the same  static const int utf8_table2[] = {
60  code as contained in pcre.c under the DEBUG macro. */    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62  static const char *OP_names[] = {  static const int utf8_table3[] = {
63    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
   "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  
   "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  
   "Brazero", "Braminzero", "Bra"  
 };  
   
   
 static void print_internals(pcre *re)  
 {  
 unsigned char *code = ((real_pcre *)re)->code;  
   
 printf("------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   printf("%3d ", code - ((real_pcre *)re)->code);  
   
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf("0,");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
   
       code++;  
       printf("    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
64    
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
65    
         default:  
         code--;  
         }  
       }  
     break;  
66    
67      /* Anything else is just a one-node item */  /*************************************************
68    *         Print compiled regex                   *
69    *************************************************/
70    
71      default:  /* The code for doing this is held in a separate file that is also included in
72      printf("    %s", OP_names[*code]);  pcre.c when it is compiled with the debug switch. It defines a function called
73      break;  print_internals(), which uses a table of opcode lengths defined by the macro
74      }  OP_LENGTHS, whose name must be OP_lengths. */
75    
76    static uschar OP_lengths[] = { OP_LENGTHS };
77    
78    #include "printint.c"
79    
80    
81    
82    /*************************************************
83    *          Read number from string               *
84    *************************************************/
85    
86    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87    around with conditional compilation, just do the job by hand. It is only used
88    for unpicking the -o argument, so just keep it simple.
89    
90    Arguments:
91      str           string to be converted
92      endptr        where to put the end pointer
93    
94    Returns:        the unsigned long
95    */
96    
97    static int
98    get_value(unsigned char *str, unsigned char **endptr)
99    {
100    int result = 0;
101    while(*str != 0 && isspace(*str)) str++;
102    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103    *endptr = str;
104    return(result);
105    }
106    
107    
108    
109    /*************************************************
110    *       Convert character value to UTF-8         *
111    *************************************************/
112    
113    /* This function takes an integer value in the range 0 - 0x7fffffff
114    and encodes it as a UTF-8 character in 0 to 6 bytes.
115    
116    Arguments:
117      cvalue     the character value
118      buffer     pointer to buffer for result - at least 6 bytes long
119    
120    Returns:     number of characters placed in the buffer
121                 -1 if input character is negative
122                 0 if input character is positive but too big (only when
123                 int is longer than 32 bits)
124    */
125    
126    static int
127    ord2utf8(int cvalue, unsigned char *buffer)
128    {
129    register int i, j;
130    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131      if (cvalue <= utf8_table1[i]) break;
132    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133    if (cvalue < 0) return -1;
134    
135    buffer += i;
136    for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141    *buffer = utf8_table2[i] | cvalue;
142    return i + 1;
143    }
144    
145    
146    /*************************************************
147    *            Convert UTF-8 string to value       *
148    *************************************************/
149    
150    /* This function takes one or more bytes that represents a UTF-8 character,
151    and returns the value of the character.
152    
153    Argument:
154      buffer   a pointer to the byte vector
155      vptr     a pointer to an int to receive the value
156    
157    Returns:   >  0 => the number of bytes consumed
158               -6 to 0 => malformed UTF-8 character at offset = (-return)
159    */
160    
161    static int
162    utf82ord(unsigned char *buffer, int *vptr)
163    {
164    int c = *buffer++;
165    int d = c;
166    int i, j, s;
167    
168    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
169      {
170      if ((d & 0x80) == 0) break;
171      d <<= 1;
172      }
173    
174    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
175    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
176    
177    /* i now has a value in the range 1-5 */
178    
179    code++;  s = 6*i;
180    printf("\n");  d = (c & utf8_table3[i]) << s;
181    
182    for (j = 0; j < i; j++)
183      {
184      c = *buffer++;
185      if ((c & 0xc0) != 0x80) return -(j+1);
186      s -= 6;
187      d |= (c & 0x3f) << s;
188    }    }
189    
190    /* Check that encoding was the correct unique one */
191    
192    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193      if (d <= utf8_table1[j]) break;
194    if (j != i) return -(i+1);
195    
196    /* Valid value */
197    
198    *vptr = d;
199    return i+1;
200  }  }
201    
202    
203    
204  /* Character string printing function. */  /*************************************************
205    *             Print character string             *
206    *************************************************/
207    
208    /* Character string printing function. Must handle UTF-8 strings in utf8
209    mode. Yields number of characters printed. If handed a NULL file, just counts
210    chars without printing. */
211    
212  static void pchars(unsigned char *p, int length)  static int pchars(unsigned char *p, int length, FILE *f)
213  {  {
214  int c;  int c;
215    int yield = 0;
216    
217  while (length-- > 0)  while (length-- > 0)
218    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
219      else fprintf(outfile, "\\x%02x", c);    if (use_utf8)
220        {
221        int rc = utf82ord(p, &c);
222    
223        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
224          {
225          length -= rc - 1;
226          p += rc;
227          if (c < 256 && isprint(c))
228            {
229            if (f != NULL) fprintf(f, "%c", c);
230            yield++;
231            }
232          else
233            {
234            int n;
235            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236            yield += n;
237            }
238          continue;
239          }
240        }
241    
242       /* Not UTF-8, or malformed UTF-8  */
243    
244      if (isprint(c = *(p++)))
245        {
246        if (f != NULL) fprintf(f, "%c", c);
247        yield++;
248        }
249      else
250        {
251        if (f != NULL) fprintf(f, "\\x%02x", c);
252        yield += 4;
253        }
254      }
255    
256    return yield;
257  }  }
258    
259    
260    
261    /*************************************************
262    *              Callout function                  *
263    *************************************************/
264    
265    /* Called from PCRE as a result of the (?C) item. We print out where we are in
266    the match. Yield zero unless more callouts than the fail count, or the callout
267    data is not zero. */
268    
269    static int callout(pcre_callout_block *cb)
270    {
271    FILE *f = (first_callout | callout_extra)? outfile : NULL;
272    int i, pre_start, post_start;
273    
274    if (callout_extra)
275      {
276      fprintf(f, "Callout %d: last capture = %d\n",
277        cb->callout_number, cb->capture_last);
278    
279      for (i = 0; i < cb->capture_top * 2; i += 2)
280        {
281        if (cb->offset_vector[i] < 0)
282          fprintf(f, "%2d: <unset>\n", i/2);
283        else
284          {
285          fprintf(f, "%2d: ", i/2);
286          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287            cb->offset_vector[i+1] - cb->offset_vector[i], f);
288          fprintf(f, "\n");
289          }
290        }
291      }
292    
293    /* Re-print the subject in canonical form, the first time or if giving full
294    datails. On subsequent calls in the same match, we use pchars just to find the
295    printed lengths of the substrings. */
296    
297    if (f != NULL) fprintf(f, "--->");
298    
299    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301      cb->current_position - cb->start_match, f);
302    
303    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304      cb->subject_length - cb->current_position, f);
305    
306    if (f != NULL) fprintf(f, "\n");
307    
308    /* Always print appropriate indicators, with callout number if not already
309    shown */
310    
311    if (callout_extra) fprintf(outfile, "    ");
312      else fprintf(outfile, "%3d ", cb->callout_number);
313    
314    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315    fprintf(outfile, "^");
316    
317    if (post_start > 0)
318      {
319      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320      fprintf(outfile, "^");
321      }
322    
323    fprintf(outfile, "\n");
324    first_callout = 0;
325    
326    if (cb->callout_data != NULL)
327      {
328      int callout_data = *((int *)(cb->callout_data));
329      if (callout_data != 0)
330        {
331        fprintf(outfile, "Callout data = %d\n", callout_data);
332        return callout_data;
333        }
334      }
335    
336    return (cb->callout_number != callout_fail_id)? 0 :
337           (++callout_count >= callout_fail_count)? 1 : 0;
338    }
339    
340    
341    /*************************************************
342    *            Local malloc functions              *
343    *************************************************/
344    
345  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
346  compiled re. */  compiled re. */
347    
348  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
349  {  {
350  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
351  return malloc(size);  gotten_store = size;
352    if (show_malloc)
353      fprintf(outfile, "malloc       %3d %p\n", size, block);
354    return block;
355    }
356    
357    static void new_free(void *block)
358    {
359    if (show_malloc)
360      fprintf(outfile, "free             %p\n", block);
361    free(block);
362    }
363    
364    
365    /* For recursion malloc/free, to test stacking calls */
366    
367    static void *stack_malloc(size_t size)
368    {
369    void *block = malloc(size);
370    if (show_malloc)
371      fprintf(outfile, "stack_malloc %3d %p\n", size, block);
372    return block;
373  }  }
374    
375    static void stack_free(void *block)
376    {
377    if (show_malloc)
378      fprintf(outfile, "stack_free       %p\n", block);
379    free(block);
380    }
381    
382    
383    /*************************************************
384    *          Call pcre_fullinfo()                  *
385    *************************************************/
386    
387    /* Get one piece of information from the pcre_fullinfo() function */
388    
389    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
390    {
391    int rc;
392    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
393      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
394    }
395    
396    
397    
398    /*************************************************
399    *                Main Program                    *
400    *************************************************/
401    
402  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
403  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
404  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 266  int study_options = 0; Line 411  int study_options = 0;
411  int op = 1;  int op = 1;
412  int timeit = 0;  int timeit = 0;
413  int showinfo = 0;  int showinfo = 0;
414    int showstore = 0;
415    int size_offsets = 45;
416    int size_offsets_max;
417    int *offsets;
418    #if !defined NOPOSIX
419  int posix = 0;  int posix = 0;
420    #endif
421  int debug = 0;  int debug = 0;
422  int done = 0;  int done = 0;
423  unsigned char buffer[30000];  
424  unsigned char dbuffer[1024];  unsigned char *buffer;
425    unsigned char *dbuffer;
426    
427    /* Get buffers from malloc() so that Electric Fence will check their misuse
428    when I am debugging. */
429    
430    buffer = (unsigned char *)malloc(BUFFER_SIZE);
431    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
432    
433  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
434    
# Line 280  outfile = stdout; Line 438  outfile = stdout;
438    
439  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
440    {    {
441    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
442    
443      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
444        showstore = 1;
445    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
446    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
447    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
448      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
449          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
450            *endptr == 0))
451        {
452        op++;
453        argc--;
454        }
455    #if !defined NOPOSIX
456    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
457    #endif
458      else if (strcmp(argv[op], "-C") == 0)
459        {
460        int rc;
461        printf("PCRE version %s\n", pcre_version());
462        printf("Compiled with\n");
463        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
464        printf("  %sUTF-8 support\n", rc? "" : "No ");
465        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
466        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
467        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
468        printf("  Internal link size = %d\n", rc);
469        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
470        printf("  POSIX malloc threshold = %d\n", rc);
471        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
472        printf("  Default match limit = %d\n", rc);
473        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
474        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
475        exit(0);
476        }
477    else    else
478      {      {
479      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
480        printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
481        printf("  -C     show PCRE compile-time options and exit\n");
482        printf("  -d     debug: show compiled code; implies -i\n"
483               "  -i     show information about compiled pattern\n"
484               "  -o <n> set size of offsets vector to <n>\n");
485    #if !defined NOPOSIX
486        printf("  -p     use POSIX interface\n");
487    #endif
488        printf("  -s     output store information\n"
489               "  -t     time compilation and execution\n");
490      return 1;      return 1;
491      }      }
492    op++;    op++;
493    argc--;    argc--;
494    }    }
495    
496    /* Get the store for the offsets vector, and remember what it was */
497    
498    size_offsets_max = size_offsets;
499    offsets = (int *)malloc(size_offsets_max * sizeof(int));
500    if (offsets == NULL)
501      {
502      printf("** Failed to get %d bytes of memory for offsets vector\n",
503        size_offsets_max * sizeof(int));
504      return 1;
505      }
506    
507  /* Sort out the input and output files */  /* Sort out the input and output files */
508    
509  if (argc > 1)  if (argc > 1)
# Line 319  if (argc > 2) Line 529  if (argc > 2)
529  /* Set alternative malloc function */  /* Set alternative malloc function */
530    
531  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
532    pcre_free = new_free;
533    pcre_stack_malloc = stack_malloc;
534    pcre_stack_free = stack_free;
535    
536  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
537    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
538  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
539    
540  /* Main loop */  /* Main loop */
# Line 331  while (!done) Line 543  while (!done)
543    {    {
544    pcre *re = NULL;    pcre *re = NULL;
545    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
546    
547    #if !defined NOPOSIX  /* There are still compilers that require no indent */
548    regex_t preg;    regex_t preg;
549      int do_posix = 0;
550    #endif
551    
552    const char *error;    const char *error;
553    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
554      const unsigned char *tables = NULL;
555    int do_study = 0;    int do_study = 0;
556    int do_debug = 0;    int do_debug = debug;
557    int do_posix = 0;    int do_G = 0;
558      int do_g = 0;
559      int do_showinfo = showinfo;
560      int do_showrest = 0;
561    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
562    
563      use_utf8 = 0;
564    
565    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
566    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
567    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
568      fflush(outfile);
569    
570    p = buffer;    p = buffer;
571    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 352  while (!done) Line 576  while (!done)
576    
577    delimiter = *p++;    delimiter = *p++;
578    
579    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
580      {      {
581      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
582      goto SKIP_DATA;      goto SKIP_DATA;
583      }      }
584    
# Line 362  while (!done) Line 586  while (!done)
586    
587    for(;;)    for(;;)
588      {      {
589      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
590          {
591          if (*pp == '\\' && pp[1] != 0) pp++;
592            else if (*pp == delimiter) break;
593          pp++;
594          }
595      if (*pp != 0) break;      if (*pp != 0) break;
596    
597      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
598      if (len < 256)      if (len < 256)
599        {        {
600        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 379  while (!done) Line 608  while (!done)
608        done = 1;        done = 1;
609        goto CONTINUE;        goto CONTINUE;
610        }        }
611      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
612      }      }
613    
614      /* If the first character after the delimiter is backslash, make
615      the pattern end with backslash. This is purely to provide a way
616      of testing for the error message when a pattern ends with backslash. */
617    
618      if (pp[1] == '\\') *pp++ = '\\';
619    
620    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
621    
622    *pp++ = 0;    *pp++ = 0;
# Line 390  while (!done) Line 625  while (!done)
625    
626    options = 0;    options = 0;
627    study_options = 0;    study_options = 0;
628      log_store = showstore;  /* default from command line */
629    
630    while (*pp != 0)    while (*pp != 0)
631      {      {
632      switch (*pp++)      switch (*pp++)
633        {        {
634          case 'g': do_g = 1; break;
635        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
636        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
637        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
638        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
639    
640          case '+': do_showrest = 1; break;
641        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
642        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
643        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
644          case 'G': do_G = 1; break;
645          case 'I': do_showinfo = 1; break;
646          case 'M': log_store = 1; break;
647          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
648    
649    #if !defined NOPOSIX
650        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
651    #endif
652    
653        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
654        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
655        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
656          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
657          case '?': options |= PCRE_NO_UTF8_CHECK; break;
658    
659          case 'L':
660          ppp = pp;
661          while (*ppp != '\n' && *ppp != ' ') ppp++;
662          *ppp = 0;
663          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
664            {
665            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
666            goto SKIP_DATA;
667            }
668          tables = pcre_maketables();
669          pp = ppp;
670          break;
671    
672        case '\n': case ' ': break;        case '\n': case ' ': break;
673        default:        default:
674        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 413  while (!done) Line 677  while (!done)
677      }      }
678    
679    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
680    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
681      local character tables. */
682    
683    #if !defined NOPOSIX
684    if (posix || do_posix)    if (posix || do_posix)
685      {      {
686      int rc;      int rc;
# Line 428  while (!done) Line 694  while (!done)
694    
695      if (rc != 0)      if (rc != 0)
696        {        {
697        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
698        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
699        goto SKIP_DATA;        goto SKIP_DATA;
700        }        }
# Line 437  while (!done) Line 703  while (!done)
703    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
704    
705    else    else
706    #endif  /* !defined NOPOSIX */
707    
708      {      {
709      if (timeit)      if (timeit)
710        {        {
711        register int i;        register int i;
712        clock_t time_taken;        clock_t time_taken;
713        clock_t start_time = clock();        clock_t start_time = clock();
714        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
715          {          {
716          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
717          if (re != NULL) free(re);          if (re != NULL) free(re);
718          }          }
719        time_taken = clock() - start_time;        time_taken = clock() - start_time;
720        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
721          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
722              (double)CLOCKS_PER_SEC);
723        }        }
724    
725      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
726    
727      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
728      if non-interactive. */      if non-interactive. */
# Line 466  while (!done) Line 735  while (!done)
735          {          {
736          for (;;)          for (;;)
737            {            {
738            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
739              {              {
740              done = 1;              done = 1;
741              goto CONTINUE;              goto CONTINUE;
# Line 477  while (!done) Line 746  while (!done)
746            }            }
747          fprintf(outfile, "\n");          fprintf(outfile, "\n");
748          }          }
749        continue;        goto CONTINUE;
750        }        }
751    
752      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
753        info-returning functions. The old one has a limited interface and
754        returns only limited data. Check that it agrees with the newer one. */
755    
756        if (log_store)
757          fprintf(outfile, "Memory allocation (code space): %d\n",
758            (int)(gotten_store -
759                  sizeof(real_pcre) -
760                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
761    
762      if (showinfo || do_debug)      if (do_showinfo)
763        {        {
764        int first_char, count;        unsigned long int get_options;
765          int old_first_char, old_options, old_count;
766          int count, backrefmax, first_char, need_char;
767          int nameentrysize, namecount;
768          const uschar *nametable;
769          size_t size;
770    
771        if (debug || do_debug) print_internals(re);        if (do_debug)
772            {
773            fprintf(outfile, "------------------------------------------------------------------\n");
774            print_internals(re, outfile);
775            }
776    
777          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
778          new_info(re, NULL, PCRE_INFO_SIZE, &size);
779          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
780          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
781          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
782          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
783          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
784          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
785          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
786    
787        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
788        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
789          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
790        else        else
791          {          {
792          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
793          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
794            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
795              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
796              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
797              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
798              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
799              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
800              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
801              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
802          if (first_char == -1)              get_options, old_options);
803            {          }
804            fprintf(outfile, "First char at start or follows \\n\n");  
805            }        if (size != gotten_store) fprintf(outfile,
806          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
807            size, gotten_store);
808    
809          fprintf(outfile, "Capturing subpattern count = %d\n", count);
810          if (backrefmax > 0)
811            fprintf(outfile, "Max back reference = %d\n", backrefmax);
812    
813          if (namecount > 0)
814            {
815            fprintf(outfile, "Named capturing subpatterns:\n");
816            while (namecount-- > 0)
817            {            {
818            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
819                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
820                GET2(nametable, 0));
821              nametable += nameentrysize;
822            }            }
823            }
824    
825          if (get_options == 0) fprintf(outfile, "No options\n");
826            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
827              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
828              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
829              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
830              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
831              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
832              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
833              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
834              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
835              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
836              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
837    
838          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
839            fprintf(outfile, "Case state changes\n");
840    
841          if (first_char == -1)
842            {
843            fprintf(outfile, "First char at start or follows \\n\n");
844            }
845          else if (first_char < 0)
846            {
847            fprintf(outfile, "No first char\n");
848            }
849          else
850            {
851            int ch = first_char & 255;
852            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
853              "" : " (caseless)";
854            if (isprint(ch))
855              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
856          else          else
857            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
858            if (isprint(first_char))          }
859              fprintf(outfile, "First char = \'%c\'\n", first_char);  
860            else        if (need_char < 0)
861              fprintf(outfile, "First char = %d\n", first_char);          {
862            }          fprintf(outfile, "No need char\n");
863            }
864          else
865            {
866            int ch = need_char & 255;
867            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
868              "" : " (caseless)";
869            if (isprint(ch))
870              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
871            else
872              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
873          }          }
874        }        }
875    
# Line 531  while (!done) Line 883  while (!done)
883          register int i;          register int i;
884          clock_t time_taken;          clock_t time_taken;
885          clock_t start_time = clock();          clock_t start_time = clock();
886          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
887            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
888          time_taken = clock() - start_time;          time_taken = clock() - start_time;
889          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
890          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
891            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
892                (double)CLOCKS_PER_SEC);
893          }          }
894    
895        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 545  while (!done) Line 898  while (!done)
898        else if (extra == NULL)        else if (extra == NULL)
899          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
900    
901        /* This looks at internal information. A bit kludgy to do it this        /* Don't output study size; at present it is in any case a fixed
902        way, but it is useful for testing. */        value, but it varies, depending on the computer architecture, and
903          so messes up the test suite. */
904    
905        else if (showinfo || do_debug)        else if (do_showinfo)
906          {          {
907          real_pcre_extra *xx = (real_pcre_extra *)extra;          size_t size;
908          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          uschar *start_bits = NULL;
909            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
910            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
911            /* fprintf(outfile, "Study size = %d\n", size); */
912            if (start_bits == NULL)
913            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
914          else          else
915            {            {
# Line 560  while (!done) Line 918  while (!done)
918            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
919            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
920              {              {
921              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
922                {                {
923                if (c > 75)                if (c > 75)
924                  {                  {
# Line 590  while (!done) Line 948  while (!done)
948    for (;;)    for (;;)
949      {      {
950      unsigned char *q;      unsigned char *q;
951        unsigned char *bptr = dbuffer;
952        int *use_offsets = offsets;
953        int use_size_offsets = size_offsets;
954        int callout_data = 0;
955        int callout_data_set = 0;
956      int count, c;      int count, c;
957      int offsets[30];      int copystrings = 0;
958      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
959        int getstrings = 0;
960        int getlist = 0;
961        int gmatched = 0;
962        int start_offset = 0;
963        int g_notempty = 0;
964    
965      options = 0;      options = 0;
966    
967      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
968      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      first_callout = 1;
969        callout_extra = 0;
970        callout_count = 0;
971        callout_fail_count = 999999;
972        callout_fail_id = -1;
973        show_malloc = 0;
974    
975        if (infile == stdin) printf("data> ");
976        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
977        {        {
978        done = 1;        done = 1;
979        goto CONTINUE;        goto CONTINUE;
980        }        }
981      if (infile != stdin) fprintf(outfile, (char *)buffer);      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
982    
983      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
984      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 617  while (!done) Line 993  while (!done)
993        {        {
994        int i = 0;        int i = 0;
995        int n = 0;        int n = 0;
996    
997        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
998          {          {
999          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 636  while (!done) Line 1013  while (!done)
1013          break;          break;
1014    
1015          case 'x':          case 'x':
1016    
1017            /* Handle \x{..} specially - new Perl thing for utf8 */
1018    
1019            if (*p == '{')
1020              {
1021              unsigned char *pt = p;
1022              c = 0;
1023              while (isxdigit(*(++pt)))
1024                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1025              if (*pt == '}')
1026                {
1027                unsigned char buff8[8];
1028                int ii, utn;
1029                utn = ord2utf8(c, buff8);
1030                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1031                c = buff8[ii];   /* Last byte */
1032                p = pt + 1;
1033                break;
1034                }
1035              /* Not correct form; fall through */
1036              }
1037    
1038            /* Ordinary \x */
1039    
1040          c = 0;          c = 0;
1041          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1042            {            {
# Line 656  while (!done) Line 1057  while (!done)
1057          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1058          continue;          continue;
1059    
1060          case 'E':          case 'C':
1061          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1062              {
1063              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1064              copystrings |= 1 << n;
1065              }
1066            else if (isalnum(*p))
1067              {
1068              uschar name[256];
1069              uschar *npp = name;
1070              while (isalnum(*p)) *npp++ = *p++;
1071              *npp = 0;
1072              n = pcre_get_stringnumber(re, (char *)name);
1073              if (n < 0)
1074                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1075              else copystrings |= 1 << n;
1076              }
1077            else if (*p == '+')
1078              {
1079              callout_extra = 1;
1080              p++;
1081              }
1082            else if (*p == '-')
1083              {
1084              pcre_callout = NULL;
1085              p++;
1086              }
1087            else if (*p == '!')
1088              {
1089              callout_fail_id = 0;
1090              p++;
1091              while(isdigit(*p))
1092                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1093              callout_fail_count = 0;
1094              if (*p == '!')
1095                {
1096                p++;
1097                while(isdigit(*p))
1098                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1099                }
1100              }
1101            else if (*p == '*')
1102              {
1103              int sign = 1;
1104              callout_data = 0;
1105              if (*(++p) == '-') { sign = -1; p++; }
1106              while(isdigit(*p))
1107                callout_data = callout_data * 10 + *p++ - '0';
1108              callout_data *= sign;
1109              callout_data_set = 1;
1110              }
1111          continue;          continue;
1112    
1113          case 'I':          case 'G':
1114          options |= PCRE_CASELESS;          if (isdigit(*p))
1115              {
1116              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1117              getstrings |= 1 << n;
1118              }
1119            else if (isalnum(*p))
1120              {
1121              uschar name[256];
1122              uschar *npp = name;
1123              while (isalnum(*p)) *npp++ = *p++;
1124              *npp = 0;
1125              n = pcre_get_stringnumber(re, (char *)name);
1126              if (n < 0)
1127                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1128              else getstrings |= 1 << n;
1129              }
1130            continue;
1131    
1132            case 'L':
1133            getlist = 1;
1134          continue;          continue;
1135    
1136          case 'M':          case 'M':
1137          options |= PCRE_MULTILINE;          find_match_limit = 1;
1138          continue;          continue;
1139    
1140          case 'S':          case 'N':
1141          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1142          continue;          continue;
1143    
1144          case 'O':          case 'O':
1145          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1146          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1147              {
1148              size_offsets_max = n;
1149              free(offsets);
1150              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1151              if (offsets == NULL)
1152                {
1153                printf("** Failed to get %d bytes of memory for offsets vector\n",
1154                  size_offsets_max * sizeof(int));
1155                return 1;
1156                }
1157              }
1158            use_size_offsets = n;
1159            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1160            continue;
1161    
1162            case 'S':
1163            show_malloc = 1;
1164          continue;          continue;
1165    
1166          case 'Z':          case 'Z':
1167          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1168          continue;          continue;
1169    
1170            case '?':
1171            options |= PCRE_NO_UTF8_CHECK;
1172            continue;
1173          }          }
1174        *q++ = c;        *q++ = c;
1175        }        }
# Line 687  while (!done) Line 1177  while (!done)
1177      len = q - dbuffer;      len = q - dbuffer;
1178    
1179      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1180      support timing. */      support timing or playing with the match limit or callout data. */
1181    
1182    #if !defined NOPOSIX
1183      if (posix || do_posix)      if (posix || do_posix)
1184        {        {
1185        int rc;        int rc;
1186        int eflags = 0;        int eflags = 0;
1187        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1188          if (use_size_offsets > 0)
1189            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1190        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1191        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1192    
1193        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1194    
1195        if (rc != 0)        if (rc != 0)
1196          {          {
1197          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1198          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1199          }          }
1200        else        else
1201          {          {
1202          size_t i;          size_t i;
1203          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1204            {            {
1205            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1206              {              {
1207              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1208              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1209                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1210              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1211                if (i == 0 && do_showrest)
1212                  {
1213                  fprintf(outfile, " 0+ ");
1214                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1215                    outfile);
1216                  fprintf(outfile, "\n");
1217                  }
1218              }              }
1219            }            }
1220          }          }
1221          free(pmatch);
1222        }        }
1223    
1224      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1225    
1226      else      else
1227    #endif  /* !defined NOPOSIX */
1228    
1229        for (;; gmatched++)    /* Loop for /g or /G */
1230        {        {
1231        if (timeit)        if (timeit)
1232          {          {
1233          register int i;          register int i;
1234          clock_t time_taken;          clock_t time_taken;
1235          clock_t start_time = clock();          clock_t start_time = clock();
1236          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1237            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1238              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1239          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1240          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1241            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1242                (double)CLOCKS_PER_SEC);
1243          }          }
1244    
1245        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If find_match_limit is set, we want to do repeated matches with
1246          size_offsets);        varying limits in order to find the minimum value. */
1247    
1248          if (find_match_limit)
1249            {
1250            int min = 0;
1251            int mid = 64;
1252            int max = -1;
1253    
1254            if (extra == NULL)
1255              {
1256              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1257              extra->flags = 0;
1258              }
1259            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1260    
1261            for (;;)
1262              {
1263              extra->match_limit = mid;
1264              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1265                options | g_notempty, use_offsets, use_size_offsets);
1266              if (count == PCRE_ERROR_MATCHLIMIT)
1267                {
1268                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1269                min = mid;
1270                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1271                }
1272              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1273                {
1274                if (mid == min + 1)
1275                  {
1276                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1277                  break;
1278                  }
1279                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1280                max = mid;
1281                mid = (min + mid)/2;
1282                }
1283              else break;    /* Some other error */
1284              }
1285    
1286            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1287            }
1288    
1289          /* If callout_data is set, use the interface with additional data */
1290    
1291          else if (callout_data_set)
1292            {
1293            if (extra == NULL)
1294              {
1295              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1296              extra->flags = 0;
1297              }
1298            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1299            extra->callout_data = &callout_data;
1300            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1301              options | g_notempty, use_offsets, use_size_offsets);
1302            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1303            }
1304    
1305          /* The normal case is just to do the match once, with the default
1306          value of match_limit. */
1307    
1308          else count = pcre_exec(re, extra, (char *)bptr, len,
1309            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1310    
1311        if (count == 0)        if (count == 0)
1312          {          {
1313          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1314          count = size_offsets/2;          count = use_size_offsets/3;
1315          }          }
1316    
1317          /* Matched */
1318    
1319        if (count >= 0)        if (count >= 0)
1320          {          {
1321          int i;          int i;
1322          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1323            {            {
1324            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1325              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1326            else            else
1327              {              {
1328              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1329              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1330                  use_offsets[i+1] - use_offsets[i], outfile);
1331              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1332                if (i == 0)
1333                  {
1334                  if (do_showrest)
1335                    {
1336                    fprintf(outfile, " 0+ ");
1337                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1338                      outfile);
1339                    fprintf(outfile, "\n");
1340                    }
1341                  }
1342                }
1343              }
1344    
1345            for (i = 0; i < 32; i++)
1346              {
1347              if ((copystrings & (1 << i)) != 0)
1348                {
1349                char copybuffer[16];
1350                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1351                  i, copybuffer, sizeof(copybuffer));
1352                if (rc < 0)
1353                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1354                else
1355                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1356                }
1357              }
1358    
1359            for (i = 0; i < 32; i++)
1360              {
1361              if ((getstrings & (1 << i)) != 0)
1362                {
1363                const char *substring;
1364                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1365                  i, &substring);
1366                if (rc < 0)
1367                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1368                else
1369                  {
1370                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1371                  /* free((void *)substring); */
1372                  pcre_free_substring(substring);
1373                  }
1374                }
1375              }
1376    
1377            if (getlist)
1378              {
1379              const char **stringlist;
1380              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1381                &stringlist);
1382              if (rc < 0)
1383                fprintf(outfile, "get substring list failed %d\n", rc);
1384              else
1385                {
1386                for (i = 0; i < count; i++)
1387                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1388                if (stringlist[i] != NULL)
1389                  fprintf(outfile, "string list not terminated by NULL\n");
1390                /* free((void *)stringlist); */
1391                pcre_free_substring_list(stringlist);
1392              }              }
1393            }            }
1394          }          }
1395    
1396          /* Failed to match. If this is a /g or /G loop and we previously set
1397          g_notempty after a null match, this is not necessarily the end.
1398          We want to advance the start offset, and continue. In the case of UTF-8
1399          matching, the advance must be one character, not one byte. Fudge the
1400          offset values to achieve this. We won't be at the end of the string -
1401          that was checked before setting g_notempty. */
1402    
1403        else        else
1404          {          {
1405          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1406              {
1407              int onechar = 1;
1408              use_offsets[0] = start_offset;
1409              if (use_utf8)
1410                {
1411                while (start_offset + onechar < len)
1412                  {
1413                  int tb = bptr[start_offset+onechar];
1414                  if (tb <= 127) break;
1415                  tb &= 0xc0;
1416                  if (tb != 0 && tb != 0xc0) onechar++;
1417                  }
1418                }
1419              use_offsets[1] = start_offset + onechar;
1420              }
1421            else
1422              {
1423              if (count == PCRE_ERROR_NOMATCH)
1424                {
1425                if (gmatched == 0) fprintf(outfile, "No match\n");
1426                }
1427            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
1428              break;  /* Out of the /g loop */
1429              }
1430          }          }
1431        }  
1432      }        /* If not /g or /G we are done */
1433    
1434          if (!do_g && !do_G) break;
1435    
1436          /* If we have matched an empty string, first check to see if we are at
1437          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1438          what Perl's /g options does. This turns out to be rather cunning. First
1439          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1440          same point. If this fails (picked up above) we advance to the next
1441          character. */
1442    
1443          g_notempty = 0;
1444          if (use_offsets[0] == use_offsets[1])
1445            {
1446            if (use_offsets[0] == len) break;
1447            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1448            }
1449    
1450          /* For /g, update the start offset, leaving the rest alone */
1451    
1452          if (do_g) start_offset = use_offsets[1];
1453    
1454          /* For /G, update the pointer and length */
1455    
1456          else
1457            {
1458            bptr += use_offsets[1];
1459            len -= use_offsets[1];
1460            }
1461          }  /* End of loop for /g and /G */
1462        }    /* End of loop for data lines */
1463    
1464    CONTINUE:    CONTINUE:
1465    
1466    #if !defined NOPOSIX
1467    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1468    #endif
1469    
1470    if (re != NULL) free(re);    if (re != NULL) free(re);
1471    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1472      if (tables != NULL)
1473        {
1474        free((void *)tables);
1475        setlocale(LC_CTYPE, "C");
1476        }
1477    }    }
1478    
1479  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1480  return 0;  return 0;
1481  }  }
1482    

Legend:
Removed from v.11  
changed lines
  Added in v.73

  ViewVC Help
Powered by ViewVC 1.1.5