/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 31 by nigel, Sat Feb 24 21:38:57 2007 UTC revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22    /* It is possible to compile this test program without including support for
23    testing the POSIX interface, though this is not available via the standard
24    Makefile. */
25    
26    #if !defined NOPOSIX
27  #include "pcreposix.h"  #include "pcreposix.h"
28    #endif
29    
30  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
31  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 35 
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE BUFFER_SIZE
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int show_malloc;
52    static int use_utf8;
53    static size_t gotten_store;
54    
55    
56    static const int utf8_table1[] = {
57      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59  /* Debugging function to print the internal form of the regex. This is the same  static const int utf8_table2[] = {
60  code as contained in pcre.c under the DEBUG macro. */    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62  static const char *OP_names[] = {  static const int utf8_table3[] = {
63    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
   
   
 static void print_internals(pcre *re, FILE *outfile)  
 {  
 unsigned char *code = ((real_pcre *)re)->code;  
   
 fprintf(outfile, "------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
64    
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
65    
         default:  
         code--;  
         }  
       }  
     break;  
66    
67      /* Anything else is just a one-node item */  /*************************************************
68    *         Print compiled regex                   *
69    *************************************************/
70    
71      default:  /* The code for doing this is held in a separate file that is also included in
72      fprintf(outfile, "    %s", OP_names[*code]);  pcre.c when it is compiled with the debug switch. It defines a function called
73      break;  print_internals(), which uses a table of opcode lengths defined by the macro
74      }  OP_LENGTHS, whose name must be OP_lengths. */
75    
76    static uschar OP_lengths[] = { OP_LENGTHS };
77    
78    #include "printint.c"
79    
80    
81    
82    /*************************************************
83    *          Read number from string               *
84    *************************************************/
85    
86    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87    around with conditional compilation, just do the job by hand. It is only used
88    for unpicking the -o argument, so just keep it simple.
89    
90    Arguments:
91      str           string to be converted
92      endptr        where to put the end pointer
93    
94    Returns:        the unsigned long
95    */
96    
97    static int
98    get_value(unsigned char *str, unsigned char **endptr)
99    {
100    int result = 0;
101    while(*str != 0 && isspace(*str)) str++;
102    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103    *endptr = str;
104    return(result);
105    }
106    
107    
108    
109    /*************************************************
110    *       Convert character value to UTF-8         *
111    *************************************************/
112    
113    /* This function takes an integer value in the range 0 - 0x7fffffff
114    and encodes it as a UTF-8 character in 0 to 6 bytes.
115    
116    Arguments:
117      cvalue     the character value
118      buffer     pointer to buffer for result - at least 6 bytes long
119    
120    Returns:     number of characters placed in the buffer
121                 -1 if input character is negative
122                 0 if input character is positive but too big (only when
123                 int is longer than 32 bits)
124    */
125    
126    static int
127    ord2utf8(int cvalue, unsigned char *buffer)
128    {
129    register int i, j;
130    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131      if (cvalue <= utf8_table1[i]) break;
132    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133    if (cvalue < 0) return -1;
134    
135    buffer += i;
136    for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141    *buffer = utf8_table2[i] | cvalue;
142    return i + 1;
143    }
144    
145    
146    /*************************************************
147    *            Convert UTF-8 string to value       *
148    *************************************************/
149    
150    /* This function takes one or more bytes that represents a UTF-8 character,
151    and returns the value of the character.
152    
153    Argument:
154      buffer   a pointer to the byte vector
155      vptr     a pointer to an int to receive the value
156    
157    Returns:   >  0 => the number of bytes consumed
158               -6 to 0 => malformed UTF-8 character at offset = (-return)
159    */
160    
161    static int
162    utf82ord(unsigned char *buffer, int *vptr)
163    {
164    int c = *buffer++;
165    int d = c;
166    int i, j, s;
167    
168    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
169      {
170      if ((d & 0x80) == 0) break;
171      d <<= 1;
172      }
173    
174    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
175    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
176    
177    code++;  /* i now has a value in the range 1-5 */
178    fprintf(outfile, "\n");  
179    s = 6*i;
180    d = (c & utf8_table3[i]) << s;
181    
182    for (j = 0; j < i; j++)
183      {
184      c = *buffer++;
185      if ((c & 0xc0) != 0x80) return -(j+1);
186      s -= 6;
187      d |= (c & 0x3f) << s;
188    }    }
189    
190    /* Check that encoding was the correct unique one */
191    
192    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193      if (d <= utf8_table1[j]) break;
194    if (j != i) return -(i+1);
195    
196    /* Valid value */
197    
198    *vptr = d;
199    return i+1;
200  }  }
201    
202    
203    
204  /* Character string printing function. */  /*************************************************
205    *             Print character string             *
206    *************************************************/
207    
208  static void pchars(unsigned char *p, int length)  /* Character string printing function. Must handle UTF-8 strings in utf8
209    mode. Yields number of characters printed. If handed a NULL file, just counts
210    chars without printing. */
211    
212    static int pchars(unsigned char *p, int length, FILE *f)
213  {  {
214  int c;  int c;
215    int yield = 0;
216    
217  while (length-- > 0)  while (length-- > 0)
218    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
219      else fprintf(outfile, "\\x%02x", c);    if (use_utf8)
220        {
221        int rc = utf82ord(p, &c);
222    
223        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
224          {
225          length -= rc - 1;
226          p += rc;
227          if (c < 256 && isprint(c))
228            {
229            if (f != NULL) fprintf(f, "%c", c);
230            yield++;
231            }
232          else
233            {
234            int n;
235            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236            yield += n;
237            }
238          continue;
239          }
240        }
241    
242       /* Not UTF-8, or malformed UTF-8  */
243    
244      if (isprint(c = *(p++)))
245        {
246        if (f != NULL) fprintf(f, "%c", c);
247        yield++;
248        }
249      else
250        {
251        if (f != NULL) fprintf(f, "\\x%02x", c);
252        yield += 4;
253        }
254      }
255    
256    return yield;
257    }
258    
259    
260    
261    /*************************************************
262    *              Callout function                  *
263    *************************************************/
264    
265    /* Called from PCRE as a result of the (?C) item. We print out where we are in
266    the match. Yield zero unless more callouts than the fail count, or the callout
267    data is not zero. */
268    
269    static int callout(pcre_callout_block *cb)
270    {
271    FILE *f = (first_callout | callout_extra)? outfile : NULL;
272    int i, pre_start, post_start;
273    
274    if (callout_extra)
275      {
276      fprintf(f, "Callout %d: last capture = %d\n",
277        cb->callout_number, cb->capture_last);
278    
279      for (i = 0; i < cb->capture_top * 2; i += 2)
280        {
281        if (cb->offset_vector[i] < 0)
282          fprintf(f, "%2d: <unset>\n", i/2);
283        else
284          {
285          fprintf(f, "%2d: ", i/2);
286          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287            cb->offset_vector[i+1] - cb->offset_vector[i], f);
288          fprintf(f, "\n");
289          }
290        }
291      }
292    
293    /* Re-print the subject in canonical form, the first time or if giving full
294    datails. On subsequent calls in the same match, we use pchars just to find the
295    printed lengths of the substrings. */
296    
297    if (f != NULL) fprintf(f, "--->");
298    
299    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301      cb->current_position - cb->start_match, f);
302    
303    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304      cb->subject_length - cb->current_position, f);
305    
306    if (f != NULL) fprintf(f, "\n");
307    
308    /* Always print appropriate indicators, with callout number if not already
309    shown */
310    
311    if (callout_extra) fprintf(outfile, "    ");
312      else fprintf(outfile, "%3d ", cb->callout_number);
313    
314    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315    fprintf(outfile, "^");
316    
317    if (post_start > 0)
318      {
319      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320      fprintf(outfile, "^");
321      }
322    
323    fprintf(outfile, "\n");
324    first_callout = 0;
325    
326    if (cb->callout_data != NULL)
327      {
328      int callout_data = *((int *)(cb->callout_data));
329      if (callout_data != 0)
330        {
331        fprintf(outfile, "Callout data = %d\n", callout_data);
332        return callout_data;
333        }
334      }
335    
336    return (cb->callout_number != callout_fail_id)? 0 :
337           (++callout_count >= callout_fail_count)? 1 : 0;
338  }  }
339    
340    
341    /*************************************************
342    *            Local malloc functions              *
343    *************************************************/
344    
345  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
346  compiled re. */  compiled re. */
347    
348  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
349  {  {
350  if (log_store)  void *block = malloc(size);
351    fprintf(outfile, "Memory allocation request: %d (code space %d)\n",  gotten_store = size;
352      (int)size, (int)size - offsetof(real_pcre, code[0]));  if (show_malloc)
353  return malloc(size);    fprintf(outfile, "malloc       %3d %p\n", size, block);
354    return block;
355    }
356    
357    static void new_free(void *block)
358    {
359    if (show_malloc)
360      fprintf(outfile, "free             %p\n", block);
361    free(block);
362    }
363    
364    
365    /* For recursion malloc/free, to test stacking calls */
366    
367    static void *stack_malloc(size_t size)
368    {
369    void *block = malloc(size);
370    if (show_malloc)
371      fprintf(outfile, "stack_malloc %3d %p\n", size, block);
372    return block;
373  }  }
374    
375    static void stack_free(void *block)
376    {
377    if (show_malloc)
378      fprintf(outfile, "stack_free       %p\n", block);
379    free(block);
380    }
381    
382    
383    /*************************************************
384    *          Call pcre_fullinfo()                  *
385    *************************************************/
386    
387    /* Get one piece of information from the pcre_fullinfo() function */
388    
389    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
390    {
391    int rc;
392    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
393      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
394    }
395    
396    
397    
398    /*************************************************
399    *                Main Program                    *
400    *************************************************/
401    
402  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
403  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
404  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 295  int op = 1; Line 412  int op = 1;
412  int timeit = 0;  int timeit = 0;
413  int showinfo = 0;  int showinfo = 0;
414  int showstore = 0;  int showstore = 0;
415    int size_offsets = 45;
416    int size_offsets_max;
417    int *offsets;
418    #if !defined NOPOSIX
419  int posix = 0;  int posix = 0;
420    #endif
421  int debug = 0;  int debug = 0;
422  int done = 0;  int done = 0;
423  unsigned char buffer[30000];  
424  unsigned char dbuffer[1024];  unsigned char *buffer;
425    unsigned char *dbuffer;
426    
427    /* Get buffers from malloc() so that Electric Fence will check their misuse
428    when I am debugging. */
429    
430    buffer = (unsigned char *)malloc(BUFFER_SIZE);
431    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
432    
433  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
434    
# Line 309  outfile = stdout; Line 438  outfile = stdout;
438    
439  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
440    {    {
441      unsigned char *endptr;
442    
443    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
444      showstore = 1;      showstore = 1;
445    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
446    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
447    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
448      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
449          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
450            *endptr == 0))
451        {
452        op++;
453        argc--;
454        }
455    #if !defined NOPOSIX
456    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
457    #endif
458      else if (strcmp(argv[op], "-C") == 0)
459        {
460        int rc;
461        printf("PCRE version %s\n", pcre_version());
462        printf("Compiled with\n");
463        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
464        printf("  %sUTF-8 support\n", rc? "" : "No ");
465        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
466        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
467        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
468        printf("  Internal link size = %d\n", rc);
469        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
470        printf("  POSIX malloc threshold = %d\n", rc);
471        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
472        printf("  Default match limit = %d\n", rc);
473        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
474        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
475        exit(0);
476        }
477    else    else
478      {      {
479      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
480      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
481      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
482             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
483             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
484             "  -s   output store information\n"             "  -o <n> set size of offsets vector to <n>\n");
485             "  -t   time compilation and execution\n");  #if !defined NOPOSIX
486        printf("  -p     use POSIX interface\n");
487    #endif
488        printf("  -s     output store information\n"
489               "  -t     time compilation and execution\n");
490      return 1;      return 1;
491      }      }
492    op++;    op++;
493    argc--;    argc--;
494    }    }
495    
496    /* Get the store for the offsets vector, and remember what it was */
497    
498    size_offsets_max = size_offsets;
499    offsets = (int *)malloc(size_offsets_max * sizeof(int));
500    if (offsets == NULL)
501      {
502      printf("** Failed to get %d bytes of memory for offsets vector\n",
503        size_offsets_max * sizeof(int));
504      return 1;
505      }
506    
507  /* Sort out the input and output files */  /* Sort out the input and output files */
508    
509  if (argc > 1)  if (argc > 1)
# Line 355  if (argc > 2) Line 529  if (argc > 2)
529  /* Set alternative malloc function */  /* Set alternative malloc function */
530    
531  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
532    pcre_free = new_free;
533    pcre_stack_malloc = stack_malloc;
534    pcre_stack_free = stack_free;
535    
536  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
537    
# Line 366  while (!done) Line 543  while (!done)
543    {    {
544    pcre *re = NULL;    pcre *re = NULL;
545    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
546    
547    #if !defined NOPOSIX  /* There are still compilers that require no indent */
548    regex_t preg;    regex_t preg;
549      int do_posix = 0;
550    #endif
551    
552    const char *error;    const char *error;
553    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
554    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
555    int do_study = 0;    int do_study = 0;
556    int do_debug = debug;    int do_debug = debug;
557      int do_G = 0;
558      int do_g = 0;
559    int do_showinfo = showinfo;    int do_showinfo = showinfo;
560    int do_posix = 0;    int do_showrest = 0;
561    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
562    
563      use_utf8 = 0;
564    
565    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
566    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
567    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
568      fflush(outfile);
569    
570    p = buffer;    p = buffer;
571    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 407  while (!done) Line 594  while (!done)
594        }        }
595      if (*pp != 0) break;      if (*pp != 0) break;
596    
597      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
598      if (len < 256)      if (len < 256)
599        {        {
600        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 444  while (!done) Line 631  while (!done)
631      {      {
632      switch (*pp++)      switch (*pp++)
633        {        {
634          case 'g': do_g = 1; break;
635        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
636        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
637        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
638        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
639    
640          case '+': do_showrest = 1; break;
641        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
642        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
643        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
644          case 'G': do_G = 1; break;
645        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
646        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
647          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
648    
649    #if !defined NOPOSIX
650        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
651    #endif
652    
653        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
654        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
655        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
656          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
657          case '?': options |= PCRE_NO_UTF8_CHECK; break;
658    
659        case 'L':        case 'L':
660        ppp = pp;        ppp = pp;
# Line 483  while (!done) Line 680  while (!done)
680    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
681    local character tables. */    local character tables. */
682    
683    #if !defined NOPOSIX
684    if (posix || do_posix)    if (posix || do_posix)
685      {      {
686      int rc;      int rc;
# Line 496  while (!done) Line 694  while (!done)
694    
695      if (rc != 0)      if (rc != 0)
696        {        {
697        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
698        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
699        goto SKIP_DATA;        goto SKIP_DATA;
700        }        }
# Line 505  while (!done) Line 703  while (!done)
703    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
704    
705    else    else
706    #endif  /* !defined NOPOSIX */
707    
708      {      {
709      if (timeit)      if (timeit)
710        {        {
# Line 518  while (!done) Line 718  while (!done)
718          }          }
719        time_taken = clock() - start_time;        time_taken = clock() - start_time;
720        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
721          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
722          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
723        }        }
724    
725      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 535  while (!done) Line 735  while (!done)
735          {          {
736          for (;;)          for (;;)
737            {            {
738            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
739              {              {
740              done = 1;              done = 1;
741              goto CONTINUE;              goto CONTINUE;
# Line 549  while (!done) Line 749  while (!done)
749        goto CONTINUE;        goto CONTINUE;
750        }        }
751    
752      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
753        info-returning functions. The old one has a limited interface and
754        returns only limited data. Check that it agrees with the newer one. */
755    
756        if (log_store)
757          fprintf(outfile, "Memory allocation (code space): %d\n",
758            (int)(gotten_store -
759                  sizeof(real_pcre) -
760                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
761    
762      if (do_showinfo)      if (do_showinfo)
763        {        {
764        int first_char, count;        unsigned long int get_options;
765          int old_first_char, old_options, old_count;
766          int count, backrefmax, first_char, need_char;
767          int nameentrysize, namecount;
768          const uschar *nametable;
769          size_t size;
770    
771          if (do_debug)
772            {
773            fprintf(outfile, "------------------------------------------------------------------\n");
774            print_internals(re, outfile);
775            }
776    
777        if (do_debug) print_internals(re, outfile);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
778          new_info(re, NULL, PCRE_INFO_SIZE, &size);
779          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
780          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
781          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
782          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
783          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
784          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
785          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
786    
787        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
788        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
789          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
790        else        else
791          {          {
792          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
793          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
794            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
795              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
796              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
797              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
798              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
799              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
800              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
801              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
802              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
803          if (first_char == -1)          }
804            {  
805            fprintf(outfile, "First char at start or follows \\n\n");        if (size != gotten_store) fprintf(outfile,
806            }          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
807          else if (first_char < 0)          size, gotten_store);
808    
809          fprintf(outfile, "Capturing subpattern count = %d\n", count);
810          if (backrefmax > 0)
811            fprintf(outfile, "Max back reference = %d\n", backrefmax);
812    
813          if (namecount > 0)
814            {
815            fprintf(outfile, "Named capturing subpatterns:\n");
816            while (namecount-- > 0)
817            {            {
818            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
819                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
820                GET2(nametable, 0));
821              nametable += nameentrysize;
822            }            }
823            }
824    
825          if (get_options == 0) fprintf(outfile, "No options\n");
826            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
827              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
828              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
829              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
830              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
831              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
832              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
833              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
834              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
835              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
836              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
837    
838          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
839            fprintf(outfile, "Case state changes\n");
840    
841          if (first_char == -1)
842            {
843            fprintf(outfile, "First char at start or follows \\n\n");
844            }
845          else if (first_char < 0)
846            {
847            fprintf(outfile, "No first char\n");
848            }
849          else
850            {
851            int ch = first_char & 255;
852            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
853              "" : " (caseless)";
854            if (isprint(ch))
855              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
856          else          else
857            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
858            if (isprint(first_char))          }
859              fprintf(outfile, "First char = \'%c\'\n", first_char);  
860            else        if (need_char < 0)
861              fprintf(outfile, "First char = %d\n", first_char);          {
862            }          fprintf(outfile, "No need char\n");
863            }
864          else
865            {
866            int ch = need_char & 255;
867            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
868              "" : " (caseless)";
869            if (isprint(ch))
870              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
871            else
872              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
873          }          }
874        }        }
875    
# Line 606  while (!done) Line 888  while (!done)
888          time_taken = clock() - start_time;          time_taken = clock() - start_time;
889          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
890          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
891            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
892            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
893          }          }
894    
895        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 616  while (!done) Line 898  while (!done)
898        else if (extra == NULL)        else if (extra == NULL)
899          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
900    
901        /* This looks at internal information. A bit kludgy to do it this        /* Don't output study size; at present it is in any case a fixed
902        way, but it is useful for testing. */        value, but it varies, depending on the computer architecture, and
903          so messes up the test suite. */
904    
905        else if (do_showinfo)        else if (do_showinfo)
906          {          {
907          real_pcre_extra *xx = (real_pcre_extra *)extra;          size_t size;
908          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          uschar *start_bits = NULL;
909            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
910            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
911            /* fprintf(outfile, "Study size = %d\n", size); */
912            if (start_bits == NULL)
913            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
914          else          else
915            {            {
# Line 631  while (!done) Line 918  while (!done)
918            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
919            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
920              {              {
921              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
922                {                {
923                if (c > 75)                if (c > 75)
924                  {                  {
# Line 661  while (!done) Line 948  while (!done)
948    for (;;)    for (;;)
949      {      {
950      unsigned char *q;      unsigned char *q;
951        unsigned char *bptr = dbuffer;
952        int *use_offsets = offsets;
953        int use_size_offsets = size_offsets;
954        int callout_data = 0;
955        int callout_data_set = 0;
956      int count, c;      int count, c;
957      int copystrings = 0;      int copystrings = 0;
958        int find_match_limit = 0;
959      int getstrings = 0;      int getstrings = 0;
960      int getlist = 0;      int getlist = 0;
961      int offsets[45];      int gmatched = 0;
962      int size_offsets = sizeof(offsets)/sizeof(int);      int start_offset = 0;
963        int g_notempty = 0;
964    
965      options = 0;      options = 0;
966    
967      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
968      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      first_callout = 1;
969        callout_extra = 0;
970        callout_count = 0;
971        callout_fail_count = 999999;
972        callout_fail_id = -1;
973        show_malloc = 0;
974    
975        if (infile == stdin) printf("data> ");
976        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
977        {        {
978        done = 1;        done = 1;
979        goto CONTINUE;        goto CONTINUE;
# Line 691  while (!done) Line 993  while (!done)
993        {        {
994        int i = 0;        int i = 0;
995        int n = 0;        int n = 0;
996    
997        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
998          {          {
999          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 710  while (!done) Line 1013  while (!done)
1013          break;          break;
1014    
1015          case 'x':          case 'x':
1016    
1017            /* Handle \x{..} specially - new Perl thing for utf8 */
1018    
1019            if (*p == '{')
1020              {
1021              unsigned char *pt = p;
1022              c = 0;
1023              while (isxdigit(*(++pt)))
1024                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1025              if (*pt == '}')
1026                {
1027                unsigned char buff8[8];
1028                int ii, utn;
1029                utn = ord2utf8(c, buff8);
1030                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1031                c = buff8[ii];   /* Last byte */
1032                p = pt + 1;
1033                break;
1034                }
1035              /* Not correct form; fall through */
1036              }
1037    
1038            /* Ordinary \x */
1039    
1040          c = 0;          c = 0;
1041          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1042            {            {
# Line 731  while (!done) Line 1058  while (!done)
1058          continue;          continue;
1059    
1060          case 'C':          case 'C':
1061          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1062          copystrings |= 1 << n;            {
1063              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1064              copystrings |= 1 << n;
1065              }
1066            else if (isalnum(*p))
1067              {
1068              uschar name[256];
1069              uschar *npp = name;
1070              while (isalnum(*p)) *npp++ = *p++;
1071              *npp = 0;
1072              n = pcre_get_stringnumber(re, (char *)name);
1073              if (n < 0)
1074                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1075              else copystrings |= 1 << n;
1076              }
1077            else if (*p == '+')
1078              {
1079              callout_extra = 1;
1080              p++;
1081              }
1082            else if (*p == '-')
1083              {
1084              pcre_callout = NULL;
1085              p++;
1086              }
1087            else if (*p == '!')
1088              {
1089              callout_fail_id = 0;
1090              p++;
1091              while(isdigit(*p))
1092                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1093              callout_fail_count = 0;
1094              if (*p == '!')
1095                {
1096                p++;
1097                while(isdigit(*p))
1098                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1099                }
1100              }
1101            else if (*p == '*')
1102              {
1103              int sign = 1;
1104              callout_data = 0;
1105              if (*(++p) == '-') { sign = -1; p++; }
1106              while(isdigit(*p))
1107                callout_data = callout_data * 10 + *p++ - '0';
1108              callout_data *= sign;
1109              callout_data_set = 1;
1110              }
1111          continue;          continue;
1112    
1113          case 'G':          case 'G':
1114          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1115          getstrings |= 1 << n;            {
1116              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1117              getstrings |= 1 << n;
1118              }
1119            else if (isalnum(*p))
1120              {
1121              uschar name[256];
1122              uschar *npp = name;
1123              while (isalnum(*p)) *npp++ = *p++;
1124              *npp = 0;
1125              n = pcre_get_stringnumber(re, (char *)name);
1126              if (n < 0)
1127                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1128              else getstrings |= 1 << n;
1129              }
1130          continue;          continue;
1131    
1132          case 'L':          case 'L':
1133          getlist = 1;          getlist = 1;
1134          continue;          continue;
1135    
1136            case 'M':
1137            find_match_limit = 1;
1138            continue;
1139    
1140            case 'N':
1141            options |= PCRE_NOTEMPTY;
1142            continue;
1143    
1144          case 'O':          case 'O':
1145          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1146          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1147              {
1148              size_offsets_max = n;
1149              free(offsets);
1150              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1151              if (offsets == NULL)
1152                {
1153                printf("** Failed to get %d bytes of memory for offsets vector\n",
1154                  size_offsets_max * sizeof(int));
1155                return 1;
1156                }
1157              }
1158            use_size_offsets = n;
1159            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1160            continue;
1161    
1162            case 'S':
1163            show_malloc = 1;
1164          continue;          continue;
1165    
1166          case 'Z':          case 'Z':
1167          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1168          continue;          continue;
1169    
1170            case '?':
1171            options |= PCRE_NO_UTF8_CHECK;
1172            continue;
1173          }          }
1174        *q++ = c;        *q++ = c;
1175        }        }
# Line 759  while (!done) Line 1177  while (!done)
1177      len = q - dbuffer;      len = q - dbuffer;
1178    
1179      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1180      support timing. */      support timing or playing with the match limit or callout data. */
1181    
1182    #if !defined NOPOSIX
1183      if (posix || do_posix)      if (posix || do_posix)
1184        {        {
1185        int rc;        int rc;
1186        int eflags = 0;        int eflags = 0;
1187        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1188          if (use_size_offsets > 0)
1189            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1190        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1191        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1192    
1193        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1194    
1195        if (rc != 0)        if (rc != 0)
1196          {          {
1197          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1198          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1199          }          }
1200        else        else
1201          {          {
1202          size_t i;          size_t i;
1203          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1204            {            {
1205            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1206              {              {
1207              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1208              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1209                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1210              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1211                if (i == 0 && do_showrest)
1212                  {
1213                  fprintf(outfile, " 0+ ");
1214                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1215                    outfile);
1216                  fprintf(outfile, "\n");
1217                  }
1218              }              }
1219            }            }
1220          }          }
1221          free(pmatch);
1222        }        }
1223    
1224      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1225    
1226      else      else
1227    #endif  /* !defined NOPOSIX */
1228    
1229        for (;; gmatched++)    /* Loop for /g or /G */
1230        {        {
1231        if (timeit)        if (timeit)
1232          {          {
# Line 803  while (!done) Line 1234  while (!done)
1234          clock_t time_taken;          clock_t time_taken;
1235          clock_t start_time = clock();          clock_t start_time = clock();
1236          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1237            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1238              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1239          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1240          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1241            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1242            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1243          }          }
1244    
1245        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If find_match_limit is set, we want to do repeated matches with
1246          size_offsets);        varying limits in order to find the minimum value. */
1247    
1248          if (find_match_limit)
1249            {
1250            int min = 0;
1251            int mid = 64;
1252            int max = -1;
1253    
1254            if (extra == NULL)
1255              {
1256              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1257              extra->flags = 0;
1258              }
1259            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1260    
1261            for (;;)
1262              {
1263              extra->match_limit = mid;
1264              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1265                options | g_notempty, use_offsets, use_size_offsets);
1266              if (count == PCRE_ERROR_MATCHLIMIT)
1267                {
1268                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1269                min = mid;
1270                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1271                }
1272              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1273                {
1274                if (mid == min + 1)
1275                  {
1276                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1277                  break;
1278                  }
1279                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1280                max = mid;
1281                mid = (min + mid)/2;
1282                }
1283              else break;    /* Some other error */
1284              }
1285    
1286            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1287            }
1288    
1289          /* If callout_data is set, use the interface with additional data */
1290    
1291          else if (callout_data_set)
1292            {
1293            if (extra == NULL)
1294              {
1295              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1296              extra->flags = 0;
1297              }
1298            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1299            extra->callout_data = &callout_data;
1300            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1301              options | g_notempty, use_offsets, use_size_offsets);
1302            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1303            }
1304    
1305          /* The normal case is just to do the match once, with the default
1306          value of match_limit. */
1307    
1308          else count = pcre_exec(re, extra, (char *)bptr, len,
1309            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1310    
1311        if (count == 0)        if (count == 0)
1312          {          {
1313          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1314          count = size_offsets/3;          count = use_size_offsets/3;
1315          }          }
1316    
1317          /* Matched */
1318    
1319        if (count >= 0)        if (count >= 0)
1320          {          {
1321          int i;          int i;
1322          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1323            {            {
1324            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1325              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1326            else            else
1327              {              {
1328              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1329              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1330                  use_offsets[i+1] - use_offsets[i], outfile);
1331              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1332                if (i == 0)
1333                  {
1334                  if (do_showrest)
1335                    {
1336                    fprintf(outfile, " 0+ ");
1337                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1338                      outfile);
1339                    fprintf(outfile, "\n");
1340                    }
1341                  }
1342              }              }
1343            }            }
1344    
# Line 839  while (!done) Line 1346  while (!done)
1346            {            {
1347            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1348              {              {
1349              char buffer[16];              char copybuffer[16];
1350              int rc = pcre_copy_substring((char *)dbuffer, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1351                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
1352              if (rc < 0)              if (rc < 0)
1353                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1354              else              else
1355                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1356              }              }
1357            }            }
1358    
# Line 854  while (!done) Line 1361  while (!done)
1361            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1362              {              {
1363              const char *substring;              const char *substring;
1364              int rc = pcre_get_substring((char *)dbuffer, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1365                i, &substring);                i, &substring);
1366              if (rc < 0)              if (rc < 0)
1367                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1368              else              else
1369                {                {
1370                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1371                free((void *)substring);                /* free((void *)substring); */
1372                  pcre_free_substring(substring);
1373                }                }
1374              }              }
1375            }            }
# Line 869  while (!done) Line 1377  while (!done)
1377          if (getlist)          if (getlist)
1378            {            {
1379            const char **stringlist;            const char **stringlist;
1380            int rc = pcre_get_substring_list((char *)dbuffer, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1381              &stringlist);              &stringlist);
1382            if (rc < 0)            if (rc < 0)
1383              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 879  while (!done) Line 1387  while (!done)
1387                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1388              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1389                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1390              free((void *)stringlist);              /* free((void *)stringlist); */
1391                pcre_free_substring_list(stringlist);
1392              }              }
1393            }            }
   
1394          }          }
1395    
1396          /* Failed to match. If this is a /g or /G loop and we previously set
1397          g_notempty after a null match, this is not necessarily the end.
1398          We want to advance the start offset, and continue. In the case of UTF-8
1399          matching, the advance must be one character, not one byte. Fudge the
1400          offset values to achieve this. We won't be at the end of the string -
1401          that was checked before setting g_notempty. */
1402    
1403        else        else
1404          {          {
1405          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1406              {
1407              int onechar = 1;
1408              use_offsets[0] = start_offset;
1409              if (use_utf8)
1410                {
1411                while (start_offset + onechar < len)
1412                  {
1413                  int tb = bptr[start_offset+onechar];
1414                  if (tb <= 127) break;
1415                  tb &= 0xc0;
1416                  if (tb != 0 && tb != 0xc0) onechar++;
1417                  }
1418                }
1419              use_offsets[1] = start_offset + onechar;
1420              }
1421            else
1422              {
1423              if (count == PCRE_ERROR_NOMATCH)
1424                {
1425                if (gmatched == 0) fprintf(outfile, "No match\n");
1426                }
1427            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
1428              break;  /* Out of the /g loop */
1429              }
1430          }          }
1431        }  
1432      }        /* If not /g or /G we are done */
1433    
1434          if (!do_g && !do_G) break;
1435    
1436          /* If we have matched an empty string, first check to see if we are at
1437          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1438          what Perl's /g options does. This turns out to be rather cunning. First
1439          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1440          same point. If this fails (picked up above) we advance to the next
1441          character. */
1442    
1443          g_notempty = 0;
1444          if (use_offsets[0] == use_offsets[1])
1445            {
1446            if (use_offsets[0] == len) break;
1447            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1448            }
1449    
1450          /* For /g, update the start offset, leaving the rest alone */
1451    
1452          if (do_g) start_offset = use_offsets[1];
1453    
1454          /* For /G, update the pointer and length */
1455    
1456          else
1457            {
1458            bptr += use_offsets[1];
1459            len -= use_offsets[1];
1460            }
1461          }  /* End of loop for /g and /G */
1462        }    /* End of loop for data lines */
1463    
1464    CONTINUE:    CONTINUE:
1465    
1466    #if !defined NOPOSIX
1467    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1468    #endif
1469    
1470    if (re != NULL) free(re);    if (re != NULL) free(re);
1471    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1472    if (tables != NULL)    if (tables != NULL)
# Line 903  while (!done) Line 1476  while (!done)
1476      }      }
1477    }    }
1478    
1479  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1480  return 0;  return 0;
1481  }  }
1482    

Legend:
Removed from v.31  
changed lines
  Added in v.73

  ViewVC Help
Powered by ViewVC 1.1.5