/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 57 by nigel, Sat Feb 24 21:39:50 2007 UTC revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 35  Makefile. */
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE BUFFER_SIZE
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int show_malloc;
52    static int use_utf8;
53  static size_t gotten_store;  static size_t gotten_store;
54    
55    
56    static const int utf8_table1[] = {
 static int utf8_table1[] = {  
57    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59  static int utf8_table2[] = {  static const int utf8_table2[] = {
60    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62  static int utf8_table3[] = {  static const int utf8_table3[] = {
63    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64    
65    
66    
67    /*************************************************
68    *         Print compiled regex                   *
69    *************************************************/
70    
71    /* The code for doing this is held in a separate file that is also included in
72    pcre.c when it is compiled with the debug switch. It defines a function called
73    print_internals(), which uses a table of opcode lengths defined by the macro
74    OP_LENGTHS, whose name must be OP_lengths. */
75    
76    static uschar OP_lengths[] = { OP_LENGTHS };
77    
78    #include "printint.c"
79    
80    
81    
82    /*************************************************
83    *          Read number from string               *
84    *************************************************/
85    
86    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87    around with conditional compilation, just do the job by hand. It is only used
88    for unpicking the -o argument, so just keep it simple.
89    
90    Arguments:
91      str           string to be converted
92      endptr        where to put the end pointer
93    
94    Returns:        the unsigned long
95    */
96    
97    static int
98    get_value(unsigned char *str, unsigned char **endptr)
99    {
100    int result = 0;
101    while(*str != 0 && isspace(*str)) str++;
102    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103    *endptr = str;
104    return(result);
105    }
106    
107    
108    
109  /*************************************************  /*************************************************
110  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
111  *************************************************/  *************************************************/
# Line 73  for (i = 0; i < sizeof(utf8_table1)/size Line 131  for (i = 0; i < sizeof(utf8_table1)/size
131    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
132  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133  if (cvalue < 0) return -1;  if (cvalue < 0) return -1;
134  *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
135  cvalue >>= 6 - i;  buffer += i;
136  for (j = 0; j < i; j++)  for (j = i; j > 0; j--)
137    {   {
138    *buffer++ = 0x80 | (cvalue & 0x3f);   *buffer-- = 0x80 | (cvalue & 0x3f);
139    cvalue >>= 6;   cvalue >>= 6;
140    }   }
141    *buffer = utf8_table2[i] | cvalue;
142  return i + 1;  return i + 1;
143  }  }
144    
# Line 99  Returns:   >  0 => the number of bytes c Line 158  Returns:   >  0 => the number of bytes c
158             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
159  */  */
160    
161  int  static int
162  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
163  {  {
164  int c = *buffer++;  int c = *buffer++;
# Line 117  if (i == 0 || i == 6) return 0;        / Line 176  if (i == 0 || i == 6) return 0;        /
176    
177  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
178    
179  d = c & utf8_table3[i];  s = 6*i;
180  s = 6 - i;  d = (c & utf8_table3[i]) << s;
181    
182  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
183    {    {
184    c = *buffer++;    c = *buffer++;
185    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
186      s -= 6;
187    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
188    }    }
189    
190  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
# Line 142  return i+1; Line 201  return i+1;
201    
202    
203    
204    /*************************************************
205    *             Print character string             *
206    *************************************************/
207    
208    /* Character string printing function. Must handle UTF-8 strings in utf8
209    mode. Yields number of characters printed. If handed a NULL file, just counts
210    chars without printing. */
211    
212    static int pchars(unsigned char *p, int length, FILE *f)
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
   
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
   
   
 static void print_internals(pcre *re)  
213  {  {
214  unsigned char *code = ((real_pcre *)re)->code;  int c;
215    int yield = 0;
 fprintf(outfile, "------------------------------------------------------------------\n");  
216    
217  for(;;)  while (length-- > 0)
218    {    {
219    int c;    if (use_utf8)
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     if (*code - OP_BRA > EXTRACT_BASIC_MAX)  
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
220      {      {
221      case OP_END:      int rc = utf82ord(p, &c);
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
222    
223      case OP_CLASS:      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
224        {        {
225        int i, min, max;        length -= rc - 1;
226        code++;        p += rc;
227        fprintf(outfile, "    [");        if (c < 256 && isprint(c))
228            {
229        for (i = 0; i < 256; i++)          if (f != NULL) fprintf(f, "%c", c);
230          {          yield++;
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
231          }          }
232        fprintf(outfile, "]");        else
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
233          {          {
234          case OP_CRSTAR:          int n;
235          case OP_CRMINSTAR:          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236          case OP_CRPLUS:          yield += n;
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
   
         default:  
         code--;  
237          }          }
238          continue;
239        }        }
240      break;      }
241    
242      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
243    
244      default:    if (isprint(c = *(p++)))
245      fprintf(outfile, "    %s", OP_names[*code]);      {
246      break;      if (f != NULL) fprintf(f, "%c", c);
247        yield++;
248        }
249      else
250        {
251        if (f != NULL) fprintf(f, "\\x%02x", c);
252        yield += 4;
253      }      }
   
   code++;  
   fprintf(outfile, "\n");  
254    }    }
255    
256    return yield;
257  }  }
258    
259    
260    
261  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
262    *              Callout function                  *
263    *************************************************/
264    
265    /* Called from PCRE as a result of the (?C) item. We print out where we are in
266    the match. Yield zero unless more callouts than the fail count, or the callout
267    data is not zero. */
268    
269  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
270  {  {
271  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
272  while (length-- > 0)  int i, pre_start, post_start;
273    
274    if (callout_extra)
275    {    {
276    if (utf8)    fprintf(f, "Callout %d: last capture = %d\n",
277        cb->callout_number, cb->capture_last);
278    
279      for (i = 0; i < cb->capture_top * 2; i += 2)
280      {      {
281      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
282      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
283        else
284        {        {
285        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
286        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
288          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
289        }        }
290      }      }
291      }
292    
293     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
294    datails. On subsequent calls in the same match, we use pchars just to find the
295    printed lengths of the substrings. */
296    
297    if (f != NULL) fprintf(f, "--->");
298    
299    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301      cb->current_position - cb->start_match, f);
302    
303    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304      else fprintf(outfile, "\\x%02x", c);    cb->subject_length - cb->current_position, f);
305    
306    if (f != NULL) fprintf(f, "\n");
307    
308    /* Always print appropriate indicators, with callout number if not already
309    shown */
310    
311    if (callout_extra) fprintf(outfile, "    ");
312      else fprintf(outfile, "%3d ", cb->callout_number);
313    
314    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315    fprintf(outfile, "^");
316    
317    if (post_start > 0)
318      {
319      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320      fprintf(outfile, "^");
321    }    }
322    
323    fprintf(outfile, "\n");
324    first_callout = 0;
325    
326    if (cb->callout_data != NULL)
327      {
328      int callout_data = *((int *)(cb->callout_data));
329      if (callout_data != 0)
330        {
331        fprintf(outfile, "Callout data = %d\n", callout_data);
332        return callout_data;
333        }
334      }
335    
336    return (cb->callout_number != callout_fail_id)? 0 :
337           (++callout_count >= callout_fail_count)? 1 : 0;
338  }  }
339    
340    
341    /*************************************************
342    *            Local malloc functions              *
343    *************************************************/
344    
345  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
346  compiled re. */  compiled re. */
347    
348  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
349  {  {
350    void *block = malloc(size);
351  gotten_store = size;  gotten_store = size;
352  if (log_store)  if (show_malloc)
353    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", size, block);
354      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
355  return malloc(size);  }
356    
357    static void new_free(void *block)
358    {
359    if (show_malloc)
360      fprintf(outfile, "free             %p\n", block);
361    free(block);
362    }
363    
364    
365    /* For recursion malloc/free, to test stacking calls */
366    
367    static void *stack_malloc(size_t size)
368    {
369    void *block = malloc(size);
370    if (show_malloc)
371      fprintf(outfile, "stack_malloc %3d %p\n", size, block);
372    return block;
373  }  }
374    
375    static void stack_free(void *block)
376    {
377    if (show_malloc)
378      fprintf(outfile, "stack_free       %p\n", block);
379    free(block);
380    }
381    
382    
383    /*************************************************
384    *          Call pcre_fullinfo()                  *
385    *************************************************/
386    
387  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
388    
# Line 419  if ((rc = pcre_fullinfo(re, study, optio Line 395  if ((rc = pcre_fullinfo(re, study, optio
395    
396    
397    
398    /*************************************************
399    *                Main Program                    *
400    *************************************************/
401    
402  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
403  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 441  int posix = 0; Line 420  int posix = 0;
420  #endif  #endif
421  int debug = 0;  int debug = 0;
422  int done = 0;  int done = 0;
423  unsigned char buffer[30000];  
424  unsigned char dbuffer[1024];  unsigned char *buffer;
425    unsigned char *dbuffer;
426    
427    /* Get buffers from malloc() so that Electric Fence will check their misuse
428    when I am debugging. */
429    
430    buffer = (unsigned char *)malloc(BUFFER_SIZE);
431    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
432    
433  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
434    
# Line 452  outfile = stdout; Line 438  outfile = stdout;
438    
439  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
440    {    {
441    char *endptr;    unsigned char *endptr;
442    
443    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
444      showstore = 1;      showstore = 1;
# Line 460  while (argc > 1 && argv[op][0] == '-') Line 446  while (argc > 1 && argv[op][0] == '-')
446    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
447    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
448    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
449        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
450            *endptr == 0))
451      {      {
452      op++;      op++;
453      argc--;      argc--;
# Line 468  while (argc > 1 && argv[op][0] == '-') Line 455  while (argc > 1 && argv[op][0] == '-')
455  #if !defined NOPOSIX  #if !defined NOPOSIX
456    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
457  #endif  #endif
458      else if (strcmp(argv[op], "-C") == 0)
459        {
460        int rc;
461        printf("PCRE version %s\n", pcre_version());
462        printf("Compiled with\n");
463        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
464        printf("  %sUTF-8 support\n", rc? "" : "No ");
465        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
466        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
467        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
468        printf("  Internal link size = %d\n", rc);
469        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
470        printf("  POSIX malloc threshold = %d\n", rc);
471        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
472        printf("  Default match limit = %d\n", rc);
473        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
474        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
475        exit(0);
476        }
477    else    else
478      {      {
479      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
480      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
481        printf("  -C     show PCRE compile-time options and exit\n");
482      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
483             "  -i     show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
484             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 496  while (argc > 1 && argv[op][0] == '-')
496  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
497    
498  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
499  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
500  if (offsets == NULL)  if (offsets == NULL)
501    {    {
502    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
# Line 522  if (argc > 2) Line 529  if (argc > 2)
529  /* Set alternative malloc function */  /* Set alternative malloc function */
530    
531  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
532    pcre_free = new_free;
533    pcre_stack_malloc = stack_malloc;
534    pcre_stack_free = stack_free;
535    
536  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
537    
# Line 548  while (!done) Line 558  while (!done)
558    int do_g = 0;    int do_g = 0;
559    int do_showinfo = showinfo;    int do_showinfo = showinfo;
560    int do_showrest = 0;    int do_showrest = 0;
   int utf8 = 0;  
561    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
562    
563      use_utf8 = 0;
564    
565    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
566    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
567    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
568      fflush(outfile);
569    
570    p = buffer;    p = buffer;
571    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 582  while (!done) Line 594  while (!done)
594        }        }
595      if (*pp != 0) break;      if (*pp != 0) break;
596    
597      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
598      if (len < 256)      if (len < 256)
599        {        {
600        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 632  while (!done) Line 644  while (!done)
644        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
645        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
646        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
647          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
648    
649  #if !defined NOPOSIX  #if !defined NOPOSIX
650        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 640  while (!done) Line 653  while (!done)
653        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
654        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
655        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
656        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
657          case '?': options |= PCRE_NO_UTF8_CHECK; break;
658    
659        case 'L':        case 'L':
660        ppp = pp;        ppp = pp;
# Line 680  while (!done) Line 694  while (!done)
694    
695      if (rc != 0)      if (rc != 0)
696        {        {
697        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
698        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
699        goto SKIP_DATA;        goto SKIP_DATA;
700        }        }
# Line 704  while (!done) Line 718  while (!done)
718          }          }
719        time_taken = clock() - start_time;        time_taken = clock() - start_time;
720        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
721          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
722          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
723        }        }
724    
725      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 721  while (!done) Line 735  while (!done)
735          {          {
736          for (;;)          for (;;)
737            {            {
738            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
739              {              {
740              done = 1;              done = 1;
741              goto CONTINUE;              goto CONTINUE;
# Line 739  while (!done) Line 753  while (!done)
753      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
754      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
755    
756        if (log_store)
757          fprintf(outfile, "Memory allocation (code space): %d\n",
758            (int)(gotten_store -
759                  sizeof(real_pcre) -
760                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
761    
762      if (do_showinfo)      if (do_showinfo)
763        {        {
764        unsigned long int get_options;        unsigned long int get_options;
765        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
766        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
767          int nameentrysize, namecount;
768          const uschar *nametable;
769        size_t size;        size_t size;
770    
771        if (do_debug) print_internals(re);        if (do_debug)
772            {
773            fprintf(outfile, "------------------------------------------------------------------\n");
774            print_internals(re, outfile);
775            }
776    
777        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
778        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
779        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
780        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
781        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
782        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
783          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
784          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
785          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
786    
787        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
788        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 780  while (!done) Line 809  while (!done)
809        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
810        if (backrefmax > 0)        if (backrefmax > 0)
811          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
812    
813          if (namecount > 0)
814            {
815            fprintf(outfile, "Named capturing subpatterns:\n");
816            while (namecount-- > 0)
817              {
818              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
819                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
820                GET2(nametable, 0));
821              nametable += nameentrysize;
822              }
823            }
824    
825        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
826          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
827            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
828            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
829            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 790  while (!done) Line 832  while (!done)
832            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
833            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
834            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
835            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
836              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
837    
838        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
839          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 805  while (!done) Line 848  while (!done)
848          }          }
849        else        else
850          {          {
851          if (isprint(first_char))          int ch = first_char & 255;
852            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
853              "" : " (caseless)";
854            if (isprint(ch))
855              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
856          else          else
857            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
858          }          }
859    
860        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 863  while (!done)
863          }          }
864        else        else
865          {          {
866          if (isprint(need_char))          int ch = need_char & 255;
867            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
868              "" : " (caseless)";
869            if (isprint(ch))
870              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
871          else          else
872            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
873          }          }
874        }        }
875    
# Line 839  while (!done) Line 888  while (!done)
888          time_taken = clock() - start_time;          time_taken = clock() - start_time;
889          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
890          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
891            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
892            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
893          }          }
894    
895        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 849  while (!done) Line 898  while (!done)
898        else if (extra == NULL)        else if (extra == NULL)
899          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
900    
901          /* Don't output study size; at present it is in any case a fixed
902          value, but it varies, depending on the computer architecture, and
903          so messes up the test suite. */
904    
905        else if (do_showinfo)        else if (do_showinfo)
906          {          {
907            size_t size;
908          uschar *start_bits = NULL;          uschar *start_bits = NULL;
909            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
910          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
911            /* fprintf(outfile, "Study size = %d\n", size); */
912          if (start_bits == NULL)          if (start_bits == NULL)
913            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
914          else          else
# Line 895  while (!done) Line 951  while (!done)
951      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
952      int *use_offsets = offsets;      int *use_offsets = offsets;
953      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
954        int callout_data = 0;
955        int callout_data_set = 0;
956      int count, c;      int count, c;
957      int copystrings = 0;      int copystrings = 0;
958        int find_match_limit = 0;
959      int getstrings = 0;      int getstrings = 0;
960      int getlist = 0;      int getlist = 0;
961      int gmatched = 0;      int gmatched = 0;
# Line 905  while (!done) Line 964  while (!done)
964    
965      options = 0;      options = 0;
966    
967        pcre_callout = callout;
968        first_callout = 1;
969        callout_extra = 0;
970        callout_count = 0;
971        callout_fail_count = 999999;
972        callout_fail_id = -1;
973        show_malloc = 0;
974    
975      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
976      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
977        {        {
978        done = 1;        done = 1;
979        goto CONTINUE;        goto CONTINUE;
# Line 926  while (!done) Line 993  while (!done)
993        {        {
994        int i = 0;        int i = 0;
995        int n = 0;        int n = 0;
996    
997        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
998          {          {
999          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 956  while (!done) Line 1024  while (!done)
1024              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1025            if (*pt == '}')            if (*pt == '}')
1026              {              {
1027              unsigned char buffer[8];              unsigned char buff8[8];
1028              int ii, utn;              int ii, utn;
1029              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1030              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1031              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1032              p = pt + 1;              p = pt + 1;
1033              break;              break;
1034              }              }
# Line 990  while (!done) Line 1058  while (!done)
1058          continue;          continue;
1059    
1060          case 'C':          case 'C':
1061          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1062          copystrings |= 1 << n;            {
1063              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1064              copystrings |= 1 << n;
1065              }
1066            else if (isalnum(*p))
1067              {
1068              uschar name[256];
1069              uschar *npp = name;
1070              while (isalnum(*p)) *npp++ = *p++;
1071              *npp = 0;
1072              n = pcre_get_stringnumber(re, (char *)name);
1073              if (n < 0)
1074                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1075              else copystrings |= 1 << n;
1076              }
1077            else if (*p == '+')
1078              {
1079              callout_extra = 1;
1080              p++;
1081              }
1082            else if (*p == '-')
1083              {
1084              pcre_callout = NULL;
1085              p++;
1086              }
1087            else if (*p == '!')
1088              {
1089              callout_fail_id = 0;
1090              p++;
1091              while(isdigit(*p))
1092                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1093              callout_fail_count = 0;
1094              if (*p == '!')
1095                {
1096                p++;
1097                while(isdigit(*p))
1098                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1099                }
1100              }
1101            else if (*p == '*')
1102              {
1103              int sign = 1;
1104              callout_data = 0;
1105              if (*(++p) == '-') { sign = -1; p++; }
1106              while(isdigit(*p))
1107                callout_data = callout_data * 10 + *p++ - '0';
1108              callout_data *= sign;
1109              callout_data_set = 1;
1110              }
1111          continue;          continue;
1112    
1113          case 'G':          case 'G':
1114          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1115          getstrings |= 1 << n;            {
1116              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1117              getstrings |= 1 << n;
1118              }
1119            else if (isalnum(*p))
1120              {
1121              uschar name[256];
1122              uschar *npp = name;
1123              while (isalnum(*p)) *npp++ = *p++;
1124              *npp = 0;
1125              n = pcre_get_stringnumber(re, (char *)name);
1126              if (n < 0)
1127                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1128              else getstrings |= 1 << n;
1129              }
1130          continue;          continue;
1131    
1132          case 'L':          case 'L':
1133          getlist = 1;          getlist = 1;
1134          continue;          continue;
1135    
1136            case 'M':
1137            find_match_limit = 1;
1138            continue;
1139    
1140          case 'N':          case 'N':
1141          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1142          continue;          continue;
# Line 1013  while (!done) Line 1147  while (!done)
1147            {            {
1148            size_offsets_max = n;            size_offsets_max = n;
1149            free(offsets);            free(offsets);
1150            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1151            if (offsets == NULL)            if (offsets == NULL)
1152              {              {
1153              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
# Line 1022  while (!done) Line 1156  while (!done)
1156              }              }
1157            }            }
1158          use_size_offsets = n;          use_size_offsets = n;
1159          if (n == 0) use_offsets = NULL;          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1160            continue;
1161    
1162            case 'S':
1163            show_malloc = 1;
1164          continue;          continue;
1165    
1166          case 'Z':          case 'Z':
1167          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1168          continue;          continue;
1169    
1170            case '?':
1171            options |= PCRE_NO_UTF8_CHECK;
1172            continue;
1173          }          }
1174        *q++ = c;        *q++ = c;
1175        }        }
# Line 1035  while (!done) Line 1177  while (!done)
1177      len = q - dbuffer;      len = q - dbuffer;
1178    
1179      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1180      support timing. */      support timing or playing with the match limit or callout data. */
1181    
1182  #if !defined NOPOSIX  #if !defined NOPOSIX
1183      if (posix || do_posix)      if (posix || do_posix)
1184        {        {
1185        int rc;        int rc;
1186        int eflags = 0;        int eflags = 0;
1187        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1188          if (use_size_offsets > 0)
1189            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1190        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1191        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1192    
# Line 1050  while (!done) Line 1194  while (!done)
1194    
1195        if (rc != 0)        if (rc != 0)
1196          {          {
1197          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1198          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1199          }          }
1200        else        else
1201          {          {
1202          size_t i;          size_t i;
1203          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1204            {            {
1205            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1206              {              {
1207              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1208              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1209                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1210              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1211              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1212                {                {
1213                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1214                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1215                    outfile);
1216                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1217                }                }
1218              }              }
# Line 1093  while (!done) Line 1238  while (!done)
1238              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1239          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1240          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1241            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1242            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1243            }
1244    
1245          /* If find_match_limit is set, we want to do repeated matches with
1246          varying limits in order to find the minimum value. */
1247    
1248          if (find_match_limit)
1249            {
1250            int min = 0;
1251            int mid = 64;
1252            int max = -1;
1253    
1254            if (extra == NULL)
1255              {
1256              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1257              extra->flags = 0;
1258              }
1259            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1260    
1261            for (;;)
1262              {
1263              extra->match_limit = mid;
1264              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1265                options | g_notempty, use_offsets, use_size_offsets);
1266              if (count == PCRE_ERROR_MATCHLIMIT)
1267                {
1268                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1269                min = mid;
1270                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1271                }
1272              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1273                {
1274                if (mid == min + 1)
1275                  {
1276                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1277                  break;
1278                  }
1279                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1280                max = mid;
1281                mid = (min + mid)/2;
1282                }
1283              else break;    /* Some other error */
1284              }
1285    
1286            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1287          }          }
1288    
1289        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
1290    
1291          else if (callout_data_set)
1292            {
1293            if (extra == NULL)
1294              {
1295              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1296              extra->flags = 0;
1297              }
1298            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1299            extra->callout_data = &callout_data;
1300            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1301              options | g_notempty, use_offsets, use_size_offsets);
1302            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1303            }
1304    
1305          /* The normal case is just to do the match once, with the default
1306          value of match_limit. */
1307    
1308          else count = pcre_exec(re, extra, (char *)bptr, len,
1309          start_offset, options | g_notempty, use_offsets, use_size_offsets);          start_offset, options | g_notempty, use_offsets, use_size_offsets);
1310    
1311        if (count == 0)        if (count == 0)
# Line 1118  while (!done) Line 1326  while (!done)
1326            else            else
1327              {              {
1328              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1329              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1330                  use_offsets[i+1] - use_offsets[i], outfile);
1331              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1332              if (i == 0)              if (i == 0)
1333                {                {
1334                if (do_showrest)                if (do_showrest)
1335                  {                  {
1336                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1337                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1338                      outfile);
1339                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1340                  }                  }
1341                }                }
# Line 1185  while (!done) Line 1395  while (!done)
1395    
1396        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1397        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1398        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1399        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1400        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1401          that was checked before setting g_notempty. */
1402    
1403        else        else
1404          {          {
1405          if (g_notempty != 0)          if (g_notempty != 0)
1406            {            {
1407              int onechar = 1;
1408            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
1409            use_offsets[1] = start_offset + 1;            if (use_utf8)
1410                {
1411                while (start_offset + onechar < len)
1412                  {
1413                  int tb = bptr[start_offset+onechar];
1414                  if (tb <= 127) break;
1415                  tb &= 0xc0;
1416                  if (tb != 0 && tb != 0xc0) onechar++;
1417                  }
1418                }
1419              use_offsets[1] = start_offset + onechar;
1420            }            }
1421          else          else
1422            {            {
1423            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1424              {              {
1425              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1426              }              }
1427              else fprintf(outfile, "Error %d\n", count);
1428            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1429            }            }
1430          }          }
# Line 1254  while (!done) Line 1476  while (!done)
1476      }      }
1477    }    }
1478    
1479  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1480  return 0;  return 0;
1481  }  }
1482    

Legend:
Removed from v.57  
changed lines
  Added in v.73

  ViewVC Help
Powered by ViewVC 1.1.5