/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 57 by nigel, Sat Feb 24 21:39:50 2007 UTC revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47  /* Use the internal info for displaying the results of pcre_study(). */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49    /* We need the internal info for displaying the results of pcre_study() and
50    other internal data; pcretest also uses some of the fixed tables, and generally
51    has "inside information" compared to a program that strictly follows the PCRE
52    API. */
53    
54    #include "pcre_internal.h"
55    
 #include "internal.h"  
56    
57  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
58  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 29  Makefile. */ Line 70  Makefile. */
70  #endif  #endif
71  #endif  #endif
72    
73  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
74    
75    #define BUFFER_SIZE 30000
76    #define PBUFFER_SIZE BUFFER_SIZE
77    #define DBUFFER_SIZE BUFFER_SIZE
78    
79    
80  static FILE *outfile;  static FILE *outfile;
81  static int log_store = 0;  static int log_store = 0;
82    static int callout_count;
83    static int callout_extra;
84    static int callout_fail_count;
85    static int callout_fail_id;
86    static int first_callout;
87    static int show_malloc;
88    static int use_utf8;
89  static size_t gotten_store;  static size_t gotten_store;
90    
91    static uschar *pbuffer = NULL;
92    
93    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
94    
95  /*************************************************  /*************************************************
96  *       Convert character value to UTF-8         *  *          Read number from string               *
97  *************************************************/  *************************************************/
98    
99  /* This function takes an integer value in the range 0 - 0x7fffffff  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
100  and encodes it as a UTF-8 character in 0 to 6 bytes.  around with conditional compilation, just do the job by hand. It is only used
101    for unpicking the -o argument, so just keep it simple.
102    
103  Arguments:  Arguments:
104    cvalue     the character value    str           string to be converted
105    buffer     pointer to buffer for result - at least 6 bytes long    endptr        where to put the end pointer
106    
107  Returns:     number of characters placed in the buffer  Returns:        the unsigned long
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
108  */  */
109    
110  static int  static int
111  ord2utf8(int cvalue, unsigned char *buffer)  get_value(unsigned char *str, unsigned char **endptr)
112  {  {
113  register int i, j;  int result = 0;
114  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  while(*str != 0 && isspace(*str)) str++;
115    if (cvalue <= utf8_table1[i]) break;  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
116  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  *endptr = str;
117  if (cvalue < 0) return -1;  return(result);
 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
 cvalue >>= 6 - i;  
 for (j = 0; j < i; j++)  
   {  
   *buffer++ = 0x80 | (cvalue & 0x3f);  
   cvalue >>= 6;  
   }  
 return i + 1;  
118  }  }
119    
120    
121    
122    
123  /*************************************************  /*************************************************
124  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
125  *************************************************/  *************************************************/
# Line 99  Returns:   >  0 => the number of bytes c Line 135  Returns:   >  0 => the number of bytes c
135             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
136  */  */
137    
138  int  static int
139  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
140  {  {
141  int c = *buffer++;  int c = *buffer++;
# Line 117  if (i == 0 || i == 6) return 0;        / Line 153  if (i == 0 || i == 6) return 0;        /
153    
154  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
155    
156  d = c & utf8_table3[i];  s = 6*i;
157  s = 6 - i;  d = (c & _pcre_utf8_table3[i]) << s;
158    
159  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
160    {    {
161    c = *buffer++;    c = *buffer++;
162    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
163      s -= 6;
164    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
165    }    }
166    
167  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
168    
169  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < _pcre_utf8_table1_size; j++)
170    if (d <= utf8_table1[j]) break;    if (d <= _pcre_utf8_table1[j]) break;
171  if (j != i) return -(i+1);  if (j != i) return -(i+1);
172    
173  /* Valid value */  /* Valid value */
# Line 142  return i+1; Line 178  return i+1;
178    
179    
180    
181    /*************************************************
182    *             Print character string             *
183    *************************************************/
184    
185    /* Character string printing function. Must handle UTF-8 strings in utf8
186    mode. Yields number of characters printed. If handed a NULL file, just counts
187    chars without printing. */
188    
189    static int pchars(unsigned char *p, int length, FILE *f)
190    {
191    int c;
192    int yield = 0;
193    
194  /* Debugging function to print the internal form of the regex. This is the same  while (length-- > 0)
195  code as contained in pcre.c under the DEBUG macro. */    {
196      if (use_utf8)
197        {
198        int rc = utf82ord(p, &c);
199    
200  static const char *OP_names[] = {      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
201    "End", "\\A", "\\B", "\\b", "\\D", "\\d",        {
202    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",        length -= rc - 1;
203    "Opt", "^", "$", "Any", "chars", "not",        p += rc;
204    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",        if (c < 256 && isprint(c))
205    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",          {
206    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",          if (f != NULL) fprintf(f, "%c", c);
207    "*", "*?", "+", "+?", "?", "??", "{", "{",          yield++;
208    "class", "Ref", "Recurse",          }
209    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",        else
210    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",          {
211    "Brazero", "Braminzero", "Branumber", "Bra"          int n;
212  };          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
213            yield += n;
214            }
215          continue;
216          }
217        }
218    
219       /* Not UTF-8, or malformed UTF-8  */
220    
221  static void print_internals(pcre *re)    if (isprint(c = *(p++)))
222  {      {
223  unsigned char *code = ((real_pcre *)re)->code;      if (f != NULL) fprintf(f, "%c", c);
224        yield++;
225        }
226      else
227        {
228        if (f != NULL) fprintf(f, "\\x%02x", c);
229        yield += 4;
230        }
231      }
232    
233    return yield;
234    }
235    
 fprintf(outfile, "------------------------------------------------------------------\n");  
236    
 for(;;)  
   {  
   int c;  
   int charlength;  
237    
238    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /*************************************************
239    *              Callout function                  *
240    *************************************************/
241    
242    /* Called from PCRE as a result of the (?C) item. We print out where we are in
243    the match. Yield zero unless more callouts than the fail count, or the callout
244    data is not zero. */
245    
246    if (*code >= OP_BRA)  static int callout(pcre_callout_block *cb)
247    {
248    FILE *f = (first_callout | callout_extra)? outfile : NULL;
249    int i, pre_start, post_start, subject_length;
250    
251    if (callout_extra)
252      {
253      fprintf(f, "Callout %d: last capture = %d\n",
254        cb->callout_number, cb->capture_last);
255    
256      for (i = 0; i < cb->capture_top * 2; i += 2)
257      {      {
258      if (*code - OP_BRA > EXTRACT_BASIC_MAX)      if (cb->offset_vector[i] < 0)
259        fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);        fprintf(f, "%2d: <unset>\n", i/2);
260      else      else
261        fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);        {
262      code += 2;        fprintf(f, "%2d: ", i/2);
263          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
264            cb->offset_vector[i+1] - cb->offset_vector[i], f);
265          fprintf(f, "\n");
266          }
267      }      }
268      }
269    
270    else switch(*code)  /* Re-print the subject in canonical form, the first time or if giving full
271      {  datails. On subsequent calls in the same match, we use pchars just to find the
272      case OP_END:  printed lengths of the substrings. */
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
273    
274        CLASS_REF_REPEAT:  if (f != NULL) fprintf(f, "--->");
275    
276        switch(*code)  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
277          {  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
278          case OP_CRSTAR:    cb->current_position - cb->start_match, f);
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
279    
280          case OP_CRRANGE:  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
281    
282          default:  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
283          code--;    cb->subject_length - cb->current_position, f);
         }  
       }  
     break;  
284    
285      /* Anything else is just a one-node item */  if (f != NULL) fprintf(f, "\n");
286    
287      default:  /* Always print appropriate indicators, with callout number if not already
288      fprintf(outfile, "    %s", OP_names[*code]);  shown. For automatic callouts, show the pattern offset. */
     break;  
     }  
289    
290    code++;  if (cb->callout_number == 255)
291    fprintf(outfile, "\n");    {
292      fprintf(outfile, "%+3d ", cb->pattern_position);
293      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
294      }
295    else
296      {
297      if (callout_extra) fprintf(outfile, "    ");
298        else fprintf(outfile, "%3d ", cb->callout_number);
299    }    }
 }  
300    
301    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
302    fprintf(outfile, "^");
303    
304    if (post_start > 0)
305      {
306      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
307      fprintf(outfile, "^");
308      }
309    
310  /* Character string printing function. A "normal" and a UTF-8 version. */  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
311      fprintf(outfile, " ");
312    
313  static void pchars(unsigned char *p, int length, int utf8)  fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
314  {    pbuffer + cb->pattern_position);
315  int c;  
316  while (length-- > 0)  fprintf(outfile, "\n");
317    first_callout = 0;
318    
319    if (cb->callout_data != NULL)
320    {    {
321    if (utf8)    int callout_data = *((int *)(cb->callout_data));
322      if (callout_data != 0)
323      {      {
324      int rc = utf82ord(p, &c);      fprintf(outfile, "Callout data = %d\n", callout_data);
325      if (rc > 0)      return callout_data;
       {  
       length -= rc - 1;  
       p += rc;  
       if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x{%02x}", c);  
       continue;  
       }  
326      }      }
   
    /* Not UTF-8, or malformed UTF-8  */  
   
   if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
     else fprintf(outfile, "\\x%02x", c);  
327    }    }
328    
329    return (cb->callout_number != callout_fail_id)? 0 :
330           (++callout_count >= callout_fail_count)? 1 : 0;
331  }  }
332    
333    
334    /*************************************************
335    *            Local malloc functions              *
336    *************************************************/
337    
338  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
339  compiled re. */  compiled re. */
340    
341  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
342  {  {
343    void *block = malloc(size);
344  gotten_store = size;  gotten_store = size;
345  if (log_store)  if (show_malloc)
346    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
347      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
348  return malloc(size);  }
349    
350    static void new_free(void *block)
351    {
352    if (show_malloc)
353      fprintf(outfile, "free             %p\n", block);
354    free(block);
355    }
356    
357    
358    /* For recursion malloc/free, to test stacking calls */
359    
360    static void *stack_malloc(size_t size)
361    {
362    void *block = malloc(size);
363    if (show_malloc)
364      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
365    return block;
366  }  }
367    
368    static void stack_free(void *block)
369    {
370    if (show_malloc)
371      fprintf(outfile, "stack_free       %p\n", block);
372    free(block);
373    }
374    
375    
376    /*************************************************
377    *          Call pcre_fullinfo()                  *
378    *************************************************/
379    
380  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
381    
# Line 419  if ((rc = pcre_fullinfo(re, study, optio Line 388  if ((rc = pcre_fullinfo(re, study, optio
388    
389    
390    
391    /*************************************************
392    *         Byte flipping function                 *
393    *************************************************/
394    
395    static long int
396    byteflip(long int value, int n)
397    {
398    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
399    return ((value & 0x000000ff) << 24) |
400           ((value & 0x0000ff00) <<  8) |
401           ((value & 0x00ff0000) >>  8) |
402           ((value & 0xff000000) >> 24);
403    }
404    
405    
406    
407    
408    /*************************************************
409    *                Main Program                    *
410    *************************************************/
411    
412  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
413  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 435  int showinfo = 0; Line 424  int showinfo = 0;
424  int showstore = 0;  int showstore = 0;
425  int size_offsets = 45;  int size_offsets = 45;
426  int size_offsets_max;  int size_offsets_max;
427  int *offsets;  int *offsets = NULL;
428  #if !defined NOPOSIX  #if !defined NOPOSIX
429  int posix = 0;  int posix = 0;
430  #endif  #endif
431  int debug = 0;  int debug = 0;
432  int done = 0;  int done = 0;
433  unsigned char buffer[30000];  int all_use_dfa = 0;
434  unsigned char dbuffer[1024];  int yield = 0;
435    
436    unsigned char *buffer;
437    unsigned char *dbuffer;
438    
439  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
440    when I am debugging. */
441    
442    buffer = (unsigned char *)malloc(BUFFER_SIZE);
443    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
444    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
445    
446    /* The outfile variable is static so that new_malloc can use it. The _setmode()
447    stuff is some magic that I don't understand, but which apparently does good
448    things in Windows. It's related to line terminations.  */
449    
450    #if defined(_WIN32) || defined(WIN32)
451    _setmode( _fileno( stdout ), 0x8000 );
452    #endif  /* defined(_WIN32) || defined(WIN32) */
453    
454  outfile = stdout;  outfile = stdout;
455    
# Line 452  outfile = stdout; Line 457  outfile = stdout;
457    
458  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
459    {    {
460    char *endptr;    unsigned char *endptr;
461    
462    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
463      showstore = 1;      showstore = 1;
464    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
465    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
466    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
467      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
468    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
469        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
470            *endptr == 0))
471      {      {
472      op++;      op++;
473      argc--;      argc--;
# Line 468  while (argc > 1 && argv[op][0] == '-') Line 475  while (argc > 1 && argv[op][0] == '-')
475  #if !defined NOPOSIX  #if !defined NOPOSIX
476    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
477  #endif  #endif
478      else if (strcmp(argv[op], "-C") == 0)
479        {
480        int rc;
481        printf("PCRE version %s\n", pcre_version());
482        printf("Compiled with\n");
483        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
484        printf("  %sUTF-8 support\n", rc? "" : "No ");
485        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
486        printf("  %sUnicode properties support\n", rc? "" : "No ");
487        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
488        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
489        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
490        printf("  Internal link size = %d\n", rc);
491        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
492        printf("  POSIX malloc threshold = %d\n", rc);
493        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
494        printf("  Default match limit = %d\n", rc);
495        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
496        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
497        exit(0);
498        }
499    else    else
500      {      {
501      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
502      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
503      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
504             "  -i     show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n");
505        printf("  -dfa   force DFA matching for all subjects\n");
506        printf("  -i     show information about compiled pattern\n"
507               "  -m     output memory used information\n"
508             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
509  #if !defined NOPOSIX  #if !defined NOPOSIX
510      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
511  #endif  #endif
512      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
513             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
514      return 1;      yield = 1;
515        goto EXIT;
516      }      }
517    op++;    op++;
518    argc--;    argc--;
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 521  while (argc > 1 && argv[op][0] == '-')
521  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
522    
523  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
524  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
525  if (offsets == NULL)  if (offsets == NULL)
526    {    {
527    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
528      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
529    return 1;    yield = 1;
530      goto EXIT;
531    }    }
532    
533  /* Sort out the input and output files */  /* Sort out the input and output files */
534    
535  if (argc > 1)  if (argc > 1)
536    {    {
537    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
538    if (infile == NULL)    if (infile == NULL)
539      {      {
540      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
541      return 1;      yield = 1;
542        goto EXIT;
543      }      }
544    }    }
545    
546  if (argc > 2)  if (argc > 2)
547    {    {
548    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
549    if (outfile == NULL)    if (outfile == NULL)
550      {      {
551      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
552      return 1;      yield = 1;
553        goto EXIT;
554      }      }
555    }    }
556    
557  /* Set alternative malloc function */  /* Set alternative malloc function */
558    
559  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
560    pcre_free = new_free;
561    pcre_stack_malloc = stack_malloc;
562    pcre_stack_free = stack_free;
563    
564  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
565    
# Line 541  while (!done) Line 579  while (!done)
579    
580    const char *error;    const char *error;
581    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
582      unsigned char *to_file = NULL;
583    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
584      unsigned long int true_size, true_study_size = 0;
585      size_t size, regex_gotten_store;
586    int do_study = 0;    int do_study = 0;
587    int do_debug = debug;    int do_debug = debug;
588    int do_G = 0;    int do_G = 0;
589    int do_g = 0;    int do_g = 0;
590    int do_showinfo = showinfo;    int do_showinfo = showinfo;
591    int do_showrest = 0;    int do_showrest = 0;
592    int utf8 = 0;    int do_flip = 0;
593    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
594    
595      use_utf8 = 0;
596    
597    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
598    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
599    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
600      fflush(outfile);
601    
602    p = buffer;    p = buffer;
603    while (isspace(*p)) p++;    while (isspace(*p)) p++;
604    if (*p == 0) continue;    if (*p == 0) continue;
605    
606    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
607    complete, read more. */  
608      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
609        {
610        unsigned long int magic;
611        uschar sbuf[8];
612        FILE *f;
613    
614        p++;
615        pp = p + (int)strlen((char *)p);
616        while (isspace(pp[-1])) pp--;
617        *pp = 0;
618    
619        f = fopen((char *)p, "rb");
620        if (f == NULL)
621          {
622          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
623          continue;
624          }
625    
626        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
627    
628        true_size =
629          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
630        true_study_size =
631          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
632    
633        re = (real_pcre *)new_malloc(true_size);
634        regex_gotten_store = gotten_store;
635    
636        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
637    
638        magic = ((real_pcre *)re)->magic_number;
639        if (magic != MAGIC_NUMBER)
640          {
641          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
642            {
643            do_flip = 1;
644            }
645          else
646            {
647            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
648            fclose(f);
649            continue;
650            }
651          }
652    
653        fprintf(outfile, "Compiled regex%s loaded from %s\n",
654          do_flip? " (byte-inverted)" : "", p);
655    
656        /* Need to know if UTF-8 for printing data strings */
657    
658        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
659        use_utf8 = (options & PCRE_UTF8) != 0;
660    
661        /* Now see if there is any following study data */
662    
663        if (true_study_size != 0)
664          {
665          pcre_study_data *psd;
666    
667          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
668          extra->flags = PCRE_EXTRA_STUDY_DATA;
669    
670          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
671          extra->study_data = psd;
672    
673          if (fread(psd, 1, true_study_size, f) != true_study_size)
674            {
675            FAIL_READ:
676            fprintf(outfile, "Failed to read data from %s\n", p);
677            if (extra != NULL) new_free(extra);
678            if (re != NULL) new_free(re);
679            fclose(f);
680            continue;
681            }
682          fprintf(outfile, "Study data loaded from %s\n", p);
683          do_study = 1;     /* To get the data output if requested */
684          }
685        else fprintf(outfile, "No study data\n");
686    
687        fclose(f);
688        goto SHOW_INFO;
689        }
690    
691      /* In-line pattern (the usual case). Get the delimiter and seek the end of
692      the pattern; if is isn't complete, read more. */
693    
694    delimiter = *p++;    delimiter = *p++;
695    
# Line 582  while (!done) Line 711  while (!done)
711        }        }
712      if (*pp != 0) break;      if (*pp != 0) break;
713    
714      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
715      if (len < 256)      if (len < 256)
716        {        {
717        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 605  while (!done) Line 734  while (!done)
734    
735    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
736    
737    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
738      for callouts. */
739    
740    *pp++ = 0;    *pp++ = 0;
741      strcpy((char *)pbuffer, (char *)p);
742    
743    /* Look for options after final delimiter */    /* Look for options after final delimiter */
744    
# Line 619  while (!done) Line 750  while (!done)
750      {      {
751      switch (*pp++)      switch (*pp++)
752        {        {
753          case 'f': options |= PCRE_FIRSTLINE; break;
754        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
755        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
756        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 627  while (!done) Line 759  while (!done)
759    
760        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
761        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
762          case 'C': options |= PCRE_AUTO_CALLOUT; break;
763        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
764        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
765          case 'F': do_flip = 1; break;
766        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
767        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
768        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
769          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
770    
771  #if !defined NOPOSIX  #if !defined NOPOSIX
772        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 640  while (!done) Line 775  while (!done)
775        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
776        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
777        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
778        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
779          case '?': options |= PCRE_NO_UTF8_CHECK; break;
780    
781        case 'L':        case 'L':
782        ppp = pp;        ppp = pp;
783        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
784          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
785        *ppp = 0;        *ppp = 0;
786        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
787          {          {
# Line 655  while (!done) Line 792  while (!done)
792        pp = ppp;        pp = ppp;
793        break;        break;
794    
795        case '\n': case ' ': break;        case '>':
796          to_file = pp;
797          while (*pp != 0) pp++;
798          while (isspace(pp[-1])) pp--;
799          *pp = 0;
800          break;
801    
802          case '\r':                      /* So that it works in Windows */
803          case '\n':
804          case ' ':
805          break;
806    
807        default:        default:
808        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
809        goto SKIP_DATA;        goto SKIP_DATA;
# Line 671  while (!done) Line 819  while (!done)
819      {      {
820      int rc;      int rc;
821      int cflags = 0;      int cflags = 0;
822    
823      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
824      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
825        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
826      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
827    
828      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 830  while (!done)
830    
831      if (rc != 0)      if (rc != 0)
832        {        {
833        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
834        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
835        goto SKIP_DATA;        goto SKIP_DATA;
836        }        }
# Line 704  while (!done) Line 854  while (!done)
854          }          }
855        time_taken = clock() - start_time;        time_taken = clock() - start_time;
856        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
857          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
858          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
859        }        }
860    
861      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 721  while (!done) Line 871  while (!done)
871          {          {
872          for (;;)          for (;;)
873            {            {
874            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
875              {              {
876              done = 1;              done = 1;
877              goto CONTINUE;              goto CONTINUE;
# Line 739  while (!done) Line 889  while (!done)
889      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
890      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
891    
892        if (log_store)
893          fprintf(outfile, "Memory allocation (code space): %d\n",
894            (int)(gotten_store -
895                  sizeof(real_pcre) -
896                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
897    
898        /* Extract the size for possible writing before possibly flipping it,
899        and remember the store that was got. */
900    
901        true_size = ((real_pcre *)re)->size;
902        regex_gotten_store = gotten_store;
903    
904        /* If /S was present, study the regexp to generate additional info to
905        help with the matching. */
906    
907        if (do_study)
908          {
909          if (timeit)
910            {
911            register int i;
912            clock_t time_taken;
913            clock_t start_time = clock();
914            for (i = 0; i < LOOPREPEAT; i++)
915              extra = pcre_study(re, study_options, &error);
916            time_taken = clock() - start_time;
917            if (extra != NULL) free(extra);
918            fprintf(outfile, "  Study time %.3f milliseconds\n",
919              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
920                (double)CLOCKS_PER_SEC);
921            }
922          extra = pcre_study(re, study_options, &error);
923          if (error != NULL)
924            fprintf(outfile, "Failed to study: %s\n", error);
925          else if (extra != NULL)
926            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
927          }
928    
929        /* If the 'F' option was present, we flip the bytes of all the integer
930        fields in the regex data block and the study block. This is to make it
931        possible to test PCRE's handling of byte-flipped patterns, e.g. those
932        compiled on a different architecture. */
933    
934        if (do_flip)
935          {
936          real_pcre *rre = (real_pcre *)re;
937          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
938          rre->size = byteflip(rre->size, sizeof(rre->size));
939          rre->options = byteflip(rre->options, sizeof(rre->options));
940          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
941          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
942          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
943          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
944          rre->name_table_offset = byteflip(rre->name_table_offset,
945            sizeof(rre->name_table_offset));
946          rre->name_entry_size = byteflip(rre->name_entry_size,
947            sizeof(rre->name_entry_size));
948          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
949    
950          if (extra != NULL)
951            {
952            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
953            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
954            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
955            }
956          }
957    
958        /* Extract information from the compiled data if required */
959    
960        SHOW_INFO:
961    
962      if (do_showinfo)      if (do_showinfo)
963        {        {
964        unsigned long int get_options;        unsigned long int get_options, all_options;
965        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
966        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
967        size_t size;        int nameentrysize, namecount;
968          const uschar *nametable;
969    
970        if (do_debug) print_internals(re);        if (do_debug)
971            {
972            fprintf(outfile, "------------------------------------------------------------------\n");
973            _pcre_printint(re, outfile);
974            }
975    
976        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
977        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
978        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
979        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
980        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
981        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
982          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
983          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
984          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
985    
986        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
987        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 773  while (!done) Line 1001  while (!done)
1001              get_options, old_options);              get_options, old_options);
1002          }          }
1003    
1004        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1005          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1006          size, gotten_store);          (int)size, (int)regex_gotten_store);
1007    
1008        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1009        if (backrefmax > 0)        if (backrefmax > 0)
1010          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1011    
1012          if (namecount > 0)
1013            {
1014            fprintf(outfile, "Named capturing subpatterns:\n");
1015            while (namecount-- > 0)
1016              {
1017              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1018                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1019                GET2(nametable, 0));
1020              nametable += nameentrysize;
1021              }
1022            }
1023    
1024          /* The NOPARTIAL bit is a private bit in the options, so we have
1025          to fish it out via out back door */
1026    
1027          all_options = ((real_pcre *)re)->options;
1028          if (do_flip)
1029            {
1030            all_options = byteflip(all_options, sizeof(all_options));
1031            }
1032    
1033          if ((all_options & PCRE_NOPARTIAL) != 0)
1034            fprintf(outfile, "Partial matching not supported\n");
1035    
1036        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1037          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1038            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1039            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1040            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1041            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1042              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1043            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1044            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1045            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1046            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1047            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1048              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1049    
1050        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1051          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 805  while (!done) Line 1060  while (!done)
1060          }          }
1061        else        else
1062          {          {
1063          if (isprint(first_char))          int ch = first_char & 255;
1064            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1065              "" : " (caseless)";
1066            if (isprint(ch))
1067              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1068          else          else
1069            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1070          }          }
1071    
1072        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 1075  while (!done)
1075          }          }
1076        else        else
1077          {          {
1078          if (isprint(need_char))          int ch = need_char & 255;
1079            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1080              "" : " (caseless)";
1081            if (isprint(ch))
1082              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1083          else          else
1084            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1085          }          }
       }  
1086    
1087      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1088      help with the matching. */        value, but it varies, depending on the computer architecture, and
1089          so messes up the test suite. (And with the /F option, it might be
1090          flipped.) */
1091    
1092      if (do_study)        if (do_study)
       {  
       if (timeit)  
1093          {          {
1094          register int i;          if (extra == NULL)
1095          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1096          clock_t start_time = clock();          else
1097          for (i = 0; i < LOOPREPEAT; i++)            {
1098            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1099          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1100          if (extra != NULL) free(extra);  
1101          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1102            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1103            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1104                {
1105                int i;
1106                int c = 24;
1107                fprintf(outfile, "Starting byte set: ");
1108                for (i = 0; i < 256; i++)
1109                  {
1110                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1111                    {
1112                    if (c > 75)
1113                      {
1114                      fprintf(outfile, "\n  ");
1115                      c = 2;
1116                      }
1117                    if (isprint(i) && i != ' ')
1118                      {
1119                      fprintf(outfile, "%c ", i);
1120                      c += 2;
1121                      }
1122                    else
1123                      {
1124                      fprintf(outfile, "\\x%02x ", i);
1125                      c += 5;
1126                      }
1127                    }
1128                  }
1129                fprintf(outfile, "\n");
1130                }
1131              }
1132          }          }
1133          }
1134    
1135        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1136        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1137          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1138    
1139        else if (do_showinfo)      if (to_file != NULL)
1140          {
1141          FILE *f = fopen((char *)to_file, "wb");
1142          if (f == NULL)
1143            {
1144            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1145            }
1146          else
1147          {          {
1148          uschar *start_bits = NULL;          uschar sbuf[8];
1149          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (true_size >> 24)  & 255;
1150          if (start_bits == NULL)          sbuf[1] = (true_size >> 16)  & 255;
1151            fprintf(outfile, "No starting character set\n");          sbuf[2] = (true_size >>  8)  & 255;
1152            sbuf[3] = (true_size)  & 255;
1153    
1154            sbuf[4] = (true_study_size >> 24)  & 255;
1155            sbuf[5] = (true_study_size >> 16)  & 255;
1156            sbuf[6] = (true_study_size >>  8)  & 255;
1157            sbuf[7] = (true_study_size)  & 255;
1158    
1159            if (fwrite(sbuf, 1, 8, f) < 8 ||
1160                fwrite(re, 1, true_size, f) < true_size)
1161              {
1162              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1163              }
1164          else          else
1165            {            {
1166            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1167            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1168              {              {
1169              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1170                    true_study_size)
1171                {                {
1172                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1173                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1174                }                }
1175                else fprintf(outfile, "Study data written to %s\n", to_file);
1176              }              }
           fprintf(outfile, "\n");  
1177            }            }
1178            fclose(f);
1179          }          }
1180    
1181          new_free(re);
1182          if (extra != NULL) new_free(extra);
1183          if (tables != NULL) new_free((void *)tables);
1184          continue;  /* With next regex */
1185        }        }
1186      }      }        /* End of non-POSIX compile */
1187    
1188    /* Read data lines and test them */    /* Read data lines and test them */
1189    
# Line 895  while (!done) Line 1193  while (!done)
1193      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
1194      int *use_offsets = offsets;      int *use_offsets = offsets;
1195      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1196        int callout_data = 0;
1197        int callout_data_set = 0;
1198      int count, c;      int count, c;
1199      int copystrings = 0;      int copystrings = 0;
1200        int find_match_limit = 0;
1201      int getstrings = 0;      int getstrings = 0;
1202      int getlist = 0;      int getlist = 0;
1203      int gmatched = 0;      int gmatched = 0;
1204      int start_offset = 0;      int start_offset = 0;
1205      int g_notempty = 0;      int g_notempty = 0;
1206        int use_dfa = 0;
1207    
1208      options = 0;      options = 0;
1209    
1210        pcre_callout = callout;
1211        first_callout = 1;
1212        callout_extra = 0;
1213        callout_count = 0;
1214        callout_fail_count = 999999;
1215        callout_fail_id = -1;
1216        show_malloc = 0;
1217    
1218      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1219      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1220        {        {
1221        done = 1;        done = 1;
1222        goto CONTINUE;        goto CONTINUE;
# Line 926  while (!done) Line 1236  while (!done)
1236        {        {
1237        int i = 0;        int i = 0;
1238        int n = 0;        int n = 0;
1239    
1240        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1241          {          {
1242          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 956  while (!done) Line 1267  while (!done)
1267              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1268            if (*pt == '}')            if (*pt == '}')
1269              {              {
1270              unsigned char buffer[8];              unsigned char buff8[8];
1271              int ii, utn;              int ii, utn;
1272              utn = ord2utf8(c, buffer);              utn = _pcre_ord2utf8(c, buff8);
1273              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1274              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1275              p = pt + 1;              p = pt + 1;
1276              break;              break;
1277              }              }
# Line 977  while (!done) Line 1288  while (!done)
1288            }            }
1289          break;          break;
1290    
1291          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1292          p--;          p--;
1293          continue;          continue;
1294    
1295            case '>':
1296            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1297            continue;
1298    
1299          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1300          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1301          continue;          continue;
# Line 990  while (!done) Line 1305  while (!done)
1305          continue;          continue;
1306    
1307          case 'C':          case 'C':
1308          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1309          copystrings |= 1 << n;            {
1310              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1311              copystrings |= 1 << n;
1312              }
1313            else if (isalnum(*p))
1314              {
1315              uschar name[256];
1316              uschar *npp = name;
1317              while (isalnum(*p)) *npp++ = *p++;
1318              *npp = 0;
1319              n = pcre_get_stringnumber(re, (char *)name);
1320              if (n < 0)
1321                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1322              else copystrings |= 1 << n;
1323              }
1324            else if (*p == '+')
1325              {
1326              callout_extra = 1;
1327              p++;
1328              }
1329            else if (*p == '-')
1330              {
1331              pcre_callout = NULL;
1332              p++;
1333              }
1334            else if (*p == '!')
1335              {
1336              callout_fail_id = 0;
1337              p++;
1338              while(isdigit(*p))
1339                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1340              callout_fail_count = 0;
1341              if (*p == '!')
1342                {
1343                p++;
1344                while(isdigit(*p))
1345                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1346                }
1347              }
1348            else if (*p == '*')
1349              {
1350              int sign = 1;
1351              callout_data = 0;
1352              if (*(++p) == '-') { sign = -1; p++; }
1353              while(isdigit(*p))
1354                callout_data = callout_data * 10 + *p++ - '0';
1355              callout_data *= sign;
1356              callout_data_set = 1;
1357              }
1358            continue;
1359    
1360            case 'D':
1361            if (posix || do_posix)
1362              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1363            else
1364              use_dfa = 1;
1365            continue;
1366    
1367            case 'F':
1368            options |= PCRE_DFA_SHORTEST;
1369          continue;          continue;
1370    
1371          case 'G':          case 'G':
1372          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1373          getstrings |= 1 << n;            {
1374              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1375              getstrings |= 1 << n;
1376              }
1377            else if (isalnum(*p))
1378              {
1379              uschar name[256];
1380              uschar *npp = name;
1381              while (isalnum(*p)) *npp++ = *p++;
1382              *npp = 0;
1383              n = pcre_get_stringnumber(re, (char *)name);
1384              if (n < 0)
1385                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1386              else getstrings |= 1 << n;
1387              }
1388          continue;          continue;
1389    
1390          case 'L':          case 'L':
1391          getlist = 1;          getlist = 1;
1392          continue;          continue;
1393    
1394            case 'M':
1395            find_match_limit = 1;
1396            continue;
1397    
1398          case 'N':          case 'N':
1399          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1400          continue;          continue;
# Line 1013  while (!done) Line 1405  while (!done)
1405            {            {
1406            size_offsets_max = n;            size_offsets_max = n;
1407            free(offsets);            free(offsets);
1408            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409            if (offsets == NULL)            if (offsets == NULL)
1410              {              {
1411              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1412                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1413              return 1;              yield = 1;
1414                goto EXIT;
1415              }              }
1416            }            }
1417          use_size_offsets = n;          use_size_offsets = n;
1418          if (n == 0) use_offsets = NULL;          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1419            continue;
1420    
1421            case 'P':
1422            options |= PCRE_PARTIAL;
1423            continue;
1424    
1425            case 'R':
1426            options |= PCRE_DFA_RESTART;
1427            continue;
1428    
1429            case 'S':
1430            show_malloc = 1;
1431          continue;          continue;
1432    
1433          case 'Z':          case 'Z':
1434          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1435          continue;          continue;
1436    
1437            case '?':
1438            options |= PCRE_NO_UTF8_CHECK;
1439            continue;
1440          }          }
1441        *q++ = c;        *q++ = c;
1442        }        }
1443      *q = 0;      *q = 0;
1444      len = q - dbuffer;      len = q - dbuffer;
1445    
1446        if ((all_use_dfa || use_dfa) && find_match_limit)
1447          {
1448          printf("**Match limit not relevant for DFA matching: ignored\n");
1449          find_match_limit = 0;
1450          }
1451    
1452      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1453      support timing. */      support timing or playing with the match limit or callout data. */
1454    
1455  #if !defined NOPOSIX  #if !defined NOPOSIX
1456      if (posix || do_posix)      if (posix || do_posix)
1457        {        {
1458        int rc;        int rc;
1459        int eflags = 0;        int eflags = 0;
1460        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1461          if (use_size_offsets > 0)
1462            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1463        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1464        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1465    
# Line 1050  while (!done) Line 1467  while (!done)
1467    
1468        if (rc != 0)        if (rc != 0)
1469          {          {
1470          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1471          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1472          }          }
1473        else        else
1474          {          {
1475          size_t i;          size_t i;
1476          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1477            {            {
1478            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1479              {              {
1480              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1481              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1482                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1483              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1484              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1485                {                {
1486                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1487                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1488                    outfile);
1489                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1490                }                }
1491              }              }
# Line 1088  while (!done) Line 1506  while (!done)
1506          register int i;          register int i;
1507          clock_t time_taken;          clock_t time_taken;
1508          clock_t start_time = clock();          clock_t start_time = clock();
1509    
1510            if (all_use_dfa || use_dfa)
1511              {
1512              int workspace[1000];
1513              for (i = 0; i < LOOPREPEAT; i++)
1514                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1515                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1516                  sizeof(workspace)/sizeof(int));
1517              }
1518            else
1519    
1520          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1521            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1522              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1523    
1524          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1525          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1526            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1527            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1528          }          }
1529    
1530        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1531          start_offset, options | g_notempty, use_offsets, use_size_offsets);        varying limits in order to find the minimum value. */
1532    
1533        if (count == 0)        if (find_match_limit)
1534          {          {
1535          fprintf(outfile, "Matched, but too many substrings\n");          int min = 0;
1536          count = use_size_offsets/3;          int mid = 64;
1537            int max = -1;
1538    
1539            if (extra == NULL)
1540              {
1541              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1542              extra->flags = 0;
1543              }
1544            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1545    
1546            for (;;)
1547              {
1548              extra->match_limit = mid;
1549              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1550                options | g_notempty, use_offsets, use_size_offsets);
1551              if (count == PCRE_ERROR_MATCHLIMIT)
1552                {
1553                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1554                min = mid;
1555                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1556                }
1557              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1558                                     count == PCRE_ERROR_PARTIAL)
1559                {
1560                if (mid == min + 1)
1561                  {
1562                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1563                  break;
1564                  }
1565                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1566                max = mid;
1567                mid = (min + mid)/2;
1568                }
1569              else break;    /* Some other error */
1570              }
1571    
1572            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1573            }
1574    
1575          /* If callout_data is set, use the interface with additional data */
1576    
1577          else if (callout_data_set)
1578            {
1579            if (extra == NULL)
1580              {
1581              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1582              extra->flags = 0;
1583              }
1584            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1585            extra->callout_data = &callout_data;
1586            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1587              options | g_notempty, use_offsets, use_size_offsets);
1588            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1589            }
1590    
1591          /* The normal case is just to do the match once, with the default
1592          value of match_limit. */
1593    
1594          else if (all_use_dfa || use_dfa)
1595            {
1596            int workspace[1000];
1597            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1598              options | g_notempty, use_offsets, use_size_offsets, workspace,
1599              sizeof(workspace)/sizeof(int));
1600            if (count == 0)
1601              {
1602              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1603              count = use_size_offsets/2;
1604              }
1605            }
1606    
1607          else
1608            {
1609            count = pcre_exec(re, extra, (char *)bptr, len,
1610              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1611            if (count == 0)
1612              {
1613              fprintf(outfile, "Matched, but too many substrings\n");
1614              count = use_size_offsets/3;
1615              }
1616          }          }
1617    
1618        /* Matched */        /* Matched */
# Line 1118  while (!done) Line 1627  while (!done)
1627            else            else
1628              {              {
1629              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1630              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1631                  use_offsets[i+1] - use_offsets[i], outfile);
1632              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1633              if (i == 0)              if (i == 0)
1634                {                {
1635                if (do_showrest)                if (do_showrest)
1636                  {                  {
1637                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1638                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1639                      outfile);
1640                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1641                  }                  }
1642                }                }
# Line 1183  while (!done) Line 1694  while (!done)
1694            }            }
1695          }          }
1696    
1697          /* There was a partial match */
1698    
1699          else if (count == PCRE_ERROR_PARTIAL)
1700            {
1701            fprintf(outfile, "Partial match");
1702            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1703              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1704                bptr + use_offsets[0]);
1705            fprintf(outfile, "\n");
1706            break;  /* Out of the /g loop */
1707            }
1708    
1709        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1710        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1711        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1712        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1713        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1714          that was checked before setting g_notempty. */
1715    
1716        else        else
1717          {          {
1718          if (g_notempty != 0)          if (g_notempty != 0)
1719            {            {
1720              int onechar = 1;
1721            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
1722            use_offsets[1] = start_offset + 1;            if (use_utf8)
1723                {
1724                while (start_offset + onechar < len)
1725                  {
1726                  int tb = bptr[start_offset+onechar];
1727                  if (tb <= 127) break;
1728                  tb &= 0xc0;
1729                  if (tb != 0 && tb != 0xc0) onechar++;
1730                  }
1731                }
1732              use_offsets[1] = start_offset + onechar;
1733            }            }
1734          else          else
1735            {            {
1736            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1737              {              {
1738              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1739              }              }
1740              else fprintf(outfile, "Error %d\n", count);
1741            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1742            }            }
1743          }          }
# Line 1245  while (!done) Line 1780  while (!done)
1780    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1781  #endif  #endif
1782    
1783    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1784    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1785    if (tables != NULL)    if (tables != NULL)
1786      {      {
1787      free((void *)tables);      new_free((void *)tables);
1788      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1789      }      }
1790    }    }
1791    
1792  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1793  return 0;  
1794    EXIT:
1795    
1796    if (infile != NULL && infile != stdin) fclose(infile);
1797    if (outfile != NULL && outfile != stdout) fclose(outfile);
1798    
1799    free(buffer);
1800    free(dbuffer);
1801    free(pbuffer);
1802    free(offsets);
1803    
1804    return yield;
1805  }  }
1806    
1807  /* End */  /* End of pcretest.c */

Legend:
Removed from v.57  
changed lines
  Added in v.77

  ViewVC Help
Powered by ViewVC 1.1.5