/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 55 by nigel, Sat Feb 24 21:39:46 2007 UTC revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49    /* We include pcre_internal.h because we need the internal info for displaying
50    the results of pcre_study() and we also need to know about the internal
51    macros, structures, and other internal data values; pcretest has "inside
52    information" compared to a program that strictly follows the PCRE API. */
53    
54    #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60  /* Use the internal info for displaying the results of pcre_study(). */  #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71    /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75    #include "pcre_printint.src"
76    
 #include "internal.h"  
77    
78  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
79  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 83  Makefile. */
83  #include "pcreposix.h"  #include "pcreposix.h"
84  #endif  #endif
85    
86    /* It is also possible, for the benefit of the version imported into Exim, to
87    build pcretest without support for UTF8 (define NOUTF8), without the interface
88    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89    function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
96  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 99  Makefile. */
99  #endif  #endif
100  #endif  #endif
101    
102  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
103    
104    #define BUFFER_SIZE 30000
105    #define PBUFFER_SIZE BUFFER_SIZE
106    #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
113    static int callout_count;
114    static int callout_extra;
115    static int callout_fail_count;
116    static int callout_fail_id;
117    static int first_callout;
118    static int show_malloc;
119    static int use_utf8;
120  static size_t gotten_store;  static size_t gotten_store;
121    
122    static uschar *pbuffer = NULL;
123    
124    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
125    
126  /*************************************************  /*************************************************
127  *       Convert character value to UTF-8         *  *          Read number from string               *
128  *************************************************/  *************************************************/
129    
130  /* This function takes an integer value in the range 0 - 0x7fffffff  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
131  and encodes it as a UTF-8 character in 0 to 6 bytes.  around with conditional compilation, just do the job by hand. It is only used
132    for unpicking the -o argument, so just keep it simple.
133    
134  Arguments:  Arguments:
135    cvalue     the character value    str           string to be converted
136    buffer     pointer to buffer for result - at least 6 bytes long    endptr        where to put the end pointer
137    
138  Returns:     number of characters placed in the buffer  Returns:        the unsigned long
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
139  */  */
140    
141  static int  static int
142  ord2utf8(int cvalue, unsigned char *buffer)  get_value(unsigned char *str, unsigned char **endptr)
143  {  {
144  register int i, j;  int result = 0;
145  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  while(*str != 0 && isspace(*str)) str++;
146    if (cvalue <= utf8_table1[i]) break;  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
147  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  *endptr = str;
148  if (cvalue < 0) return -1;  return(result);
 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
 cvalue >>= 6 - i;  
 for (j = 0; j < i; j++)  
   {  
   *buffer++ = 0x80 | (cvalue & 0x3f);  
   cvalue >>= 6;  
   }  
 return i + 1;  
149  }  }
150    
151    
152    
153    
154  /*************************************************  /*************************************************
155  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
156  *************************************************/  *************************************************/
# Line 99  Returns:   >  0 => the number of bytes c Line 166  Returns:   >  0 => the number of bytes c
166             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
167  */  */
168    
169  int  #if !defined NOUTF8
170    
171    static int
172  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
173  {  {
174  int c = *buffer++;  int c = *buffer++;
# Line 117  if (i == 0 || i == 6) return 0;        / Line 186  if (i == 0 || i == 6) return 0;        /
186    
187  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
188    
189  d = c & utf8_table3[i];  s = 6*i;
190  s = 6 - i;  d = (c & utf8_table3[i]) << s;
191    
192  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
193    {    {
194    c = *buffer++;    c = *buffer++;
195    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
196      s -= 6;
197    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
198    }    }
199    
200  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
201    
202  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
203    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
204  if (j != i) return -(i+1);  if (j != i) return -(i+1);
205    
# Line 140  if (j != i) return -(i+1); Line 209  if (j != i) return -(i+1);
209  return i+1;  return i+1;
210  }  }
211    
212    #endif
213    
214    
215    
216    /*************************************************
217    *       Convert character value to UTF-8         *
218    *************************************************/
219    
220    /* This function takes an integer value in the range 0 - 0x7fffffff
221    and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223  /* Debugging function to print the internal form of the regex. This is the same  Arguments:
224  code as contained in pcre.c under the DEBUG macro. */    cvalue     the character value
225      buffer     pointer to buffer for result - at least 6 bytes long
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
226    
227    Returns:     number of characters placed in the buffer
228    */
229    
230  static void print_internals(pcre *re)  static int
231    ord2utf8(int cvalue, uschar *buffer)
232  {  {
233  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
234    for (i = 0; i < utf8_table1_size; i++)
235      if (cvalue <= utf8_table1[i]) break;
236    buffer += i;
237    for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242    *buffer = utf8_table2[i] | cvalue;
243    return i + 1;
244    }
245    
 fprintf(outfile, "------------------------------------------------------------------\n");  
246    
 for(;;)  
   {  
   int c;  
   int charlength;  
247    
248    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /*************************************************
249    *             Print character string             *
250    *************************************************/
251    
252    if (*code >= OP_BRA)  /* Character string printing function. Must handle UTF-8 strings in utf8
253      {  mode. Yields number of characters printed. If handed a NULL file, just counts
254      if (*code - OP_BRA > EXTRACT_BASIC_MAX)  chars without printing. */
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
255    
256    else switch(*code)  static int pchars(unsigned char *p, int length, FILE *f)
257      {  {
258      case OP_END:  int c = 0;
259      fprintf(outfile, "    %s\n", OP_names[*code]);  int yield = 0;
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
260    
261        CLASS_REF_REPEAT:  while (length-- > 0)
262      {
263    #if !defined NOUTF8
264      if (use_utf8)
265        {
266        int rc = utf82ord(p, &c);
267    
268        switch(*code)      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
269          {
270          length -= rc - 1;
271          p += rc;
272          if (c < 256 && isprint(c))
273          {          {
274          case OP_CRSTAR:          if (f != NULL) fprintf(f, "%c", c);
275          case OP_CRMINSTAR:          yield++;
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
   
         default:  
         code--;  
276          }          }
277          else
278            {
279            int n;
280            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
281            yield += n;
282            }
283          continue;
284        }        }
285      break;      }
286    #endif
287    
288      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
289    
290      default:    if (isprint(c = *(p++)))
291      fprintf(outfile, "    %s", OP_names[*code]);      {
292      break;      if (f != NULL) fprintf(f, "%c", c);
293        yield++;
294        }
295      else
296        {
297        if (f != NULL) fprintf(f, "\\x%02x", c);
298        yield += 4;
299      }      }
   
   code++;  
   fprintf(outfile, "\n");  
300    }    }
301    
302    return yield;
303  }  }
304    
305    
306    
307  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
308    *              Callout function                  *
309    *************************************************/
310    
311    /* Called from PCRE as a result of the (?C) item. We print out where we are in
312    the match. Yield zero unless more callouts than the fail count, or the callout
313    data is not zero. */
314    
315  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
316  {  {
317  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
318  while (length-- > 0)  int i, pre_start, post_start, subject_length;
319    
320    if (callout_extra)
321    {    {
322    if (utf8)    fprintf(f, "Callout %d: last capture = %d\n",
323        cb->callout_number, cb->capture_last);
324    
325      for (i = 0; i < cb->capture_top * 2; i += 2)
326      {      {
327      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
328      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
329        else
330        {        {
331        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
332        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
333        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
334          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
335        }        }
336      }      }
337      }
338    
339     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
340    datails. On subsequent calls in the same match, we use pchars just to find the
341    printed lengths of the substrings. */
342    
343    if (f != NULL) fprintf(f, "--->");
344    
345    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
346    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347      cb->current_position - cb->start_match, f);
348    
349    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350    
351    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352      cb->subject_length - cb->current_position, f);
353    
354    if (f != NULL) fprintf(f, "\n");
355    
356    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  /* Always print appropriate indicators, with callout number if not already
357      else fprintf(outfile, "\\x%02x", c);  shown. For automatic callouts, show the pattern offset. */
358    
359    if (cb->callout_number == 255)
360      {
361      fprintf(outfile, "%+3d ", cb->pattern_position);
362      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
363      }
364    else
365      {
366      if (callout_extra) fprintf(outfile, "    ");
367        else fprintf(outfile, "%3d ", cb->callout_number);
368    }    }
369    
370    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371    fprintf(outfile, "^");
372    
373    if (post_start > 0)
374      {
375      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
376      fprintf(outfile, "^");
377      }
378    
379    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380      fprintf(outfile, " ");
381    
382    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383      pbuffer + cb->pattern_position);
384    
385    fprintf(outfile, "\n");
386    first_callout = 0;
387    
388    if (cb->callout_data != NULL)
389      {
390      int callout_data = *((int *)(cb->callout_data));
391      if (callout_data != 0)
392        {
393        fprintf(outfile, "Callout data = %d\n", callout_data);
394        return callout_data;
395        }
396      }
397    
398    return (cb->callout_number != callout_fail_id)? 0 :
399           (++callout_count >= callout_fail_count)? 1 : 0;
400  }  }
401    
402    
403    /*************************************************
404    *            Local malloc functions              *
405    *************************************************/
406    
407  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
408  compiled re. */  compiled re. */
409    
410  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
411  {  {
412    void *block = malloc(size);
413  gotten_store = size;  gotten_store = size;
414  if (log_store)  if (show_malloc)
415    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
416      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
417  return malloc(size);  }
418    
419    static void new_free(void *block)
420    {
421    if (show_malloc)
422      fprintf(outfile, "free             %p\n", block);
423    free(block);
424    }
425    
426    
427    /* For recursion malloc/free, to test stacking calls */
428    
429    static void *stack_malloc(size_t size)
430    {
431    void *block = malloc(size);
432    if (show_malloc)
433      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434    return block;
435  }  }
436    
437    static void stack_free(void *block)
438    {
439    if (show_malloc)
440      fprintf(outfile, "stack_free       %p\n", block);
441    free(block);
442    }
443    
444    
445    /*************************************************
446    *          Call pcre_fullinfo()                  *
447    *************************************************/
448    
449  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
450    
# Line 419  if ((rc = pcre_fullinfo(re, study, optio Line 457  if ((rc = pcre_fullinfo(re, study, optio
457    
458    
459    
460    /*************************************************
461    *         Byte flipping function                 *
462    *************************************************/
463    
464    static long int
465    byteflip(long int value, int n)
466    {
467    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468    return ((value & 0x000000ff) << 24) |
469           ((value & 0x0000ff00) <<  8) |
470           ((value & 0x00ff0000) >>  8) |
471           ((value & 0xff000000) >> 24);
472    }
473    
474    
475    
476    
477    /*************************************************
478    *                Main Program                    *
479    *************************************************/
480    
481  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
482  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 435  int showinfo = 0; Line 493  int showinfo = 0;
493  int showstore = 0;  int showstore = 0;
494  int size_offsets = 45;  int size_offsets = 45;
495  int size_offsets_max;  int size_offsets_max;
496  int *offsets;  int *offsets = NULL;
497  #if !defined NOPOSIX  #if !defined NOPOSIX
498  int posix = 0;  int posix = 0;
499  #endif  #endif
500  int debug = 0;  int debug = 0;
501  int done = 0;  int done = 0;
502  unsigned char buffer[30000];  int all_use_dfa = 0;
503  unsigned char dbuffer[1024];  int yield = 0;
504    
505    unsigned char *buffer;
506    unsigned char *dbuffer;
507    
508  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
509    when I am debugging. */
510    
511    buffer = (unsigned char *)malloc(BUFFER_SIZE);
512    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
513    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
514    
515    /* The outfile variable is static so that new_malloc can use it. The _setmode()
516    stuff is some magic that I don't understand, but which apparently does good
517    things in Windows. It's related to line terminations.  */
518    
519    #if defined(_WIN32) || defined(WIN32)
520    _setmode( _fileno( stdout ), 0x8000 );
521    #endif  /* defined(_WIN32) || defined(WIN32) */
522    
523  outfile = stdout;  outfile = stdout;
524    
# Line 452  outfile = stdout; Line 526  outfile = stdout;
526    
527  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
528    {    {
529    char *endptr;    unsigned char *endptr;
530    
531    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
532      showstore = 1;      showstore = 1;
533    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
534    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
535    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
536    #if !defined NODFA
537      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
538    #endif
539    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
540        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
541            *endptr == 0))
542      {      {
543      op++;      op++;
544      argc--;      argc--;
# Line 468  while (argc > 1 && argv[op][0] == '-') Line 546  while (argc > 1 && argv[op][0] == '-')
546  #if !defined NOPOSIX  #if !defined NOPOSIX
547    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
548  #endif  #endif
549      else if (strcmp(argv[op], "-C") == 0)
550        {
551        int rc;
552        printf("PCRE version %s\n", pcre_version());
553        printf("Compiled with\n");
554        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
555        printf("  %sUTF-8 support\n", rc? "" : "No ");
556        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
557        printf("  %sUnicode properties support\n", rc? "" : "No ");
558        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
559        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
560        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
561        printf("  Internal link size = %d\n", rc);
562        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
563        printf("  POSIX malloc threshold = %d\n", rc);
564        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
565        printf("  Default match limit = %d\n", rc);
566        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
567        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
568        exit(0);
569        }
570    else    else
571      {      {
572      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
573      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
574      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
575             "  -i     show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n");
576    #if !defined NODFA
577        printf("  -dfa   force DFA matching for all subjects\n");
578    #endif
579        printf("  -i     show information about compiled pattern\n"
580               "  -m     output memory used information\n"
581             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
582  #if !defined NOPOSIX  #if !defined NOPOSIX
583      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
584  #endif  #endif
585      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
586             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
587      return 1;      yield = 1;
588        goto EXIT;
589      }      }
590    op++;    op++;
591    argc--;    argc--;
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 594  while (argc > 1 && argv[op][0] == '-')
594  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
595    
596  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
597  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
598  if (offsets == NULL)  if (offsets == NULL)
599    {    {
600    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
601      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
602    return 1;    yield = 1;
603      goto EXIT;
604    }    }
605    
606  /* Sort out the input and output files */  /* Sort out the input and output files */
607    
608  if (argc > 1)  if (argc > 1)
609    {    {
610    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
611    if (infile == NULL)    if (infile == NULL)
612      {      {
613      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
614      return 1;      yield = 1;
615        goto EXIT;
616      }      }
617    }    }
618    
619  if (argc > 2)  if (argc > 2)
620    {    {
621    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
622    if (outfile == NULL)    if (outfile == NULL)
623      {      {
624      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
625      return 1;      yield = 1;
626        goto EXIT;
627      }      }
628    }    }
629    
630  /* Set alternative malloc function */  /* Set alternative malloc function */
631    
632  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
633    pcre_free = new_free;
634    pcre_stack_malloc = stack_malloc;
635    pcre_stack_free = stack_free;
636    
637  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
638    
# Line 541  while (!done) Line 652  while (!done)
652    
653    const char *error;    const char *error;
654    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
655      unsigned char *to_file = NULL;
656    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
657      unsigned long int true_size, true_study_size = 0;
658      size_t size, regex_gotten_store;
659    int do_study = 0;    int do_study = 0;
660    int do_debug = debug;    int do_debug = debug;
661    int do_G = 0;    int do_G = 0;
662    int do_g = 0;    int do_g = 0;
663    int do_showinfo = showinfo;    int do_showinfo = showinfo;
664    int do_showrest = 0;    int do_showrest = 0;
665    int utf8 = 0;    int do_flip = 0;
666    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
667    
668      use_utf8 = 0;
669    
670    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
671    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
672    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
673      fflush(outfile);
674    
675    p = buffer;    p = buffer;
676    while (isspace(*p)) p++;    while (isspace(*p)) p++;
677    if (*p == 0) continue;    if (*p == 0) continue;
678    
679    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
680    complete, read more. */  
681      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
682        {
683        unsigned long int magic;
684        uschar sbuf[8];
685        FILE *f;
686    
687        p++;
688        pp = p + (int)strlen((char *)p);
689        while (isspace(pp[-1])) pp--;
690        *pp = 0;
691    
692        f = fopen((char *)p, "rb");
693        if (f == NULL)
694          {
695          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
696          continue;
697          }
698    
699        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
700    
701        true_size =
702          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
703        true_study_size =
704          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
705    
706        re = (real_pcre *)new_malloc(true_size);
707        regex_gotten_store = gotten_store;
708    
709        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
710    
711        magic = ((real_pcre *)re)->magic_number;
712        if (magic != MAGIC_NUMBER)
713          {
714          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
715            {
716            do_flip = 1;
717            }
718          else
719            {
720            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
721            fclose(f);
722            continue;
723            }
724          }
725    
726        fprintf(outfile, "Compiled regex%s loaded from %s\n",
727          do_flip? " (byte-inverted)" : "", p);
728    
729        /* Need to know if UTF-8 for printing data strings */
730    
731        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
732        use_utf8 = (options & PCRE_UTF8) != 0;
733    
734        /* Now see if there is any following study data */
735    
736        if (true_study_size != 0)
737          {
738          pcre_study_data *psd;
739    
740          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
741          extra->flags = PCRE_EXTRA_STUDY_DATA;
742    
743          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
744          extra->study_data = psd;
745    
746          if (fread(psd, 1, true_study_size, f) != true_study_size)
747            {
748            FAIL_READ:
749            fprintf(outfile, "Failed to read data from %s\n", p);
750            if (extra != NULL) new_free(extra);
751            if (re != NULL) new_free(re);
752            fclose(f);
753            continue;
754            }
755          fprintf(outfile, "Study data loaded from %s\n", p);
756          do_study = 1;     /* To get the data output if requested */
757          }
758        else fprintf(outfile, "No study data\n");
759    
760        fclose(f);
761        goto SHOW_INFO;
762        }
763    
764      /* In-line pattern (the usual case). Get the delimiter and seek the end of
765      the pattern; if is isn't complete, read more. */
766    
767    delimiter = *p++;    delimiter = *p++;
768    
# Line 582  while (!done) Line 784  while (!done)
784        }        }
785      if (*pp != 0) break;      if (*pp != 0) break;
786    
787      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
788      if (len < 256)      if (len < 256)
789        {        {
790        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 605  while (!done) Line 807  while (!done)
807    
808    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
809    
810    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
811      for callouts. */
812    
813    *pp++ = 0;    *pp++ = 0;
814      strcpy((char *)pbuffer, (char *)p);
815    
816    /* Look for options after final delimiter */    /* Look for options after final delimiter */
817    
# Line 619  while (!done) Line 823  while (!done)
823      {      {
824      switch (*pp++)      switch (*pp++)
825        {        {
826          case 'f': options |= PCRE_FIRSTLINE; break;
827        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
828        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
829        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 627  while (!done) Line 832  while (!done)
832    
833        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
834        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
835          case 'C': options |= PCRE_AUTO_CALLOUT; break;
836        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
837        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
838          case 'F': do_flip = 1; break;
839        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
840        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
841        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
842          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
843    
844  #if !defined NOPOSIX  #if !defined NOPOSIX
845        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 640  while (!done) Line 848  while (!done)
848        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
849        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
850        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
851        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
852          case '?': options |= PCRE_NO_UTF8_CHECK; break;
853    
854        case 'L':        case 'L':
855        ppp = pp;        ppp = pp;
856        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
857          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
858        *ppp = 0;        *ppp = 0;
859        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
860          {          {
# Line 655  while (!done) Line 865  while (!done)
865        pp = ppp;        pp = ppp;
866        break;        break;
867    
868        case '\n': case ' ': break;        case '>':
869          to_file = pp;
870          while (*pp != 0) pp++;
871          while (isspace(pp[-1])) pp--;
872          *pp = 0;
873          break;
874    
875          case '\r':                      /* So that it works in Windows */
876          case '\n':
877          case ' ':
878          break;
879    
880        default:        default:
881        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
882        goto SKIP_DATA;        goto SKIP_DATA;
# Line 671  while (!done) Line 892  while (!done)
892      {      {
893      int rc;      int rc;
894      int cflags = 0;      int cflags = 0;
895    
896      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
897      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
898        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
899      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
900    
901      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 903  while (!done)
903    
904      if (rc != 0)      if (rc != 0)
905        {        {
906        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
907        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
908        goto SKIP_DATA;        goto SKIP_DATA;
909        }        }
# Line 704  while (!done) Line 927  while (!done)
927          }          }
928        time_taken = clock() - start_time;        time_taken = clock() - start_time;
929        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
930          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
931          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
932        }        }
933    
934      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 721  while (!done) Line 944  while (!done)
944          {          {
945          for (;;)          for (;;)
946            {            {
947            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
948              {              {
949              done = 1;              done = 1;
950              goto CONTINUE;              goto CONTINUE;
# Line 739  while (!done) Line 962  while (!done)
962      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
963      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
964    
965        if (log_store)
966          fprintf(outfile, "Memory allocation (code space): %d\n",
967            (int)(gotten_store -
968                  sizeof(real_pcre) -
969                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
970    
971        /* Extract the size for possible writing before possibly flipping it,
972        and remember the store that was got. */
973    
974        true_size = ((real_pcre *)re)->size;
975        regex_gotten_store = gotten_store;
976    
977        /* If /S was present, study the regexp to generate additional info to
978        help with the matching. */
979    
980        if (do_study)
981          {
982          if (timeit)
983            {
984            register int i;
985            clock_t time_taken;
986            clock_t start_time = clock();
987            for (i = 0; i < LOOPREPEAT; i++)
988              extra = pcre_study(re, study_options, &error);
989            time_taken = clock() - start_time;
990            if (extra != NULL) free(extra);
991            fprintf(outfile, "  Study time %.3f milliseconds\n",
992              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
993                (double)CLOCKS_PER_SEC);
994            }
995          extra = pcre_study(re, study_options, &error);
996          if (error != NULL)
997            fprintf(outfile, "Failed to study: %s\n", error);
998          else if (extra != NULL)
999            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1000          }
1001    
1002        /* If the 'F' option was present, we flip the bytes of all the integer
1003        fields in the regex data block and the study block. This is to make it
1004        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1005        compiled on a different architecture. */
1006    
1007        if (do_flip)
1008          {
1009          real_pcre *rre = (real_pcre *)re;
1010          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1011          rre->size = byteflip(rre->size, sizeof(rre->size));
1012          rre->options = byteflip(rre->options, sizeof(rre->options));
1013          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1014          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1015          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1016          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1017          rre->name_table_offset = byteflip(rre->name_table_offset,
1018            sizeof(rre->name_table_offset));
1019          rre->name_entry_size = byteflip(rre->name_entry_size,
1020            sizeof(rre->name_entry_size));
1021          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1022    
1023          if (extra != NULL)
1024            {
1025            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1026            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1027            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1028            }
1029          }
1030    
1031        /* Extract information from the compiled data if required */
1032    
1033        SHOW_INFO:
1034    
1035      if (do_showinfo)      if (do_showinfo)
1036        {        {
1037        unsigned long int get_options;        unsigned long int get_options, all_options;
1038    #if !defined NOINFOCHECK
1039        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1040    #endif
1041        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1042        size_t size;        int nameentrysize, namecount;
1043          const uschar *nametable;
1044    
1045        if (do_debug) print_internals(re);        if (do_debug)
1046            {
1047            fprintf(outfile, "------------------------------------------------------------------\n");
1048            pcre_printint(re, outfile);
1049            }
1050    
1051        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1052        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1053        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1054        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1055        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1056        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1057          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1058          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1059          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1060    
1061    #if !defined NOINFOCHECK
1062        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1063        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1064          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 772  while (!done) Line 1076  while (!done)
1076            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1077              get_options, old_options);              get_options, old_options);
1078          }          }
1079    #endif
1080    
1081        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1082          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1083          size, gotten_store);          (int)size, (int)regex_gotten_store);
1084    
1085        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1086        if (backrefmax > 0)        if (backrefmax > 0)
1087          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1088    
1089          if (namecount > 0)
1090            {
1091            fprintf(outfile, "Named capturing subpatterns:\n");
1092            while (namecount-- > 0)
1093              {
1094              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1095                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1096                GET2(nametable, 0));
1097              nametable += nameentrysize;
1098              }
1099            }
1100    
1101          /* The NOPARTIAL bit is a private bit in the options, so we have
1102          to fish it out via out back door */
1103    
1104          all_options = ((real_pcre *)re)->options;
1105          if (do_flip)
1106            {
1107            all_options = byteflip(all_options, sizeof(all_options));
1108            }
1109    
1110          if ((all_options & PCRE_NOPARTIAL) != 0)
1111            fprintf(outfile, "Partial matching not supported\n");
1112    
1113        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1114          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1115            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1116            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1117            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1118            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1119              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1120            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1121            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1122            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1123            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1124            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1125              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1126    
1127        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1128          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 805  while (!done) Line 1137  while (!done)
1137          }          }
1138        else        else
1139          {          {
1140          if (isprint(first_char))          int ch = first_char & 255;
1141            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1142              "" : " (caseless)";
1143            if (isprint(ch))
1144              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1145          else          else
1146            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1147          }          }
1148    
1149        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 1152  while (!done)
1152          }          }
1153        else        else
1154          {          {
1155          if (isprint(need_char))          int ch = need_char & 255;
1156            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1157              "" : " (caseless)";
1158            if (isprint(ch))
1159              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1160          else          else
1161            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1162          }          }
       }  
1163    
1164      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1165      help with the matching. */        value, but it varies, depending on the computer architecture, and
1166          so messes up the test suite. (And with the /F option, it might be
1167          flipped.) */
1168    
1169      if (do_study)        if (do_study)
       {  
       if (timeit)  
1170          {          {
1171          register int i;          if (extra == NULL)
1172          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1173          clock_t start_time = clock();          else
1174          for (i = 0; i < LOOPREPEAT; i++)            {
1175            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1176          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1177          if (extra != NULL) free(extra);  
1178          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1179            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1180            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1181                {
1182                int i;
1183                int c = 24;
1184                fprintf(outfile, "Starting byte set: ");
1185                for (i = 0; i < 256; i++)
1186                  {
1187                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1188                    {
1189                    if (c > 75)
1190                      {
1191                      fprintf(outfile, "\n  ");
1192                      c = 2;
1193                      }
1194                    if (isprint(i) && i != ' ')
1195                      {
1196                      fprintf(outfile, "%c ", i);
1197                      c += 2;
1198                      }
1199                    else
1200                      {
1201                      fprintf(outfile, "\\x%02x ", i);
1202                      c += 5;
1203                      }
1204                    }
1205                  }
1206                fprintf(outfile, "\n");
1207                }
1208              }
1209          }          }
1210          }
1211    
1212        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1213        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1214          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1215    
1216        else if (do_showinfo)      if (to_file != NULL)
1217          {
1218          FILE *f = fopen((char *)to_file, "wb");
1219          if (f == NULL)
1220            {
1221            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1222            }
1223          else
1224          {          {
1225          uschar *start_bits = NULL;          uschar sbuf[8];
1226          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (true_size >> 24)  & 255;
1227          if (start_bits == NULL)          sbuf[1] = (true_size >> 16)  & 255;
1228            fprintf(outfile, "No starting character set\n");          sbuf[2] = (true_size >>  8)  & 255;
1229            sbuf[3] = (true_size)  & 255;
1230    
1231            sbuf[4] = (true_study_size >> 24)  & 255;
1232            sbuf[5] = (true_study_size >> 16)  & 255;
1233            sbuf[6] = (true_study_size >>  8)  & 255;
1234            sbuf[7] = (true_study_size)  & 255;
1235    
1236            if (fwrite(sbuf, 1, 8, f) < 8 ||
1237                fwrite(re, 1, true_size, f) < true_size)
1238              {
1239              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1240              }
1241          else          else
1242            {            {
1243            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1244            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1245              {              {
1246              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1247                    true_study_size)
1248                {                {
1249                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1250                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1251                }                }
1252                else fprintf(outfile, "Study data written to %s\n", to_file);
1253              }              }
           fprintf(outfile, "\n");  
1254            }            }
1255            fclose(f);
1256          }          }
1257    
1258          new_free(re);
1259          if (extra != NULL) new_free(extra);
1260          if (tables != NULL) new_free((void *)tables);
1261          continue;  /* With next regex */
1262        }        }
1263      }      }        /* End of non-POSIX compile */
1264    
1265    /* Read data lines and test them */    /* Read data lines and test them */
1266    
# Line 893  while (!done) Line 1268  while (!done)
1268      {      {
1269      unsigned char *q;      unsigned char *q;
1270      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
1271        int *use_offsets = offsets;
1272      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1273        int callout_data = 0;
1274        int callout_data_set = 0;
1275      int count, c;      int count, c;
1276      int copystrings = 0;      int copystrings = 0;
1277        int find_match_limit = 0;
1278      int getstrings = 0;      int getstrings = 0;
1279      int getlist = 0;      int getlist = 0;
1280      int gmatched = 0;      int gmatched = 0;
1281      int start_offset = 0;      int start_offset = 0;
1282      int g_notempty = 0;      int g_notempty = 0;
1283        int use_dfa = 0;
1284    
1285      options = 0;      options = 0;
1286    
1287        pcre_callout = callout;
1288        first_callout = 1;
1289        callout_extra = 0;
1290        callout_count = 0;
1291        callout_fail_count = 999999;
1292        callout_fail_id = -1;
1293        show_malloc = 0;
1294    
1295      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1296      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1297        {        {
1298        done = 1;        done = 1;
1299        goto CONTINUE;        goto CONTINUE;
# Line 925  while (!done) Line 1313  while (!done)
1313        {        {
1314        int i = 0;        int i = 0;
1315        int n = 0;        int n = 0;
1316    
1317        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1318          {          {
1319          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 947  while (!done) Line 1336  while (!done)
1336    
1337          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1338    
1339    #if !defined NOUTF8
1340          if (*p == '{')          if (*p == '{')
1341            {            {
1342            unsigned char *pt = p;            unsigned char *pt = p;
# Line 955  while (!done) Line 1345  while (!done)
1345              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1346            if (*pt == '}')            if (*pt == '}')
1347              {              {
1348              unsigned char buffer[8];              unsigned char buff8[8];
1349              int ii, utn;              int ii, utn;
1350              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1351              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1352              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1353              p = pt + 1;              p = pt + 1;
1354              break;              break;
1355              }              }
1356            /* Not correct form; fall through */            /* Not correct form; fall through */
1357            }            }
1358    #endif
1359    
1360          /* Ordinary \x */          /* Ordinary \x */
1361    
# Line 976  while (!done) Line 1367  while (!done)
1367            }            }
1368          break;          break;
1369    
1370          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1371          p--;          p--;
1372          continue;          continue;
1373    
1374            case '>':
1375            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1376            continue;
1377    
1378          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1379          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1380          continue;          continue;
# Line 989  while (!done) Line 1384  while (!done)
1384          continue;          continue;
1385    
1386          case 'C':          case 'C':
1387          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1388          copystrings |= 1 << n;            {
1389              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1390              copystrings |= 1 << n;
1391              }
1392            else if (isalnum(*p))
1393              {
1394              uschar name[256];
1395              uschar *npp = name;
1396              while (isalnum(*p)) *npp++ = *p++;
1397              *npp = 0;
1398              n = pcre_get_stringnumber(re, (char *)name);
1399              if (n < 0)
1400                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1401              else copystrings |= 1 << n;
1402              }
1403            else if (*p == '+')
1404              {
1405              callout_extra = 1;
1406              p++;
1407              }
1408            else if (*p == '-')
1409              {
1410              pcre_callout = NULL;
1411              p++;
1412              }
1413            else if (*p == '!')
1414              {
1415              callout_fail_id = 0;
1416              p++;
1417              while(isdigit(*p))
1418                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1419              callout_fail_count = 0;
1420              if (*p == '!')
1421                {
1422                p++;
1423                while(isdigit(*p))
1424                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1425                }
1426              }
1427            else if (*p == '*')
1428              {
1429              int sign = 1;
1430              callout_data = 0;
1431              if (*(++p) == '-') { sign = -1; p++; }
1432              while(isdigit(*p))
1433                callout_data = callout_data * 10 + *p++ - '0';
1434              callout_data *= sign;
1435              callout_data_set = 1;
1436              }
1437            continue;
1438    
1439    #if !defined NODFA
1440            case 'D':
1441    #if !defined NOPOSIX
1442            if (posix || do_posix)
1443              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1444            else
1445    #endif
1446              use_dfa = 1;
1447            continue;
1448    
1449            case 'F':
1450            options |= PCRE_DFA_SHORTEST;
1451          continue;          continue;
1452    #endif
1453    
1454          case 'G':          case 'G':
1455          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1456          getstrings |= 1 << n;            {
1457              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1458              getstrings |= 1 << n;
1459              }
1460            else if (isalnum(*p))
1461              {
1462              uschar name[256];
1463              uschar *npp = name;
1464              while (isalnum(*p)) *npp++ = *p++;
1465              *npp = 0;
1466              n = pcre_get_stringnumber(re, (char *)name);
1467              if (n < 0)
1468                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1469              else getstrings |= 1 << n;
1470              }
1471          continue;          continue;
1472    
1473          case 'L':          case 'L':
1474          getlist = 1;          getlist = 1;
1475          continue;          continue;
1476    
1477            case 'M':
1478            find_match_limit = 1;
1479            continue;
1480    
1481          case 'N':          case 'N':
1482          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1483          continue;          continue;
# Line 1010  while (!done) Line 1486  while (!done)
1486          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1487          if (n > size_offsets_max)          if (n > size_offsets_max)
1488            {            {
   
 if (offsets != NULL)  
   
           free(offsets);  
1489            size_offsets_max = n;            size_offsets_max = n;
1490            offsets = malloc(size_offsets_max * sizeof(int));            free(offsets);
1491              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1492            if (offsets == NULL)            if (offsets == NULL)
1493              {              {
1494              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1495                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1496              return 1;              yield = 1;
1497                goto EXIT;
1498              }              }
1499            }            }
1500          use_size_offsets = n;          use_size_offsets = n;
1501            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1502            continue;
1503    
1504  if (n == 0)          case 'P':
1505    {          options |= PCRE_PARTIAL;
1506    free(offsets);          continue;
1507    offsets = NULL;  
1508    size_offsets_max = 0;  #if !defined NODFA
1509    }          case 'R':
1510            options |= PCRE_DFA_RESTART;
1511            continue;
1512    #endif
1513    
1514            case 'S':
1515            show_malloc = 1;
1516          continue;          continue;
1517    
1518          case 'Z':          case 'Z':
1519          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1520          continue;          continue;
1521    
1522            case '?':
1523            options |= PCRE_NO_UTF8_CHECK;
1524            continue;
1525          }          }
1526        *q++ = c;        *q++ = c;
1527        }        }
1528      *q = 0;      *q = 0;
1529      len = q - dbuffer;      len = q - dbuffer;
1530    
1531        if ((all_use_dfa || use_dfa) && find_match_limit)
1532          {
1533          printf("**Match limit not relevant for DFA matching: ignored\n");
1534          find_match_limit = 0;
1535          }
1536    
1537      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1538      support timing. */      support timing or playing with the match limit or callout data. */
1539    
1540  #if !defined NOPOSIX  #if !defined NOPOSIX
1541      if (posix || do_posix)      if (posix || do_posix)
1542        {        {
1543        int rc;        int rc;
1544        int eflags = 0;        int eflags = 0;
1545        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1546          if (use_size_offsets > 0)
1547            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1548        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1549        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1550    
# Line 1059  if (n == 0) Line 1552  if (n == 0)
1552    
1553        if (rc != 0)        if (rc != 0)
1554          {          {
1555          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1556          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1557          }          }
1558        else        else
1559          {          {
1560          size_t i;          size_t i;
1561          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1562            {            {
1563            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1564              {              {
1565              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1566              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1567                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1568              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1569              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1570                {                {
1571                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1572                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1573                    outfile);
1574                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1575                }                }
1576              }              }
# Line 1097  if (n == 0) Line 1591  if (n == 0)
1591          register int i;          register int i;
1592          clock_t time_taken;          clock_t time_taken;
1593          clock_t start_time = clock();          clock_t start_time = clock();
1594    
1595    #if !defined NODFA
1596            if (all_use_dfa || use_dfa)
1597              {
1598              int workspace[1000];
1599              for (i = 0; i < LOOPREPEAT; i++)
1600                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1601                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1602                  sizeof(workspace)/sizeof(int));
1603              }
1604            else
1605    #endif
1606    
1607          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1608            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1609              start_offset, options | g_notempty, offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1610    
1611          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1612          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1613            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1614            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1615          }          }
1616    
1617        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1618          start_offset, options | g_notempty, offsets, use_size_offsets);        varying limits in order to find the minimum value. */
1619    
1620        if (count == 0)        if (find_match_limit)
1621          {          {
1622          fprintf(outfile, "Matched, but too many substrings\n");          int min = 0;
1623          count = use_size_offsets/3;          int mid = 64;
1624            int max = -1;
1625    
1626            if (extra == NULL)
1627              {
1628              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1629              extra->flags = 0;
1630              }
1631            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1632    
1633            for (;;)
1634              {
1635              extra->match_limit = mid;
1636              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1637                options | g_notempty, use_offsets, use_size_offsets);
1638              if (count == PCRE_ERROR_MATCHLIMIT)
1639                {
1640                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1641                min = mid;
1642                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1643                }
1644              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1645                                     count == PCRE_ERROR_PARTIAL)
1646                {
1647                if (mid == min + 1)
1648                  {
1649                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1650                  break;
1651                  }
1652                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1653                max = mid;
1654                mid = (min + mid)/2;
1655                }
1656              else break;    /* Some other error */
1657              }
1658    
1659            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1660            }
1661    
1662          /* If callout_data is set, use the interface with additional data */
1663    
1664          else if (callout_data_set)
1665            {
1666            if (extra == NULL)
1667              {
1668              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1669              extra->flags = 0;
1670              }
1671            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1672            extra->callout_data = &callout_data;
1673            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1674              options | g_notempty, use_offsets, use_size_offsets);
1675            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1676            }
1677    
1678          /* The normal case is just to do the match once, with the default
1679          value of match_limit. */
1680    
1681    #if !defined NODFA
1682          else if (all_use_dfa || use_dfa)
1683            {
1684            int workspace[1000];
1685            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1686              options | g_notempty, use_offsets, use_size_offsets, workspace,
1687              sizeof(workspace)/sizeof(int));
1688            if (count == 0)
1689              {
1690              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1691              count = use_size_offsets/2;
1692              }
1693            }
1694    #endif
1695    
1696          else
1697            {
1698            count = pcre_exec(re, extra, (char *)bptr, len,
1699              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1700            if (count == 0)
1701              {
1702              fprintf(outfile, "Matched, but too many substrings\n");
1703              count = use_size_offsets/3;
1704              }
1705          }          }
1706    
1707        /* Matched */        /* Matched */
# Line 1122  if (n == 0) Line 1711  if (n == 0)
1711          int i;          int i;
1712          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1713            {            {
1714            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1715              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1716            else            else
1717              {              {
1718              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1719              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1720                  use_offsets[i+1] - use_offsets[i], outfile);
1721              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1722              if (i == 0)              if (i == 0)
1723                {                {
1724                if (do_showrest)                if (do_showrest)
1725                  {                  {
1726                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1727                  pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1728                      outfile);
1729                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1730                  }                  }
1731                }                }
# Line 1146  if (n == 0) Line 1737  if (n == 0)
1737            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1738              {              {
1739              char copybuffer[16];              char copybuffer[16];
1740              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1741                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1742              if (rc < 0)              if (rc < 0)
1743                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 1160  if (n == 0) Line 1751  if (n == 0)
1751            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1752              {              {
1753              const char *substring;              const char *substring;
1754              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1755                i, &substring);                i, &substring);
1756              if (rc < 0)              if (rc < 0)
1757                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
# Line 1176  if (n == 0) Line 1767  if (n == 0)
1767          if (getlist)          if (getlist)
1768            {            {
1769            const char **stringlist;            const char **stringlist;
1770            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1771              &stringlist);              &stringlist);
1772            if (rc < 0)            if (rc < 0)
1773              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 1192  if (n == 0) Line 1783  if (n == 0)
1783            }            }
1784          }          }
1785    
1786          /* There was a partial match */
1787    
1788          else if (count == PCRE_ERROR_PARTIAL)
1789            {
1790            fprintf(outfile, "Partial match");
1791    #if !defined NODFA
1792            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1793              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1794                bptr + use_offsets[0]);
1795    #endif
1796            fprintf(outfile, "\n");
1797            break;  /* Out of the /g loop */
1798            }
1799    
1800        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1801        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1802        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1803        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1804        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1805          that was checked before setting g_notempty. */
1806    
1807        else        else
1808          {          {
1809          if (g_notempty != 0)          if (g_notempty != 0)
1810            {            {
1811            offsets[0] = start_offset;            int onechar = 1;
1812            offsets[1] = start_offset + 1;            use_offsets[0] = start_offset;
1813              if (use_utf8)
1814                {
1815                while (start_offset + onechar < len)
1816                  {
1817                  int tb = bptr[start_offset+onechar];
1818                  if (tb <= 127) break;
1819                  tb &= 0xc0;
1820                  if (tb != 0 && tb != 0xc0) onechar++;
1821                  }
1822                }
1823              use_offsets[1] = start_offset + onechar;
1824            }            }
1825          else          else
1826            {            {
1827            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1828              {              {
1829              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1830              }              }
1831              else fprintf(outfile, "Error %d\n", count);
1832            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1833            }            }
1834          }          }
# Line 1228  if (n == 0) Line 1845  if (n == 0)
1845        character. */        character. */
1846    
1847        g_notempty = 0;        g_notempty = 0;
1848        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1849          {          {
1850          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1851          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1852          }          }
1853    
1854        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1855    
1856        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1857    
1858        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1859    
1860        else        else
1861          {          {
1862          bptr += offsets[1];          bptr += use_offsets[1];
1863          len -= offsets[1];          len -= use_offsets[1];
1864          }          }
1865        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1866      }    /* End of loop for data lines */      }    /* End of loop for data lines */
# Line 1254  if (n == 0) Line 1871  if (n == 0)
1871    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1872  #endif  #endif
1873    
1874    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1875    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1876    if (tables != NULL)    if (tables != NULL)
1877      {      {
1878      free((void *)tables);      new_free((void *)tables);
1879      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1880      }      }
1881    }    }
1882    
1883  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1884  return 0;  
1885    EXIT:
1886    
1887    if (infile != NULL && infile != stdin) fclose(infile);
1888    if (outfile != NULL && outfile != stdout) fclose(outfile);
1889    
1890    free(buffer);
1891    free(dbuffer);
1892    free(pbuffer);
1893    free(offsets);
1894    
1895    return yield;
1896  }  }
1897    
1898  /* End */  /* End of pcretest.c */

Legend:
Removed from v.55  
changed lines
  Added in v.85

  ViewVC Help
Powered by ViewVC 1.1.5