/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 57 by nigel, Sat Feb 24 21:39:50 2007 UTC revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    
52    /* A number of things vary for Windows builds. Originally, pcretest opened its
53    input and output without "b"; then I was told that "b" was needed in some
54    environments, so it was added for release 5.0 to both the input and output. (It
55    makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71    #endif
72    
 /* Use the internal info for displaying the results of pcre_study(). */  
73    
74  #include "internal.h"  /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76    internal macros, structures, and other internal data values; pcretest has
77    "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84    #include "pcre_internal.h"
85    
86    /* We need access to the data tables that PCRE uses. So as not to have to keep
87    two copies, we include the source file here, changing the names of the external
88    symbols to prevent clashes. */
89    
90    #define _pcre_utf8_table1      utf8_table1
91    #define _pcre_utf8_table1_size utf8_table1_size
92    #define _pcre_utf8_table2      utf8_table2
93    #define _pcre_utf8_table3      utf8_table3
94    #define _pcre_utf8_table4      utf8_table4
95    #define _pcre_utt              utt
96    #define _pcre_utt_size         utt_size
97    #define _pcre_OP_lengths       OP_lengths
98    
99    #include "pcre_tables.c"
100    
101    /* We also need the pcre_printint() function for printing out compiled
102    patterns. This function is in a separate file so that it can be included in
103    pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111    #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
115    
116  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
117  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124    /* It is also possible, for the benefit of the version currently imported into
125    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126    interface to the DFA matcher (NODFA), and without the doublecheck of the old
127    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137    /* Other parameters */
138    
139  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
140  #ifdef CLK_TCK  #ifdef CLK_TCK
141  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 144  Makefile. */
144  #endif  #endif
145  #endif  #endif
146    
147  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
148    
149    #define LOOPREPEAT 500000
150    
151    /* Static variables */
152    
153  static FILE *outfile;  static FILE *outfile;
154  static int log_store = 0;  static int log_store = 0;
155    static int callout_count;
156    static int callout_extra;
157    static int callout_fail_count;
158    static int callout_fail_id;
159    static int debug_lengths;
160    static int first_callout;
161    static int locale_set = 0;
162    static int show_malloc;
163    static int use_utf8;
164  static size_t gotten_store;  static size_t gotten_store;
165    
166    /* The buffers grow automatically if very long input lines are encountered. */
167    
168    static int buffer_size = 50000;
169    static uschar *buffer = NULL;
170    static uschar *dbuffer = NULL;
171    static uschar *pbuffer = NULL;
172    
173    
174    
175    /*************************************************
176    *        Read or extend an input line            *
177    *************************************************/
178    
179    /* Input lines are read into buffer, but both patterns and data lines can be
180    continued over multiple input lines. In addition, if the buffer fills up, we
181    want to automatically expand it so as to be able to handle extremely large
182    lines that are needed for certain stress tests. When the input buffer is
183    expanded, the other two buffers must also be expanded likewise, and the
184    contents of pbuffer, which are a copy of the input for callouts, must be
185    preserved (for when expansion happens for a data line). This is not the most
186    optimal way of handling this, but hey, this is just a test program!
187    
188    Arguments:
189      f            the file to read
190      start        where in buffer to start (this *must* be within buffer)
191    
192    Returns:       pointer to the start of new data
193                   could be a copy of start, or could be moved
194                   NULL if no data read and EOF reached
195    */
196    
197    static uschar *
198    extend_inputline(FILE *f, uschar *start)
199    {
200    uschar *here = start;
201    
202    for (;;)
203      {
204      int rlen = buffer_size - (here - buffer);
205    
206      if (rlen > 1000)
207        {
208        int dlen;
209        if (fgets((char *)here, rlen,  f) == NULL)
210          return (here == start)? NULL : start;
211        dlen = (int)strlen((char *)here);
212        if (dlen > 0 && here[dlen - 1] == '\n') return start;
213        here += dlen;
214        }
215    
216      else
217        {
218        int new_buffer_size = 2*buffer_size;
219        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222    
223        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224          {
225          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226          exit(1);
227          }
228    
229        memcpy(new_buffer, buffer, buffer_size);
230        memcpy(new_pbuffer, pbuffer, buffer_size);
231    
232        buffer_size = new_buffer_size;
233    
234        start = new_buffer + (start - buffer);
235        here = new_buffer + (here - buffer);
236    
237        free(buffer);
238        free(dbuffer);
239        free(pbuffer);
240    
241        buffer = new_buffer;
242        dbuffer = new_dbuffer;
243        pbuffer = new_pbuffer;
244        }
245      }
246    
247    return NULL;  /* Control never gets here */
248    }
249    
250    
251    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
252    
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
253    
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
254    
255    
256  /*************************************************  /*************************************************
257  *       Convert character value to UTF-8         *  *          Read number from string               *
258  *************************************************/  *************************************************/
259    
260  /* This function takes an integer value in the range 0 - 0x7fffffff  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261  and encodes it as a UTF-8 character in 0 to 6 bytes.  around with conditional compilation, just do the job by hand. It is only used
262    for unpicking arguments, so just keep it simple.
263    
264  Arguments:  Arguments:
265    cvalue     the character value    str           string to be converted
266    buffer     pointer to buffer for result - at least 6 bytes long    endptr        where to put the end pointer
267    
268  Returns:     number of characters placed in the buffer  Returns:        the unsigned long
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
269  */  */
270    
271  static int  static int
272  ord2utf8(int cvalue, unsigned char *buffer)  get_value(unsigned char *str, unsigned char **endptr)
273  {  {
274  register int i, j;  int result = 0;
275  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  while(*str != 0 && isspace(*str)) str++;
276    if (cvalue <= utf8_table1[i]) break;  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  *endptr = str;
278  if (cvalue < 0) return -1;  return(result);
 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
 cvalue >>= 6 - i;  
 for (j = 0; j < i; j++)  
   {  
   *buffer++ = 0x80 | (cvalue & 0x3f);  
   cvalue >>= 6;  
   }  
 return i + 1;  
279  }  }
280    
281    
282    
283    
284  /*************************************************  /*************************************************
285  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
286  *************************************************/  *************************************************/
# Line 92  return i + 1; Line 289  return i + 1;
289  and returns the value of the character.  and returns the value of the character.
290    
291  Argument:  Argument:
292    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
293    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
294    
295  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
296             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
297  */  */
298    
299  int  #if !defined NOUTF8
300  utf82ord(unsigned char *buffer, int *vptr)  
301    static int
302    utf82ord(unsigned char *utf8bytes, int *vptr)
303  {  {
304  int c = *buffer++;  int c = *utf8bytes++;
305  int d = c;  int d = c;
306  int i, j, s;  int i, j, s;
307    
# Line 117  if (i == 0 || i == 6) return 0;        / Line 316  if (i == 0 || i == 6) return 0;        /
316    
317  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
318    
319  d = c & utf8_table3[i];  s = 6*i;
320  s = 6 - i;  d = (c & utf8_table3[i]) << s;
321    
322  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
323    {    {
324    c = *buffer++;    c = *utf8bytes++;
325    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
326      s -= 6;
327    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
328    }    }
329    
330  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
331    
332  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
333    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
334  if (j != i) return -(i+1);  if (j != i) return -(i+1);
335    
# Line 140  if (j != i) return -(i+1); Line 339  if (j != i) return -(i+1);
339  return i+1;  return i+1;
340  }  }
341    
342    #endif
343    
344    
345    
346    /*************************************************
347    *       Convert character value to UTF-8         *
348    *************************************************/
349    
350    /* This function takes an integer value in the range 0 - 0x7fffffff
351    and encodes it as a UTF-8 character in 0 to 6 bytes.
352    
353  /* Debugging function to print the internal form of the regex. This is the same  Arguments:
354  code as contained in pcre.c under the DEBUG macro. */    cvalue     the character value
355      utf8bytes  pointer to buffer for result - at least 6 bytes long
356    
357  static const char *OP_names[] = {  Returns:     number of characters placed in the buffer
358    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  */
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
359    
360    #if !defined NOUTF8
361    
362  static void print_internals(pcre *re)  static int
363    ord2utf8(int cvalue, uschar *utf8bytes)
364  {  {
365  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
366    for (i = 0; i < utf8_table1_size; i++)
367  fprintf(outfile, "------------------------------------------------------------------\n");    if (cvalue <= utf8_table1[i]) break;
368    utf8bytes += i;
369    for (j = i; j > 0; j--)
370     {
371     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372     cvalue >>= 6;
373     }
374    *utf8bytes = utf8_table2[i] | cvalue;
375    return i + 1;
376    }
377    
378  for(;;)  #endif
   {  
   int c;  
   int charlength;  
379    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
380    
   if (*code >= OP_BRA)  
     {  
     if (*code - OP_BRA > EXTRACT_BASIC_MAX)  
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
381    
382    else switch(*code)  /*************************************************
383      {  *             Print character string             *
384      case OP_END:  *************************************************/
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
385    
386        CLASS_REF_REPEAT:  /* Character string printing function. Must handle UTF-8 strings in utf8
387    mode. Yields number of characters printed. If handed a NULL file, just counts
388    chars without printing. */
389    
390        switch(*code)  static int pchars(unsigned char *p, int length, FILE *f)
391          {  {
392          case OP_CRSTAR:  int c = 0;
393          case OP_CRMINSTAR:  int yield = 0;
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
394    
395          case OP_CRRANGE:  while (length-- > 0)
396          case OP_CRMINRANGE:    {
397          min = (code[1] << 8) + code[2];  #if !defined NOUTF8
398          max = (code[3] << 8) + code[4];    if (use_utf8)
399          if (max == 0) fprintf(outfile, "{%d,}", min);      {
400          else fprintf(outfile, "{%d,%d}", min, max);      int rc = utf82ord(p, &c);
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
401    
402          default:      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
403          code--;        {
404          length -= rc - 1;
405          p += rc;
406          if (PRINTHEX(c))
407            {
408            if (f != NULL) fprintf(f, "%c", c);
409            yield++;
410            }
411          else
412            {
413            int n = 4;
414            if (f != NULL) fprintf(f, "\\x{%02x}", c);
415            yield += (n <= 0x000000ff)? 2 :
416                     (n <= 0x00000fff)? 3 :
417                     (n <= 0x0000ffff)? 4 :
418                     (n <= 0x000fffff)? 5 : 6;
419          }          }
420          continue;
421        }        }
422      break;      }
423    #endif
424    
425      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
426    
427      default:    c = *p++;
428      fprintf(outfile, "    %s", OP_names[*code]);    if (PRINTHEX(c))
429      break;      {
430        if (f != NULL) fprintf(f, "%c", c);
431        yield++;
432        }
433      else
434        {
435        if (f != NULL) fprintf(f, "\\x%02x", c);
436        yield += 4;
437      }      }
   
   code++;  
   fprintf(outfile, "\n");  
438    }    }
439    
440    return yield;
441  }  }
442    
443    
444    
445  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
446    *              Callout function                  *
447    *************************************************/
448    
449    /* Called from PCRE as a result of the (?C) item. We print out where we are in
450    the match. Yield zero unless more callouts than the fail count, or the callout
451    data is not zero. */
452    
453  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
454  {  {
455  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
456  while (length-- > 0)  int i, pre_start, post_start, subject_length;
457    
458    if (callout_extra)
459    {    {
460    if (utf8)    fprintf(f, "Callout %d: last capture = %d\n",
461        cb->callout_number, cb->capture_last);
462    
463      for (i = 0; i < cb->capture_top * 2; i += 2)
464      {      {
465      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
466      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
467        else
468        {        {
469        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
470        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
472          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
473        }        }
474      }      }
475      }
476    
477     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
478    datails. On subsequent calls in the same match, we use pchars just to find the
479    printed lengths of the substrings. */
480    
481    if (f != NULL) fprintf(f, "--->");
482    
483    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485      cb->current_position - cb->start_match, f);
486    
487    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488    
489    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490      cb->subject_length - cb->current_position, f);
491    
492    if (f != NULL) fprintf(f, "\n");
493    
494    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  /* Always print appropriate indicators, with callout number if not already
495      else fprintf(outfile, "\\x%02x", c);  shown. For automatic callouts, show the pattern offset. */
496    
497    if (cb->callout_number == 255)
498      {
499      fprintf(outfile, "%+3d ", cb->pattern_position);
500      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
501      }
502    else
503      {
504      if (callout_extra) fprintf(outfile, "    ");
505        else fprintf(outfile, "%3d ", cb->callout_number);
506      }
507    
508    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509    fprintf(outfile, "^");
510    
511    if (post_start > 0)
512      {
513      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514      fprintf(outfile, "^");
515      }
516    
517    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518      fprintf(outfile, " ");
519    
520    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521      pbuffer + cb->pattern_position);
522    
523    fprintf(outfile, "\n");
524    first_callout = 0;
525    
526    if (cb->callout_data != NULL)
527      {
528      int callout_data = *((int *)(cb->callout_data));
529      if (callout_data != 0)
530        {
531        fprintf(outfile, "Callout data = %d\n", callout_data);
532        return callout_data;
533        }
534    }    }
535    
536    return (cb->callout_number != callout_fail_id)? 0 :
537           (++callout_count >= callout_fail_count)? 1 : 0;
538  }  }
539    
540    
541    /*************************************************
542    *            Local malloc functions              *
543    *************************************************/
544    
545  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
546  compiled re. */  compiled re. */
547    
548  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
549  {  {
550    void *block = malloc(size);
551  gotten_store = size;  gotten_store = size;
552  if (log_store)  if (show_malloc)
553    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
554      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
 return malloc(size);  
555  }  }
556    
557    static void new_free(void *block)
558    {
559    if (show_malloc)
560      fprintf(outfile, "free             %p\n", block);
561    free(block);
562    }
563    
564    
565    /* For recursion malloc/free, to test stacking calls */
566    
567    static void *stack_malloc(size_t size)
568    {
569    void *block = malloc(size);
570    if (show_malloc)
571      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572    return block;
573    }
574    
575    static void stack_free(void *block)
576    {
577    if (show_malloc)
578      fprintf(outfile, "stack_free       %p\n", block);
579    free(block);
580    }
581    
582    
583    /*************************************************
584    *          Call pcre_fullinfo()                  *
585    *************************************************/
586    
587  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
588    
# Line 419  if ((rc = pcre_fullinfo(re, study, optio Line 595  if ((rc = pcre_fullinfo(re, study, optio
595    
596    
597    
598    /*************************************************
599    *         Byte flipping function                 *
600    *************************************************/
601    
602    static unsigned long int
603    byteflip(unsigned long int value, int n)
604    {
605    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606    return ((value & 0x000000ff) << 24) |
607           ((value & 0x0000ff00) <<  8) |
608           ((value & 0x00ff0000) >>  8) |
609           ((value & 0xff000000) >> 24);
610    }
611    
612    
613    
614    
615    /*************************************************
616    *        Check match or recursion limit          *
617    *************************************************/
618    
619    static int
620    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621      int start_offset, int options, int *use_offsets, int use_size_offsets,
622      int flag, unsigned long int *limit, int errnumber, const char *msg)
623    {
624    int count;
625    int min = 0;
626    int mid = 64;
627    int max = -1;
628    
629    extra->flags |= flag;
630    
631    for (;;)
632      {
633      *limit = mid;
634    
635      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636        use_offsets, use_size_offsets);
637    
638      if (count == errnumber)
639        {
640        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641        min = mid;
642        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643        }
644    
645      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646                             count == PCRE_ERROR_PARTIAL)
647        {
648        if (mid == min + 1)
649          {
650          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651          break;
652          }
653        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654        max = mid;
655        mid = (min + mid)/2;
656        }
657      else break;    /* Some other error */
658      }
659    
660    extra->flags &= ~flag;
661    return count;
662    }
663    
664    
665    
666    /*************************************************
667    *         Case-independent strncmp() function    *
668    *************************************************/
669    
670    /*
671    Arguments:
672      s         first string
673      t         second string
674      n         number of characters to compare
675    
676    Returns:    < 0, = 0, or > 0, according to the comparison
677    */
678    
679    static int
680    strncmpic(uschar *s, uschar *t, int n)
681    {
682    while (n--)
683      {
684      int c = tolower(*s++) - tolower(*t++);
685      if (c) return c;
686      }
687    return 0;
688    }
689    
690    
691    
692    /*************************************************
693    *         Check newline indicator                *
694    *************************************************/
695    
696    /* This is used both at compile and run-time to check for <xxx> escapes, where
697    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698    no match.
699    
700    Arguments:
701      p           points after the leading '<'
702      f           file for error message
703    
704    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
705    */
706    
707    static int
708    check_newline(uschar *p, FILE *f)
709    {
710    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715    fprintf(f, "Unknown newline type at: <%s\n", p);
716    return 0;
717    }
718    
719    
720    
721    /*************************************************
722    *             Usage function                     *
723    *************************************************/
724    
725    static void
726    usage(void)
727    {
728    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
729    printf("  -b       show compiled code (bytecode)\n");
730    printf("  -C       show PCRE compile-time options and exit\n");
731    printf("  -d       debug: show compiled code and information (-b and -i)\n");
732    #if !defined NODFA
733    printf("  -dfa     force DFA matching for all subjects\n");
734    #endif
735    printf("  -help    show usage information\n");
736    printf("  -i       show information about compiled patterns\n"
737           "  -m       output memory used information\n"
738           "  -o <n>   set size of offsets vector to <n>\n");
739    #if !defined NOPOSIX
740    printf("  -p       use POSIX interface\n");
741    #endif
742    printf("  -q       quiet: do not output PCRE version number at start\n");
743    printf("  -S <n>   set stack size to <n> megabytes\n");
744    printf("  -s       output store (memory) used information\n"
745           "  -t       time compilation and execution\n");
746    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
747    printf("  -tm      time execution (matching) only\n");
748    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
749    }
750    
751    
752    
753    /*************************************************
754    *                Main Program                    *
755    *************************************************/
756    
757  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
758  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 431  int options = 0; Line 765  int options = 0;
765  int study_options = 0;  int study_options = 0;
766  int op = 1;  int op = 1;
767  int timeit = 0;  int timeit = 0;
768    int timeitm = 0;
769  int showinfo = 0;  int showinfo = 0;
770  int showstore = 0;  int showstore = 0;
771    int quiet = 0;
772  int size_offsets = 45;  int size_offsets = 45;
773  int size_offsets_max;  int size_offsets_max;
774  int *offsets;  int *offsets = NULL;
775  #if !defined NOPOSIX  #if !defined NOPOSIX
776  int posix = 0;  int posix = 0;
777  #endif  #endif
778  int debug = 0;  int debug = 0;
779  int done = 0;  int done = 0;
780  unsigned char buffer[30000];  int all_use_dfa = 0;
781  unsigned char dbuffer[1024];  int yield = 0;
782    int stack_size;
783    
784    /* These vectors store, end-to-end, a list of captured substring names. Assume
785    that 1024 is plenty long enough for the few names we'll be testing. */
786    
787  /* Static so that new_malloc can use it. */  uschar copynames[1024];
788    uschar getnames[1024];
789    
790    uschar *copynamesptr;
791    uschar *getnamesptr;
792    
793    /* Get buffers from malloc() so that Electric Fence will check their misuse
794    when I am debugging. They grow automatically when very long lines are read. */
795    
796    buffer = (unsigned char *)malloc(buffer_size);
797    dbuffer = (unsigned char *)malloc(buffer_size);
798    pbuffer = (unsigned char *)malloc(buffer_size);
799    
800    /* The outfile variable is static so that new_malloc can use it. */
801    
802  outfile = stdout;  outfile = stdout;
803    
804    /* The following  _setmode() stuff is some Windows magic that tells its runtime
805    library to translate CRLF into a single LF character. At least, that's what
806    I've been told: never having used Windows I take this all on trust. Originally
807    it set 0x8000, but then I was advised that _O_BINARY was better. */
808    
809    #if defined(_WIN32) || defined(WIN32)
810    _setmode( _fileno( stdout ), _O_BINARY );
811    #endif
812    
813  /* Scan options */  /* Scan options */
814    
815  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
816    {    {
817    char *endptr;    unsigned char *endptr;
818    
819    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
820      showstore = 1;      showstore = 1;
821    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
822      else if (strcmp(argv[op], "-b") == 0) debug = 1;
823    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
824    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
825    #if !defined NODFA
826      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
827    #endif
828    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
829        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
830            *endptr == 0))
831        {
832        op++;
833        argc--;
834        }
835      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
836        {
837        int both = argv[op][2] == 0;
838        int temp;
839        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
840                         *endptr == 0))
841          {
842          timeitm = temp;
843          op++;
844          argc--;
845          }
846        else timeitm = LOOPREPEAT;
847        if (both) timeit = timeitm;
848        }
849      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
850          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
851            *endptr == 0))
852      {      {
853    #if defined(_WIN32) || defined(WIN32)
854        printf("PCRE: -S not supported on this OS\n");
855        exit(1);
856    #else
857        int rc;
858        struct rlimit rlim;
859        getrlimit(RLIMIT_STACK, &rlim);
860        rlim.rlim_cur = stack_size * 1024 * 1024;
861        rc = setrlimit(RLIMIT_STACK, &rlim);
862        if (rc != 0)
863          {
864        printf("PCRE: setrlimit() failed with error %d\n", rc);
865        exit(1);
866          }
867      op++;      op++;
868      argc--;      argc--;
869    #endif
870      }      }
871  #if !defined NOPOSIX  #if !defined NOPOSIX
872    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
873  #endif  #endif
874      else if (strcmp(argv[op], "-C") == 0)
875        {
876        int rc;
877        printf("PCRE version %s\n", pcre_version());
878        printf("Compiled with\n");
879        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
880        printf("  %sUTF-8 support\n", rc? "" : "No ");
881        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
882        printf("  %sUnicode properties support\n", rc? "" : "No ");
883        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
885          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886          (rc == -2)? "ANYCRLF" :
887          (rc == -1)? "ANY" : "???");
888        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889        printf("  Internal link size = %d\n", rc);
890        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
891        printf("  POSIX malloc threshold = %d\n", rc);
892        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
893        printf("  Default match limit = %d\n", rc);
894        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
895        printf("  Default recursion depth limit = %d\n", rc);
896        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
898        goto EXIT;
899        }
900      else if (strcmp(argv[op], "-help") == 0 ||
901               strcmp(argv[op], "--help") == 0)
902        {
903        usage();
904        goto EXIT;
905        }
906    else    else
907      {      {
908      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
909      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
910      printf("  -d     debug: show compiled code; implies -i\n"      yield = 1;
911             "  -i     show information about compiled pattern\n"      goto EXIT;
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
912      }      }
913    op++;    op++;
914    argc--;    argc--;
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 917  while (argc > 1 && argv[op][0] == '-')
917  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
918    
919  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
920  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
921  if (offsets == NULL)  if (offsets == NULL)
922    {    {
923    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
924      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
925    return 1;    yield = 1;
926      goto EXIT;
927    }    }
928    
929  /* Sort out the input and output files */  /* Sort out the input and output files */
930    
931  if (argc > 1)  if (argc > 1)
932    {    {
933    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
934    if (infile == NULL)    if (infile == NULL)
935      {      {
936      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
937      return 1;      yield = 1;
938        goto EXIT;
939      }      }
940    }    }
941    
942  if (argc > 2)  if (argc > 2)
943    {    {
944    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
945    if (outfile == NULL)    if (outfile == NULL)
946      {      {
947      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
948      return 1;      yield = 1;
949        goto EXIT;
950      }      }
951    }    }
952    
953  /* Set alternative malloc function */  /* Set alternative malloc function */
954    
955  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
956    pcre_free = new_free;
957    pcre_stack_malloc = stack_malloc;
958    pcre_stack_free = stack_free;
959    
960  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
961    
962  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
963    
964  /* Main loop */  /* Main loop */
965    
# Line 541  while (!done) Line 975  while (!done)
975    
976    const char *error;    const char *error;
977    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
978      unsigned char *to_file = NULL;
979    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
980      unsigned long int true_size, true_study_size = 0;
981      size_t size, regex_gotten_store;
982    int do_study = 0;    int do_study = 0;
983    int do_debug = debug;    int do_debug = debug;
984    int do_G = 0;    int do_G = 0;
985    int do_g = 0;    int do_g = 0;
986    int do_showinfo = showinfo;    int do_showinfo = showinfo;
987    int do_showrest = 0;    int do_showrest = 0;
988    int utf8 = 0;    int do_flip = 0;
989    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
990    
991      use_utf8 = 0;
992      debug_lengths = 1;
993    
994    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
995    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
996    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
997      fflush(outfile);
998    
999    p = buffer;    p = buffer;
1000    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1001    if (*p == 0) continue;    if (*p == 0) continue;
1002    
1003    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1004    complete, read more. */  
1005      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1006        {
1007        unsigned long int magic, get_options;
1008        uschar sbuf[8];
1009        FILE *f;
1010    
1011        p++;
1012        pp = p + (int)strlen((char *)p);
1013        while (isspace(pp[-1])) pp--;
1014        *pp = 0;
1015    
1016        f = fopen((char *)p, "rb");
1017        if (f == NULL)
1018          {
1019          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1020          continue;
1021          }
1022    
1023        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1024    
1025        true_size =
1026          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1027        true_study_size =
1028          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1029    
1030        re = (real_pcre *)new_malloc(true_size);
1031        regex_gotten_store = gotten_store;
1032    
1033        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1034    
1035        magic = ((real_pcre *)re)->magic_number;
1036        if (magic != MAGIC_NUMBER)
1037          {
1038          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1039            {
1040            do_flip = 1;
1041            }
1042          else
1043            {
1044            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1045            fclose(f);
1046            continue;
1047            }
1048          }
1049    
1050        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1051          do_flip? " (byte-inverted)" : "", p);
1052    
1053        /* Need to know if UTF-8 for printing data strings */
1054    
1055        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1056        use_utf8 = (get_options & PCRE_UTF8) != 0;
1057    
1058        /* Now see if there is any following study data */
1059    
1060        if (true_study_size != 0)
1061          {
1062          pcre_study_data *psd;
1063    
1064          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1065          extra->flags = PCRE_EXTRA_STUDY_DATA;
1066    
1067          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1068          extra->study_data = psd;
1069    
1070          if (fread(psd, 1, true_study_size, f) != true_study_size)
1071            {
1072            FAIL_READ:
1073            fprintf(outfile, "Failed to read data from %s\n", p);
1074            if (extra != NULL) new_free(extra);
1075            if (re != NULL) new_free(re);
1076            fclose(f);
1077            continue;
1078            }
1079          fprintf(outfile, "Study data loaded from %s\n", p);
1080          do_study = 1;     /* To get the data output if requested */
1081          }
1082        else fprintf(outfile, "No study data\n");
1083    
1084        fclose(f);
1085        goto SHOW_INFO;
1086        }
1087    
1088      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1089      the pattern; if is isn't complete, read more. */
1090    
1091    delimiter = *p++;    delimiter = *p++;
1092    
# Line 571  while (!done) Line 1097  while (!done)
1097      }      }
1098    
1099    pp = p;    pp = p;
1100      poffset = p - buffer;
1101    
1102    for(;;)    for(;;)
1103      {      {
# Line 581  while (!done) Line 1108  while (!done)
1108        pp++;        pp++;
1109        }        }
1110      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1111      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1112      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1113        {        {
1114        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1115        done = 1;        done = 1;
# Line 599  while (!done) Line 1118  while (!done)
1118      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1119      }      }
1120    
1121      /* The buffer may have moved while being extended; reset the start of data
1122      pointer to the correct relative point in the buffer. */
1123    
1124      p = buffer + poffset;
1125    
1126    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1127    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1128    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1129    
1130    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1131    
1132    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1133      for callouts. */
1134    
1135    *pp++ = 0;    *pp++ = 0;
1136      strcpy((char *)pbuffer, (char *)p);
1137    
1138    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1139    
# Line 619  while (!done) Line 1145  while (!done)
1145      {      {
1146      switch (*pp++)      switch (*pp++)
1147        {        {
1148          case 'f': options |= PCRE_FIRSTLINE; break;
1149        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1150        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1151        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 627  while (!done) Line 1154  while (!done)
1154    
1155        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1156        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1157          case 'B': do_debug = 1; break;
1158          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1159        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1160        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1161          case 'F': do_flip = 1; break;
1162        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1163        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1164          case 'J': options |= PCRE_DUPNAMES; break;
1165        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1166          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1167    
1168  #if !defined NOPOSIX  #if !defined NOPOSIX
1169        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 640  while (!done) Line 1172  while (!done)
1172        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1173        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1174        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1175        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Z': debug_lengths = 0; break;
1176          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178    
1179        case 'L':        case 'L':
1180        ppp = pp;        ppp = pp;
1181        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1182          /* The '0' test is just in case this is an unterminated line. */
1183          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1184        *ppp = 0;        *ppp = 0;
1185        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1186          {          {
1187          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1188          goto SKIP_DATA;          goto SKIP_DATA;
1189          }          }
1190          locale_set = 1;
1191        tables = pcre_maketables();        tables = pcre_maketables();
1192        pp = ppp;        pp = ppp;
1193        break;        break;
1194    
1195        case '\n': case ' ': break;        case '>':
1196          to_file = pp;
1197          while (*pp != 0) pp++;
1198          while (isspace(pp[-1])) pp--;
1199          *pp = 0;
1200          break;
1201    
1202          case '<':
1203            {
1204            int x = check_newline(pp, outfile);
1205            if (x == 0) goto SKIP_DATA;
1206            options |= x;
1207            while (*pp++ != '>');
1208            }
1209          break;
1210    
1211          case '\r':                      /* So that it works in Windows */
1212          case '\n':
1213          case ' ':
1214          break;
1215    
1216        default:        default:
1217        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1218        goto SKIP_DATA;        goto SKIP_DATA;
# Line 671  while (!done) Line 1228  while (!done)
1228      {      {
1229      int rc;      int rc;
1230      int cflags = 0;      int cflags = 0;
1231    
1232      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1233      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1234        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1235        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1236        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1237    
1238      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1239    
1240      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 1242  while (!done)
1242    
1243      if (rc != 0)      if (rc != 0)
1244        {        {
1245        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1246        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1247        goto SKIP_DATA;        goto SKIP_DATA;
1248        }        }
# Line 692  while (!done) Line 1254  while (!done)
1254  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1255    
1256      {      {
1257      if (timeit)      if (timeit > 0)
1258        {        {
1259        register int i;        register int i;
1260        clock_t time_taken;        clock_t time_taken;
1261        clock_t start_time = clock();        clock_t start_time = clock();
1262        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1263          {          {
1264          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1265          if (re != NULL) free(re);          if (re != NULL) free(re);
1266          }          }
1267        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1268        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1269          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1270          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1271        }        }
1272    
1273      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 721  while (!done) Line 1283  while (!done)
1283          {          {
1284          for (;;)          for (;;)
1285            {            {
1286            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1287              {              {
1288              done = 1;              done = 1;
1289              goto CONTINUE;              goto CONTINUE;
# Line 739  while (!done) Line 1301  while (!done)
1301      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
1302      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
1303    
1304        if (log_store)
1305          fprintf(outfile, "Memory allocation (code space): %d\n",
1306            (int)(gotten_store -
1307                  sizeof(real_pcre) -
1308                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1309    
1310        /* Extract the size for possible writing before possibly flipping it,
1311        and remember the store that was got. */
1312    
1313        true_size = ((real_pcre *)re)->size;
1314        regex_gotten_store = gotten_store;
1315    
1316        /* If /S was present, study the regexp to generate additional info to
1317        help with the matching. */
1318    
1319        if (do_study)
1320          {
1321          if (timeit > 0)
1322            {
1323            register int i;
1324            clock_t time_taken;
1325            clock_t start_time = clock();
1326            for (i = 0; i < timeit; i++)
1327              extra = pcre_study(re, study_options, &error);
1328            time_taken = clock() - start_time;
1329            if (extra != NULL) free(extra);
1330            fprintf(outfile, "  Study time %.4f milliseconds\n",
1331              (((double)time_taken * 1000.0) / (double)timeit) /
1332                (double)CLOCKS_PER_SEC);
1333            }
1334          extra = pcre_study(re, study_options, &error);
1335          if (error != NULL)
1336            fprintf(outfile, "Failed to study: %s\n", error);
1337          else if (extra != NULL)
1338            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1339          }
1340    
1341        /* If the 'F' option was present, we flip the bytes of all the integer
1342        fields in the regex data block and the study block. This is to make it
1343        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1344        compiled on a different architecture. */
1345    
1346        if (do_flip)
1347          {
1348          real_pcre *rre = (real_pcre *)re;
1349          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1350          rre->size = byteflip(rre->size, sizeof(rre->size));
1351          rre->options = byteflip(rre->options, sizeof(rre->options));
1352          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1353          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1354          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1355          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1356          rre->name_table_offset = byteflip(rre->name_table_offset,
1357            sizeof(rre->name_table_offset));
1358          rre->name_entry_size = byteflip(rre->name_entry_size,
1359            sizeof(rre->name_entry_size));
1360          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1361    
1362          if (extra != NULL)
1363            {
1364            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1365            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1366            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1367            }
1368          }
1369    
1370        /* Extract information from the compiled data if required */
1371    
1372        SHOW_INFO:
1373    
1374        if (do_debug)
1375          {
1376          fprintf(outfile, "------------------------------------------------------------------\n");
1377          pcre_printint(re, outfile, debug_lengths);
1378          }
1379    
1380      if (do_showinfo)      if (do_showinfo)
1381        {        {
1382        unsigned long int get_options;        unsigned long int get_options, all_options;
1383    #if !defined NOINFOCHECK
1384        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1385        int count, backrefmax, first_char, need_char;  #endif
1386        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1387            hascrorlf;
1388        if (do_debug) print_internals(re);        int nameentrysize, namecount;
1389          const uschar *nametable;
1390    
1391        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1392        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1393        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1394        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1395        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1396        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1397          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1398          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1399          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1400          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1401          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1402          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1403    
1404    #if !defined NOINFOCHECK
1405        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1406        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1407          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 772  while (!done) Line 1419  while (!done)
1419            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1420              get_options, old_options);              get_options, old_options);
1421          }          }
1422    #endif
1423    
1424        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1425          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1426          size, gotten_store);          (int)size, (int)regex_gotten_store);
1427    
1428        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1429        if (backrefmax > 0)        if (backrefmax > 0)
1430          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1431    
1432          if (namecount > 0)
1433            {
1434            fprintf(outfile, "Named capturing subpatterns:\n");
1435            while (namecount-- > 0)
1436              {
1437              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1438                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1439                GET2(nametable, 0));
1440              nametable += nameentrysize;
1441              }
1442            }
1443    
1444          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1445          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1446    
1447          all_options = ((real_pcre *)re)->options;
1448          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1449    
1450        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1451          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1452            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1453            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1454            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1455            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1456              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1457            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1458            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1459            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1460            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1461            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1462              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1463              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1464              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1465    
1466          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1467    
1468          switch (get_options & PCRE_NEWLINE_BITS)
1469            {
1470            case PCRE_NEWLINE_CR:
1471            fprintf(outfile, "Forced newline sequence: CR\n");
1472            break;
1473    
1474            case PCRE_NEWLINE_LF:
1475            fprintf(outfile, "Forced newline sequence: LF\n");
1476            break;
1477    
1478            case PCRE_NEWLINE_CRLF:
1479            fprintf(outfile, "Forced newline sequence: CRLF\n");
1480            break;
1481    
1482            case PCRE_NEWLINE_ANYCRLF:
1483            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1484            break;
1485    
1486        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_ANY:
1487          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1488            break;
1489    
1490            default:
1491            break;
1492            }
1493    
1494        if (first_char == -1)        if (first_char == -1)
1495          {          {
1496          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1497          }          }
1498        else if (first_char < 0)        else if (first_char < 0)
1499          {          {
# Line 805  while (!done) Line 1501  while (!done)
1501          }          }
1502        else        else
1503          {          {
1504          if (isprint(first_char))          int ch = first_char & 255;
1505            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1506              "" : " (caseless)";
1507            if (PRINTHEX(ch))
1508              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1509          else          else
1510            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1511          }          }
1512    
1513        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 1516  while (!done)
1516          }          }
1517        else        else
1518          {          {
1519          if (isprint(need_char))          int ch = need_char & 255;
1520            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1521              "" : " (caseless)";
1522            if (PRINTHEX(ch))
1523              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1524          else          else
1525            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1526          }          }
       }  
1527    
1528      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1529      help with the matching. */        value, but it varies, depending on the computer architecture, and
1530          so messes up the test suite. (And with the /F option, it might be
1531          flipped.) */
1532    
1533      if (do_study)        if (do_study)
       {  
       if (timeit)  
1534          {          {
1535          register int i;          if (extra == NULL)
1536          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1537          clock_t start_time = clock();          else
1538          for (i = 0; i < LOOPREPEAT; i++)            {
1539            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1540          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1541          if (extra != NULL) free(extra);  
1542          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1543            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1544            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1545                {
1546                int i;
1547                int c = 24;
1548                fprintf(outfile, "Starting byte set: ");
1549                for (i = 0; i < 256; i++)
1550                  {
1551                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1552                    {
1553                    if (c > 75)
1554                      {
1555                      fprintf(outfile, "\n  ");
1556                      c = 2;
1557                      }
1558                    if (PRINTHEX(i) && i != ' ')
1559                      {
1560                      fprintf(outfile, "%c ", i);
1561                      c += 2;
1562                      }
1563                    else
1564                      {
1565                      fprintf(outfile, "\\x%02x ", i);
1566                      c += 5;
1567                      }
1568                    }
1569                  }
1570                fprintf(outfile, "\n");
1571                }
1572              }
1573          }          }
1574          }
1575    
1576        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1577        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1578          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1579    
1580        else if (do_showinfo)      if (to_file != NULL)
1581          {
1582          FILE *f = fopen((char *)to_file, "wb");
1583          if (f == NULL)
1584            {
1585            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1586            }
1587          else
1588          {          {
1589          uschar *start_bits = NULL;          uschar sbuf[8];
1590          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (true_size >> 24)  & 255;
1591          if (start_bits == NULL)          sbuf[1] = (true_size >> 16)  & 255;
1592            fprintf(outfile, "No starting character set\n");          sbuf[2] = (true_size >>  8)  & 255;
1593            sbuf[3] = (true_size)  & 255;
1594    
1595            sbuf[4] = (true_study_size >> 24)  & 255;
1596            sbuf[5] = (true_study_size >> 16)  & 255;
1597            sbuf[6] = (true_study_size >>  8)  & 255;
1598            sbuf[7] = (true_study_size)  & 255;
1599    
1600            if (fwrite(sbuf, 1, 8, f) < 8 ||
1601                fwrite(re, 1, true_size, f) < true_size)
1602              {
1603              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1604              }
1605          else          else
1606            {            {
1607            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1608            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1609              {              {
1610              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1611                    true_study_size)
1612                {                {
1613                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1614                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1615                }                }
1616                else fprintf(outfile, "Study data written to %s\n", to_file);
1617    
1618              }              }
           fprintf(outfile, "\n");  
1619            }            }
1620            fclose(f);
1621          }          }
1622    
1623          new_free(re);
1624          if (extra != NULL) new_free(extra);
1625          if (tables != NULL) new_free((void *)tables);
1626          continue;  /* With next regex */
1627        }        }
1628      }      }        /* End of non-POSIX compile */
1629    
1630    /* Read data lines and test them */    /* Read data lines and test them */
1631    
1632    for (;;)    for (;;)
1633      {      {
1634      unsigned char *q;      uschar *q;
1635      unsigned char *bptr = dbuffer;      uschar *bptr;
1636      int *use_offsets = offsets;      int *use_offsets = offsets;
1637      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1638        int callout_data = 0;
1639        int callout_data_set = 0;
1640      int count, c;      int count, c;
1641      int copystrings = 0;      int copystrings = 0;
1642        int find_match_limit = 0;
1643      int getstrings = 0;      int getstrings = 0;
1644      int getlist = 0;      int getlist = 0;
1645      int gmatched = 0;      int gmatched = 0;
1646      int start_offset = 0;      int start_offset = 0;
1647      int g_notempty = 0;      int g_notempty = 0;
1648        int use_dfa = 0;
1649    
1650      options = 0;      options = 0;
1651    
1652      if (infile == stdin) printf("data> ");      *copynames = 0;
1653      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1654    
1655        copynamesptr = copynames;
1656        getnamesptr = getnames;
1657    
1658        pcre_callout = callout;
1659        first_callout = 1;
1660        callout_extra = 0;
1661        callout_count = 0;
1662        callout_fail_count = 999999;
1663        callout_fail_id = -1;
1664        show_malloc = 0;
1665    
1666        if (extra != NULL) extra->flags &=
1667          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1668    
1669        len = 0;
1670        for (;;)
1671        {        {
1672        done = 1;        if (infile == stdin) printf("data> ");
1673        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1674            {
1675            if (len > 0) break;
1676            done = 1;
1677            goto CONTINUE;
1678            }
1679          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1680          len = (int)strlen((char *)buffer);
1681          if (buffer[len-1] == '\n') break;
1682        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1683    
     len = (int)strlen((char *)buffer);  
1684      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1685      buffer[len] = 0;      buffer[len] = 0;
1686      if (len == 0) break;      if (len == 0) break;
# Line 921  while (!done) Line 1688  while (!done)
1688      p = buffer;      p = buffer;
1689      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1690    
1691      q = dbuffer;      bptr = q = dbuffer;
1692      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1693        {        {
1694        int i = 0;        int i = 0;
1695        int n = 0;        int n = 0;
1696    
1697        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1698          {          {
1699          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 942  while (!done) Line 1710  while (!done)
1710          c -= '0';          c -= '0';
1711          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1712            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1713    
1714    #if !defined NOUTF8
1715            if (use_utf8 && c > 255)
1716              {
1717              unsigned char buff8[8];
1718              int ii, utn;
1719              utn = ord2utf8(c, buff8);
1720              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1721              c = buff8[ii];   /* Last byte */
1722              }
1723    #endif
1724          break;          break;
1725    
1726          case 'x':          case 'x':
1727    
1728          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1729    
1730    #if !defined NOUTF8
1731          if (*p == '{')          if (*p == '{')
1732            {            {
1733            unsigned char *pt = p;            unsigned char *pt = p;
# Line 956  while (!done) Line 1736  while (!done)
1736              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1737            if (*pt == '}')            if (*pt == '}')
1738              {              {
1739              unsigned char buffer[8];              unsigned char buff8[8];
1740              int ii, utn;              int ii, utn;
1741              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1742              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1743              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1744              p = pt + 1;              p = pt + 1;
1745              break;              break;
1746              }              }
1747            /* Not correct form; fall through */            /* Not correct form; fall through */
1748            }            }
1749    #endif
1750    
1751          /* Ordinary \x */          /* Ordinary \x */
1752    
# Line 977  while (!done) Line 1758  while (!done)
1758            }            }
1759          break;          break;
1760    
1761          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1762          p--;          p--;
1763          continue;          continue;
1764    
1765            case '>':
1766            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1767            continue;
1768    
1769          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1770          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1771          continue;          continue;
# Line 990  while (!done) Line 1775  while (!done)
1775          continue;          continue;
1776    
1777          case 'C':          case 'C':
1778          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1779          copystrings |= 1 << n;            {
1780              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1781              copystrings |= 1 << n;
1782              }
1783            else if (isalnum(*p))
1784              {
1785              uschar *npp = copynamesptr;
1786              while (isalnum(*p)) *npp++ = *p++;
1787              *npp++ = 0;
1788              *npp = 0;
1789              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1790              if (n < 0)
1791                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1792              copynamesptr = npp;
1793              }
1794            else if (*p == '+')
1795              {
1796              callout_extra = 1;
1797              p++;
1798              }
1799            else if (*p == '-')
1800              {
1801              pcre_callout = NULL;
1802              p++;
1803              }
1804            else if (*p == '!')
1805              {
1806              callout_fail_id = 0;
1807              p++;
1808              while(isdigit(*p))
1809                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1810              callout_fail_count = 0;
1811              if (*p == '!')
1812                {
1813                p++;
1814                while(isdigit(*p))
1815                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1816                }
1817              }
1818            else if (*p == '*')
1819              {
1820              int sign = 1;
1821              callout_data = 0;
1822              if (*(++p) == '-') { sign = -1; p++; }
1823              while(isdigit(*p))
1824                callout_data = callout_data * 10 + *p++ - '0';
1825              callout_data *= sign;
1826              callout_data_set = 1;
1827              }
1828            continue;
1829    
1830    #if !defined NODFA
1831            case 'D':
1832    #if !defined NOPOSIX
1833            if (posix || do_posix)
1834              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1835            else
1836    #endif
1837              use_dfa = 1;
1838            continue;
1839    
1840            case 'F':
1841            options |= PCRE_DFA_SHORTEST;
1842          continue;          continue;
1843    #endif
1844    
1845          case 'G':          case 'G':
1846          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1847          getstrings |= 1 << n;            {
1848              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1849              getstrings |= 1 << n;
1850              }
1851            else if (isalnum(*p))
1852              {
1853              uschar *npp = getnamesptr;
1854              while (isalnum(*p)) *npp++ = *p++;
1855              *npp++ = 0;
1856              *npp = 0;
1857              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1858              if (n < 0)
1859                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1860              getnamesptr = npp;
1861              }
1862          continue;          continue;
1863    
1864          case 'L':          case 'L':
1865          getlist = 1;          getlist = 1;
1866          continue;          continue;
1867    
1868            case 'M':
1869            find_match_limit = 1;
1870            continue;
1871    
1872          case 'N':          case 'N':
1873          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1874          continue;          continue;
# Line 1013  while (!done) Line 1879  while (!done)
1879            {            {
1880            size_offsets_max = n;            size_offsets_max = n;
1881            free(offsets);            free(offsets);
1882            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1883            if (offsets == NULL)            if (offsets == NULL)
1884              {              {
1885              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1886                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1887              return 1;              yield = 1;
1888                goto EXIT;
1889              }              }
1890            }            }
1891          use_size_offsets = n;          use_size_offsets = n;
1892          if (n == 0) use_offsets = NULL;          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1893            continue;
1894    
1895            case 'P':
1896            options |= PCRE_PARTIAL;
1897            continue;
1898    
1899            case 'Q':
1900            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1901            if (extra == NULL)
1902              {
1903              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1904              extra->flags = 0;
1905              }
1906            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1907            extra->match_limit_recursion = n;
1908            continue;
1909    
1910            case 'q':
1911            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1912            if (extra == NULL)
1913              {
1914              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1915              extra->flags = 0;
1916              }
1917            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1918            extra->match_limit = n;
1919            continue;
1920    
1921    #if !defined NODFA
1922            case 'R':
1923            options |= PCRE_DFA_RESTART;
1924            continue;
1925    #endif
1926    
1927            case 'S':
1928            show_malloc = 1;
1929          continue;          continue;
1930    
1931          case 'Z':          case 'Z':
1932          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1933          continue;          continue;
1934    
1935            case '?':
1936            options |= PCRE_NO_UTF8_CHECK;
1937            continue;
1938    
1939            case '<':
1940              {
1941              int x = check_newline(p, outfile);
1942              if (x == 0) goto NEXT_DATA;
1943              options |= x;
1944              while (*p++ != '>');
1945              }
1946            continue;
1947          }          }
1948        *q++ = c;        *q++ = c;
1949        }        }
1950      *q = 0;      *q = 0;
1951      len = q - dbuffer;      len = q - dbuffer;
1952    
1953        if ((all_use_dfa || use_dfa) && find_match_limit)
1954          {
1955          printf("**Match limit not relevant for DFA matching: ignored\n");
1956          find_match_limit = 0;
1957          }
1958    
1959      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1960      support timing. */      support timing or playing with the match limit or callout data. */
1961    
1962  #if !defined NOPOSIX  #if !defined NOPOSIX
1963      if (posix || do_posix)      if (posix || do_posix)
1964        {        {
1965        int rc;        int rc;
1966        int eflags = 0;        int eflags = 0;
1967        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1968          if (use_size_offsets > 0)
1969            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1970        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1971        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1972    
# Line 1050  while (!done) Line 1974  while (!done)
1974    
1975        if (rc != 0)        if (rc != 0)
1976          {          {
1977          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1978          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1979          }          }
1980          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1981                  != 0)
1982            {
1983            fprintf(outfile, "Matched with REG_NOSUB\n");
1984            }
1985        else        else
1986          {          {
1987          size_t i;          size_t i;
1988          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1989            {            {
1990            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1991              {              {
1992              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1993              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1994                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1995              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1996              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1997                {                {
1998                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1999                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2000                    outfile);
2001                fprintf(outfile, "\n");                fprintf(outfile, "\n");
2002                }                }
2003              }              }
# Line 1083  while (!done) Line 2013  while (!done)
2013    
2014      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2015        {        {
2016        if (timeit)        if (timeitm > 0)
2017          {          {
2018          register int i;          register int i;
2019          clock_t time_taken;          clock_t time_taken;
2020          clock_t start_time = clock();          clock_t start_time = clock();
2021          for (i = 0; i < LOOPREPEAT; i++)  
2022    #if !defined NODFA
2023            if (all_use_dfa || use_dfa)
2024              {
2025              int workspace[1000];
2026              for (i = 0; i < timeitm; i++)
2027                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2028                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2029                  sizeof(workspace)/sizeof(int));
2030              }
2031            else
2032    #endif
2033    
2034            for (i = 0; i < timeitm; i++)
2035            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2036              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2037    
2038          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2039          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2040            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2041            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2042            }
2043    
2044          /* If find_match_limit is set, we want to do repeated matches with
2045          varying limits in order to find the minimum value for the match limit and
2046          for the recursion limit. */
2047    
2048          if (find_match_limit)
2049            {
2050            if (extra == NULL)
2051              {
2052              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2053              extra->flags = 0;
2054              }
2055    
2056            (void)check_match_limit(re, extra, bptr, len, start_offset,
2057              options|g_notempty, use_offsets, use_size_offsets,
2058              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2059              PCRE_ERROR_MATCHLIMIT, "match()");
2060    
2061            count = check_match_limit(re, extra, bptr, len, start_offset,
2062              options|g_notempty, use_offsets, use_size_offsets,
2063              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2064              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2065          }          }
2066    
2067        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2068          start_offset, options | g_notempty, use_offsets, use_size_offsets);  
2069          else if (callout_data_set)
2070            {
2071            if (extra == NULL)
2072              {
2073              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2074              extra->flags = 0;
2075              }
2076            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2077            extra->callout_data = &callout_data;
2078            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2079              options | g_notempty, use_offsets, use_size_offsets);
2080            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2081            }
2082    
2083          /* The normal case is just to do the match once, with the default
2084          value of match_limit. */
2085    
2086    #if !defined NODFA
2087          else if (all_use_dfa || use_dfa)
2088            {
2089            int workspace[1000];
2090            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2091              options | g_notempty, use_offsets, use_size_offsets, workspace,
2092              sizeof(workspace)/sizeof(int));
2093            if (count == 0)
2094              {
2095              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2096              count = use_size_offsets/2;
2097              }
2098            }
2099    #endif
2100    
2101        if (count == 0)        else
2102          {          {
2103          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2104          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2105            if (count == 0)
2106              {
2107              fprintf(outfile, "Matched, but too many substrings\n");
2108              count = use_size_offsets/3;
2109              }
2110          }          }
2111    
2112        /* Matched */        /* Matched */
2113    
2114        if (count >= 0)        if (count >= 0)
2115          {          {
2116          int i;          int i, maxcount;
2117    
2118    #if !defined NODFA
2119            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2120    #endif
2121              maxcount = use_size_offsets/3;
2122    
2123            /* This is a check against a lunatic return value. */
2124    
2125            if (count > maxcount)
2126              {
2127              fprintf(outfile,
2128                "** PCRE error: returned count %d is too big for offset size %d\n",
2129                count, use_size_offsets);
2130              count = use_size_offsets/3;
2131              if (do_g || do_G)
2132                {
2133                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2134                do_g = do_G = FALSE;        /* Break g/G loop */
2135                }
2136              }
2137    
2138          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2139            {            {
2140            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1118  while (!done) Line 2142  while (!done)
2142            else            else
2143              {              {
2144              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2145              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
2146                  use_offsets[i+1] - use_offsets[i], outfile);
2147              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2148              if (i == 0)              if (i == 0)
2149                {                {
2150                if (do_showrest)                if (do_showrest)
2151                  {                  {
2152                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2153                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2154                      outfile);
2155                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2156                  }                  }
2157                }                }
# Line 1136  while (!done) Line 2162  while (!done)
2162            {            {
2163            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2164              {              {
2165              char copybuffer[16];              char copybuffer[256];
2166              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2167                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2168              if (rc < 0)              if (rc < 0)
# Line 1146  while (!done) Line 2172  while (!done)
2172              }              }
2173            }            }
2174    
2175            for (copynamesptr = copynames;
2176                 *copynamesptr != 0;
2177                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2178              {
2179              char copybuffer[256];
2180              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2181                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2182              if (rc < 0)
2183                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2184              else
2185                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2186              }
2187    
2188          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2189            {            {
2190            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1158  while (!done) Line 2197  while (!done)
2197              else              else
2198                {                {
2199                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2200                pcre_free_substring(substring);                pcre_free_substring(substring);
2201                }                }
2202              }              }
2203            }            }
2204    
2205            for (getnamesptr = getnames;
2206                 *getnamesptr != 0;
2207                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2208              {
2209              const char *substring;
2210              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2211                count, (char *)getnamesptr, &substring);
2212              if (rc < 0)
2213                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2214              else
2215                {
2216                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2217                pcre_free_substring(substring);
2218                }
2219              }
2220    
2221          if (getlist)          if (getlist)
2222            {            {
2223            const char **stringlist;            const char **stringlist;
# Line 1183  while (!done) Line 2237  while (!done)
2237            }            }
2238          }          }
2239    
2240          /* There was a partial match */
2241    
2242          else if (count == PCRE_ERROR_PARTIAL)
2243            {
2244            fprintf(outfile, "Partial match");
2245    #if !defined NODFA
2246            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2247              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2248                bptr + use_offsets[0]);
2249    #endif
2250            fprintf(outfile, "\n");
2251            break;  /* Out of the /g loop */
2252            }
2253    
2254        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2255        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2256        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2257        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2258        was checked before setting g_notempty. */  
2259          Complication arises in the case when the newline option is "any" or
2260          "anycrlf". If the previous match was at the end of a line terminated by
2261          CRLF, an advance of one character just passes the \r, whereas we should
2262          prefer the longer newline sequence, as does the code in pcre_exec().
2263          Fudge the offset value to achieve this.
2264    
2265          Otherwise, in the case of UTF-8 matching, the advance must be one
2266          character, not one byte. */
2267    
2268        else        else
2269          {          {
2270          if (g_notempty != 0)          if (g_notempty != 0)
2271            {            {
2272              int onechar = 1;
2273              unsigned int obits = ((real_pcre *)re)->options;
2274            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2275            use_offsets[1] = start_offset + 1;            if ((obits & PCRE_NEWLINE_BITS) == 0)
2276                {
2277                int d;
2278                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2279                obits = (d == '\r')? PCRE_NEWLINE_CR :
2280                        (d == '\n')? PCRE_NEWLINE_LF :
2281                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2282                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2283                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2284                }
2285              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2286                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2287                  &&
2288                  start_offset < len - 1 &&
2289                  bptr[start_offset] == '\r' &&
2290                  bptr[start_offset+1] == '\n')
2291                onechar++;
2292              else if (use_utf8)
2293                {
2294                while (start_offset + onechar < len)
2295                  {
2296                  int tb = bptr[start_offset+onechar];
2297                  if (tb <= 127) break;
2298                  tb &= 0xc0;
2299                  if (tb != 0 && tb != 0xc0) onechar++;
2300                  }
2301                }
2302              use_offsets[1] = start_offset + onechar;
2303            }            }
2304          else          else
2305            {            {
2306            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2307              {              {
2308              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2309              }              }
2310              else fprintf(outfile, "Error %d\n", count);
2311            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2312            }            }
2313          }          }
# Line 1219  while (!done) Line 2324  while (!done)
2324        character. */        character. */
2325    
2326        g_notempty = 0;        g_notempty = 0;
2327    
2328        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2329          {          {
2330          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1237  while (!done) Line 2343  while (!done)
2343          len -= use_offsets[1];          len -= use_offsets[1];
2344          }          }
2345        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2346    
2347        NEXT_DATA: continue;
2348      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2349    
2350    CONTINUE:    CONTINUE:
# Line 1245  while (!done) Line 2353  while (!done)
2353    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2354  #endif  #endif
2355    
2356    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2357    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2358    if (tables != NULL)    if (tables != NULL)
2359      {      {
2360      free((void *)tables);      new_free((void *)tables);
2361      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2362        locale_set = 0;
2363      }      }
2364    }    }
2365    
2366  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2367  return 0;  
2368    EXIT:
2369    
2370    if (infile != NULL && infile != stdin) fclose(infile);
2371    if (outfile != NULL && outfile != stdout) fclose(outfile);
2372    
2373    free(buffer);
2374    free(dbuffer);
2375    free(pbuffer);
2376    free(offsets);
2377    
2378    return yield;
2379  }  }
2380    
2381  /* End */  /* End of pcretest.c */

Legend:
Removed from v.57  
changed lines
  Added in v.227

  ViewVC Help
Powered by ViewVC 1.1.5