/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 376 by ph10, Sun Mar 1 12:00:59 2009 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48    #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* Use the internal info for displaying the results of pcre_study(). */  /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74  #include "internal.h"  #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85    /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95    #include "pcre_internal.h"
96    
97    /* We need access to some of the data tables that PCRE uses. So as not to have
98    to keep two copies, we include the source file here, changing the names of the
99    external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102    #define _pcre_utf8_table1      utf8_table1
103    #define _pcre_utf8_table1_size utf8_table1_size
104    #define _pcre_utf8_table2      utf8_table2
105    #define _pcre_utf8_table3      utf8_table3
106    #define _pcre_utf8_table4      utf8_table4
107    #define _pcre_utt              utt
108    #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110    #define _pcre_OP_lengths       OP_lengths
111    
112    #include "pcre_tables.c"
113    
114    /* We also need the pcre_printint() function for printing out compiled
115    patterns. This function is in a separate file so that it can be included in
116    pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124    #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
128    
129    /* It is possible to compile this test program without including support for
130    testing the POSIX interface, though this is not available via the standard
131    Makefile. */
132    
133    #if !defined NOPOSIX
134  #include "pcreposix.h"  #include "pcreposix.h"
135    #endif
136    
137    /* It is also possible, for the benefit of the version currently imported into
138    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139    interface to the DFA matcher (NODFA), and without the doublecheck of the old
140    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150    /* Other parameters */
151    
152  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
153  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 157 
157  #endif  #endif
158  #endif  #endif
159    
160  #define LOOPREPEAT 10000  /* This is the default loop count for timing. */
161    
162    #define LOOPREPEAT 500000
163    
164    /* Static variables */
165    
166  static FILE *outfile;  static FILE *outfile;
167  static int log_store = 0;  static int log_store = 0;
168    static int callout_count;
169    static int callout_extra;
170    static int callout_fail_count;
171    static int callout_fail_id;
172    static int debug_lengths;
173    static int first_callout;
174    static int locale_set = 0;
175    static int show_malloc;
176    static int use_utf8;
177    static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184    static uschar *pbuffer = NULL;
185    
186    
187    
188    /*************************************************
189    *        Read or extend an input line            *
190    *************************************************/
191    
192  /* Debugging function to print the internal form of the regex. This is the same  /* Input lines are read into buffer, but both patterns and data lines can be
193  code as contained in pcre.c under the DEBUG macro. */  continued over multiple input lines. In addition, if the buffer fills up, we
194    want to automatically expand it so as to be able to handle extremely large
195    lines that are needed for certain stress tests. When the input buffer is
196    expanded, the other two buffers must also be expanded likewise, and the
197    contents of pbuffer, which are a copy of the input for callouts, must be
198    preserved (for when expansion happens for a data line). This is not the most
199    optimal way of handling this, but hey, this is just a test program!
200    
201    Arguments:
202      f            the file to read
203      start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206    Returns:       pointer to the start of new data
207                   could be a copy of start, or could be moved
208                   NULL if no data read and EOF reached
209    */
210    
211  static const char *OP_names[] = {  static uschar *
212    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  extend_inputline(FILE *f, uschar *start, const char *prompt)
213    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  {
214    "Opt", "^", "$", "Any", "chars", "not",  uschar *here = start;
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
   
   
 static void print_internals(pcre *re, FILE *outfile)  
 {  
 unsigned char *code = ((real_pcre *)re)->code;  
   
 fprintf(outfile, "------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
215    
216          case OP_CRRANGE:  for (;;)
217          case OP_CRMINRANGE:    {
218          min = (code[1] << 8) + code[2];    int rlen = buffer_size - (here - buffer);
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
219    
220          default:    if (rlen > 1000)
221          code--;      {
222          }      int dlen;
223    
224        /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241        }        }
242      break;      else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247      /* Anything else is just a one-node item */        {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253      default:      dlen = (int)strlen((char *)here);
254      fprintf(outfile, "    %s", OP_names[*code]);      if (dlen > 0 && here[dlen - 1] == '\n') return start;
255      break;      here += dlen;
256      }      }
257    
258    code++;    else
259    fprintf(outfile, "\n");      {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269          }
270    
271        memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274        buffer_size = new_buffer_size;
275    
276        start = new_buffer + (start - buffer);
277        here = new_buffer + (here - buffer);
278    
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286        }
287    }    }
288    
289    return NULL;  /* Control never gets here */
290  }  }
291    
292    
293    
 /* Character string printing function. */  
294    
295  static void pchars(unsigned char *p, int length)  
296    
297    
298    /*************************************************
299    *          Read number from string               *
300    *************************************************/
301    
302    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303    around with conditional compilation, just do the job by hand. It is only used
304    for unpicking arguments, so just keep it simple.
305    
306    Arguments:
307      str           string to be converted
308      endptr        where to put the end pointer
309    
310    Returns:        the unsigned long
311    */
312    
313    static int
314    get_value(unsigned char *str, unsigned char **endptr)
315  {  {
316  int c;  int result = 0;
317    while(*str != 0 && isspace(*str)) str++;
318    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319    *endptr = str;
320    return(result);
321    }
322    
323    
324    
325    
326    /*************************************************
327    *            Convert UTF-8 string to value       *
328    *************************************************/
329    
330    /* This function takes one or more bytes that represents a UTF-8 character,
331    and returns the value of the character.
332    
333    Argument:
334      utf8bytes   a pointer to the byte vector
335      vptr        a pointer to an int to receive the value
336    
337    Returns:      >  0 => the number of bytes consumed
338                  -6 to 0 => malformed UTF-8 character at offset = (-return)
339    */
340    
341    #if !defined NOUTF8
342    
343    static int
344    utf82ord(unsigned char *utf8bytes, int *vptr)
345    {
346    int c = *utf8bytes++;
347    int d = c;
348    int i, j, s;
349    
350    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
351      {
352      if ((d & 0x80) == 0) break;
353      d <<= 1;
354      }
355    
356    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
357    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
358    
359    /* i now has a value in the range 1-5 */
360    
361    s = 6*i;
362    d = (c & utf8_table3[i]) << s;
363    
364    for (j = 0; j < i; j++)
365      {
366      c = *utf8bytes++;
367      if ((c & 0xc0) != 0x80) return -(j+1);
368      s -= 6;
369      d |= (c & 0x3f) << s;
370      }
371    
372    /* Check that encoding was the correct unique one */
373    
374    for (j = 0; j < utf8_table1_size; j++)
375      if (d <= utf8_table1[j]) break;
376    if (j != i) return -(i+1);
377    
378    /* Valid value */
379    
380    *vptr = d;
381    return i+1;
382    }
383    
384    #endif
385    
386    
387    
388    /*************************************************
389    *       Convert character value to UTF-8         *
390    *************************************************/
391    
392    /* This function takes an integer value in the range 0 - 0x7fffffff
393    and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395    Arguments:
396      cvalue     the character value
397      utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399    Returns:     number of characters placed in the buffer
400    */
401    
402    #if !defined NOUTF8
403    
404    static int
405    ord2utf8(int cvalue, uschar *utf8bytes)
406    {
407    register int i, j;
408    for (i = 0; i < utf8_table1_size; i++)
409      if (cvalue <= utf8_table1[i]) break;
410    utf8bytes += i;
411    for (j = i; j > 0; j--)
412     {
413     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414     cvalue >>= 6;
415     }
416    *utf8bytes = utf8_table2[i] | cvalue;
417    return i + 1;
418    }
419    
420    #endif
421    
422    
423    
424    /*************************************************
425    *             Print character string             *
426    *************************************************/
427    
428    /* Character string printing function. Must handle UTF-8 strings in utf8
429    mode. Yields number of characters printed. If handed a NULL file, just counts
430    chars without printing. */
431    
432    static int pchars(unsigned char *p, int length, FILE *f)
433    {
434    int c = 0;
435    int yield = 0;
436    
437  while (length-- > 0)  while (length-- > 0)
438    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
439      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
440      if (use_utf8)
441        {
442        int rc = utf82ord(p, &c);
443    
444        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
445          {
446          length -= rc - 1;
447          p += rc;
448          if (PRINTHEX(c))
449            {
450            if (f != NULL) fprintf(f, "%c", c);
451            yield++;
452            }
453          else
454            {
455            int n = 4;
456            if (f != NULL) fprintf(f, "\\x{%02x}", c);
457            yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461            }
462          continue;
463          }
464        }
465    #endif
466    
467       /* Not UTF-8, or malformed UTF-8  */
468    
469      c = *p++;
470      if (PRINTHEX(c))
471        {
472        if (f != NULL) fprintf(f, "%c", c);
473        yield++;
474        }
475      else
476        {
477        if (f != NULL) fprintf(f, "\\x%02x", c);
478        yield += 4;
479        }
480      }
481    
482    return yield;
483    }
484    
485    
486    
487    /*************************************************
488    *              Callout function                  *
489    *************************************************/
490    
491    /* Called from PCRE as a result of the (?C) item. We print out where we are in
492    the match. Yield zero unless more callouts than the fail count, or the callout
493    data is not zero. */
494    
495    static int callout(pcre_callout_block *cb)
496    {
497    FILE *f = (first_callout | callout_extra)? outfile : NULL;
498    int i, pre_start, post_start, subject_length;
499    
500    if (callout_extra)
501      {
502      fprintf(f, "Callout %d: last capture = %d\n",
503        cb->callout_number, cb->capture_last);
504    
505      for (i = 0; i < cb->capture_top * 2; i += 2)
506        {
507        if (cb->offset_vector[i] < 0)
508          fprintf(f, "%2d: <unset>\n", i/2);
509        else
510          {
511          fprintf(f, "%2d: ", i/2);
512          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513            cb->offset_vector[i+1] - cb->offset_vector[i], f);
514          fprintf(f, "\n");
515          }
516        }
517      }
518    
519    /* Re-print the subject in canonical form, the first time or if giving full
520    datails. On subsequent calls in the same match, we use pchars just to find the
521    printed lengths of the substrings. */
522    
523    if (f != NULL) fprintf(f, "--->");
524    
525    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527      cb->current_position - cb->start_match, f);
528    
529    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530    
531    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532      cb->subject_length - cb->current_position, f);
533    
534    if (f != NULL) fprintf(f, "\n");
535    
536    /* Always print appropriate indicators, with callout number if not already
537    shown. For automatic callouts, show the pattern offset. */
538    
539    if (cb->callout_number == 255)
540      {
541      fprintf(outfile, "%+3d ", cb->pattern_position);
542      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
543      }
544    else
545      {
546      if (callout_extra) fprintf(outfile, "    ");
547        else fprintf(outfile, "%3d ", cb->callout_number);
548      }
549    
550    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551    fprintf(outfile, "^");
552    
553    if (post_start > 0)
554      {
555      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556      fprintf(outfile, "^");
557      }
558    
559    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560      fprintf(outfile, " ");
561    
562    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563      pbuffer + cb->pattern_position);
564    
565    fprintf(outfile, "\n");
566    first_callout = 0;
567    
568    if (cb->callout_data != NULL)
569      {
570      int callout_data = *((int *)(cb->callout_data));
571      if (callout_data != 0)
572        {
573        fprintf(outfile, "Callout data = %d\n", callout_data);
574        return callout_data;
575        }
576      }
577    
578    return (cb->callout_number != callout_fail_id)? 0 :
579           (++callout_count >= callout_fail_count)? 1 : 0;
580  }  }
581    
582    
583    /*************************************************
584    *            Local malloc functions              *
585    *************************************************/
586    
587  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
588  compiled re. */  compiled re. */
589    
590  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
591  {  {
592  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
593  return malloc(size);  gotten_store = size;
594    if (show_malloc)
595      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
596    return block;
597    }
598    
599    static void new_free(void *block)
600    {
601    if (show_malloc)
602      fprintf(outfile, "free             %p\n", block);
603    free(block);
604    }
605    
606    
607    /* For recursion malloc/free, to test stacking calls */
608    
609    static void *stack_malloc(size_t size)
610    {
611    void *block = malloc(size);
612    if (show_malloc)
613      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614    return block;
615    }
616    
617    static void stack_free(void *block)
618    {
619    if (show_malloc)
620      fprintf(outfile, "stack_free       %p\n", block);
621    free(block);
622    }
623    
624    
625    /*************************************************
626    *          Call pcre_fullinfo()                  *
627    *************************************************/
628    
629    /* Get one piece of information from the pcre_fullinfo() function */
630    
631    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632    {
633    int rc;
634    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636    }
637    
638    
639    
640    /*************************************************
641    *         Byte flipping function                 *
642    *************************************************/
643    
644    static unsigned long int
645    byteflip(unsigned long int value, int n)
646    {
647    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648    return ((value & 0x000000ff) << 24) |
649           ((value & 0x0000ff00) <<  8) |
650           ((value & 0x00ff0000) >>  8) |
651           ((value & 0xff000000) >> 24);
652    }
653    
654    
655    
656    
657    /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704    }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730  }  }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -m       output memory used information\n"
789           "  -o <n>   set size of offsets vector to <n>\n");
790    #if !defined NOPOSIX
791    printf("  -p       use POSIX interface\n");
792    #endif
793    printf("  -q       quiet: do not output PCRE version number at start\n");
794    printf("  -S <n>   set stack size to <n> megabytes\n");
795    printf("  -s       output store (memory) used information\n"
796           "  -t       time compilation and execution\n");
797    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
798    printf("  -tm      time execution (matching) only\n");
799    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
800    }
801    
802    
803    
804    /*************************************************
805    *                Main Program                    *
806    *************************************************/
807    
808  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
809  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
810  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 290  int options = 0; Line 816  int options = 0;
816  int study_options = 0;  int study_options = 0;
817  int op = 1;  int op = 1;
818  int timeit = 0;  int timeit = 0;
819    int timeitm = 0;
820  int showinfo = 0;  int showinfo = 0;
821    int showstore = 0;
822    int quiet = 0;
823    int size_offsets = 45;
824    int size_offsets_max;
825    int *offsets = NULL;
826    #if !defined NOPOSIX
827  int posix = 0;  int posix = 0;
828    #endif
829  int debug = 0;  int debug = 0;
830  int done = 0;  int done = 0;
831  unsigned char buffer[30000];  int all_use_dfa = 0;
832  unsigned char dbuffer[1024];  int yield = 0;
833    int stack_size;
834    
835    /* These vectors store, end-to-end, a list of captured substring names. Assume
836    that 1024 is plenty long enough for the few names we'll be testing. */
837    
838    uschar copynames[1024];
839    uschar getnames[1024];
840    
841    uschar *copynamesptr;
842    uschar *getnamesptr;
843    
844  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
845    when I am debugging. They grow automatically when very long lines are read. */
846    
847    buffer = (unsigned char *)malloc(buffer_size);
848    dbuffer = (unsigned char *)malloc(buffer_size);
849    pbuffer = (unsigned char *)malloc(buffer_size);
850    
851    /* The outfile variable is static so that new_malloc can use it. */
852    
853  outfile = stdout;  outfile = stdout;
854    
855    /* The following  _setmode() stuff is some Windows magic that tells its runtime
856    library to translate CRLF into a single LF character. At least, that's what
857    I've been told: never having used Windows I take this all on trust. Originally
858    it set 0x8000, but then I was advised that _O_BINARY was better. */
859    
860    #if defined(_WIN32) || defined(WIN32)
861    _setmode( _fileno( stdout ), _O_BINARY );
862    #endif
863    
864  /* Scan options */  /* Scan options */
865    
866  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
867    {    {
868    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
869    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
870      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
871        showstore = 1;
872      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
873      else if (strcmp(argv[op], "-b") == 0) debug = 1;
874    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
875    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
876    #if !defined NODFA
877      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
878    #endif
879      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
880          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
881            *endptr == 0))
882        {
883        op++;
884        argc--;
885        }
886      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
887        {
888        int both = argv[op][2] == 0;
889        int temp;
890        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891                         *endptr == 0))
892          {
893          timeitm = temp;
894          op++;
895          argc--;
896          }
897        else timeitm = LOOPREPEAT;
898        if (both) timeit = timeitm;
899        }
900      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
901          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
902            *endptr == 0))
903        {
904    #if defined(_WIN32) || defined(WIN32)
905        printf("PCRE: -S not supported on this OS\n");
906        exit(1);
907    #else
908        int rc;
909        struct rlimit rlim;
910        getrlimit(RLIMIT_STACK, &rlim);
911        rlim.rlim_cur = stack_size * 1024 * 1024;
912        rc = setrlimit(RLIMIT_STACK, &rlim);
913        if (rc != 0)
914          {
915        printf("PCRE: setrlimit() failed with error %d\n", rc);
916        exit(1);
917          }
918        op++;
919        argc--;
920    #endif
921        }
922    #if !defined NOPOSIX
923    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
924    #endif
925      else if (strcmp(argv[op], "-C") == 0)
926        {
927        int rc;
928        unsigned long int lrc;
929        printf("PCRE version %s\n", pcre_version());
930        printf("Compiled with\n");
931        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
932        printf("  %sUTF-8 support\n", rc? "" : "No ");
933        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
934        printf("  %sUnicode properties support\n", rc? "" : "No ");
935        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
936        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
937          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
938          (rc == -2)? "ANYCRLF" :
939          (rc == -1)? "ANY" : "???");
940        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
941        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
942                                         "all Unicode newlines");
943        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
944        printf("  Internal link size = %d\n", rc);
945        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
946        printf("  POSIX malloc threshold = %d\n", rc);
947        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
948        printf("  Default match limit = %ld\n", lrc);
949        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
950        printf("  Default recursion depth limit = %ld\n", lrc);
951        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
952        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
953        goto EXIT;
954        }
955      else if (strcmp(argv[op], "-help") == 0 ||
956               strcmp(argv[op], "--help") == 0)
957        {
958        usage();
959        goto EXIT;
960        }
961    else    else
962      {      {
963      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
964      return 1;      usage();
965        yield = 1;
966        goto EXIT;
967      }      }
968    op++;    op++;
969    argc--;    argc--;
970    }    }
971    
972    /* Get the store for the offsets vector, and remember what it was */
973    
974    size_offsets_max = size_offsets;
975    offsets = (int *)malloc(size_offsets_max * sizeof(int));
976    if (offsets == NULL)
977      {
978      printf("** Failed to get %d bytes of memory for offsets vector\n",
979        (int)(size_offsets_max * sizeof(int)));
980      yield = 1;
981      goto EXIT;
982      }
983    
984  /* Sort out the input and output files */  /* Sort out the input and output files */
985    
986  if (argc > 1)  if (argc > 1)
987    {    {
988    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
989    if (infile == NULL)    if (infile == NULL)
990      {      {
991      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
992      return 1;      yield = 1;
993        goto EXIT;
994      }      }
995    }    }
996    
997  if (argc > 2)  if (argc > 2)
998    {    {
999    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1000    if (outfile == NULL)    if (outfile == NULL)
1001      {      {
1002      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1003      return 1;      yield = 1;
1004        goto EXIT;
1005      }      }
1006    }    }
1007    
1008  /* Set alternative malloc function */  /* Set alternative malloc function */
1009    
1010  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1011    pcre_free = new_free;
1012    pcre_stack_malloc = stack_malloc;
1013    pcre_stack_free = stack_free;
1014    
1015  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1016    
1017  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1018    
1019  /* Main loop */  /* Main loop */
1020    
# Line 355  while (!done) Line 1022  while (!done)
1022    {    {
1023    pcre *re = NULL;    pcre *re = NULL;
1024    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
1025    
1026    #if !defined NOPOSIX  /* There are still compilers that require no indent */
1027    regex_t preg;    regex_t preg;
1028      int do_posix = 0;
1029    #endif
1030    
1031    const char *error;    const char *error;
1032    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
1033      unsigned char *to_file = NULL;
1034      const unsigned char *tables = NULL;
1035      unsigned long int true_size, true_study_size = 0;
1036      size_t size, regex_gotten_store;
1037    int do_study = 0;    int do_study = 0;
1038    int do_debug = 0;    int do_debug = debug;
1039    int do_posix = 0;    int do_G = 0;
1040    int erroroffset, len, delimiter;    int do_g = 0;
1041      int do_showinfo = showinfo;
1042      int do_showrest = 0;
1043      int do_flip = 0;
1044      int erroroffset, len, delimiter, poffset;
1045    
1046      use_utf8 = 0;
1047      debug_lengths = 1;
1048    
1049    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1050    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1051      fflush(outfile);
1052    
1053    p = buffer;    p = buffer;
1054    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1055    if (*p == 0) continue;    if (*p == 0) continue;
1056    
1057    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1058    complete, read more. */  
1059      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1060        {
1061        unsigned long int magic, get_options;
1062        uschar sbuf[8];
1063        FILE *f;
1064    
1065        p++;
1066        pp = p + (int)strlen((char *)p);
1067        while (isspace(pp[-1])) pp--;
1068        *pp = 0;
1069    
1070        f = fopen((char *)p, "rb");
1071        if (f == NULL)
1072          {
1073          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1074          continue;
1075          }
1076    
1077        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1078    
1079        true_size =
1080          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1081        true_study_size =
1082          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1083    
1084        re = (real_pcre *)new_malloc(true_size);
1085        regex_gotten_store = gotten_store;
1086    
1087        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1088    
1089        magic = ((real_pcre *)re)->magic_number;
1090        if (magic != MAGIC_NUMBER)
1091          {
1092          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1093            {
1094            do_flip = 1;
1095            }
1096          else
1097            {
1098            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1099            fclose(f);
1100            continue;
1101            }
1102          }
1103    
1104        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1105          do_flip? " (byte-inverted)" : "", p);
1106    
1107        /* Need to know if UTF-8 for printing data strings */
1108    
1109        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1110        use_utf8 = (get_options & PCRE_UTF8) != 0;
1111    
1112        /* Now see if there is any following study data */
1113    
1114        if (true_study_size != 0)
1115          {
1116          pcre_study_data *psd;
1117    
1118          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1119          extra->flags = PCRE_EXTRA_STUDY_DATA;
1120    
1121          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1122          extra->study_data = psd;
1123    
1124          if (fread(psd, 1, true_study_size, f) != true_study_size)
1125            {
1126            FAIL_READ:
1127            fprintf(outfile, "Failed to read data from %s\n", p);
1128            if (extra != NULL) new_free(extra);
1129            if (re != NULL) new_free(re);
1130            fclose(f);
1131            continue;
1132            }
1133          fprintf(outfile, "Study data loaded from %s\n", p);
1134          do_study = 1;     /* To get the data output if requested */
1135          }
1136        else fprintf(outfile, "No study data\n");
1137    
1138        fclose(f);
1139        goto SHOW_INFO;
1140        }
1141    
1142      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1143      the pattern; if is isn't complete, read more. */
1144    
1145    delimiter = *p++;    delimiter = *p++;
1146    
1147    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1148      {      {
1149      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1150      goto SKIP_DATA;      goto SKIP_DATA;
1151      }      }
1152    
1153    pp = p;    pp = p;
1154      poffset = p - buffer;
1155    
1156    for(;;)    for(;;)
1157      {      {
1158      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1159        {        {
1160        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1161        goto SKIP_DATA;          else if (*pp == delimiter) break;
1162          pp++;
1163        }        }
1164        if (*pp != 0) break;
1165      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if (fgets((char *)pp, len, infile) == NULL)  
1166        {        {
1167        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1168        done = 1;        done = 1;
# Line 406  while (!done) Line 1171  while (!done)
1171      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1172      }      }
1173    
1174    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1175      pointer to the correct relative point in the buffer. */
1176    
1177      p = buffer + poffset;
1178    
1179      /* If the first character after the delimiter is backslash, make
1180      the pattern end with backslash. This is purely to provide a way
1181      of testing for the error message when a pattern ends with backslash. */
1182    
1183      if (pp[1] == '\\') *pp++ = '\\';
1184    
1185      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1186      for callouts. */
1187    
1188    *pp++ = 0;    *pp++ = 0;
1189      strcpy((char *)pbuffer, (char *)p);
1190    
1191    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1192    
1193    options = 0;    options = 0;
1194    study_options = 0;    study_options = 0;
1195      log_store = showstore;  /* default from command line */
1196    
1197    while (*pp != 0)    while (*pp != 0)
1198      {      {
1199      switch (*pp++)      switch (*pp++)
1200        {        {
1201          case 'f': options |= PCRE_FIRSTLINE; break;
1202          case 'g': do_g = 1; break;
1203        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1204        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1205        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1206        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1207    
1208          case '+': do_showrest = 1; break;
1209        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1210        case 'D': do_debug = 1; break;        case 'B': do_debug = 1; break;
1211          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1212          case 'D': do_debug = do_showinfo = 1; break;
1213        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1214          case 'F': do_flip = 1; break;
1215          case 'G': do_G = 1; break;
1216          case 'I': do_showinfo = 1; break;
1217          case 'J': options |= PCRE_DUPNAMES; break;
1218          case 'M': log_store = 1; break;
1219          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1220    
1221    #if !defined NOPOSIX
1222        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1223    #endif
1224    
1225        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1226        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1227        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1228        case '\n': case ' ': break;        case 'Z': debug_lengths = 0; break;
1229          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1230          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1231    
1232          case 'L':
1233          ppp = pp;
1234          /* The '\r' test here is so that it works on Windows. */
1235          /* The '0' test is just in case this is an unterminated line. */
1236          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1237          *ppp = 0;
1238          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1239            {
1240            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1241            goto SKIP_DATA;
1242            }
1243          locale_set = 1;
1244          tables = pcre_maketables();
1245          pp = ppp;
1246          break;
1247    
1248          case '>':
1249          to_file = pp;
1250          while (*pp != 0) pp++;
1251          while (isspace(pp[-1])) pp--;
1252          *pp = 0;
1253          break;
1254    
1255          case '<':
1256            {
1257            if (strncmp((char *)pp, "JS>", 3) == 0)
1258              {
1259              options |= PCRE_JAVASCRIPT_COMPAT;
1260              pp += 3;
1261              }
1262            else
1263              {
1264              int x = check_newline(pp, outfile);
1265              if (x == 0) goto SKIP_DATA;
1266              options |= x;
1267              while (*pp++ != '>');
1268              }
1269            }
1270          break;
1271    
1272          case '\r':                      /* So that it works in Windows */
1273          case '\n':
1274          case ' ':
1275          break;
1276    
1277        default:        default:
1278        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1279        goto SKIP_DATA;        goto SKIP_DATA;
# Line 437  while (!done) Line 1281  while (!done)
1281      }      }
1282    
1283    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1284    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
1285      local character tables. */
1286    
1287    #if !defined NOPOSIX
1288    if (posix || do_posix)    if (posix || do_posix)
1289      {      {
1290      int rc;      int rc;
1291      int cflags = 0;      int cflags = 0;
1292    
1293      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1294      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1295        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1296        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1297        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1298    
1299      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1300    
1301      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 452  while (!done) Line 1303  while (!done)
1303    
1304      if (rc != 0)      if (rc != 0)
1305        {        {
1306        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1307        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1308        goto SKIP_DATA;        goto SKIP_DATA;
1309        }        }
# Line 461  while (!done) Line 1312  while (!done)
1312    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1313    
1314    else    else
1315    #endif  /* !defined NOPOSIX */
1316    
1317      {      {
1318      if (timeit)      if (timeit > 0)
1319        {        {
1320        register int i;        register int i;
1321        clock_t time_taken;        clock_t time_taken;
1322        clock_t start_time = clock();        clock_t start_time = clock();
1323        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1324          {          {
1325          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1326          if (re != NULL) free(re);          if (re != NULL) free(re);
1327          }          }
1328        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1329        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1330          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1331              (double)CLOCKS_PER_SEC);
1332        }        }
1333    
1334      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1335    
1336      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1337      if non-interactive. */      if non-interactive. */
# Line 490  while (!done) Line 1344  while (!done)
1344          {          {
1345          for (;;)          for (;;)
1346            {            {
1347            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1348              {              {
1349              done = 1;              done = 1;
1350              goto CONTINUE;              goto CONTINUE;
# Line 501  while (!done) Line 1355  while (!done)
1355            }            }
1356          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1357          }          }
1358        continue;        goto CONTINUE;
1359          }
1360    
1361        /* Compilation succeeded; print data if required. There are now two
1362        info-returning functions. The old one has a limited interface and
1363        returns only limited data. Check that it agrees with the newer one. */
1364    
1365        if (log_store)
1366          fprintf(outfile, "Memory allocation (code space): %d\n",
1367            (int)(gotten_store -
1368                  sizeof(real_pcre) -
1369                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1370    
1371        /* Extract the size for possible writing before possibly flipping it,
1372        and remember the store that was got. */
1373    
1374        true_size = ((real_pcre *)re)->size;
1375        regex_gotten_store = gotten_store;
1376    
1377        /* If /S was present, study the regexp to generate additional info to
1378        help with the matching. */
1379    
1380        if (do_study)
1381          {
1382          if (timeit > 0)
1383            {
1384            register int i;
1385            clock_t time_taken;
1386            clock_t start_time = clock();
1387            for (i = 0; i < timeit; i++)
1388              extra = pcre_study(re, study_options, &error);
1389            time_taken = clock() - start_time;
1390            if (extra != NULL) free(extra);
1391            fprintf(outfile, "  Study time %.4f milliseconds\n",
1392              (((double)time_taken * 1000.0) / (double)timeit) /
1393                (double)CLOCKS_PER_SEC);
1394            }
1395          extra = pcre_study(re, study_options, &error);
1396          if (error != NULL)
1397            fprintf(outfile, "Failed to study: %s\n", error);
1398          else if (extra != NULL)
1399            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1400          }
1401    
1402        /* If the 'F' option was present, we flip the bytes of all the integer
1403        fields in the regex data block and the study block. This is to make it
1404        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1405        compiled on a different architecture. */
1406    
1407        if (do_flip)
1408          {
1409          real_pcre *rre = (real_pcre *)re;
1410          rre->magic_number =
1411            byteflip(rre->magic_number, sizeof(rre->magic_number));
1412          rre->size = byteflip(rre->size, sizeof(rre->size));
1413          rre->options = byteflip(rre->options, sizeof(rre->options));
1414          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1415          rre->top_bracket =
1416            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1417          rre->top_backref =
1418            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1419          rre->first_byte =
1420            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1421          rre->req_byte =
1422            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1423          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1424            sizeof(rre->name_table_offset));
1425          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1426            sizeof(rre->name_entry_size));
1427          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1428            sizeof(rre->name_count));
1429    
1430          if (extra != NULL)
1431            {
1432            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1433            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1434            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1435            }
1436        }        }
1437    
1438      /* Compilation succeeded; print data if required */      /* Extract information from the compiled data if required */
1439    
1440      if (showinfo || do_debug)      SHOW_INFO:
1441    
1442        if (do_debug)
1443        {        {
1444        int first_char, count;        fprintf(outfile, "------------------------------------------------------------------\n");
1445          pcre_printint(re, outfile, debug_lengths);
1446          }
1447    
1448        if (debug || do_debug) print_internals(re, outfile);      if (do_showinfo)
1449          {
1450          unsigned long int get_options, all_options;
1451    #if !defined NOINFOCHECK
1452          int old_first_char, old_options, old_count;
1453    #endif
1454          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1455            hascrorlf;
1456          int nameentrysize, namecount;
1457          const uschar *nametable;
1458    
1459          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1460          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1461          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1462          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1463          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1464          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1465          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1466          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1467          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1468          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1469          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1470          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1471    
1472        count = pcre_info(re, &options, &first_char);  #if !defined NOINFOCHECK
1473          old_count = pcre_info(re, &old_options, &old_first_char);
1474        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1475          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
1476        else        else
1477          {          {
1478          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
1479          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1480            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
1481              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
1482              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
1483              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1484              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
1485              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
1486              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
1487              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1488              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
1489          if (first_char == -1)          }
1490            {  #endif
1491            fprintf(outfile, "First char at start or follows \\n\n");  
1492            }        if (size != regex_gotten_store) fprintf(outfile,
1493          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1494            (int)size, (int)regex_gotten_store);
1495    
1496          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1497          if (backrefmax > 0)
1498            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1499    
1500          if (namecount > 0)
1501            {
1502            fprintf(outfile, "Named capturing subpatterns:\n");
1503            while (namecount-- > 0)
1504            {            {
1505            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1506                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1507                GET2(nametable, 0));
1508              nametable += nameentrysize;
1509            }            }
1510            }
1511    
1512          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1513          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1514    
1515          all_options = ((real_pcre *)re)->options;
1516          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1517    
1518          if (get_options == 0) fprintf(outfile, "No options\n");
1519            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1520              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1521              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1522              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1523              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1524              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1525              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1526              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1527              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1528              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1529              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1530              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1531              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1532              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1533              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1534              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1535    
1536          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1537    
1538          switch (get_options & PCRE_NEWLINE_BITS)
1539            {
1540            case PCRE_NEWLINE_CR:
1541            fprintf(outfile, "Forced newline sequence: CR\n");
1542            break;
1543    
1544            case PCRE_NEWLINE_LF:
1545            fprintf(outfile, "Forced newline sequence: LF\n");
1546            break;
1547    
1548            case PCRE_NEWLINE_CRLF:
1549            fprintf(outfile, "Forced newline sequence: CRLF\n");
1550            break;
1551    
1552            case PCRE_NEWLINE_ANYCRLF:
1553            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1554            break;
1555    
1556            case PCRE_NEWLINE_ANY:
1557            fprintf(outfile, "Forced newline sequence: ANY\n");
1558            break;
1559    
1560            default:
1561            break;
1562            }
1563    
1564          if (first_char == -1)
1565            {
1566            fprintf(outfile, "First char at start or follows newline\n");
1567            }
1568          else if (first_char < 0)
1569            {
1570            fprintf(outfile, "No first char\n");
1571            }
1572          else
1573            {
1574            int ch = first_char & 255;
1575            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1576              "" : " (caseless)";
1577            if (PRINTHEX(ch))
1578              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1579            else
1580              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1581            }
1582    
1583          if (need_char < 0)
1584            {
1585            fprintf(outfile, "No need char\n");
1586            }
1587          else
1588            {
1589            int ch = need_char & 255;
1590            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1591              "" : " (caseless)";
1592            if (PRINTHEX(ch))
1593              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1594            else
1595              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1596            }
1597    
1598          /* Don't output study size; at present it is in any case a fixed
1599          value, but it varies, depending on the computer architecture, and
1600          so messes up the test suite. (And with the /F option, it might be
1601          flipped.) */
1602    
1603          if (do_study)
1604            {
1605            if (extra == NULL)
1606              fprintf(outfile, "Study returned NULL\n");
1607          else          else
1608            {            {
1609            if (isprint(first_char))            uschar *start_bits = NULL;
1610              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1611    
1612              if (start_bits == NULL)
1613                fprintf(outfile, "No starting byte set\n");
1614            else            else
1615              fprintf(outfile, "First char = %d\n", first_char);              {
1616                int i;
1617                int c = 24;
1618                fprintf(outfile, "Starting byte set: ");
1619                for (i = 0; i < 256; i++)
1620                  {
1621                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1622                    {
1623                    if (c > 75)
1624                      {
1625                      fprintf(outfile, "\n  ");
1626                      c = 2;
1627                      }
1628                    if (PRINTHEX(i) && i != ' ')
1629                      {
1630                      fprintf(outfile, "%c ", i);
1631                      c += 2;
1632                      }
1633                    else
1634                      {
1635                      fprintf(outfile, "\\x%02x ", i);
1636                      c += 5;
1637                      }
1638                    }
1639                  }
1640                fprintf(outfile, "\n");
1641                }
1642            }            }
1643          }          }
1644        }        }
1645    
1646      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1647      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1648        the study length, in big-endian order. */
1649      if (do_study)  
1650        if (to_file != NULL)
1651        {        {
1652        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1653          if (f == NULL)
1654          {          {
1655          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
1656          }          }
1657          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
1658          {          {
1659          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1660          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (uschar)((true_size >> 24) & 255);
1661            fprintf(outfile, "No starting character set\n");          sbuf[1] = (uschar)((true_size >> 16) & 255);
1662            sbuf[2] = (uschar)((true_size >>  8) & 255);
1663            sbuf[3] = (uschar)((true_size) & 255);
1664    
1665            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1666            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1667            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1668            sbuf[7] = (uschar)((true_study_size) & 255);
1669    
1670            if (fwrite(sbuf, 1, 8, f) < 8 ||
1671                fwrite(re, 1, true_size, f) < true_size)
1672              {
1673              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1674              }
1675          else          else
1676            {            {
1677            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1678            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1679              {              {
1680              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1681                    true_study_size)
1682                {                {
1683                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1684                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1685                }                }
1686                else fprintf(outfile, "Study data written to %s\n", to_file);
1687    
1688              }              }
           fprintf(outfile, "\n");  
1689            }            }
1690            fclose(f);
1691          }          }
1692    
1693          new_free(re);
1694          if (extra != NULL) new_free(extra);
1695          if (tables != NULL) new_free((void *)tables);
1696          continue;  /* With next regex */
1697        }        }
1698      }      }        /* End of non-POSIX compile */
1699    
1700    /* Read data lines and test them */    /* Read data lines and test them */
1701    
1702    for (;;)    for (;;)
1703      {      {
1704      unsigned char *q;      uschar *q;
1705        uschar *bptr;
1706        int *use_offsets = offsets;
1707        int use_size_offsets = size_offsets;
1708        int callout_data = 0;
1709        int callout_data_set = 0;
1710      int count, c;      int count, c;
1711      int offsets[45];      int copystrings = 0;
1712      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
1713        int getstrings = 0;
1714        int getlist = 0;
1715        int gmatched = 0;
1716        int start_offset = 0;
1717        int g_notempty = 0;
1718        int use_dfa = 0;
1719    
1720      options = 0;      options = 0;
1721    
1722      if (infile == stdin) printf("  data> ");      *copynames = 0;
1723      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1724    
1725        copynamesptr = copynames;
1726        getnamesptr = getnames;
1727    
1728        pcre_callout = callout;
1729        first_callout = 1;
1730        callout_extra = 0;
1731        callout_count = 0;
1732        callout_fail_count = 999999;
1733        callout_fail_id = -1;
1734        show_malloc = 0;
1735    
1736        if (extra != NULL) extra->flags &=
1737          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1738    
1739        len = 0;
1740        for (;;)
1741        {        {
1742        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1743        goto CONTINUE;          {
1744            if (len > 0) break;
1745            done = 1;
1746            goto CONTINUE;
1747            }
1748          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1749          len = (int)strlen((char *)buffer);
1750          if (buffer[len-1] == '\n') break;
1751        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1752    
     len = (int)strlen((char *)buffer);  
1753      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1754      buffer[len] = 0;      buffer[len] = 0;
1755      if (len == 0) break;      if (len == 0) break;
# Line 637  while (!done) Line 1757  while (!done)
1757      p = buffer;      p = buffer;
1758      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1759    
1760      q = dbuffer;      bptr = q = dbuffer;
1761      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1762        {        {
1763        int i = 0;        int i = 0;
1764        int n = 0;        int n = 0;
1765    
1766        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1767          {          {
1768          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 658  while (!done) Line 1779  while (!done)
1779          c -= '0';          c -= '0';
1780          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1781            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1782    
1783    #if !defined NOUTF8
1784            if (use_utf8 && c > 255)
1785              {
1786              unsigned char buff8[8];
1787              int ii, utn;
1788              utn = ord2utf8(c, buff8);
1789              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1790              c = buff8[ii];   /* Last byte */
1791              }
1792    #endif
1793          break;          break;
1794    
1795          case 'x':          case 'x':
1796    
1797            /* Handle \x{..} specially - new Perl thing for utf8 */
1798    
1799    #if !defined NOUTF8
1800            if (*p == '{')
1801              {
1802              unsigned char *pt = p;
1803              c = 0;
1804              while (isxdigit(*(++pt)))
1805                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1806              if (*pt == '}')
1807                {
1808                unsigned char buff8[8];
1809                int ii, utn;
1810                if (use_utf8)
1811                  {
1812                  utn = ord2utf8(c, buff8);
1813                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1814                  c = buff8[ii];   /* Last byte */
1815                  }
1816                else
1817                 {
1818                 if (c > 255)
1819                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1820                     "UTF-8 mode is not enabled.\n"
1821                     "** Truncation will probably give the wrong result.\n", c);
1822                 }
1823                p = pt + 1;
1824                break;
1825                }
1826              /* Not correct form; fall through */
1827              }
1828    #endif
1829    
1830            /* Ordinary \x */
1831    
1832          c = 0;          c = 0;
1833          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1834            {            {
# Line 669  while (!done) Line 1837  while (!done)
1837            }            }
1838          break;          break;
1839    
1840          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1841          p--;          p--;
1842          continue;          continue;
1843    
1844            case '>':
1845            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1846            continue;
1847    
1848          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1849          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1850          continue;          continue;
# Line 681  while (!done) Line 1853  while (!done)
1853          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1854          continue;          continue;
1855    
1856            case 'C':
1857            if (isdigit(*p))    /* Set copy string */
1858              {
1859              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1860              copystrings |= 1 << n;
1861              }
1862            else if (isalnum(*p))
1863              {
1864              uschar *npp = copynamesptr;
1865              while (isalnum(*p)) *npp++ = *p++;
1866              *npp++ = 0;
1867              *npp = 0;
1868              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1869              if (n < 0)
1870                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1871              copynamesptr = npp;
1872              }
1873            else if (*p == '+')
1874              {
1875              callout_extra = 1;
1876              p++;
1877              }
1878            else if (*p == '-')
1879              {
1880              pcre_callout = NULL;
1881              p++;
1882              }
1883            else if (*p == '!')
1884              {
1885              callout_fail_id = 0;
1886              p++;
1887              while(isdigit(*p))
1888                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1889              callout_fail_count = 0;
1890              if (*p == '!')
1891                {
1892                p++;
1893                while(isdigit(*p))
1894                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1895                }
1896              }
1897            else if (*p == '*')
1898              {
1899              int sign = 1;
1900              callout_data = 0;
1901              if (*(++p) == '-') { sign = -1; p++; }
1902              while(isdigit(*p))
1903                callout_data = callout_data * 10 + *p++ - '0';
1904              callout_data *= sign;
1905              callout_data_set = 1;
1906              }
1907            continue;
1908    
1909    #if !defined NODFA
1910            case 'D':
1911    #if !defined NOPOSIX
1912            if (posix || do_posix)
1913              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1914            else
1915    #endif
1916              use_dfa = 1;
1917            continue;
1918    
1919            case 'F':
1920            options |= PCRE_DFA_SHORTEST;
1921            continue;
1922    #endif
1923    
1924            case 'G':
1925            if (isdigit(*p))
1926              {
1927              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1928              getstrings |= 1 << n;
1929              }
1930            else if (isalnum(*p))
1931              {
1932              uschar *npp = getnamesptr;
1933              while (isalnum(*p)) *npp++ = *p++;
1934              *npp++ = 0;
1935              *npp = 0;
1936              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1937              if (n < 0)
1938                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1939              getnamesptr = npp;
1940              }
1941            continue;
1942    
1943            case 'L':
1944            getlist = 1;
1945            continue;
1946    
1947            case 'M':
1948            find_match_limit = 1;
1949            continue;
1950    
1951            case 'N':
1952            options |= PCRE_NOTEMPTY;
1953            continue;
1954    
1955          case 'O':          case 'O':
1956          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1957          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1958              {
1959              size_offsets_max = n;
1960              free(offsets);
1961              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1962              if (offsets == NULL)
1963                {
1964                printf("** Failed to get %d bytes of memory for offsets vector\n",
1965                  (int)(size_offsets_max * sizeof(int)));
1966                yield = 1;
1967                goto EXIT;
1968                }
1969              }
1970            use_size_offsets = n;
1971            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1972            continue;
1973    
1974            case 'P':
1975            options |= PCRE_PARTIAL;
1976            continue;
1977    
1978            case 'Q':
1979            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1980            if (extra == NULL)
1981              {
1982              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1983              extra->flags = 0;
1984              }
1985            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1986            extra->match_limit_recursion = n;
1987            continue;
1988    
1989            case 'q':
1990            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1991            if (extra == NULL)
1992              {
1993              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1994              extra->flags = 0;
1995              }
1996            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1997            extra->match_limit = n;
1998            continue;
1999    
2000    #if !defined NODFA
2001            case 'R':
2002            options |= PCRE_DFA_RESTART;
2003            continue;
2004    #endif
2005    
2006            case 'S':
2007            show_malloc = 1;
2008          continue;          continue;
2009    
2010          case 'Z':          case 'Z':
2011          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2012          continue;          continue;
2013    
2014            case '?':
2015            options |= PCRE_NO_UTF8_CHECK;
2016            continue;
2017    
2018            case '<':
2019              {
2020              int x = check_newline(p, outfile);
2021              if (x == 0) goto NEXT_DATA;
2022              options |= x;
2023              while (*p++ != '>');
2024              }
2025            continue;
2026          }          }
2027        *q++ = c;        *q++ = c;
2028        }        }
2029      *q = 0;      *q = 0;
2030      len = q - dbuffer;      len = q - dbuffer;
2031    
2032        /* Move the data to the end of the buffer so that a read over the end of
2033        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2034        we are using the POSIX interface, we must include the terminating zero. */
2035    
2036    #if !defined NOPOSIX
2037        if (posix || do_posix)
2038          {
2039          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2040          bptr += buffer_size - len - 1;
2041          }
2042        else
2043    #endif
2044          {
2045          memmove(bptr + buffer_size - len, bptr, len);
2046          bptr += buffer_size - len;
2047          }
2048    
2049        if ((all_use_dfa || use_dfa) && find_match_limit)
2050          {
2051          printf("**Match limit not relevant for DFA matching: ignored\n");
2052          find_match_limit = 0;
2053          }
2054    
2055      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2056      support timing. */      support timing or playing with the match limit or callout data. */
2057    
2058    #if !defined NOPOSIX
2059      if (posix || do_posix)      if (posix || do_posix)
2060        {        {
2061        int rc;        int rc;
2062        int eflags = 0;        int eflags = 0;
2063        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
2064          if (use_size_offsets > 0)
2065            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2066        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2067        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2068    
2069        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
2070    
2071        if (rc != 0)        if (rc != 0)
2072          {          {
2073          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2074          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2075          }          }
2076          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2077                  != 0)
2078            {
2079            fprintf(outfile, "Matched with REG_NOSUB\n");
2080            }
2081        else        else
2082          {          {
2083          size_t i;          size_t i;
2084          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2085            {            {
2086            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2087              {              {
2088              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2089              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2090                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2091              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2092                if (i == 0 && do_showrest)
2093                  {
2094                  fprintf(outfile, " 0+ ");
2095                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2096                    outfile);
2097                  fprintf(outfile, "\n");
2098                  }
2099              }              }
2100            }            }
2101          }          }
2102          free(pmatch);
2103        }        }
2104    
2105      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
2106    
2107      else      else
2108    #endif  /* !defined NOPOSIX */
2109    
2110        for (;; gmatched++)    /* Loop for /g or /G */
2111        {        {
2112        if (timeit)        if (timeitm > 0)
2113          {          {
2114          register int i;          register int i;
2115          clock_t time_taken;          clock_t time_taken;
2116          clock_t start_time = clock();          clock_t start_time = clock();
2117          for (i = 0; i < 4000; i++)  
2118            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
2119              size_offsets);          if (all_use_dfa || use_dfa)
2120              {
2121              int workspace[1000];
2122              for (i = 0; i < timeitm; i++)
2123                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2124                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2125                  sizeof(workspace)/sizeof(int));
2126              }
2127            else
2128    #endif
2129    
2130            for (i = 0; i < timeitm; i++)
2131              count = pcre_exec(re, extra, (char *)bptr, len,
2132                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2133    
2134          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2135          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2136            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2137                (double)CLOCKS_PER_SEC);
2138            }
2139    
2140          /* If find_match_limit is set, we want to do repeated matches with
2141          varying limits in order to find the minimum value for the match limit and
2142          for the recursion limit. */
2143    
2144          if (find_match_limit)
2145            {
2146            if (extra == NULL)
2147              {
2148              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2149              extra->flags = 0;
2150              }
2151    
2152            (void)check_match_limit(re, extra, bptr, len, start_offset,
2153              options|g_notempty, use_offsets, use_size_offsets,
2154              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2155              PCRE_ERROR_MATCHLIMIT, "match()");
2156    
2157            count = check_match_limit(re, extra, bptr, len, start_offset,
2158              options|g_notempty, use_offsets, use_size_offsets,
2159              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2160              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2161            }
2162    
2163          /* If callout_data is set, use the interface with additional data */
2164    
2165          else if (callout_data_set)
2166            {
2167            if (extra == NULL)
2168              {
2169              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2170              extra->flags = 0;
2171              }
2172            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2173            extra->callout_data = &callout_data;
2174            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2175              options | g_notempty, use_offsets, use_size_offsets);
2176            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2177          }          }
2178    
2179        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2180          size_offsets);        value of match_limit. */
2181    
2182    #if !defined NODFA
2183          else if (all_use_dfa || use_dfa)
2184            {
2185            int workspace[1000];
2186            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2187              options | g_notempty, use_offsets, use_size_offsets, workspace,
2188              sizeof(workspace)/sizeof(int));
2189            if (count == 0)
2190              {
2191              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2192              count = use_size_offsets/2;
2193              }
2194            }
2195    #endif
2196    
2197        if (count == 0)        else
2198          {          {
2199          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2200          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2201            if (count == 0)
2202              {
2203              fprintf(outfile, "Matched, but too many substrings\n");
2204              count = use_size_offsets/3;
2205              }
2206          }          }
2207    
2208          /* Matched */
2209    
2210        if (count >= 0)        if (count >= 0)
2211          {          {
2212          int i;          int i, maxcount;
2213          count *= 2;  
2214          for (i = 0; i < count; i += 2)  #if !defined NODFA
2215            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2216    #endif
2217              maxcount = use_size_offsets/3;
2218    
2219            /* This is a check against a lunatic return value. */
2220    
2221            if (count > maxcount)
2222              {
2223              fprintf(outfile,
2224                "** PCRE error: returned count %d is too big for offset size %d\n",
2225                count, use_size_offsets);
2226              count = use_size_offsets/3;
2227              if (do_g || do_G)
2228                {
2229                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2230                do_g = do_G = FALSE;        /* Break g/G loop */
2231                }
2232              }
2233    
2234            for (i = 0; i < count * 2; i += 2)
2235            {            {
2236            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2237              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2238            else            else
2239              {              {
2240              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2241              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2242                  use_offsets[i+1] - use_offsets[i], outfile);
2243              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2244                if (i == 0)
2245                  {
2246                  if (do_showrest)
2247                    {
2248                    fprintf(outfile, " 0+ ");
2249                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2250                      outfile);
2251                    fprintf(outfile, "\n");
2252                    }
2253                  }
2254                }
2255              }
2256    
2257            for (i = 0; i < 32; i++)
2258              {
2259              if ((copystrings & (1 << i)) != 0)
2260                {
2261                char copybuffer[256];
2262                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2263                  i, copybuffer, sizeof(copybuffer));
2264                if (rc < 0)
2265                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2266                else
2267                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2268                }
2269              }
2270    
2271            for (copynamesptr = copynames;
2272                 *copynamesptr != 0;
2273                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2274              {
2275              char copybuffer[256];
2276              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2277                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2278              if (rc < 0)
2279                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2280              else
2281                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2282              }
2283    
2284            for (i = 0; i < 32; i++)
2285              {
2286              if ((getstrings & (1 << i)) != 0)
2287                {
2288                const char *substring;
2289                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2290                  i, &substring);
2291                if (rc < 0)
2292                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2293                else
2294                  {
2295                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2296                  pcre_free_substring(substring);
2297                  }
2298                }
2299              }
2300    
2301            for (getnamesptr = getnames;
2302                 *getnamesptr != 0;
2303                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2304              {
2305              const char *substring;
2306              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2307                count, (char *)getnamesptr, &substring);
2308              if (rc < 0)
2309                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2310              else
2311                {
2312                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2313                pcre_free_substring(substring);
2314              }              }
2315            }            }
2316    
2317            if (getlist)
2318              {
2319              const char **stringlist;
2320              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2321                &stringlist);
2322              if (rc < 0)
2323                fprintf(outfile, "get substring list failed %d\n", rc);
2324              else
2325                {
2326                for (i = 0; i < count; i++)
2327                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2328                if (stringlist[i] != NULL)
2329                  fprintf(outfile, "string list not terminated by NULL\n");
2330                /* free((void *)stringlist); */
2331                pcre_free_substring_list(stringlist);
2332                }
2333              }
2334            }
2335    
2336          /* There was a partial match */
2337    
2338          else if (count == PCRE_ERROR_PARTIAL)
2339            {
2340            fprintf(outfile, "Partial match");
2341    #if !defined NODFA
2342            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2343              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2344                bptr + use_offsets[0]);
2345    #endif
2346            fprintf(outfile, "\n");
2347            break;  /* Out of the /g loop */
2348          }          }
2349    
2350          /* Failed to match. If this is a /g or /G loop and we previously set
2351          g_notempty after a null match, this is not necessarily the end. We want
2352          to advance the start offset, and continue. We won't be at the end of the
2353          string - that was checked before setting g_notempty.
2354    
2355          Complication arises in the case when the newline option is "any" or
2356          "anycrlf". If the previous match was at the end of a line terminated by
2357          CRLF, an advance of one character just passes the \r, whereas we should
2358          prefer the longer newline sequence, as does the code in pcre_exec().
2359          Fudge the offset value to achieve this.
2360    
2361          Otherwise, in the case of UTF-8 matching, the advance must be one
2362          character, not one byte. */
2363    
2364        else        else
2365          {          {
2366          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2367              {
2368              int onechar = 1;
2369              unsigned int obits = ((real_pcre *)re)->options;
2370              use_offsets[0] = start_offset;
2371              if ((obits & PCRE_NEWLINE_BITS) == 0)
2372                {
2373                int d;
2374                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2375                obits = (d == '\r')? PCRE_NEWLINE_CR :
2376                        (d == '\n')? PCRE_NEWLINE_LF :
2377                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2378                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2379                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2380                }
2381              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2382                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2383                  &&
2384                  start_offset < len - 1 &&
2385                  bptr[start_offset] == '\r' &&
2386                  bptr[start_offset+1] == '\n')
2387                onechar++;
2388              else if (use_utf8)
2389                {
2390                while (start_offset + onechar < len)
2391                  {
2392                  int tb = bptr[start_offset+onechar];
2393                  if (tb <= 127) break;
2394                  tb &= 0xc0;
2395                  if (tb != 0 && tb != 0xc0) onechar++;
2396                  }
2397                }
2398              use_offsets[1] = start_offset + onechar;
2399              }
2400            else
2401              {
2402              if (count == PCRE_ERROR_NOMATCH)
2403                {
2404                if (gmatched == 0) fprintf(outfile, "No match\n");
2405                }
2406            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2407              break;  /* Out of the /g loop */
2408              }
2409          }          }
2410        }  
2411      }        /* If not /g or /G we are done */
2412    
2413          if (!do_g && !do_G) break;
2414    
2415          /* If we have matched an empty string, first check to see if we are at
2416          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2417          what Perl's /g options does. This turns out to be rather cunning. First
2418          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2419          same point. If this fails (picked up above) we advance to the next
2420          character. */
2421    
2422          g_notempty = 0;
2423    
2424          if (use_offsets[0] == use_offsets[1])
2425            {
2426            if (use_offsets[0] == len) break;
2427            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2428            }
2429    
2430          /* For /g, update the start offset, leaving the rest alone */
2431    
2432          if (do_g) start_offset = use_offsets[1];
2433    
2434          /* For /G, update the pointer and length */
2435    
2436          else
2437            {
2438            bptr += use_offsets[1];
2439            len -= use_offsets[1];
2440            }
2441          }  /* End of loop for /g and /G */
2442    
2443        NEXT_DATA: continue;
2444        }    /* End of loop for data lines */
2445    
2446    CONTINUE:    CONTINUE:
2447    
2448    #if !defined NOPOSIX
2449    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2450    if (re != NULL) free(re);  #endif
2451    if (extra != NULL) free(extra);  
2452      if (re != NULL) new_free(re);
2453      if (extra != NULL) new_free(extra);
2454      if (tables != NULL)
2455        {
2456        new_free((void *)tables);
2457        setlocale(LC_CTYPE, "C");
2458        locale_set = 0;
2459        }
2460    }    }
2461    
2462  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2463  return 0;  
2464    EXIT:
2465    
2466    if (infile != NULL && infile != stdin) fclose(infile);
2467    if (outfile != NULL && outfile != stdout) fclose(outfile);
2468    
2469    free(buffer);
2470    free(dbuffer);
2471    free(pbuffer);
2472    free(offsets);
2473    
2474    return yield;
2475  }  }
2476    
2477  /* End */  /* End of pcretest.c */

Legend:
Removed from v.23  
changed lines
  Added in v.376

  ViewVC Help
Powered by ViewVC 1.1.5