/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 351 by ph10, Fri Jul 4 18:27:16 2008 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95    #include "pcre_internal.h"
96    
97    /* We need access to some of the data tables that PCRE uses. So as not to have
98    to keep two copies, we include the source file here, changing the names of the
99    external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102    #define _pcre_utf8_table1      utf8_table1
103    #define _pcre_utf8_table1_size utf8_table1_size
104    #define _pcre_utf8_table2      utf8_table2
105    #define _pcre_utf8_table3      utf8_table3
106    #define _pcre_utf8_table4      utf8_table4
107    #define _pcre_utt              utt
108    #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110    #define _pcre_OP_lengths       OP_lengths
111    
112    #include "pcre_tables.c"
113    
114    /* We also need the pcre_printint() function for printing out compiled
115    patterns. This function is in a separate file so that it can be included in
116    pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124    #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
 #include "internal.h"  
128    
129  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
130  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 134  Makefile. */
134  #include "pcreposix.h"  #include "pcreposix.h"
135  #endif  #endif
136    
137    /* It is also possible, for the benefit of the version currently imported into
138    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139    interface to the DFA matcher (NODFA), and without the doublecheck of the old
140    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150    /* Other parameters */
151    
152  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
153  #ifdef CLK_TCK  #ifdef CLK_TCK
154  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 157  Makefile. */
157  #endif  #endif
158  #endif  #endif
159    
160  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
161    
162    #define LOOPREPEAT 500000
163    
164    /* Static variables */
165    
166  static FILE *outfile;  static FILE *outfile;
167  static int log_store = 0;  static int log_store = 0;
168    static int callout_count;
169    static int callout_extra;
170    static int callout_fail_count;
171    static int callout_fail_id;
172    static int debug_lengths;
173    static int first_callout;
174    static int locale_set = 0;
175    static int show_malloc;
176    static int use_utf8;
177  static size_t gotten_store;  static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184    static uschar *pbuffer = NULL;
185    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
186    
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
187    
188    /*************************************************
189    *        Read or extend an input line            *
190    *************************************************/
191    
192  static void print_internals(pcre *re)  /* Input lines are read into buffer, but both patterns and data lines can be
193  {  continued over multiple input lines. In addition, if the buffer fills up, we
194  unsigned char *code = ((real_pcre *)re)->code;  want to automatically expand it so as to be able to handle extremely large
195    lines that are needed for certain stress tests. When the input buffer is
196    expanded, the other two buffers must also be expanded likewise, and the
197    contents of pbuffer, which are a copy of the input for callouts, must be
198    preserved (for when expansion happens for a data line). This is not the most
199    optimal way of handling this, but hey, this is just a test program!
200    
201    Arguments:
202      f            the file to read
203      start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206    Returns:       pointer to the start of new data
207                   could be a copy of start, or could be moved
208                   NULL if no data read and EOF reached
209    */
210    
211  fprintf(outfile, "------------------------------------------------------------------\n");  static uschar *
212    extend_inputline(FILE *f, uschar *start, const char *prompt)
213    {
214    uschar *here = start;
215    
216  for(;;)  for (;;)
217    {    {
218    int c;    int rlen = buffer_size - (here - buffer);
   int charlength;  
219    
220    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));    if (rlen > 1000)
   
   if (*code >= OP_BRA)  
221      {      {
222      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      int dlen;
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
223    
224        CLASS_REF_REPEAT:      /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228        switch(*code)  #ifdef SUPPORT_LIBREADLINE
229          {      if (isatty(fileno(f)))
230          case OP_CRSTAR:        {
231          case OP_CRMINSTAR:        size_t len;
232          case OP_CRPLUS:        char *s = readline(prompt);
233          case OP_CRMINPLUS:        if (s == NULL) return (here == start)? NULL : start;
234          case OP_CRQUERY:        len = strlen(s);
235          case OP_CRMINQUERY:        if (len > 0) add_history(s);
236          fprintf(outfile, "%s", OP_names[*code]);        if (len > rlen - 1) len = rlen - 1;
237          break;        memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241          }
242        else
243    #endif
244    
245          case OP_CRRANGE:      /* Read the next line by normal means, prompting if the file is stdin. */
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
246    
247          default:        {
248          code--;        if (f == stdin) printf(prompt);
249          }        if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253        dlen = (int)strlen((char *)here);
254        if (dlen > 0 && here[dlen - 1] == '\n') return start;
255        here += dlen;
256        }
257    
258      else
259        {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269        }        }
     break;  
270    
271      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274        buffer_size = new_buffer_size;
275    
276      default:      start = new_buffer + (start - buffer);
277      fprintf(outfile, "    %s", OP_names[*code]);      here = new_buffer + (here - buffer);
278      break;  
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286      }      }
287      }
288    
289    return NULL;  /* Control never gets here */
290    }
291    
292    code++;  
293    fprintf(outfile, "\n");  
294    
295    
296    
297    
298    /*************************************************
299    *          Read number from string               *
300    *************************************************/
301    
302    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303    around with conditional compilation, just do the job by hand. It is only used
304    for unpicking arguments, so just keep it simple.
305    
306    Arguments:
307      str           string to be converted
308      endptr        where to put the end pointer
309    
310    Returns:        the unsigned long
311    */
312    
313    static int
314    get_value(unsigned char *str, unsigned char **endptr)
315    {
316    int result = 0;
317    while(*str != 0 && isspace(*str)) str++;
318    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319    *endptr = str;
320    return(result);
321    }
322    
323    
324    
325    
326    /*************************************************
327    *            Convert UTF-8 string to value       *
328    *************************************************/
329    
330    /* This function takes one or more bytes that represents a UTF-8 character,
331    and returns the value of the character.
332    
333    Argument:
334      utf8bytes   a pointer to the byte vector
335      vptr        a pointer to an int to receive the value
336    
337    Returns:      >  0 => the number of bytes consumed
338                  -6 to 0 => malformed UTF-8 character at offset = (-return)
339    */
340    
341    #if !defined NOUTF8
342    
343    static int
344    utf82ord(unsigned char *utf8bytes, int *vptr)
345    {
346    int c = *utf8bytes++;
347    int d = c;
348    int i, j, s;
349    
350    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
351      {
352      if ((d & 0x80) == 0) break;
353      d <<= 1;
354      }
355    
356    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
357    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
358    
359    /* i now has a value in the range 1-5 */
360    
361    s = 6*i;
362    d = (c & utf8_table3[i]) << s;
363    
364    for (j = 0; j < i; j++)
365      {
366      c = *utf8bytes++;
367      if ((c & 0xc0) != 0x80) return -(j+1);
368      s -= 6;
369      d |= (c & 0x3f) << s;
370    }    }
371    
372    /* Check that encoding was the correct unique one */
373    
374    for (j = 0; j < utf8_table1_size; j++)
375      if (d <= utf8_table1[j]) break;
376    if (j != i) return -(i+1);
377    
378    /* Valid value */
379    
380    *vptr = d;
381    return i+1;
382    }
383    
384    #endif
385    
386    
387    
388    /*************************************************
389    *       Convert character value to UTF-8         *
390    *************************************************/
391    
392    /* This function takes an integer value in the range 0 - 0x7fffffff
393    and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395    Arguments:
396      cvalue     the character value
397      utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399    Returns:     number of characters placed in the buffer
400    */
401    
402    #if !defined NOUTF8
403    
404    static int
405    ord2utf8(int cvalue, uschar *utf8bytes)
406    {
407    register int i, j;
408    for (i = 0; i < utf8_table1_size; i++)
409      if (cvalue <= utf8_table1[i]) break;
410    utf8bytes += i;
411    for (j = i; j > 0; j--)
412     {
413     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414     cvalue >>= 6;
415     }
416    *utf8bytes = utf8_table2[i] | cvalue;
417    return i + 1;
418  }  }
419    
420    #endif
421    
422    
 /* Character string printing function. */  
423    
424  static void pchars(unsigned char *p, int length)  /*************************************************
425    *             Print character string             *
426    *************************************************/
427    
428    /* Character string printing function. Must handle UTF-8 strings in utf8
429    mode. Yields number of characters printed. If handed a NULL file, just counts
430    chars without printing. */
431    
432    static int pchars(unsigned char *p, int length, FILE *f)
433  {  {
434  int c;  int c = 0;
435    int yield = 0;
436    
437  while (length-- > 0)  while (length-- > 0)
438    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
439      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
440      if (use_utf8)
441        {
442        int rc = utf82ord(p, &c);
443    
444        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
445          {
446          length -= rc - 1;
447          p += rc;
448          if (PRINTHEX(c))
449            {
450            if (f != NULL) fprintf(f, "%c", c);
451            yield++;
452            }
453          else
454            {
455            int n = 4;
456            if (f != NULL) fprintf(f, "\\x{%02x}", c);
457            yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461            }
462          continue;
463          }
464        }
465    #endif
466    
467       /* Not UTF-8, or malformed UTF-8  */
468    
469      c = *p++;
470      if (PRINTHEX(c))
471        {
472        if (f != NULL) fprintf(f, "%c", c);
473        yield++;
474        }
475      else
476        {
477        if (f != NULL) fprintf(f, "\\x%02x", c);
478        yield += 4;
479        }
480      }
481    
482    return yield;
483    }
484    
485    
486    
487    /*************************************************
488    *              Callout function                  *
489    *************************************************/
490    
491    /* Called from PCRE as a result of the (?C) item. We print out where we are in
492    the match. Yield zero unless more callouts than the fail count, or the callout
493    data is not zero. */
494    
495    static int callout(pcre_callout_block *cb)
496    {
497    FILE *f = (first_callout | callout_extra)? outfile : NULL;
498    int i, pre_start, post_start, subject_length;
499    
500    if (callout_extra)
501      {
502      fprintf(f, "Callout %d: last capture = %d\n",
503        cb->callout_number, cb->capture_last);
504    
505      for (i = 0; i < cb->capture_top * 2; i += 2)
506        {
507        if (cb->offset_vector[i] < 0)
508          fprintf(f, "%2d: <unset>\n", i/2);
509        else
510          {
511          fprintf(f, "%2d: ", i/2);
512          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513            cb->offset_vector[i+1] - cb->offset_vector[i], f);
514          fprintf(f, "\n");
515          }
516        }
517      }
518    
519    /* Re-print the subject in canonical form, the first time or if giving full
520    datails. On subsequent calls in the same match, we use pchars just to find the
521    printed lengths of the substrings. */
522    
523    if (f != NULL) fprintf(f, "--->");
524    
525    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527      cb->current_position - cb->start_match, f);
528    
529    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530    
531    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532      cb->subject_length - cb->current_position, f);
533    
534    if (f != NULL) fprintf(f, "\n");
535    
536    /* Always print appropriate indicators, with callout number if not already
537    shown. For automatic callouts, show the pattern offset. */
538    
539    if (cb->callout_number == 255)
540      {
541      fprintf(outfile, "%+3d ", cb->pattern_position);
542      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
543      }
544    else
545      {
546      if (callout_extra) fprintf(outfile, "    ");
547        else fprintf(outfile, "%3d ", cb->callout_number);
548      }
549    
550    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551    fprintf(outfile, "^");
552    
553    if (post_start > 0)
554      {
555      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556      fprintf(outfile, "^");
557      }
558    
559    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560      fprintf(outfile, " ");
561    
562    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563      pbuffer + cb->pattern_position);
564    
565    fprintf(outfile, "\n");
566    first_callout = 0;
567    
568    if (cb->callout_data != NULL)
569      {
570      int callout_data = *((int *)(cb->callout_data));
571      if (callout_data != 0)
572        {
573        fprintf(outfile, "Callout data = %d\n", callout_data);
574        return callout_data;
575        }
576      }
577    
578    return (cb->callout_number != callout_fail_id)? 0 :
579           (++callout_count >= callout_fail_count)? 1 : 0;
580  }  }
581    
582    
583    /*************************************************
584    *            Local malloc functions              *
585    *************************************************/
586    
587  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
588  compiled re. */  compiled re. */
589    
590  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
591  {  {
592    void *block = malloc(size);
593  gotten_store = size;  gotten_store = size;
594  if (log_store)  if (show_malloc)
595    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
596      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
597  return malloc(size);  }
598    
599    static void new_free(void *block)
600    {
601    if (show_malloc)
602      fprintf(outfile, "free             %p\n", block);
603    free(block);
604  }  }
605    
606    
607    /* For recursion malloc/free, to test stacking calls */
608    
609    static void *stack_malloc(size_t size)
610    {
611    void *block = malloc(size);
612    if (show_malloc)
613      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614    return block;
615    }
616    
617    static void stack_free(void *block)
618    {
619    if (show_malloc)
620      fprintf(outfile, "stack_free       %p\n", block);
621    free(block);
622    }
623    
624    
625    /*************************************************
626    *          Call pcre_fullinfo()                  *
627    *************************************************/
628    
629  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
630    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 637  if ((rc = pcre_fullinfo(re, study, optio
637    
638    
639    
640    /*************************************************
641    *         Byte flipping function                 *
642    *************************************************/
643    
644    static unsigned long int
645    byteflip(unsigned long int value, int n)
646    {
647    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648    return ((value & 0x000000ff) << 24) |
649           ((value & 0x0000ff00) <<  8) |
650           ((value & 0x00ff0000) >>  8) |
651           ((value & 0xff000000) >> 24);
652    }
653    
654    
655    
656    
657    /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704    }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -m       output memory used information\n"
789           "  -o <n>   set size of offsets vector to <n>\n");
790    #if !defined NOPOSIX
791    printf("  -p       use POSIX interface\n");
792    #endif
793    printf("  -q       quiet: do not output PCRE version number at start\n");
794    printf("  -S <n>   set stack size to <n> megabytes\n");
795    printf("  -s       output store (memory) used information\n"
796           "  -t       time compilation and execution\n");
797    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
798    printf("  -tm      time execution (matching) only\n");
799    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
800    }
801    
802    
803    
804    /*************************************************
805    *                Main Program                    *
806    *************************************************/
807    
808  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
809  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 315  int options = 0; Line 816  int options = 0;
816  int study_options = 0;  int study_options = 0;
817  int op = 1;  int op = 1;
818  int timeit = 0;  int timeit = 0;
819    int timeitm = 0;
820  int showinfo = 0;  int showinfo = 0;
821  int showstore = 0;  int showstore = 0;
822    int quiet = 0;
823    int size_offsets = 45;
824    int size_offsets_max;
825    int *offsets = NULL;
826    #if !defined NOPOSIX
827  int posix = 0;  int posix = 0;
828    #endif
829  int debug = 0;  int debug = 0;
830  int done = 0;  int done = 0;
831  unsigned char buffer[30000];  int all_use_dfa = 0;
832  unsigned char dbuffer[1024];  int yield = 0;
833    int stack_size;
834    
835    /* These vectors store, end-to-end, a list of captured substring names. Assume
836    that 1024 is plenty long enough for the few names we'll be testing. */
837    
838    uschar copynames[1024];
839    uschar getnames[1024];
840    
841    uschar *copynamesptr;
842    uschar *getnamesptr;
843    
844  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
845    when I am debugging. They grow automatically when very long lines are read. */
846    
847    buffer = (unsigned char *)malloc(buffer_size);
848    dbuffer = (unsigned char *)malloc(buffer_size);
849    pbuffer = (unsigned char *)malloc(buffer_size);
850    
851    /* The outfile variable is static so that new_malloc can use it. */
852    
853  outfile = stdout;  outfile = stdout;
854    
855    /* The following  _setmode() stuff is some Windows magic that tells its runtime
856    library to translate CRLF into a single LF character. At least, that's what
857    I've been told: never having used Windows I take this all on trust. Originally
858    it set 0x8000, but then I was advised that _O_BINARY was better. */
859    
860    #if defined(_WIN32) || defined(WIN32)
861    _setmode( _fileno( stdout ), _O_BINARY );
862    #endif
863    
864  /* Scan options */  /* Scan options */
865    
866  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
867    {    {
868      unsigned char *endptr;
869    
870    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
871      showstore = 1;      showstore = 1;
872    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
873      else if (strcmp(argv[op], "-b") == 0) debug = 1;
874    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
875    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
876    #if !defined NODFA
877      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
878    #endif
879      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
880          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
881            *endptr == 0))
882        {
883        op++;
884        argc--;
885        }
886      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
887        {
888        int both = argv[op][2] == 0;
889        int temp;
890        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891                         *endptr == 0))
892          {
893          timeitm = temp;
894          op++;
895          argc--;
896          }
897        else timeitm = LOOPREPEAT;
898        if (both) timeit = timeitm;
899        }
900      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
901          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
902            *endptr == 0))
903        {
904    #if defined(_WIN32) || defined(WIN32)
905        printf("PCRE: -S not supported on this OS\n");
906        exit(1);
907    #else
908        int rc;
909        struct rlimit rlim;
910        getrlimit(RLIMIT_STACK, &rlim);
911        rlim.rlim_cur = stack_size * 1024 * 1024;
912        rc = setrlimit(RLIMIT_STACK, &rlim);
913        if (rc != 0)
914          {
915        printf("PCRE: setrlimit() failed with error %d\n", rc);
916        exit(1);
917          }
918        op++;
919        argc--;
920    #endif
921        }
922    #if !defined NOPOSIX
923    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
924    #endif
925      else if (strcmp(argv[op], "-C") == 0)
926        {
927        int rc;
928        printf("PCRE version %s\n", pcre_version());
929        printf("Compiled with\n");
930        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
931        printf("  %sUTF-8 support\n", rc? "" : "No ");
932        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
933        printf("  %sUnicode properties support\n", rc? "" : "No ");
934        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
935        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
936          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
937          (rc == -2)? "ANYCRLF" :
938          (rc == -1)? "ANY" : "???");
939        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
940        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
941                                         "all Unicode newlines");
942        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
943        printf("  Internal link size = %d\n", rc);
944        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
945        printf("  POSIX malloc threshold = %d\n", rc);
946        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
947        printf("  Default match limit = %d\n", rc);
948        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
949        printf("  Default recursion depth limit = %d\n", rc);
950        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
951        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
952        goto EXIT;
953        }
954      else if (strcmp(argv[op], "-help") == 0 ||
955               strcmp(argv[op], "--help") == 0)
956        {
957        usage();
958        goto EXIT;
959        }
960    else    else
961      {      {
962      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
963      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
964      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
965             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
966      }      }
967    op++;    op++;
968    argc--;    argc--;
969    }    }
970    
971    /* Get the store for the offsets vector, and remember what it was */
972    
973    size_offsets_max = size_offsets;
974    offsets = (int *)malloc(size_offsets_max * sizeof(int));
975    if (offsets == NULL)
976      {
977      printf("** Failed to get %d bytes of memory for offsets vector\n",
978        (int)(size_offsets_max * sizeof(int)));
979      yield = 1;
980      goto EXIT;
981      }
982    
983  /* Sort out the input and output files */  /* Sort out the input and output files */
984    
985  if (argc > 1)  if (argc > 1)
986    {    {
987    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
988    if (infile == NULL)    if (infile == NULL)
989      {      {
990      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
991      return 1;      yield = 1;
992        goto EXIT;
993      }      }
994    }    }
995    
996  if (argc > 2)  if (argc > 2)
997    {    {
998    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
999    if (outfile == NULL)    if (outfile == NULL)
1000      {      {
1001      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1002      return 1;      yield = 1;
1003        goto EXIT;
1004      }      }
1005    }    }
1006    
1007  /* Set alternative malloc function */  /* Set alternative malloc function */
1008    
1009  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1010    pcre_free = new_free;
1011    pcre_stack_malloc = stack_malloc;
1012    pcre_stack_free = stack_free;
1013    
1014  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1015    
1016  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1017    
1018  /* Main loop */  /* Main loop */
1019    
# Line 391  while (!done) Line 1024  while (!done)
1024    
1025  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
1026    regex_t preg;    regex_t preg;
1027      int do_posix = 0;
1028  #endif  #endif
1029    
1030    const char *error;    const char *error;
1031    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1032    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
1033      const unsigned char *tables = NULL;
1034      unsigned long int true_size, true_study_size = 0;
1035      size_t size, regex_gotten_store;
1036    int do_study = 0;    int do_study = 0;
1037    int do_debug = debug;    int do_debug = debug;
1038    int do_G = 0;    int do_G = 0;
1039    int do_g = 0;    int do_g = 0;
1040    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1041    int do_showrest = 0;    int do_showrest = 0;
1042    int do_posix = 0;    int do_flip = 0;
1043    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1044    
1045      use_utf8 = 0;
1046      debug_lengths = 1;
1047    
1048    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1049    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1050      fflush(outfile);
1051    
1052    p = buffer;    p = buffer;
1053    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1054    if (*p == 0) continue;    if (*p == 0) continue;
1055    
1056    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1057    complete, read more. */  
1058      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1059        {
1060        unsigned long int magic, get_options;
1061        uschar sbuf[8];
1062        FILE *f;
1063    
1064        p++;
1065        pp = p + (int)strlen((char *)p);
1066        while (isspace(pp[-1])) pp--;
1067        *pp = 0;
1068    
1069        f = fopen((char *)p, "rb");
1070        if (f == NULL)
1071          {
1072          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1073          continue;
1074          }
1075    
1076        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1077    
1078        true_size =
1079          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1080        true_study_size =
1081          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1082    
1083        re = (real_pcre *)new_malloc(true_size);
1084        regex_gotten_store = gotten_store;
1085    
1086        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1087    
1088        magic = ((real_pcre *)re)->magic_number;
1089        if (magic != MAGIC_NUMBER)
1090          {
1091          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1092            {
1093            do_flip = 1;
1094            }
1095          else
1096            {
1097            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1098            fclose(f);
1099            continue;
1100            }
1101          }
1102    
1103        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1104          do_flip? " (byte-inverted)" : "", p);
1105    
1106        /* Need to know if UTF-8 for printing data strings */
1107    
1108        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1109        use_utf8 = (get_options & PCRE_UTF8) != 0;
1110    
1111        /* Now see if there is any following study data */
1112    
1113        if (true_study_size != 0)
1114          {
1115          pcre_study_data *psd;
1116    
1117          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1118          extra->flags = PCRE_EXTRA_STUDY_DATA;
1119    
1120          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1121          extra->study_data = psd;
1122    
1123          if (fread(psd, 1, true_study_size, f) != true_study_size)
1124            {
1125            FAIL_READ:
1126            fprintf(outfile, "Failed to read data from %s\n", p);
1127            if (extra != NULL) new_free(extra);
1128            if (re != NULL) new_free(re);
1129            fclose(f);
1130            continue;
1131            }
1132          fprintf(outfile, "Study data loaded from %s\n", p);
1133          do_study = 1;     /* To get the data output if requested */
1134          }
1135        else fprintf(outfile, "No study data\n");
1136    
1137        fclose(f);
1138        goto SHOW_INFO;
1139        }
1140    
1141      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1142      the pattern; if is isn't complete, read more. */
1143    
1144    delimiter = *p++;    delimiter = *p++;
1145    
1146    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1147      {      {
1148      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1149      goto SKIP_DATA;      goto SKIP_DATA;
1150      }      }
1151    
1152    pp = p;    pp = p;
1153      poffset = p - buffer;
1154    
1155    for(;;)    for(;;)
1156      {      {
# Line 435  while (!done) Line 1161  while (!done)
1161        pp++;        pp++;
1162        }        }
1163      if (*pp != 0) break;      if (*pp != 0) break;
1164        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1165        {        {
1166        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1167        done = 1;        done = 1;
# Line 453  while (!done) Line 1170  while (!done)
1170      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1171      }      }
1172    
1173      /* The buffer may have moved while being extended; reset the start of data
1174      pointer to the correct relative point in the buffer. */
1175    
1176      p = buffer + poffset;
1177    
1178    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1179    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1180    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1181    
1182    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1183    
1184    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1185      for callouts. */
1186    
1187    *pp++ = 0;    *pp++ = 0;
1188      strcpy((char *)pbuffer, (char *)p);
1189    
1190    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1191    
# Line 473  while (!done) Line 1197  while (!done)
1197      {      {
1198      switch (*pp++)      switch (*pp++)
1199        {        {
1200          case 'f': options |= PCRE_FIRSTLINE; break;
1201        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1202        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1203        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 481  while (!done) Line 1206  while (!done)
1206    
1207        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1208        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1209          case 'B': do_debug = 1; break;
1210          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1211        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1212        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1213          case 'F': do_flip = 1; break;
1214        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1215        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1216          case 'J': options |= PCRE_DUPNAMES; break;
1217        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1218          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1219    
1220  #if !defined NOPOSIX  #if !defined NOPOSIX
1221        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 494  while (!done) Line 1224  while (!done)
1224        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1225        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1226        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1227          case 'Z': debug_lengths = 0; break;
1228          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1229          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1230    
1231        case 'L':        case 'L':
1232        ppp = pp;        ppp = pp;
1233        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1234          /* The '0' test is just in case this is an unterminated line. */
1235          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1236        *ppp = 0;        *ppp = 0;
1237        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1238          {          {
1239          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1240          goto SKIP_DATA;          goto SKIP_DATA;
1241          }          }
1242          locale_set = 1;
1243        tables = pcre_maketables();        tables = pcre_maketables();
1244        pp = ppp;        pp = ppp;
1245        break;        break;
1246    
1247        case '\n': case ' ': break;        case '>':
1248          to_file = pp;
1249          while (*pp != 0) pp++;
1250          while (isspace(pp[-1])) pp--;
1251          *pp = 0;
1252          break;
1253    
1254          case '<':
1255            {
1256            if (strncmp((char *)pp, "JS>", 3) == 0)
1257              {
1258              options |= PCRE_JAVASCRIPT_COMPAT;
1259              pp += 3;
1260              }
1261            else
1262              {
1263              int x = check_newline(pp, outfile);
1264              if (x == 0) goto SKIP_DATA;
1265              options |= x;
1266              while (*pp++ != '>');
1267              }
1268            }
1269          break;
1270    
1271          case '\r':                      /* So that it works in Windows */
1272          case '\n':
1273          case ' ':
1274          break;
1275    
1276        default:        default:
1277        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1278        goto SKIP_DATA;        goto SKIP_DATA;
# Line 524  while (!done) Line 1288  while (!done)
1288      {      {
1289      int rc;      int rc;
1290      int cflags = 0;      int cflags = 0;
1291    
1292      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1293      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1294        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1295        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1296        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1297    
1298      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1299    
1300      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 1302  while (!done)
1302    
1303      if (rc != 0)      if (rc != 0)
1304        {        {
1305        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1306        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1307        goto SKIP_DATA;        goto SKIP_DATA;
1308        }        }
# Line 545  while (!done) Line 1314  while (!done)
1314  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1315    
1316      {      {
1317      if (timeit)      if (timeit > 0)
1318        {        {
1319        register int i;        register int i;
1320        clock_t time_taken;        clock_t time_taken;
1321        clock_t start_time = clock();        clock_t start_time = clock();
1322        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1323          {          {
1324          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1325          if (re != NULL) free(re);          if (re != NULL) free(re);
1326          }          }
1327        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1328        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1329          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1330          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1331        }        }
1332    
1333      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 574  while (!done) Line 1343  while (!done)
1343          {          {
1344          for (;;)          for (;;)
1345            {            {
1346            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1347              {              {
1348              done = 1;              done = 1;
1349              goto CONTINUE;              goto CONTINUE;
# Line 592  while (!done) Line 1361  while (!done)
1361      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
1362      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
1363    
1364        if (log_store)
1365          fprintf(outfile, "Memory allocation (code space): %d\n",
1366            (int)(gotten_store -
1367                  sizeof(real_pcre) -
1368                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1369    
1370        /* Extract the size for possible writing before possibly flipping it,
1371        and remember the store that was got. */
1372    
1373        true_size = ((real_pcre *)re)->size;
1374        regex_gotten_store = gotten_store;
1375    
1376        /* If /S was present, study the regexp to generate additional info to
1377        help with the matching. */
1378    
1379        if (do_study)
1380          {
1381          if (timeit > 0)
1382            {
1383            register int i;
1384            clock_t time_taken;
1385            clock_t start_time = clock();
1386            for (i = 0; i < timeit; i++)
1387              extra = pcre_study(re, study_options, &error);
1388            time_taken = clock() - start_time;
1389            if (extra != NULL) free(extra);
1390            fprintf(outfile, "  Study time %.4f milliseconds\n",
1391              (((double)time_taken * 1000.0) / (double)timeit) /
1392                (double)CLOCKS_PER_SEC);
1393            }
1394          extra = pcre_study(re, study_options, &error);
1395          if (error != NULL)
1396            fprintf(outfile, "Failed to study: %s\n", error);
1397          else if (extra != NULL)
1398            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1399          }
1400    
1401        /* If the 'F' option was present, we flip the bytes of all the integer
1402        fields in the regex data block and the study block. This is to make it
1403        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1404        compiled on a different architecture. */
1405    
1406        if (do_flip)
1407          {
1408          real_pcre *rre = (real_pcre *)re;
1409          rre->magic_number =
1410            byteflip(rre->magic_number, sizeof(rre->magic_number));
1411          rre->size = byteflip(rre->size, sizeof(rre->size));
1412          rre->options = byteflip(rre->options, sizeof(rre->options));
1413          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1414          rre->top_bracket =
1415            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1416          rre->top_backref =
1417            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1418          rre->first_byte =
1419            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1420          rre->req_byte =
1421            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1422          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1423            sizeof(rre->name_table_offset));
1424          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1425            sizeof(rre->name_entry_size));
1426          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1427            sizeof(rre->name_count));
1428    
1429          if (extra != NULL)
1430            {
1431            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1432            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1433            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1434            }
1435          }
1436    
1437        /* Extract information from the compiled data if required */
1438    
1439        SHOW_INFO:
1440    
1441        if (do_debug)
1442          {
1443          fprintf(outfile, "------------------------------------------------------------------\n");
1444          pcre_printint(re, outfile, debug_lengths);
1445          }
1446    
1447      if (do_showinfo)      if (do_showinfo)
1448        {        {
1449          unsigned long int get_options, all_options;
1450    #if !defined NOINFOCHECK
1451        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1452        int count, backrefmax, first_char, need_char;  #endif
1453        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1454            hascrorlf;
1455        if (do_debug) print_internals(re);        int nameentrysize, namecount;
1456          const uschar *nametable;
1457    
1458        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1459        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1460        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1461        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1462        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1463        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1464          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1465          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1466          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1467          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1468          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1469          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1470    
1471    #if !defined NOINFOCHECK
1472        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1473        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1474          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 620  while (!done) Line 1482  while (!done)
1482            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1483              first_char, old_first_char);              first_char, old_first_char);
1484    
1485          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
1486            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1487              old_options);              get_options, old_options);
1488          }          }
1489    #endif
1490    
1491        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1492          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1493          size, gotten_store);          (int)size, (int)regex_gotten_store);
1494    
1495        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1496        if (backrefmax > 0)        if (backrefmax > 0)
1497          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
1498    
1499        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
1500          fprintf(outfile, "Case state changes\n");          {
1501            fprintf(outfile, "Named capturing subpatterns:\n");
1502            while (namecount-- > 0)
1503              {
1504              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1505                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1506                GET2(nametable, 0));
1507              nametable += nameentrysize;
1508              }
1509            }
1510    
1511          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1512          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1513    
1514          all_options = ((real_pcre *)re)->options;
1515          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1516    
1517          if (get_options == 0) fprintf(outfile, "No options\n");
1518            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1519              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1520              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1521              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1522              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1523              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1524              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1525              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1526              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1527              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1528              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1529              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1530              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1531              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1532              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1533              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1534    
1535          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1536    
1537          switch (get_options & PCRE_NEWLINE_BITS)
1538            {
1539            case PCRE_NEWLINE_CR:
1540            fprintf(outfile, "Forced newline sequence: CR\n");
1541            break;
1542    
1543            case PCRE_NEWLINE_LF:
1544            fprintf(outfile, "Forced newline sequence: LF\n");
1545            break;
1546    
1547            case PCRE_NEWLINE_CRLF:
1548            fprintf(outfile, "Forced newline sequence: CRLF\n");
1549            break;
1550    
1551            case PCRE_NEWLINE_ANYCRLF:
1552            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1553            break;
1554    
1555            case PCRE_NEWLINE_ANY:
1556            fprintf(outfile, "Forced newline sequence: ANY\n");
1557            break;
1558    
1559            default:
1560            break;
1561            }
1562    
1563        if (first_char == -1)        if (first_char == -1)
1564          {          {
1565          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1566          }          }
1567        else if (first_char < 0)        else if (first_char < 0)
1568          {          {
# Line 656  while (!done) Line 1570  while (!done)
1570          }          }
1571        else        else
1572          {          {
1573          if (isprint(first_char))          int ch = first_char & 255;
1574            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1575              "" : " (caseless)";
1576            if (PRINTHEX(ch))
1577              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1578          else          else
1579            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1580          }          }
1581    
1582        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 1585  while (!done)
1585          }          }
1586        else        else
1587          {          {
1588          if (isprint(need_char))          int ch = need_char & 255;
1589            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1590              "" : " (caseless)";
1591            if (PRINTHEX(ch))
1592              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1593          else          else
1594            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1595          }          }
       }  
1596    
1597      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1598      help with the matching. */        value, but it varies, depending on the computer architecture, and
1599          so messes up the test suite. (And with the /F option, it might be
1600          flipped.) */
1601    
1602      if (do_study)        if (do_study)
       {  
       if (timeit)  
1603          {          {
1604          register int i;          if (extra == NULL)
1605          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1606          clock_t start_time = clock();          else
1607          for (i = 0; i < LOOPREPEAT; i++)            {
1608            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1609          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1610          if (extra != NULL) free(extra);  
1611          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1612            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1613            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1614                {
1615                int i;
1616                int c = 24;
1617                fprintf(outfile, "Starting byte set: ");
1618                for (i = 0; i < 256; i++)
1619                  {
1620                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1621                    {
1622                    if (c > 75)
1623                      {
1624                      fprintf(outfile, "\n  ");
1625                      c = 2;
1626                      }
1627                    if (PRINTHEX(i) && i != ' ')
1628                      {
1629                      fprintf(outfile, "%c ", i);
1630                      c += 2;
1631                      }
1632                    else
1633                      {
1634                      fprintf(outfile, "\\x%02x ", i);
1635                      c += 5;
1636                      }
1637                    }
1638                  }
1639                fprintf(outfile, "\n");
1640                }
1641              }
1642          }          }
1643          }
1644    
1645        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1646        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1647          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1648    
1649        else if (do_showinfo)      if (to_file != NULL)
1650          {
1651          FILE *f = fopen((char *)to_file, "wb");
1652          if (f == NULL)
1653            {
1654            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1655            }
1656          else
1657          {          {
1658          uschar *start_bits = NULL;          uschar sbuf[8];
1659          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (uschar)((true_size >> 24) & 255);
1660          if (start_bits == NULL)          sbuf[1] = (uschar)((true_size >> 16) & 255);
1661            fprintf(outfile, "No starting character set\n");          sbuf[2] = (uschar)((true_size >>  8) & 255);
1662            sbuf[3] = (uschar)((true_size) & 255);
1663    
1664            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1665            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1666            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1667            sbuf[7] = (uschar)((true_study_size) & 255);
1668    
1669            if (fwrite(sbuf, 1, 8, f) < 8 ||
1670                fwrite(re, 1, true_size, f) < true_size)
1671              {
1672              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1673              }
1674          else          else
1675            {            {
1676            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1677            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1678              {              {
1679              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1680                    true_study_size)
1681                {                {
1682                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1683                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1684                }                }
1685                else fprintf(outfile, "Study data written to %s\n", to_file);
1686    
1687              }              }
           fprintf(outfile, "\n");  
1688            }            }
1689            fclose(f);
1690          }          }
1691    
1692          new_free(re);
1693          if (extra != NULL) new_free(extra);
1694          if (tables != NULL) new_free((void *)tables);
1695          continue;  /* With next regex */
1696        }        }
1697      }      }        /* End of non-POSIX compile */
1698    
1699    /* Read data lines and test them */    /* Read data lines and test them */
1700    
1701    for (;;)    for (;;)
1702      {      {
1703      unsigned char *q;      uschar *q;
1704      unsigned char *bptr = dbuffer;      uschar *bptr;
1705        int *use_offsets = offsets;
1706        int use_size_offsets = size_offsets;
1707        int callout_data = 0;
1708        int callout_data_set = 0;
1709      int count, c;      int count, c;
1710      int copystrings = 0;      int copystrings = 0;
1711        int find_match_limit = 0;
1712      int getstrings = 0;      int getstrings = 0;
1713      int getlist = 0;      int getlist = 0;
1714      int gmatched = 0;      int gmatched = 0;
1715      int start_offset = 0;      int start_offset = 0;
1716      int g_notempty = 0;      int g_notempty = 0;
1717      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
1718    
1719      options = 0;      options = 0;
1720    
1721      if (infile == stdin) printf("data> ");      *copynames = 0;
1722      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1723    
1724        copynamesptr = copynames;
1725        getnamesptr = getnames;
1726    
1727        pcre_callout = callout;
1728        first_callout = 1;
1729        callout_extra = 0;
1730        callout_count = 0;
1731        callout_fail_count = 999999;
1732        callout_fail_id = -1;
1733        show_malloc = 0;
1734    
1735        if (extra != NULL) extra->flags &=
1736          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1737    
1738        len = 0;
1739        for (;;)
1740        {        {
1741        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1742        goto CONTINUE;          {
1743            if (len > 0) break;
1744            done = 1;
1745            goto CONTINUE;
1746            }
1747          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1748          len = (int)strlen((char *)buffer);
1749          if (buffer[len-1] == '\n') break;
1750        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1751    
     len = (int)strlen((char *)buffer);  
1752      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1753      buffer[len] = 0;      buffer[len] = 0;
1754      if (len == 0) break;      if (len == 0) break;
# Line 772  while (!done) Line 1756  while (!done)
1756      p = buffer;      p = buffer;
1757      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1758    
1759      q = dbuffer;      bptr = q = dbuffer;
1760      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1761        {        {
1762        int i = 0;        int i = 0;
1763        int n = 0;        int n = 0;
1764    
1765        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1766          {          {
1767          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 793  while (!done) Line 1778  while (!done)
1778          c -= '0';          c -= '0';
1779          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1780            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1781    
1782    #if !defined NOUTF8
1783            if (use_utf8 && c > 255)
1784              {
1785              unsigned char buff8[8];
1786              int ii, utn;
1787              utn = ord2utf8(c, buff8);
1788              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1789              c = buff8[ii];   /* Last byte */
1790              }
1791    #endif
1792          break;          break;
1793    
1794          case 'x':          case 'x':
1795    
1796            /* Handle \x{..} specially - new Perl thing for utf8 */
1797    
1798    #if !defined NOUTF8
1799            if (*p == '{')
1800              {
1801              unsigned char *pt = p;
1802              c = 0;
1803              while (isxdigit(*(++pt)))
1804                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1805              if (*pt == '}')
1806                {
1807                unsigned char buff8[8];
1808                int ii, utn;
1809                utn = ord2utf8(c, buff8);
1810                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1811                c = buff8[ii];   /* Last byte */
1812                p = pt + 1;
1813                break;
1814                }
1815              /* Not correct form; fall through */
1816              }
1817    #endif
1818    
1819            /* Ordinary \x */
1820    
1821          c = 0;          c = 0;
1822          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1823            {            {
# Line 804  while (!done) Line 1826  while (!done)
1826            }            }
1827          break;          break;
1828    
1829          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1830          p--;          p--;
1831          continue;          continue;
1832    
1833            case '>':
1834            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1835            continue;
1836    
1837          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1838          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1839          continue;          continue;
# Line 817  while (!done) Line 1843  while (!done)
1843          continue;          continue;
1844    
1845          case 'C':          case 'C':
1846          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1847          copystrings |= 1 << n;            {
1848              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1849              copystrings |= 1 << n;
1850              }
1851            else if (isalnum(*p))
1852              {
1853              uschar *npp = copynamesptr;
1854              while (isalnum(*p)) *npp++ = *p++;
1855              *npp++ = 0;
1856              *npp = 0;
1857              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1858              if (n < 0)
1859                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1860              copynamesptr = npp;
1861              }
1862            else if (*p == '+')
1863              {
1864              callout_extra = 1;
1865              p++;
1866              }
1867            else if (*p == '-')
1868              {
1869              pcre_callout = NULL;
1870              p++;
1871              }
1872            else if (*p == '!')
1873              {
1874              callout_fail_id = 0;
1875              p++;
1876              while(isdigit(*p))
1877                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1878              callout_fail_count = 0;
1879              if (*p == '!')
1880                {
1881                p++;
1882                while(isdigit(*p))
1883                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1884                }
1885              }
1886            else if (*p == '*')
1887              {
1888              int sign = 1;
1889              callout_data = 0;
1890              if (*(++p) == '-') { sign = -1; p++; }
1891              while(isdigit(*p))
1892                callout_data = callout_data * 10 + *p++ - '0';
1893              callout_data *= sign;
1894              callout_data_set = 1;
1895              }
1896            continue;
1897    
1898    #if !defined NODFA
1899            case 'D':
1900    #if !defined NOPOSIX
1901            if (posix || do_posix)
1902              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1903            else
1904    #endif
1905              use_dfa = 1;
1906            continue;
1907    
1908            case 'F':
1909            options |= PCRE_DFA_SHORTEST;
1910          continue;          continue;
1911    #endif
1912    
1913          case 'G':          case 'G':
1914          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1915          getstrings |= 1 << n;            {
1916              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1917              getstrings |= 1 << n;
1918              }
1919            else if (isalnum(*p))
1920              {
1921              uschar *npp = getnamesptr;
1922              while (isalnum(*p)) *npp++ = *p++;
1923              *npp++ = 0;
1924              *npp = 0;
1925              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1926              if (n < 0)
1927                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1928              getnamesptr = npp;
1929              }
1930          continue;          continue;
1931    
1932          case 'L':          case 'L':
1933          getlist = 1;          getlist = 1;
1934          continue;          continue;
1935    
1936            case 'M':
1937            find_match_limit = 1;
1938            continue;
1939    
1940          case 'N':          case 'N':
1941          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1942          continue;          continue;
1943    
1944          case 'O':          case 'O':
1945          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1946          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1947              {
1948              size_offsets_max = n;
1949              free(offsets);
1950              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1951              if (offsets == NULL)
1952                {
1953                printf("** Failed to get %d bytes of memory for offsets vector\n",
1954                  (int)(size_offsets_max * sizeof(int)));
1955                yield = 1;
1956                goto EXIT;
1957                }
1958              }
1959            use_size_offsets = n;
1960            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1961            continue;
1962    
1963            case 'P':
1964            options |= PCRE_PARTIAL;
1965            continue;
1966    
1967            case 'Q':
1968            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1969            if (extra == NULL)
1970              {
1971              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1972              extra->flags = 0;
1973              }
1974            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1975            extra->match_limit_recursion = n;
1976            continue;
1977    
1978            case 'q':
1979            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1980            if (extra == NULL)
1981              {
1982              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1983              extra->flags = 0;
1984              }
1985            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1986            extra->match_limit = n;
1987            continue;
1988    
1989    #if !defined NODFA
1990            case 'R':
1991            options |= PCRE_DFA_RESTART;
1992            continue;
1993    #endif
1994    
1995            case 'S':
1996            show_malloc = 1;
1997          continue;          continue;
1998    
1999          case 'Z':          case 'Z':
2000          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2001          continue;          continue;
2002    
2003            case '?':
2004            options |= PCRE_NO_UTF8_CHECK;
2005            continue;
2006    
2007            case '<':
2008              {
2009              int x = check_newline(p, outfile);
2010              if (x == 0) goto NEXT_DATA;
2011              options |= x;
2012              while (*p++ != '>');
2013              }
2014            continue;
2015          }          }
2016        *q++ = c;        *q++ = c;
2017        }        }
2018      *q = 0;      *q = 0;
2019      len = q - dbuffer;      len = q - dbuffer;
2020    
2021        if ((all_use_dfa || use_dfa) && find_match_limit)
2022          {
2023          printf("**Match limit not relevant for DFA matching: ignored\n");
2024          find_match_limit = 0;
2025          }
2026    
2027      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2028      support timing. */      support timing or playing with the match limit or callout data. */
2029    
2030  #if !defined NOPOSIX  #if !defined NOPOSIX
2031      if (posix || do_posix)      if (posix || do_posix)
2032        {        {
2033        int rc;        int rc;
2034        int eflags = 0;        int eflags = 0;
2035        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
2036          if (use_size_offsets > 0)
2037            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2038        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2039        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2040    
2041        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2042    
2043        if (rc != 0)        if (rc != 0)
2044          {          {
2045          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2046          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2047          }          }
2048          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2049                  != 0)
2050            {
2051            fprintf(outfile, "Matched with REG_NOSUB\n");
2052            }
2053        else        else
2054          {          {
2055          size_t i;          size_t i;
2056          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2057            {            {
2058            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2059              {              {
2060              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2061              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2062                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2063              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2064              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
2065                {                {
2066                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
2067                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2068                    outfile);
2069                fprintf(outfile, "\n");                fprintf(outfile, "\n");
2070                }                }
2071              }              }
2072            }            }
2073          }          }
2074          free(pmatch);
2075        }        }
2076    
2077      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 896  while (!done) Line 2081  while (!done)
2081    
2082      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2083        {        {
2084        if (timeit)        if (timeitm > 0)
2085          {          {
2086          register int i;          register int i;
2087          clock_t time_taken;          clock_t time_taken;
2088          clock_t start_time = clock();          clock_t start_time = clock();
2089          for (i = 0; i < LOOPREPEAT; i++)  
2090    #if !defined NODFA
2091            if (all_use_dfa || use_dfa)
2092              {
2093              int workspace[1000];
2094              for (i = 0; i < timeitm; i++)
2095                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2096                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2097                  sizeof(workspace)/sizeof(int));
2098              }
2099            else
2100    #endif
2101    
2102            for (i = 0; i < timeitm; i++)
2103            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2104              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2105    
2106          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2107          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2108            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2109            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2110            }
2111    
2112          /* If find_match_limit is set, we want to do repeated matches with
2113          varying limits in order to find the minimum value for the match limit and
2114          for the recursion limit. */
2115    
2116          if (find_match_limit)
2117            {
2118            if (extra == NULL)
2119              {
2120              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2121              extra->flags = 0;
2122              }
2123    
2124            (void)check_match_limit(re, extra, bptr, len, start_offset,
2125              options|g_notempty, use_offsets, use_size_offsets,
2126              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2127              PCRE_ERROR_MATCHLIMIT, "match()");
2128    
2129            count = check_match_limit(re, extra, bptr, len, start_offset,
2130              options|g_notempty, use_offsets, use_size_offsets,
2131              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2132              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2133          }          }
2134    
2135        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2136          start_offset, options | g_notempty, offsets, size_offsets);  
2137          else if (callout_data_set)
2138            {
2139            if (extra == NULL)
2140              {
2141              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2142              extra->flags = 0;
2143              }
2144            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2145            extra->callout_data = &callout_data;
2146            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2147              options | g_notempty, use_offsets, use_size_offsets);
2148            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2149            }
2150    
2151          /* The normal case is just to do the match once, with the default
2152          value of match_limit. */
2153    
2154    #if !defined NODFA
2155          else if (all_use_dfa || use_dfa)
2156            {
2157            int workspace[1000];
2158            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2159              options | g_notempty, use_offsets, use_size_offsets, workspace,
2160              sizeof(workspace)/sizeof(int));
2161            if (count == 0)
2162              {
2163              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2164              count = use_size_offsets/2;
2165              }
2166            }
2167    #endif
2168    
2169        if (count == 0)        else
2170          {          {
2171          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2172          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2173            if (count == 0)
2174              {
2175              fprintf(outfile, "Matched, but too many substrings\n");
2176              count = use_size_offsets/3;
2177              }
2178          }          }
2179    
2180        /* Matched */        /* Matched */
2181    
2182        if (count >= 0)        if (count >= 0)
2183          {          {
2184          int i;          int i, maxcount;
2185    
2186    #if !defined NODFA
2187            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2188    #endif
2189              maxcount = use_size_offsets/3;
2190    
2191            /* This is a check against a lunatic return value. */
2192    
2193            if (count > maxcount)
2194              {
2195              fprintf(outfile,
2196                "** PCRE error: returned count %d is too big for offset size %d\n",
2197                count, use_size_offsets);
2198              count = use_size_offsets/3;
2199              if (do_g || do_G)
2200                {
2201                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2202                do_g = do_G = FALSE;        /* Break g/G loop */
2203                }
2204              }
2205    
2206          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2207            {            {
2208            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2209              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2210            else            else
2211              {              {
2212              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2213              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2214                  use_offsets[i+1] - use_offsets[i], outfile);
2215              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2216              if (i == 0)              if (i == 0)
2217                {                {
2218                if (do_showrest)                if (do_showrest)
2219                  {                  {
2220                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2221                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2222                      outfile);
2223                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2224                  }                  }
2225                }                }
# Line 949  while (!done) Line 2230  while (!done)
2230            {            {
2231            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2232              {              {
2233              char copybuffer[16];              char copybuffer[256];
2234              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2235                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2236              if (rc < 0)              if (rc < 0)
2237                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 959  while (!done) Line 2240  while (!done)
2240              }              }
2241            }            }
2242    
2243            for (copynamesptr = copynames;
2244                 *copynamesptr != 0;
2245                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2246              {
2247              char copybuffer[256];
2248              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2249                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2250              if (rc < 0)
2251                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2252              else
2253                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2254              }
2255    
2256          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2257            {            {
2258            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2259              {              {
2260              const char *substring;              const char *substring;
2261              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2262                i, &substring);                i, &substring);
2263              if (rc < 0)              if (rc < 0)
2264                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2265              else              else
2266                {                {
2267                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2268                free((void *)substring);                pcre_free_substring(substring);
2269                }                }
2270              }              }
2271            }            }
2272    
2273            for (getnamesptr = getnames;
2274                 *getnamesptr != 0;
2275                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2276              {
2277              const char *substring;
2278              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2279                count, (char *)getnamesptr, &substring);
2280              if (rc < 0)
2281                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2282              else
2283                {
2284                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2285                pcre_free_substring(substring);
2286                }
2287              }
2288    
2289          if (getlist)          if (getlist)
2290            {            {
2291            const char **stringlist;            const char **stringlist;
2292            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2293              &stringlist);              &stringlist);
2294            if (rc < 0)            if (rc < 0)
2295              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 2299  while (!done)
2299                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2300              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2301                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2302              free((void *)stringlist);              /* free((void *)stringlist); */
2303                pcre_free_substring_list(stringlist);
2304              }              }
2305            }            }
2306          }          }
2307    
2308          /* There was a partial match */
2309    
2310          else if (count == PCRE_ERROR_PARTIAL)
2311            {
2312            fprintf(outfile, "Partial match");
2313    #if !defined NODFA
2314            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2315              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2316                bptr + use_offsets[0]);
2317    #endif
2318            fprintf(outfile, "\n");
2319            break;  /* Out of the /g loop */
2320            }
2321    
2322        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2323        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2324        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2325        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2326        was checked before setting PCRE_NOTEMPTY. */  
2327          Complication arises in the case when the newline option is "any" or
2328          "anycrlf". If the previous match was at the end of a line terminated by
2329          CRLF, an advance of one character just passes the \r, whereas we should
2330          prefer the longer newline sequence, as does the code in pcre_exec().
2331          Fudge the offset value to achieve this.
2332    
2333          Otherwise, in the case of UTF-8 matching, the advance must be one
2334          character, not one byte. */
2335    
2336        else        else
2337          {          {
2338          if (g_notempty != 0)          if (g_notempty != 0)
2339            {            {
2340            offsets[0] = start_offset;            int onechar = 1;
2341            offsets[1] = start_offset + 1;            unsigned int obits = ((real_pcre *)re)->options;
2342              use_offsets[0] = start_offset;
2343              if ((obits & PCRE_NEWLINE_BITS) == 0)
2344                {
2345                int d;
2346                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2347                obits = (d == '\r')? PCRE_NEWLINE_CR :
2348                        (d == '\n')? PCRE_NEWLINE_LF :
2349                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2350                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2351                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2352                }
2353              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2354                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2355                  &&
2356                  start_offset < len - 1 &&
2357                  bptr[start_offset] == '\r' &&
2358                  bptr[start_offset+1] == '\n')
2359                onechar++;
2360              else if (use_utf8)
2361                {
2362                while (start_offset + onechar < len)
2363                  {
2364                  int tb = bptr[start_offset+onechar];
2365                  if (tb <= 127) break;
2366                  tb &= 0xc0;
2367                  if (tb != 0 && tb != 0xc0) onechar++;
2368                  }
2369                }
2370              use_offsets[1] = start_offset + onechar;
2371            }            }
2372          else          else
2373            {            {
2374            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2375              {              {
2376              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2377              }              }
2378              else fprintf(outfile, "Error %d\n", count);
2379            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2380            }            }
2381          }          }
# Line 1025  while (!done) Line 2387  while (!done)
2387        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2388        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
2389        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
2390        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2391        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
2392          character. */
2393    
2394        g_notempty = 0;        g_notempty = 0;
2395        if (offsets[0] == offsets[1])  
2396          if (use_offsets[0] == use_offsets[1])
2397          {          {
2398          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
2399          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2400          }          }
2401    
2402        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2403    
2404        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
2405    
2406        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2407    
2408        else        else
2409          {          {
2410          bptr += offsets[1];          bptr += use_offsets[1];
2411          len -= offsets[1];          len -= use_offsets[1];
2412          }          }
2413        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2414    
2415        NEXT_DATA: continue;
2416      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2417    
2418    CONTINUE:    CONTINUE:
# Line 1055  while (!done) Line 2421  while (!done)
2421    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2422  #endif  #endif
2423    
2424    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2425    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2426    if (tables != NULL)    if (tables != NULL)
2427      {      {
2428      free((void *)tables);      new_free((void *)tables);
2429      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2430        locale_set = 0;
2431      }      }
2432    }    }
2433    
2434  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2435  return 0;  
2436    EXIT:
2437    
2438    if (infile != NULL && infile != stdin) fclose(infile);
2439    if (outfile != NULL && outfile != stdout) fclose(outfile);
2440    
2441    free(buffer);
2442    free(dbuffer);
2443    free(pbuffer);
2444    free(offsets);
2445    
2446    return yield;
2447  }  }
2448    
2449  /* End */  /* End of pcretest.c */

Legend:
Removed from v.43  
changed lines
  Added in v.351

  ViewVC Help
Powered by ViewVC 1.1.5