/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #include <unistd.h>
53    #include <readline/readline.h>
54    #include <readline/history.h>
55    #endif
56    
57    
58    /* A number of things vary for Windows builds. Originally, pcretest opened its
59    input and output without "b"; then I was told that "b" was needed in some
60    environments, so it was added for release 5.0 to both the input and output. (It
61    makes no difference on Unix-like systems.) Later I was told that it is wrong
62    for the input on Windows. I've now abstracted the modes into two macros that
63    are set here, to make it easier to fiddle with them, and removed "b" from the
64    input mode under Windows. */
65    
66    #if defined(_WIN32) || defined(WIN32)
67    #include <io.h>                /* For _setmode() */
68    #include <fcntl.h>             /* For _O_BINARY */
69    #define INPUT_MODE   "r"
70    #define OUTPUT_MODE  "wb"
71    
72    #else
73    #include <sys/time.h>          /* These two includes are needed */
74    #include <sys/resource.h>      /* for setrlimit(). */
75    #define INPUT_MODE   "rb"
76    #define OUTPUT_MODE  "wb"
77    #endif
78    
 /* Use the internal info for displaying the results of pcre_study(). */  
79    
80  #include "internal.h"  /* We have to include pcre_internal.h because we need the internal info for
81    displaying the results of pcre_study() and we also need to know about the
82    internal macros, structures, and other internal data values; pcretest has
83    "inside information" compared to a program that strictly follows the PCRE API.
84    
85    Although pcre_internal.h does itself include pcre.h, we explicitly include it
86    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87    appropriately for an application, not for building PCRE. */
88    
89    #include "pcre.h"
90    #include "pcre_internal.h"
91    
92    /* We need access to the data tables that PCRE uses. So as not to have to keep
93    two copies, we include the source file here, changing the names of the external
94    symbols to prevent clashes. */
95    
96    #define _pcre_utf8_table1      utf8_table1
97    #define _pcre_utf8_table1_size utf8_table1_size
98    #define _pcre_utf8_table2      utf8_table2
99    #define _pcre_utf8_table3      utf8_table3
100    #define _pcre_utf8_table4      utf8_table4
101    #define _pcre_utt              utt
102    #define _pcre_utt_size         utt_size
103    #define _pcre_utt_names        utt_names
104    #define _pcre_OP_lengths       OP_lengths
105    
106    #include "pcre_tables.c"
107    
108    /* We also need the pcre_printint() function for printing out compiled
109    patterns. This function is in a separate file so that it can be included in
110    pcre_compile.c when that module is compiled with debugging enabled.
111    
112    The definition of the macro PRINTABLE, which determines whether to print an
113    output character as-is or as a hex value when showing compiled patterns, is
114    contained in this file. We uses it here also, in cases when the locale has not
115    been explicitly changed, so as to get consistent output from systems that
116    differ in their output from isprint() even in the "C" locale. */
117    
118    #include "pcre_printint.src"
119    
120    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
121    
122    
123  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
124  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 128  Makefile. */
128  #include "pcreposix.h"  #include "pcreposix.h"
129  #endif  #endif
130    
131    /* It is also possible, for the benefit of the version currently imported into
132    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133    interface to the DFA matcher (NODFA), and without the doublecheck of the old
134    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135    UTF8 support if PCRE is built without it. */
136    
137    #ifndef SUPPORT_UTF8
138    #ifndef NOUTF8
139    #define NOUTF8
140    #endif
141    #endif
142    
143    
144    /* Other parameters */
145    
146  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
147  #ifdef CLK_TCK  #ifdef CLK_TCK
148  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 151  Makefile. */
151  #endif  #endif
152  #endif  #endif
153    
154  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
155    
156    #define LOOPREPEAT 500000
157    
158    /* Static variables */
159    
160  static FILE *outfile;  static FILE *outfile;
161  static int log_store = 0;  static int log_store = 0;
162    static int callout_count;
163    static int callout_extra;
164    static int callout_fail_count;
165    static int callout_fail_id;
166    static int debug_lengths;
167    static int first_callout;
168    static int locale_set = 0;
169    static int show_malloc;
170    static int use_utf8;
171  static size_t gotten_store;  static size_t gotten_store;
172    
173    /* The buffers grow automatically if very long input lines are encountered. */
174    
175    static int buffer_size = 50000;
176    static uschar *buffer = NULL;
177    static uschar *dbuffer = NULL;
178    static uschar *pbuffer = NULL;
179    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
180    
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
181    
182    /*************************************************
183    *        Read or extend an input line            *
184    *************************************************/
185    
186  static void print_internals(pcre *re)  /* Input lines are read into buffer, but both patterns and data lines can be
187  {  continued over multiple input lines. In addition, if the buffer fills up, we
188  unsigned char *code = ((real_pcre *)re)->code;  want to automatically expand it so as to be able to handle extremely large
189    lines that are needed for certain stress tests. When the input buffer is
190    expanded, the other two buffers must also be expanded likewise, and the
191    contents of pbuffer, which are a copy of the input for callouts, must be
192    preserved (for when expansion happens for a data line). This is not the most
193    optimal way of handling this, but hey, this is just a test program!
194    
195    Arguments:
196      f            the file to read
197      start        where in buffer to start (this *must* be within buffer)
198      prompt       for stdin or readline()
199    
200    Returns:       pointer to the start of new data
201                   could be a copy of start, or could be moved
202                   NULL if no data read and EOF reached
203    */
204    
205  fprintf(outfile, "------------------------------------------------------------------\n");  static uschar *
206    extend_inputline(FILE *f, uschar *start, const char *prompt)
207    {
208    uschar *here = start;
209    
210  for(;;)  for (;;)
211    {    {
212    int c;    int rlen = buffer_size - (here - buffer);
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
213    
214    if (*code >= OP_BRA)    if (rlen > 1000)
215      {      {
216      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      int dlen;
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
217    
218        CLASS_REF_REPEAT:      /* If libreadline support is required, use readline() to read a line if the
219        input is a terminal. Note that readline() removes the trailing newline, so
220        we must put it back again, to be compatible with fgets(). */
221    
222        switch(*code)  #ifdef SUPPORT_LIBREADLINE
223          {      if (isatty(fileno(f)))
224          case OP_CRSTAR:        {
225          case OP_CRMINSTAR:        size_t len;
226          case OP_CRPLUS:        char *s = readline(prompt);
227          case OP_CRMINPLUS:        if (s == NULL) return (here == start)? NULL : start;
228          case OP_CRQUERY:        len = strlen(s);
229          case OP_CRMINQUERY:        if (len > 0) add_history(s);
230          fprintf(outfile, "%s", OP_names[*code]);        if (len > rlen - 1) len = rlen - 1;
231          break;        memcpy(here, s, len);
232          here[len] = '\n';
233          here[len+1] = 0;
234          free(s);
235          }
236        else
237    #endif
238    
239          case OP_CRRANGE:      /* Read the next line by normal means, prompting if the file is stdin. */
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
240    
241          default:        {
242          code--;        if (f == stdin) printf(prompt);
243          }        if (fgets((char *)here, rlen,  f) == NULL)
244            return (here == start)? NULL : start;
245        }        }
     break;  
246    
247      /* Anything else is just a one-node item */      dlen = (int)strlen((char *)here);
248        if (dlen > 0 && here[dlen - 1] == '\n') return start;
249        here += dlen;
250        }
251    
252      default:    else
253      fprintf(outfile, "    %s", OP_names[*code]);      {
254      break;      int new_buffer_size = 2*buffer_size;
255        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
258    
259        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
260          {
261          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
262          exit(1);
263          }
264    
265        memcpy(new_buffer, buffer, buffer_size);
266        memcpy(new_pbuffer, pbuffer, buffer_size);
267    
268        buffer_size = new_buffer_size;
269    
270        start = new_buffer + (start - buffer);
271        here = new_buffer + (here - buffer);
272    
273        free(buffer);
274        free(dbuffer);
275        free(pbuffer);
276    
277        buffer = new_buffer;
278        dbuffer = new_dbuffer;
279        pbuffer = new_pbuffer;
280      }      }
281      }
282    
283    return NULL;  /* Control never gets here */
284    }
285    
286    
287    
288    
289    
290    
291    
292    /*************************************************
293    *          Read number from string               *
294    *************************************************/
295    
296    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297    around with conditional compilation, just do the job by hand. It is only used
298    for unpicking arguments, so just keep it simple.
299    
300    Arguments:
301      str           string to be converted
302      endptr        where to put the end pointer
303    
304    Returns:        the unsigned long
305    */
306    
307    static int
308    get_value(unsigned char *str, unsigned char **endptr)
309    {
310    int result = 0;
311    while(*str != 0 && isspace(*str)) str++;
312    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
313    *endptr = str;
314    return(result);
315    }
316    
317    
318    
319    
320    /*************************************************
321    *            Convert UTF-8 string to value       *
322    *************************************************/
323    
324    code++;  /* This function takes one or more bytes that represents a UTF-8 character,
325    fprintf(outfile, "\n");  and returns the value of the character.
326    
327    Argument:
328      utf8bytes   a pointer to the byte vector
329      vptr        a pointer to an int to receive the value
330    
331    Returns:      >  0 => the number of bytes consumed
332                  -6 to 0 => malformed UTF-8 character at offset = (-return)
333    */
334    
335    #if !defined NOUTF8
336    
337    static int
338    utf82ord(unsigned char *utf8bytes, int *vptr)
339    {
340    int c = *utf8bytes++;
341    int d = c;
342    int i, j, s;
343    
344    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
345      {
346      if ((d & 0x80) == 0) break;
347      d <<= 1;
348      }
349    
350    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
351    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
352    
353    /* i now has a value in the range 1-5 */
354    
355    s = 6*i;
356    d = (c & utf8_table3[i]) << s;
357    
358    for (j = 0; j < i; j++)
359      {
360      c = *utf8bytes++;
361      if ((c & 0xc0) != 0x80) return -(j+1);
362      s -= 6;
363      d |= (c & 0x3f) << s;
364    }    }
365    
366    /* Check that encoding was the correct unique one */
367    
368    for (j = 0; j < utf8_table1_size; j++)
369      if (d <= utf8_table1[j]) break;
370    if (j != i) return -(i+1);
371    
372    /* Valid value */
373    
374    *vptr = d;
375    return i+1;
376    }
377    
378    #endif
379    
380    
381    
382    /*************************************************
383    *       Convert character value to UTF-8         *
384    *************************************************/
385    
386    /* This function takes an integer value in the range 0 - 0x7fffffff
387    and encodes it as a UTF-8 character in 0 to 6 bytes.
388    
389    Arguments:
390      cvalue     the character value
391      utf8bytes  pointer to buffer for result - at least 6 bytes long
392    
393    Returns:     number of characters placed in the buffer
394    */
395    
396    #if !defined NOUTF8
397    
398    static int
399    ord2utf8(int cvalue, uschar *utf8bytes)
400    {
401    register int i, j;
402    for (i = 0; i < utf8_table1_size; i++)
403      if (cvalue <= utf8_table1[i]) break;
404    utf8bytes += i;
405    for (j = i; j > 0; j--)
406     {
407     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
408     cvalue >>= 6;
409     }
410    *utf8bytes = utf8_table2[i] | cvalue;
411    return i + 1;
412  }  }
413    
414    #endif
415    
416    
 /* Character string printing function. */  
417    
418  static void pchars(unsigned char *p, int length)  /*************************************************
419    *             Print character string             *
420    *************************************************/
421    
422    /* Character string printing function. Must handle UTF-8 strings in utf8
423    mode. Yields number of characters printed. If handed a NULL file, just counts
424    chars without printing. */
425    
426    static int pchars(unsigned char *p, int length, FILE *f)
427  {  {
428  int c;  int c = 0;
429    int yield = 0;
430    
431  while (length-- > 0)  while (length-- > 0)
432    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
433      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
434      if (use_utf8)
435        {
436        int rc = utf82ord(p, &c);
437    
438        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
439          {
440          length -= rc - 1;
441          p += rc;
442          if (PRINTHEX(c))
443            {
444            if (f != NULL) fprintf(f, "%c", c);
445            yield++;
446            }
447          else
448            {
449            int n = 4;
450            if (f != NULL) fprintf(f, "\\x{%02x}", c);
451            yield += (n <= 0x000000ff)? 2 :
452                     (n <= 0x00000fff)? 3 :
453                     (n <= 0x0000ffff)? 4 :
454                     (n <= 0x000fffff)? 5 : 6;
455            }
456          continue;
457          }
458        }
459    #endif
460    
461       /* Not UTF-8, or malformed UTF-8  */
462    
463      c = *p++;
464      if (PRINTHEX(c))
465        {
466        if (f != NULL) fprintf(f, "%c", c);
467        yield++;
468        }
469      else
470        {
471        if (f != NULL) fprintf(f, "\\x%02x", c);
472        yield += 4;
473        }
474      }
475    
476    return yield;
477  }  }
478    
479    
480    
481    /*************************************************
482    *              Callout function                  *
483    *************************************************/
484    
485    /* Called from PCRE as a result of the (?C) item. We print out where we are in
486    the match. Yield zero unless more callouts than the fail count, or the callout
487    data is not zero. */
488    
489    static int callout(pcre_callout_block *cb)
490    {
491    FILE *f = (first_callout | callout_extra)? outfile : NULL;
492    int i, pre_start, post_start, subject_length;
493    
494    if (callout_extra)
495      {
496      fprintf(f, "Callout %d: last capture = %d\n",
497        cb->callout_number, cb->capture_last);
498    
499      for (i = 0; i < cb->capture_top * 2; i += 2)
500        {
501        if (cb->offset_vector[i] < 0)
502          fprintf(f, "%2d: <unset>\n", i/2);
503        else
504          {
505          fprintf(f, "%2d: ", i/2);
506          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
507            cb->offset_vector[i+1] - cb->offset_vector[i], f);
508          fprintf(f, "\n");
509          }
510        }
511      }
512    
513    /* Re-print the subject in canonical form, the first time or if giving full
514    datails. On subsequent calls in the same match, we use pchars just to find the
515    printed lengths of the substrings. */
516    
517    if (f != NULL) fprintf(f, "--->");
518    
519    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
520    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
521      cb->current_position - cb->start_match, f);
522    
523    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
524    
525    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
526      cb->subject_length - cb->current_position, f);
527    
528    if (f != NULL) fprintf(f, "\n");
529    
530    /* Always print appropriate indicators, with callout number if not already
531    shown. For automatic callouts, show the pattern offset. */
532    
533    if (cb->callout_number == 255)
534      {
535      fprintf(outfile, "%+3d ", cb->pattern_position);
536      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
537      }
538    else
539      {
540      if (callout_extra) fprintf(outfile, "    ");
541        else fprintf(outfile, "%3d ", cb->callout_number);
542      }
543    
544    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
545    fprintf(outfile, "^");
546    
547    if (post_start > 0)
548      {
549      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
550      fprintf(outfile, "^");
551      }
552    
553    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
554      fprintf(outfile, " ");
555    
556    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
557      pbuffer + cb->pattern_position);
558    
559    fprintf(outfile, "\n");
560    first_callout = 0;
561    
562    if (cb->callout_data != NULL)
563      {
564      int callout_data = *((int *)(cb->callout_data));
565      if (callout_data != 0)
566        {
567        fprintf(outfile, "Callout data = %d\n", callout_data);
568        return callout_data;
569        }
570      }
571    
572    return (cb->callout_number != callout_fail_id)? 0 :
573           (++callout_count >= callout_fail_count)? 1 : 0;
574    }
575    
576    
577    /*************************************************
578    *            Local malloc functions              *
579    *************************************************/
580    
581  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
582  compiled re. */  compiled re. */
583    
584  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
585  {  {
586    void *block = malloc(size);
587  gotten_store = size;  gotten_store = size;
588  if (log_store)  if (show_malloc)
589    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
590      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
591  return malloc(size);  }
592    
593    static void new_free(void *block)
594    {
595    if (show_malloc)
596      fprintf(outfile, "free             %p\n", block);
597    free(block);
598  }  }
599    
600    
601    /* For recursion malloc/free, to test stacking calls */
602    
603    static void *stack_malloc(size_t size)
604    {
605    void *block = malloc(size);
606    if (show_malloc)
607      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
608    return block;
609    }
610    
611    static void stack_free(void *block)
612    {
613    if (show_malloc)
614      fprintf(outfile, "stack_free       %p\n", block);
615    free(block);
616    }
617    
618    
619    /*************************************************
620    *          Call pcre_fullinfo()                  *
621    *************************************************/
622    
623  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
624    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 631  if ((rc = pcre_fullinfo(re, study, optio
631    
632    
633    
634    /*************************************************
635    *         Byte flipping function                 *
636    *************************************************/
637    
638    static unsigned long int
639    byteflip(unsigned long int value, int n)
640    {
641    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642    return ((value & 0x000000ff) << 24) |
643           ((value & 0x0000ff00) <<  8) |
644           ((value & 0x00ff0000) >>  8) |
645           ((value & 0xff000000) >> 24);
646    }
647    
648    
649    
650    
651    /*************************************************
652    *        Check match or recursion limit          *
653    *************************************************/
654    
655    static int
656    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657      int start_offset, int options, int *use_offsets, int use_size_offsets,
658      int flag, unsigned long int *limit, int errnumber, const char *msg)
659    {
660    int count;
661    int min = 0;
662    int mid = 64;
663    int max = -1;
664    
665    extra->flags |= flag;
666    
667    for (;;)
668      {
669      *limit = mid;
670    
671      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672        use_offsets, use_size_offsets);
673    
674      if (count == errnumber)
675        {
676        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
677        min = mid;
678        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
679        }
680    
681      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682                             count == PCRE_ERROR_PARTIAL)
683        {
684        if (mid == min + 1)
685          {
686          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
687          break;
688          }
689        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
690        max = mid;
691        mid = (min + mid)/2;
692        }
693      else break;    /* Some other error */
694      }
695    
696    extra->flags &= ~flag;
697    return count;
698    }
699    
700    
701    
702    /*************************************************
703    *         Case-independent strncmp() function    *
704    *************************************************/
705    
706    /*
707    Arguments:
708      s         first string
709      t         second string
710      n         number of characters to compare
711    
712    Returns:    < 0, = 0, or > 0, according to the comparison
713    */
714    
715    static int
716    strncmpic(uschar *s, uschar *t, int n)
717    {
718    while (n--)
719      {
720      int c = tolower(*s++) - tolower(*t++);
721      if (c) return c;
722      }
723    return 0;
724    }
725    
726    
727    
728    /*************************************************
729    *         Check newline indicator                *
730    *************************************************/
731    
732    /* This is used both at compile and run-time to check for <xxx> escapes, where
733    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734    no match.
735    
736    Arguments:
737      p           points after the leading '<'
738      f           file for error message
739    
740    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
741    */
742    
743    static int
744    check_newline(uschar *p, FILE *f)
745    {
746    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753    fprintf(f, "Unknown newline type at: <%s\n", p);
754    return 0;
755    }
756    
757    
758    
759    /*************************************************
760    *             Usage function                     *
761    *************************************************/
762    
763    static void
764    usage(void)
765    {
766    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
767    printf("Input and output default to stdin and stdout.\n");
768    #ifdef SUPPORT_LIBREADLINE
769    printf("If input is a terminal, readline() is used to read from it.\n");
770    #else
771    printf("This version of pcretest is not linked with readline().\n");
772    #endif
773    printf("\nOptions:\n");
774    printf("  -b       show compiled code (bytecode)\n");
775    printf("  -C       show PCRE compile-time options and exit\n");
776    printf("  -d       debug: show compiled code and information (-b and -i)\n");
777    #if !defined NODFA
778    printf("  -dfa     force DFA matching for all subjects\n");
779    #endif
780    printf("  -help    show usage information\n");
781    printf("  -i       show information about compiled patterns\n"
782           "  -m       output memory used information\n"
783           "  -o <n>   set size of offsets vector to <n>\n");
784    #if !defined NOPOSIX
785    printf("  -p       use POSIX interface\n");
786    #endif
787    printf("  -q       quiet: do not output PCRE version number at start\n");
788    printf("  -S <n>   set stack size to <n> megabytes\n");
789    printf("  -s       output store (memory) used information\n"
790           "  -t       time compilation and execution\n");
791    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
792    printf("  -tm      time execution (matching) only\n");
793    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
794    }
795    
796    
797    
798    /*************************************************
799    *                Main Program                    *
800    *************************************************/
801    
802  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
803  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 315  int options = 0; Line 810  int options = 0;
810  int study_options = 0;  int study_options = 0;
811  int op = 1;  int op = 1;
812  int timeit = 0;  int timeit = 0;
813    int timeitm = 0;
814  int showinfo = 0;  int showinfo = 0;
815  int showstore = 0;  int showstore = 0;
816    int quiet = 0;
817    int size_offsets = 45;
818    int size_offsets_max;
819    int *offsets = NULL;
820    #if !defined NOPOSIX
821  int posix = 0;  int posix = 0;
822    #endif
823  int debug = 0;  int debug = 0;
824  int done = 0;  int done = 0;
825  unsigned char buffer[30000];  int all_use_dfa = 0;
826  unsigned char dbuffer[1024];  int yield = 0;
827    int stack_size;
828    
829    /* These vectors store, end-to-end, a list of captured substring names. Assume
830    that 1024 is plenty long enough for the few names we'll be testing. */
831    
832    uschar copynames[1024];
833    uschar getnames[1024];
834    
835  /* Static so that new_malloc can use it. */  uschar *copynamesptr;
836    uschar *getnamesptr;
837    
838    /* Get buffers from malloc() so that Electric Fence will check their misuse
839    when I am debugging. They grow automatically when very long lines are read. */
840    
841    buffer = (unsigned char *)malloc(buffer_size);
842    dbuffer = (unsigned char *)malloc(buffer_size);
843    pbuffer = (unsigned char *)malloc(buffer_size);
844    
845    /* The outfile variable is static so that new_malloc can use it. */
846    
847  outfile = stdout;  outfile = stdout;
848    
849    /* The following  _setmode() stuff is some Windows magic that tells its runtime
850    library to translate CRLF into a single LF character. At least, that's what
851    I've been told: never having used Windows I take this all on trust. Originally
852    it set 0x8000, but then I was advised that _O_BINARY was better. */
853    
854    #if defined(_WIN32) || defined(WIN32)
855    _setmode( _fileno( stdout ), _O_BINARY );
856    #endif
857    
858  /* Scan options */  /* Scan options */
859    
860  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
861    {    {
862      unsigned char *endptr;
863    
864    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
865      showstore = 1;      showstore = 1;
866    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867      else if (strcmp(argv[op], "-b") == 0) debug = 1;
868    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
870    #if !defined NODFA
871      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
872    #endif
873      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
874          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
875            *endptr == 0))
876        {
877        op++;
878        argc--;
879        }
880      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
881        {
882        int both = argv[op][2] == 0;
883        int temp;
884        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
885                         *endptr == 0))
886          {
887          timeitm = temp;
888          op++;
889          argc--;
890          }
891        else timeitm = LOOPREPEAT;
892        if (both) timeit = timeitm;
893        }
894      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
896            *endptr == 0))
897        {
898    #if defined(_WIN32) || defined(WIN32)
899        printf("PCRE: -S not supported on this OS\n");
900        exit(1);
901    #else
902        int rc;
903        struct rlimit rlim;
904        getrlimit(RLIMIT_STACK, &rlim);
905        rlim.rlim_cur = stack_size * 1024 * 1024;
906        rc = setrlimit(RLIMIT_STACK, &rlim);
907        if (rc != 0)
908          {
909        printf("PCRE: setrlimit() failed with error %d\n", rc);
910        exit(1);
911          }
912        op++;
913        argc--;
914    #endif
915        }
916    #if !defined NOPOSIX
917    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
918    #endif
919      else if (strcmp(argv[op], "-C") == 0)
920        {
921        int rc;
922        printf("PCRE version %s\n", pcre_version());
923        printf("Compiled with\n");
924        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
925        printf("  %sUTF-8 support\n", rc? "" : "No ");
926        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927        printf("  %sUnicode properties support\n", rc? "" : "No ");
928        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
930          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931          (rc == -2)? "ANYCRLF" :
932          (rc == -1)? "ANY" : "???");
933        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935                                         "all Unicode newlines");
936        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937        printf("  Internal link size = %d\n", rc);
938        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939        printf("  POSIX malloc threshold = %d\n", rc);
940        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941        printf("  Default match limit = %d\n", rc);
942        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943        printf("  Default recursion depth limit = %d\n", rc);
944        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
946        goto EXIT;
947        }
948      else if (strcmp(argv[op], "-help") == 0 ||
949               strcmp(argv[op], "--help") == 0)
950        {
951        usage();
952        goto EXIT;
953        }
954    else    else
955      {      {
956      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
957      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
958      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
959             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
960      }      }
961    op++;    op++;
962    argc--;    argc--;
963    }    }
964    
965    /* Get the store for the offsets vector, and remember what it was */
966    
967    size_offsets_max = size_offsets;
968    offsets = (int *)malloc(size_offsets_max * sizeof(int));
969    if (offsets == NULL)
970      {
971      printf("** Failed to get %d bytes of memory for offsets vector\n",
972        (int)(size_offsets_max * sizeof(int)));
973      yield = 1;
974      goto EXIT;
975      }
976    
977  /* Sort out the input and output files */  /* Sort out the input and output files */
978    
979  if (argc > 1)  if (argc > 1)
980    {    {
981    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
982    if (infile == NULL)    if (infile == NULL)
983      {      {
984      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
985      return 1;      yield = 1;
986        goto EXIT;
987      }      }
988    }    }
989    
990  if (argc > 2)  if (argc > 2)
991    {    {
992    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
993    if (outfile == NULL)    if (outfile == NULL)
994      {      {
995      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
996      return 1;      yield = 1;
997        goto EXIT;
998      }      }
999    }    }
1000    
1001  /* Set alternative malloc function */  /* Set alternative malloc function */
1002    
1003  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1004    pcre_free = new_free;
1005    pcre_stack_malloc = stack_malloc;
1006    pcre_stack_free = stack_free;
1007    
1008  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1009    
1010  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1011    
1012  /* Main loop */  /* Main loop */
1013    
# Line 391  while (!done) Line 1018  while (!done)
1018    
1019  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
1020    regex_t preg;    regex_t preg;
1021      int do_posix = 0;
1022  #endif  #endif
1023    
1024    const char *error;    const char *error;
1025    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1026    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
1027      const unsigned char *tables = NULL;
1028      unsigned long int true_size, true_study_size = 0;
1029      size_t size, regex_gotten_store;
1030    int do_study = 0;    int do_study = 0;
1031    int do_debug = debug;    int do_debug = debug;
1032    int do_G = 0;    int do_G = 0;
1033    int do_g = 0;    int do_g = 0;
1034    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1035    int do_showrest = 0;    int do_showrest = 0;
1036    int do_posix = 0;    int do_flip = 0;
1037    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1038    
1039      use_utf8 = 0;
1040      debug_lengths = 1;
1041    
1042    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1043    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044      fflush(outfile);
1045    
1046    p = buffer;    p = buffer;
1047    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1048    if (*p == 0) continue;    if (*p == 0) continue;
1049    
1050    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1051    complete, read more. */  
1052      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1053        {
1054        unsigned long int magic, get_options;
1055        uschar sbuf[8];
1056        FILE *f;
1057    
1058        p++;
1059        pp = p + (int)strlen((char *)p);
1060        while (isspace(pp[-1])) pp--;
1061        *pp = 0;
1062    
1063        f = fopen((char *)p, "rb");
1064        if (f == NULL)
1065          {
1066          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1067          continue;
1068          }
1069    
1070        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1071    
1072        true_size =
1073          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1074        true_study_size =
1075          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1076    
1077        re = (real_pcre *)new_malloc(true_size);
1078        regex_gotten_store = gotten_store;
1079    
1080        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1081    
1082        magic = ((real_pcre *)re)->magic_number;
1083        if (magic != MAGIC_NUMBER)
1084          {
1085          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1086            {
1087            do_flip = 1;
1088            }
1089          else
1090            {
1091            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1092            fclose(f);
1093            continue;
1094            }
1095          }
1096    
1097        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1098          do_flip? " (byte-inverted)" : "", p);
1099    
1100        /* Need to know if UTF-8 for printing data strings */
1101    
1102        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103        use_utf8 = (get_options & PCRE_UTF8) != 0;
1104    
1105        /* Now see if there is any following study data */
1106    
1107        if (true_study_size != 0)
1108          {
1109          pcre_study_data *psd;
1110    
1111          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1112          extra->flags = PCRE_EXTRA_STUDY_DATA;
1113    
1114          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1115          extra->study_data = psd;
1116    
1117          if (fread(psd, 1, true_study_size, f) != true_study_size)
1118            {
1119            FAIL_READ:
1120            fprintf(outfile, "Failed to read data from %s\n", p);
1121            if (extra != NULL) new_free(extra);
1122            if (re != NULL) new_free(re);
1123            fclose(f);
1124            continue;
1125            }
1126          fprintf(outfile, "Study data loaded from %s\n", p);
1127          do_study = 1;     /* To get the data output if requested */
1128          }
1129        else fprintf(outfile, "No study data\n");
1130    
1131        fclose(f);
1132        goto SHOW_INFO;
1133        }
1134    
1135      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1136      the pattern; if is isn't complete, read more. */
1137    
1138    delimiter = *p++;    delimiter = *p++;
1139    
1140    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1141      {      {
1142      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143      goto SKIP_DATA;      goto SKIP_DATA;
1144      }      }
1145    
1146    pp = p;    pp = p;
1147      poffset = p - buffer;
1148    
1149    for(;;)    for(;;)
1150      {      {
# Line 435  while (!done) Line 1155  while (!done)
1155        pp++;        pp++;
1156        }        }
1157      if (*pp != 0) break;      if (*pp != 0) break;
1158        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1159        {        {
1160        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1161        done = 1;        done = 1;
# Line 453  while (!done) Line 1164  while (!done)
1164      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1165      }      }
1166    
1167      /* The buffer may have moved while being extended; reset the start of data
1168      pointer to the correct relative point in the buffer. */
1169    
1170      p = buffer + poffset;
1171    
1172    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1173    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1174    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1175    
1176    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1177    
1178    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1179      for callouts. */
1180    
1181    *pp++ = 0;    *pp++ = 0;
1182      strcpy((char *)pbuffer, (char *)p);
1183    
1184    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1185    
# Line 473  while (!done) Line 1191  while (!done)
1191      {      {
1192      switch (*pp++)      switch (*pp++)
1193        {        {
1194          case 'f': options |= PCRE_FIRSTLINE; break;
1195        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1196        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1197        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 481  while (!done) Line 1200  while (!done)
1200    
1201        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1202        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1203          case 'B': do_debug = 1; break;
1204          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1206        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207          case 'F': do_flip = 1; break;
1208        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1209        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1210          case 'J': options |= PCRE_DUPNAMES; break;
1211        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1212          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1213    
1214  #if !defined NOPOSIX  #if !defined NOPOSIX
1215        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 494  while (!done) Line 1218  while (!done)
1218        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1219        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1220        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1221          case 'Z': debug_lengths = 0; break;
1222          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224    
1225        case 'L':        case 'L':
1226        ppp = pp;        ppp = pp;
1227        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1228          /* The '0' test is just in case this is an unterminated line. */
1229          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1230        *ppp = 0;        *ppp = 0;
1231        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1232          {          {
1233          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1234          goto SKIP_DATA;          goto SKIP_DATA;
1235          }          }
1236          locale_set = 1;
1237        tables = pcre_maketables();        tables = pcre_maketables();
1238        pp = ppp;        pp = ppp;
1239        break;        break;
1240    
1241        case '\n': case ' ': break;        case '>':
1242          to_file = pp;
1243          while (*pp != 0) pp++;
1244          while (isspace(pp[-1])) pp--;
1245          *pp = 0;
1246          break;
1247    
1248          case '<':
1249            {
1250            if (strncmp((char *)pp, "JS>", 3) == 0)
1251              {
1252              options |= PCRE_JAVASCRIPT_COMPAT;
1253              pp += 3;
1254              }
1255            else
1256              {
1257              int x = check_newline(pp, outfile);
1258              if (x == 0) goto SKIP_DATA;
1259              options |= x;
1260              while (*pp++ != '>');
1261              }
1262            }
1263          break;
1264    
1265          case '\r':                      /* So that it works in Windows */
1266          case '\n':
1267          case ' ':
1268          break;
1269    
1270        default:        default:
1271        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1272        goto SKIP_DATA;        goto SKIP_DATA;
# Line 524  while (!done) Line 1282  while (!done)
1282      {      {
1283      int rc;      int rc;
1284      int cflags = 0;      int cflags = 0;
1285    
1286      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1287      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1288        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1289        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1290        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1291    
1292      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1293    
1294      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 1296  while (!done)
1296    
1297      if (rc != 0)      if (rc != 0)
1298        {        {
1299        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1300        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1301        goto SKIP_DATA;        goto SKIP_DATA;
1302        }        }
# Line 545  while (!done) Line 1308  while (!done)
1308  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1309    
1310      {      {
1311      if (timeit)      if (timeit > 0)
1312        {        {
1313        register int i;        register int i;
1314        clock_t time_taken;        clock_t time_taken;
1315        clock_t start_time = clock();        clock_t start_time = clock();
1316        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1317          {          {
1318          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1319          if (re != NULL) free(re);          if (re != NULL) free(re);
1320          }          }
1321        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1322        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1323          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1324          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1325        }        }
1326    
1327      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 574  while (!done) Line 1337  while (!done)
1337          {          {
1338          for (;;)          for (;;)
1339            {            {
1340            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1341              {              {
1342              done = 1;              done = 1;
1343              goto CONTINUE;              goto CONTINUE;
# Line 592  while (!done) Line 1355  while (!done)
1355      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
1356      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
1357    
1358        if (log_store)
1359          fprintf(outfile, "Memory allocation (code space): %d\n",
1360            (int)(gotten_store -
1361                  sizeof(real_pcre) -
1362                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1363    
1364        /* Extract the size for possible writing before possibly flipping it,
1365        and remember the store that was got. */
1366    
1367        true_size = ((real_pcre *)re)->size;
1368        regex_gotten_store = gotten_store;
1369    
1370        /* If /S was present, study the regexp to generate additional info to
1371        help with the matching. */
1372    
1373        if (do_study)
1374          {
1375          if (timeit > 0)
1376            {
1377            register int i;
1378            clock_t time_taken;
1379            clock_t start_time = clock();
1380            for (i = 0; i < timeit; i++)
1381              extra = pcre_study(re, study_options, &error);
1382            time_taken = clock() - start_time;
1383            if (extra != NULL) free(extra);
1384            fprintf(outfile, "  Study time %.4f milliseconds\n",
1385              (((double)time_taken * 1000.0) / (double)timeit) /
1386                (double)CLOCKS_PER_SEC);
1387            }
1388          extra = pcre_study(re, study_options, &error);
1389          if (error != NULL)
1390            fprintf(outfile, "Failed to study: %s\n", error);
1391          else if (extra != NULL)
1392            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1393          }
1394    
1395        /* If the 'F' option was present, we flip the bytes of all the integer
1396        fields in the regex data block and the study block. This is to make it
1397        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1398        compiled on a different architecture. */
1399    
1400        if (do_flip)
1401          {
1402          real_pcre *rre = (real_pcre *)re;
1403          rre->magic_number =
1404            byteflip(rre->magic_number, sizeof(rre->magic_number));
1405          rre->size = byteflip(rre->size, sizeof(rre->size));
1406          rre->options = byteflip(rre->options, sizeof(rre->options));
1407          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1408          rre->top_bracket =
1409            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1410          rre->top_backref =
1411            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1412          rre->first_byte =
1413            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1414          rre->req_byte =
1415            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1416          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1417            sizeof(rre->name_table_offset));
1418          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1419            sizeof(rre->name_entry_size));
1420          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1421            sizeof(rre->name_count));
1422    
1423          if (extra != NULL)
1424            {
1425            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1426            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1427            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1428            }
1429          }
1430    
1431        /* Extract information from the compiled data if required */
1432    
1433        SHOW_INFO:
1434    
1435        if (do_debug)
1436          {
1437          fprintf(outfile, "------------------------------------------------------------------\n");
1438          pcre_printint(re, outfile, debug_lengths);
1439          }
1440    
1441      if (do_showinfo)      if (do_showinfo)
1442        {        {
1443          unsigned long int get_options, all_options;
1444    #if !defined NOINFOCHECK
1445        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1446        int count, backrefmax, first_char, need_char;  #endif
1447        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1448            hascrorlf;
1449        if (do_debug) print_internals(re);        int nameentrysize, namecount;
1450          const uschar *nametable;
1451    
1452        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1453        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1454        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1455        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1456        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1457        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1458          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1459          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1460          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1461          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1462          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1463          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1464    
1465    #if !defined NOINFOCHECK
1466        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1467        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1468          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 620  while (!done) Line 1476  while (!done)
1476            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1477              first_char, old_first_char);              first_char, old_first_char);
1478    
1479          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
1480            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1481              old_options);              get_options, old_options);
1482          }          }
1483    #endif
1484    
1485        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1486          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1487          size, gotten_store);          (int)size, (int)regex_gotten_store);
1488    
1489        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1490        if (backrefmax > 0)        if (backrefmax > 0)
1491          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
1492    
1493        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
1494          fprintf(outfile, "Case state changes\n");          {
1495            fprintf(outfile, "Named capturing subpatterns:\n");
1496            while (namecount-- > 0)
1497              {
1498              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1499                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1500                GET2(nametable, 0));
1501              nametable += nameentrysize;
1502              }
1503            }
1504    
1505          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1506          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1507    
1508          all_options = ((real_pcre *)re)->options;
1509          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1510    
1511          if (get_options == 0) fprintf(outfile, "No options\n");
1512            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1513              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1514              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1515              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1516              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1517              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1518              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1519              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1520              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1521              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1522              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1523              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1524              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1525              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1526              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1527              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1528    
1529          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1530    
1531          switch (get_options & PCRE_NEWLINE_BITS)
1532            {
1533            case PCRE_NEWLINE_CR:
1534            fprintf(outfile, "Forced newline sequence: CR\n");
1535            break;
1536    
1537            case PCRE_NEWLINE_LF:
1538            fprintf(outfile, "Forced newline sequence: LF\n");
1539            break;
1540    
1541            case PCRE_NEWLINE_CRLF:
1542            fprintf(outfile, "Forced newline sequence: CRLF\n");
1543            break;
1544    
1545            case PCRE_NEWLINE_ANYCRLF:
1546            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1547            break;
1548    
1549            case PCRE_NEWLINE_ANY:
1550            fprintf(outfile, "Forced newline sequence: ANY\n");
1551            break;
1552    
1553            default:
1554            break;
1555            }
1556    
1557        if (first_char == -1)        if (first_char == -1)
1558          {          {
1559          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1560          }          }
1561        else if (first_char < 0)        else if (first_char < 0)
1562          {          {
# Line 656  while (!done) Line 1564  while (!done)
1564          }          }
1565        else        else
1566          {          {
1567          if (isprint(first_char))          int ch = first_char & 255;
1568            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1569              "" : " (caseless)";
1570            if (PRINTHEX(ch))
1571              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1572          else          else
1573            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1574          }          }
1575    
1576        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 1579  while (!done)
1579          }          }
1580        else        else
1581          {          {
1582          if (isprint(need_char))          int ch = need_char & 255;
1583            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1584              "" : " (caseless)";
1585            if (PRINTHEX(ch))
1586              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1587          else          else
1588            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1589          }          }
       }  
1590    
1591      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1592      help with the matching. */        value, but it varies, depending on the computer architecture, and
1593          so messes up the test suite. (And with the /F option, it might be
1594          flipped.) */
1595    
1596      if (do_study)        if (do_study)
       {  
       if (timeit)  
1597          {          {
1598          register int i;          if (extra == NULL)
1599          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1600          clock_t start_time = clock();          else
1601          for (i = 0; i < LOOPREPEAT; i++)            {
1602            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1603          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1604          if (extra != NULL) free(extra);  
1605          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1606            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1607            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1608                {
1609                int i;
1610                int c = 24;
1611                fprintf(outfile, "Starting byte set: ");
1612                for (i = 0; i < 256; i++)
1613                  {
1614                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1615                    {
1616                    if (c > 75)
1617                      {
1618                      fprintf(outfile, "\n  ");
1619                      c = 2;
1620                      }
1621                    if (PRINTHEX(i) && i != ' ')
1622                      {
1623                      fprintf(outfile, "%c ", i);
1624                      c += 2;
1625                      }
1626                    else
1627                      {
1628                      fprintf(outfile, "\\x%02x ", i);
1629                      c += 5;
1630                      }
1631                    }
1632                  }
1633                fprintf(outfile, "\n");
1634                }
1635              }
1636          }          }
1637          }
1638    
1639        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1640        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1641          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1642    
1643        else if (do_showinfo)      if (to_file != NULL)
1644          {
1645          FILE *f = fopen((char *)to_file, "wb");
1646          if (f == NULL)
1647            {
1648            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1649            }
1650          else
1651          {          {
1652          uschar *start_bits = NULL;          uschar sbuf[8];
1653          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (uschar)((true_size >> 24) & 255);
1654          if (start_bits == NULL)          sbuf[1] = (uschar)((true_size >> 16) & 255);
1655            fprintf(outfile, "No starting character set\n");          sbuf[2] = (uschar)((true_size >>  8) & 255);
1656            sbuf[3] = (uschar)((true_size) & 255);
1657    
1658            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1659            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1660            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1661            sbuf[7] = (uschar)((true_study_size) & 255);
1662    
1663            if (fwrite(sbuf, 1, 8, f) < 8 ||
1664                fwrite(re, 1, true_size, f) < true_size)
1665              {
1666              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1667              }
1668          else          else
1669            {            {
1670            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1671            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1672              {              {
1673              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1674                    true_study_size)
1675                {                {
1676                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1677                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1678                }                }
1679                else fprintf(outfile, "Study data written to %s\n", to_file);
1680    
1681              }              }
           fprintf(outfile, "\n");  
1682            }            }
1683            fclose(f);
1684          }          }
1685    
1686          new_free(re);
1687          if (extra != NULL) new_free(extra);
1688          if (tables != NULL) new_free((void *)tables);
1689          continue;  /* With next regex */
1690        }        }
1691      }      }        /* End of non-POSIX compile */
1692    
1693    /* Read data lines and test them */    /* Read data lines and test them */
1694    
1695    for (;;)    for (;;)
1696      {      {
1697      unsigned char *q;      uschar *q;
1698      unsigned char *bptr = dbuffer;      uschar *bptr;
1699        int *use_offsets = offsets;
1700        int use_size_offsets = size_offsets;
1701        int callout_data = 0;
1702        int callout_data_set = 0;
1703      int count, c;      int count, c;
1704      int copystrings = 0;      int copystrings = 0;
1705        int find_match_limit = 0;
1706      int getstrings = 0;      int getstrings = 0;
1707      int getlist = 0;      int getlist = 0;
1708      int gmatched = 0;      int gmatched = 0;
1709      int start_offset = 0;      int start_offset = 0;
1710      int g_notempty = 0;      int g_notempty = 0;
1711      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
1712    
1713      options = 0;      options = 0;
1714    
1715      if (infile == stdin) printf("data> ");      *copynames = 0;
1716      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1717    
1718        copynamesptr = copynames;
1719        getnamesptr = getnames;
1720    
1721        pcre_callout = callout;
1722        first_callout = 1;
1723        callout_extra = 0;
1724        callout_count = 0;
1725        callout_fail_count = 999999;
1726        callout_fail_id = -1;
1727        show_malloc = 0;
1728    
1729        if (extra != NULL) extra->flags &=
1730          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1731    
1732        len = 0;
1733        for (;;)
1734        {        {
1735        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1736        goto CONTINUE;          {
1737            if (len > 0) break;
1738            done = 1;
1739            goto CONTINUE;
1740            }
1741          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1742          len = (int)strlen((char *)buffer);
1743          if (buffer[len-1] == '\n') break;
1744        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1745    
     len = (int)strlen((char *)buffer);  
1746      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1747      buffer[len] = 0;      buffer[len] = 0;
1748      if (len == 0) break;      if (len == 0) break;
# Line 772  while (!done) Line 1750  while (!done)
1750      p = buffer;      p = buffer;
1751      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1752    
1753      q = dbuffer;      bptr = q = dbuffer;
1754      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1755        {        {
1756        int i = 0;        int i = 0;
1757        int n = 0;        int n = 0;
1758    
1759        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1760          {          {
1761          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 793  while (!done) Line 1772  while (!done)
1772          c -= '0';          c -= '0';
1773          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1774            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1775    
1776    #if !defined NOUTF8
1777            if (use_utf8 && c > 255)
1778              {
1779              unsigned char buff8[8];
1780              int ii, utn;
1781              utn = ord2utf8(c, buff8);
1782              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1783              c = buff8[ii];   /* Last byte */
1784              }
1785    #endif
1786          break;          break;
1787    
1788          case 'x':          case 'x':
1789    
1790            /* Handle \x{..} specially - new Perl thing for utf8 */
1791    
1792    #if !defined NOUTF8
1793            if (*p == '{')
1794              {
1795              unsigned char *pt = p;
1796              c = 0;
1797              while (isxdigit(*(++pt)))
1798                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1799              if (*pt == '}')
1800                {
1801                unsigned char buff8[8];
1802                int ii, utn;
1803                utn = ord2utf8(c, buff8);
1804                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1805                c = buff8[ii];   /* Last byte */
1806                p = pt + 1;
1807                break;
1808                }
1809              /* Not correct form; fall through */
1810              }
1811    #endif
1812    
1813            /* Ordinary \x */
1814    
1815          c = 0;          c = 0;
1816          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1817            {            {
# Line 804  while (!done) Line 1820  while (!done)
1820            }            }
1821          break;          break;
1822    
1823          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1824          p--;          p--;
1825          continue;          continue;
1826    
1827            case '>':
1828            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1829            continue;
1830    
1831          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1832          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1833          continue;          continue;
# Line 817  while (!done) Line 1837  while (!done)
1837          continue;          continue;
1838    
1839          case 'C':          case 'C':
1840          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1841          copystrings |= 1 << n;            {
1842              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1843              copystrings |= 1 << n;
1844              }
1845            else if (isalnum(*p))
1846              {
1847              uschar *npp = copynamesptr;
1848              while (isalnum(*p)) *npp++ = *p++;
1849              *npp++ = 0;
1850              *npp = 0;
1851              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1852              if (n < 0)
1853                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1854              copynamesptr = npp;
1855              }
1856            else if (*p == '+')
1857              {
1858              callout_extra = 1;
1859              p++;
1860              }
1861            else if (*p == '-')
1862              {
1863              pcre_callout = NULL;
1864              p++;
1865              }
1866            else if (*p == '!')
1867              {
1868              callout_fail_id = 0;
1869              p++;
1870              while(isdigit(*p))
1871                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1872              callout_fail_count = 0;
1873              if (*p == '!')
1874                {
1875                p++;
1876                while(isdigit(*p))
1877                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1878                }
1879              }
1880            else if (*p == '*')
1881              {
1882              int sign = 1;
1883              callout_data = 0;
1884              if (*(++p) == '-') { sign = -1; p++; }
1885              while(isdigit(*p))
1886                callout_data = callout_data * 10 + *p++ - '0';
1887              callout_data *= sign;
1888              callout_data_set = 1;
1889              }
1890            continue;
1891    
1892    #if !defined NODFA
1893            case 'D':
1894    #if !defined NOPOSIX
1895            if (posix || do_posix)
1896              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1897            else
1898    #endif
1899              use_dfa = 1;
1900            continue;
1901    
1902            case 'F':
1903            options |= PCRE_DFA_SHORTEST;
1904          continue;          continue;
1905    #endif
1906    
1907          case 'G':          case 'G':
1908          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1909          getstrings |= 1 << n;            {
1910              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1911              getstrings |= 1 << n;
1912              }
1913            else if (isalnum(*p))
1914              {
1915              uschar *npp = getnamesptr;
1916              while (isalnum(*p)) *npp++ = *p++;
1917              *npp++ = 0;
1918              *npp = 0;
1919              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1920              if (n < 0)
1921                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1922              getnamesptr = npp;
1923              }
1924          continue;          continue;
1925    
1926          case 'L':          case 'L':
1927          getlist = 1;          getlist = 1;
1928          continue;          continue;
1929    
1930            case 'M':
1931            find_match_limit = 1;
1932            continue;
1933    
1934          case 'N':          case 'N':
1935          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1936          continue;          continue;
1937    
1938          case 'O':          case 'O':
1939          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1940          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1941              {
1942              size_offsets_max = n;
1943              free(offsets);
1944              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1945              if (offsets == NULL)
1946                {
1947                printf("** Failed to get %d bytes of memory for offsets vector\n",
1948                  (int)(size_offsets_max * sizeof(int)));
1949                yield = 1;
1950                goto EXIT;
1951                }
1952              }
1953            use_size_offsets = n;
1954            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1955            continue;
1956    
1957            case 'P':
1958            options |= PCRE_PARTIAL;
1959            continue;
1960    
1961            case 'Q':
1962            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1963            if (extra == NULL)
1964              {
1965              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1966              extra->flags = 0;
1967              }
1968            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1969            extra->match_limit_recursion = n;
1970            continue;
1971    
1972            case 'q':
1973            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1974            if (extra == NULL)
1975              {
1976              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1977              extra->flags = 0;
1978              }
1979            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1980            extra->match_limit = n;
1981            continue;
1982    
1983    #if !defined NODFA
1984            case 'R':
1985            options |= PCRE_DFA_RESTART;
1986            continue;
1987    #endif
1988    
1989            case 'S':
1990            show_malloc = 1;
1991          continue;          continue;
1992    
1993          case 'Z':          case 'Z':
1994          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1995          continue;          continue;
1996    
1997            case '?':
1998            options |= PCRE_NO_UTF8_CHECK;
1999            continue;
2000    
2001            case '<':
2002              {
2003              int x = check_newline(p, outfile);
2004              if (x == 0) goto NEXT_DATA;
2005              options |= x;
2006              while (*p++ != '>');
2007              }
2008            continue;
2009          }          }
2010        *q++ = c;        *q++ = c;
2011        }        }
2012      *q = 0;      *q = 0;
2013      len = q - dbuffer;      len = q - dbuffer;
2014    
2015        if ((all_use_dfa || use_dfa) && find_match_limit)
2016          {
2017          printf("**Match limit not relevant for DFA matching: ignored\n");
2018          find_match_limit = 0;
2019          }
2020    
2021      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2022      support timing. */      support timing or playing with the match limit or callout data. */
2023    
2024  #if !defined NOPOSIX  #if !defined NOPOSIX
2025      if (posix || do_posix)      if (posix || do_posix)
2026        {        {
2027        int rc;        int rc;
2028        int eflags = 0;        int eflags = 0;
2029        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
2030          if (use_size_offsets > 0)
2031            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2032        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2033        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2034    
2035        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2036    
2037        if (rc != 0)        if (rc != 0)
2038          {          {
2039          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2040          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2041          }          }
2042          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2043                  != 0)
2044            {
2045            fprintf(outfile, "Matched with REG_NOSUB\n");
2046            }
2047        else        else
2048          {          {
2049          size_t i;          size_t i;
2050          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2051            {            {
2052            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2053              {              {
2054              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2055              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2056                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2057              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2058              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
2059                {                {
2060                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
2061                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2062                    outfile);
2063                fprintf(outfile, "\n");                fprintf(outfile, "\n");
2064                }                }
2065              }              }
2066            }            }
2067          }          }
2068          free(pmatch);
2069        }        }
2070    
2071      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 896  while (!done) Line 2075  while (!done)
2075    
2076      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2077        {        {
2078        if (timeit)        if (timeitm > 0)
2079          {          {
2080          register int i;          register int i;
2081          clock_t time_taken;          clock_t time_taken;
2082          clock_t start_time = clock();          clock_t start_time = clock();
2083          for (i = 0; i < LOOPREPEAT; i++)  
2084    #if !defined NODFA
2085            if (all_use_dfa || use_dfa)
2086              {
2087              int workspace[1000];
2088              for (i = 0; i < timeitm; i++)
2089                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2090                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2091                  sizeof(workspace)/sizeof(int));
2092              }
2093            else
2094    #endif
2095    
2096            for (i = 0; i < timeitm; i++)
2097            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2098              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2099    
2100          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2101          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2102            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2103            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2104            }
2105    
2106          /* If find_match_limit is set, we want to do repeated matches with
2107          varying limits in order to find the minimum value for the match limit and
2108          for the recursion limit. */
2109    
2110          if (find_match_limit)
2111            {
2112            if (extra == NULL)
2113              {
2114              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2115              extra->flags = 0;
2116              }
2117    
2118            (void)check_match_limit(re, extra, bptr, len, start_offset,
2119              options|g_notempty, use_offsets, use_size_offsets,
2120              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2121              PCRE_ERROR_MATCHLIMIT, "match()");
2122    
2123            count = check_match_limit(re, extra, bptr, len, start_offset,
2124              options|g_notempty, use_offsets, use_size_offsets,
2125              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2126              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2127          }          }
2128    
2129        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2130          start_offset, options | g_notempty, offsets, size_offsets);  
2131          else if (callout_data_set)
2132            {
2133            if (extra == NULL)
2134              {
2135              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2136              extra->flags = 0;
2137              }
2138            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2139            extra->callout_data = &callout_data;
2140            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2141              options | g_notempty, use_offsets, use_size_offsets);
2142            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2143            }
2144    
2145          /* The normal case is just to do the match once, with the default
2146          value of match_limit. */
2147    
2148    #if !defined NODFA
2149          else if (all_use_dfa || use_dfa)
2150            {
2151            int workspace[1000];
2152            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2153              options | g_notempty, use_offsets, use_size_offsets, workspace,
2154              sizeof(workspace)/sizeof(int));
2155            if (count == 0)
2156              {
2157              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2158              count = use_size_offsets/2;
2159              }
2160            }
2161    #endif
2162    
2163        if (count == 0)        else
2164          {          {
2165          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2166          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2167            if (count == 0)
2168              {
2169              fprintf(outfile, "Matched, but too many substrings\n");
2170              count = use_size_offsets/3;
2171              }
2172          }          }
2173    
2174        /* Matched */        /* Matched */
2175    
2176        if (count >= 0)        if (count >= 0)
2177          {          {
2178          int i;          int i, maxcount;
2179    
2180    #if !defined NODFA
2181            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2182    #endif
2183              maxcount = use_size_offsets/3;
2184    
2185            /* This is a check against a lunatic return value. */
2186    
2187            if (count > maxcount)
2188              {
2189              fprintf(outfile,
2190                "** PCRE error: returned count %d is too big for offset size %d\n",
2191                count, use_size_offsets);
2192              count = use_size_offsets/3;
2193              if (do_g || do_G)
2194                {
2195                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2196                do_g = do_G = FALSE;        /* Break g/G loop */
2197                }
2198              }
2199    
2200          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2201            {            {
2202            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2203              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2204            else            else
2205              {              {
2206              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2207              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2208                  use_offsets[i+1] - use_offsets[i], outfile);
2209              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2210              if (i == 0)              if (i == 0)
2211                {                {
2212                if (do_showrest)                if (do_showrest)
2213                  {                  {
2214                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2215                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2216                      outfile);
2217                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2218                  }                  }
2219                }                }
# Line 949  while (!done) Line 2224  while (!done)
2224            {            {
2225            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2226              {              {
2227              char copybuffer[16];              char copybuffer[256];
2228              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2229                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2230              if (rc < 0)              if (rc < 0)
2231                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 959  while (!done) Line 2234  while (!done)
2234              }              }
2235            }            }
2236    
2237            for (copynamesptr = copynames;
2238                 *copynamesptr != 0;
2239                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2240              {
2241              char copybuffer[256];
2242              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2243                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2244              if (rc < 0)
2245                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2246              else
2247                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2248              }
2249    
2250          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2251            {            {
2252            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2253              {              {
2254              const char *substring;              const char *substring;
2255              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2256                i, &substring);                i, &substring);
2257              if (rc < 0)              if (rc < 0)
2258                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2259              else              else
2260                {                {
2261                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2262                free((void *)substring);                pcre_free_substring(substring);
2263                }                }
2264              }              }
2265            }            }
2266    
2267            for (getnamesptr = getnames;
2268                 *getnamesptr != 0;
2269                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2270              {
2271              const char *substring;
2272              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2273                count, (char *)getnamesptr, &substring);
2274              if (rc < 0)
2275                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2276              else
2277                {
2278                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2279                pcre_free_substring(substring);
2280                }
2281              }
2282    
2283          if (getlist)          if (getlist)
2284            {            {
2285            const char **stringlist;            const char **stringlist;
2286            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2287              &stringlist);              &stringlist);
2288            if (rc < 0)            if (rc < 0)
2289              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 2293  while (!done)
2293                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2294              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2295                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2296              free((void *)stringlist);              /* free((void *)stringlist); */
2297                pcre_free_substring_list(stringlist);
2298              }              }
2299            }            }
2300          }          }
2301    
2302          /* There was a partial match */
2303    
2304          else if (count == PCRE_ERROR_PARTIAL)
2305            {
2306            fprintf(outfile, "Partial match");
2307    #if !defined NODFA
2308            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2309              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2310                bptr + use_offsets[0]);
2311    #endif
2312            fprintf(outfile, "\n");
2313            break;  /* Out of the /g loop */
2314            }
2315    
2316        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2317        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2318        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2319        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2320        was checked before setting PCRE_NOTEMPTY. */  
2321          Complication arises in the case when the newline option is "any" or
2322          "anycrlf". If the previous match was at the end of a line terminated by
2323          CRLF, an advance of one character just passes the \r, whereas we should
2324          prefer the longer newline sequence, as does the code in pcre_exec().
2325          Fudge the offset value to achieve this.
2326    
2327          Otherwise, in the case of UTF-8 matching, the advance must be one
2328          character, not one byte. */
2329    
2330        else        else
2331          {          {
2332          if (g_notempty != 0)          if (g_notempty != 0)
2333            {            {
2334            offsets[0] = start_offset;            int onechar = 1;
2335            offsets[1] = start_offset + 1;            unsigned int obits = ((real_pcre *)re)->options;
2336              use_offsets[0] = start_offset;
2337              if ((obits & PCRE_NEWLINE_BITS) == 0)
2338                {
2339                int d;
2340                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2341                obits = (d == '\r')? PCRE_NEWLINE_CR :
2342                        (d == '\n')? PCRE_NEWLINE_LF :
2343                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2344                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2345                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2346                }
2347              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2348                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2349                  &&
2350                  start_offset < len - 1 &&
2351                  bptr[start_offset] == '\r' &&
2352                  bptr[start_offset+1] == '\n')
2353                onechar++;
2354              else if (use_utf8)
2355                {
2356                while (start_offset + onechar < len)
2357                  {
2358                  int tb = bptr[start_offset+onechar];
2359                  if (tb <= 127) break;
2360                  tb &= 0xc0;
2361                  if (tb != 0 && tb != 0xc0) onechar++;
2362                  }
2363                }
2364              use_offsets[1] = start_offset + onechar;
2365            }            }
2366          else          else
2367            {            {
2368            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2369              {              {
2370              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2371              }              }
2372              else fprintf(outfile, "Error %d\n", count);
2373            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2374            }            }
2375          }          }
# Line 1025  while (!done) Line 2381  while (!done)
2381        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2382        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
2383        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
2384        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2385        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
2386          character. */
2387    
2388        g_notempty = 0;        g_notempty = 0;
2389        if (offsets[0] == offsets[1])  
2390          if (use_offsets[0] == use_offsets[1])
2391          {          {
2392          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
2393          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2394          }          }
2395    
2396        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2397    
2398        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
2399    
2400        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2401    
2402        else        else
2403          {          {
2404          bptr += offsets[1];          bptr += use_offsets[1];
2405          len -= offsets[1];          len -= use_offsets[1];
2406          }          }
2407        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2408    
2409        NEXT_DATA: continue;
2410      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2411    
2412    CONTINUE:    CONTINUE:
# Line 1055  while (!done) Line 2415  while (!done)
2415    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2416  #endif  #endif
2417    
2418    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2419    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2420    if (tables != NULL)    if (tables != NULL)
2421      {      {
2422      free((void *)tables);      new_free((void *)tables);
2423      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2424        locale_set = 0;
2425      }      }
2426    }    }
2427    
2428  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2429  return 0;  
2430    EXIT:
2431    
2432    if (infile != NULL && infile != stdin) fclose(infile);
2433    if (outfile != NULL && outfile != stdout) fclose(outfile);
2434    
2435    free(buffer);
2436    free(dbuffer);
2437    free(pbuffer);
2438    free(offsets);
2439    
2440    return yield;
2441  }  }
2442    
2443  /* End */  /* End of pcretest.c */

Legend:
Removed from v.43  
changed lines
  Added in v.336

  ViewVC Help
Powered by ViewVC 1.1.5