/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 389 by ph10, Sun Mar 15 18:24:05 2009 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48    #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95    #include "pcre_internal.h"
96    
97    /* We need access to some of the data tables that PCRE uses. So as not to have
98    to keep two copies, we include the source file here, changing the names of the
99    external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102    #define _pcre_utf8_table1      utf8_table1
103    #define _pcre_utf8_table1_size utf8_table1_size
104    #define _pcre_utf8_table2      utf8_table2
105    #define _pcre_utf8_table3      utf8_table3
106    #define _pcre_utf8_table4      utf8_table4
107    #define _pcre_utt              utt
108    #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110    #define _pcre_OP_lengths       OP_lengths
111    
112    #include "pcre_tables.c"
113    
114    /* We also need the pcre_printint() function for printing out compiled
115    patterns. This function is in a separate file so that it can be included in
116    pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124    #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
128    
129    /* It is possible to compile this test program without including support for
130    testing the POSIX interface, though this is not available via the standard
131    Makefile. */
132    
133  #include "internal.h"  #if !defined NOPOSIX
134  #include "pcreposix.h"  #include "pcreposix.h"
135    #endif
136    
137    /* It is also possible, for the benefit of the version currently imported into
138    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139    interface to the DFA matcher (NODFA), and without the doublecheck of the old
140    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150    /* Other parameters */
151    
152  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
153  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 157 
157  #endif  #endif
158  #endif  #endif
159    
160    /* This is the default loop count for timing. */
161    
162    #define LOOPREPEAT 500000
163    
164    /* Static variables */
165    
166  static FILE *outfile;  static FILE *outfile;
167  static int log_store = 0;  static int log_store = 0;
168    static int callout_count;
169    static int callout_extra;
170    static int callout_fail_count;
171    static int callout_fail_id;
172    static int debug_lengths;
173    static int first_callout;
174    static int locale_set = 0;
175    static int show_malloc;
176    static int use_utf8;
177    static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184    static uschar *pbuffer = NULL;
185    
186    
187    
188  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
189  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
190    *************************************************/
191    
192  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  /* Input lines are read into buffer, but both patterns and data lines can be
193    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  continued over multiple input lines. In addition, if the buffer fills up, we
194    "not",  want to automatically expand it so as to be able to handle extremely large
195    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  lines that are needed for certain stress tests. When the input buffer is
196    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
197    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
198    "*", "*?", "+", "+?", "?", "??", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
199    "class", "Ref",  optimal way of handling this, but hey, this is just a test program!
200    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  
201    "Brazero", "Braminzero", "Bra"  Arguments:
202  };    f            the file to read
203      start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205  static void print_internals(pcre *re)  
206  {  Returns:       pointer to the start of new data
207  unsigned char *code = ((real_pcre *)re)->code;                 could be a copy of start, or could be moved
208                   NULL if no data read and EOF reached
209  printf("------------------------------------------------------------------\n");  */
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   printf("%3d ", code - ((real_pcre *)re)->code);  
   
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     break;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
   
       code++;  
       printf("    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
210    
211          case OP_CRRANGE:  static uschar *
212          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start, const char *prompt)
213          min = (code[1] << 8) + code[2];  {
214          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
215    
216          default:  for (;;)
217          code--;    {
218          }    int rlen = buffer_size - (here - buffer);
219    
220      if (rlen > 1000)
221        {
222        int dlen;
223    
224        /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241          }
242        else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247          {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253        dlen = (int)strlen((char *)here);
254        if (dlen > 0 && here[dlen - 1] == '\n') return start;
255        here += dlen;
256        }
257    
258      else
259        {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269        }        }
     break;  
270    
271      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274      default:      buffer_size = new_buffer_size;
275      printf("    %s", OP_names[*code]);  
276      break;      start = new_buffer + (start - buffer);
277        here = new_buffer + (here - buffer);
278    
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286      }      }
287      }
288    
289    return NULL;  /* Control never gets here */
290    }
291    
292    
293    code++;  
294    printf("\n");  
295    
296    
297    
298    /*************************************************
299    *          Read number from string               *
300    *************************************************/
301    
302    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303    around with conditional compilation, just do the job by hand. It is only used
304    for unpicking arguments, so just keep it simple.
305    
306    Arguments:
307      str           string to be converted
308      endptr        where to put the end pointer
309    
310    Returns:        the unsigned long
311    */
312    
313    static int
314    get_value(unsigned char *str, unsigned char **endptr)
315    {
316    int result = 0;
317    while(*str != 0 && isspace(*str)) str++;
318    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319    *endptr = str;
320    return(result);
321    }
322    
323    
324    
325    
326    /*************************************************
327    *            Convert UTF-8 string to value       *
328    *************************************************/
329    
330    /* This function takes one or more bytes that represents a UTF-8 character,
331    and returns the value of the character.
332    
333    Argument:
334      utf8bytes   a pointer to the byte vector
335      vptr        a pointer to an int to receive the value
336    
337    Returns:      >  0 => the number of bytes consumed
338                  -6 to 0 => malformed UTF-8 character at offset = (-return)
339    */
340    
341    #if !defined NOUTF8
342    
343    static int
344    utf82ord(unsigned char *utf8bytes, int *vptr)
345    {
346    int c = *utf8bytes++;
347    int d = c;
348    int i, j, s;
349    
350    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
351      {
352      if ((d & 0x80) == 0) break;
353      d <<= 1;
354      }
355    
356    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
357    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
358    
359    /* i now has a value in the range 1-5 */
360    
361    s = 6*i;
362    d = (c & utf8_table3[i]) << s;
363    
364    for (j = 0; j < i; j++)
365      {
366      c = *utf8bytes++;
367      if ((c & 0xc0) != 0x80) return -(j+1);
368      s -= 6;
369      d |= (c & 0x3f) << s;
370    }    }
371    
372    /* Check that encoding was the correct unique one */
373    
374    for (j = 0; j < utf8_table1_size; j++)
375      if (d <= utf8_table1[j]) break;
376    if (j != i) return -(i+1);
377    
378    /* Valid value */
379    
380    *vptr = d;
381    return i+1;
382    }
383    
384    #endif
385    
386    
387    
388    /*************************************************
389    *       Convert character value to UTF-8         *
390    *************************************************/
391    
392    /* This function takes an integer value in the range 0 - 0x7fffffff
393    and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395    Arguments:
396      cvalue     the character value
397      utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399    Returns:     number of characters placed in the buffer
400    */
401    
402    #if !defined NOUTF8
403    
404    static int
405    ord2utf8(int cvalue, uschar *utf8bytes)
406    {
407    register int i, j;
408    for (i = 0; i < utf8_table1_size; i++)
409      if (cvalue <= utf8_table1[i]) break;
410    utf8bytes += i;
411    for (j = i; j > 0; j--)
412     {
413     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414     cvalue >>= 6;
415     }
416    *utf8bytes = utf8_table2[i] | cvalue;
417    return i + 1;
418  }  }
419    
420    #endif
421    
422    
423    
424  /* Character string printing function. */  /*************************************************
425    *             Print character string             *
426    *************************************************/
427    
428    /* Character string printing function. Must handle UTF-8 strings in utf8
429    mode. Yields number of characters printed. If handed a NULL file, just counts
430    chars without printing. */
431    
432  static void pchars(unsigned char *p, int length)  static int pchars(unsigned char *p, int length, FILE *f)
433  {  {
434  int c;  int c = 0;
435    int yield = 0;
436    
437  while (length-- > 0)  while (length-- > 0)
438    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
439      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
440      if (use_utf8)
441        {
442        int rc = utf82ord(p, &c);
443    
444        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
445          {
446          length -= rc - 1;
447          p += rc;
448          if (PRINTHEX(c))
449            {
450            if (f != NULL) fprintf(f, "%c", c);
451            yield++;
452            }
453          else
454            {
455            int n = 4;
456            if (f != NULL) fprintf(f, "\\x{%02x}", c);
457            yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461            }
462          continue;
463          }
464        }
465    #endif
466    
467       /* Not UTF-8, or malformed UTF-8  */
468    
469      c = *p++;
470      if (PRINTHEX(c))
471        {
472        if (f != NULL) fprintf(f, "%c", c);
473        yield++;
474        }
475      else
476        {
477        if (f != NULL) fprintf(f, "\\x%02x", c);
478        yield += 4;
479        }
480      }
481    
482    return yield;
483    }
484    
485    
486    
487    /*************************************************
488    *              Callout function                  *
489    *************************************************/
490    
491    /* Called from PCRE as a result of the (?C) item. We print out where we are in
492    the match. Yield zero unless more callouts than the fail count, or the callout
493    data is not zero. */
494    
495    static int callout(pcre_callout_block *cb)
496    {
497    FILE *f = (first_callout | callout_extra)? outfile : NULL;
498    int i, pre_start, post_start, subject_length;
499    
500    if (callout_extra)
501      {
502      fprintf(f, "Callout %d: last capture = %d\n",
503        cb->callout_number, cb->capture_last);
504    
505      for (i = 0; i < cb->capture_top * 2; i += 2)
506        {
507        if (cb->offset_vector[i] < 0)
508          fprintf(f, "%2d: <unset>\n", i/2);
509        else
510          {
511          fprintf(f, "%2d: ", i/2);
512          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513            cb->offset_vector[i+1] - cb->offset_vector[i], f);
514          fprintf(f, "\n");
515          }
516        }
517      }
518    
519    /* Re-print the subject in canonical form, the first time or if giving full
520    datails. On subsequent calls in the same match, we use pchars just to find the
521    printed lengths of the substrings. */
522    
523    if (f != NULL) fprintf(f, "--->");
524    
525    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527      cb->current_position - cb->start_match, f);
528    
529    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530    
531    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532      cb->subject_length - cb->current_position, f);
533    
534    if (f != NULL) fprintf(f, "\n");
535    
536    /* Always print appropriate indicators, with callout number if not already
537    shown. For automatic callouts, show the pattern offset. */
538    
539    if (cb->callout_number == 255)
540      {
541      fprintf(outfile, "%+3d ", cb->pattern_position);
542      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
543      }
544    else
545      {
546      if (callout_extra) fprintf(outfile, "    ");
547        else fprintf(outfile, "%3d ", cb->callout_number);
548      }
549    
550    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551    fprintf(outfile, "^");
552    
553    if (post_start > 0)
554      {
555      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556      fprintf(outfile, "^");
557      }
558    
559    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560      fprintf(outfile, " ");
561    
562    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563      pbuffer + cb->pattern_position);
564    
565    fprintf(outfile, "\n");
566    first_callout = 0;
567    
568    if (cb->callout_data != NULL)
569      {
570      int callout_data = *((int *)(cb->callout_data));
571      if (callout_data != 0)
572        {
573        fprintf(outfile, "Callout data = %d\n", callout_data);
574        return callout_data;
575        }
576      }
577    
578    return (cb->callout_number != callout_fail_id)? 0 :
579           (++callout_count >= callout_fail_count)? 1 : 0;
580  }  }
581    
582    
583    /*************************************************
584    *            Local malloc functions              *
585    *************************************************/
586    
587  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
588  compiled re. */  compiled re. */
589    
590  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
591  {  {
592  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
593  return malloc(size);  gotten_store = size;
594    if (show_malloc)
595      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
596    return block;
597    }
598    
599    static void new_free(void *block)
600    {
601    if (show_malloc)
602      fprintf(outfile, "free             %p\n", block);
603    free(block);
604    }
605    
606    
607    /* For recursion malloc/free, to test stacking calls */
608    
609    static void *stack_malloc(size_t size)
610    {
611    void *block = malloc(size);
612    if (show_malloc)
613      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614    return block;
615    }
616    
617    static void stack_free(void *block)
618    {
619    if (show_malloc)
620      fprintf(outfile, "stack_free       %p\n", block);
621    free(block);
622    }
623    
624    
625    /*************************************************
626    *          Call pcre_fullinfo()                  *
627    *************************************************/
628    
629    /* Get one piece of information from the pcre_fullinfo() function */
630    
631    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632    {
633    int rc;
634    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636    }
637    
638    
639    
640    /*************************************************
641    *         Byte flipping function                 *
642    *************************************************/
643    
644    static unsigned long int
645    byteflip(unsigned long int value, int n)
646    {
647    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648    return ((value & 0x000000ff) << 24) |
649           ((value & 0x0000ff00) <<  8) |
650           ((value & 0x00ff0000) >>  8) |
651           ((value & 0xff000000) >> 24);
652    }
653    
654    
655    
656    
657    /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704  }  }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -M       find MATCH_LIMIT minimum for each subject\n"
789           "  -m       output memory used information\n"
790           "  -o <n>   set size of offsets vector to <n>\n");
791    #if !defined NOPOSIX
792    printf("  -p       use POSIX interface\n");
793    #endif
794    printf("  -q       quiet: do not output PCRE version number at start\n");
795    printf("  -S <n>   set stack size to <n> megabytes\n");
796    printf("  -s       output store (memory) used information\n"
797           "  -t       time compilation and execution\n");
798    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
799    printf("  -tm      time execution (matching) only\n");
800    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
801    }
802    
803    
804    
805    /*************************************************
806    *                Main Program                    *
807    *************************************************/
808    
809  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
810  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
811  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 259  int main(int argc, char **argv) Line 815  int main(int argc, char **argv)
815  FILE *infile = stdin;  FILE *infile = stdin;
816  int options = 0;  int options = 0;
817  int study_options = 0;  int study_options = 0;
818    int default_find_match_limit = FALSE;
819  int op = 1;  int op = 1;
820  int timeit = 0;  int timeit = 0;
821    int timeitm = 0;
822  int showinfo = 0;  int showinfo = 0;
823    int showstore = 0;
824    int quiet = 0;
825    int size_offsets = 45;
826    int size_offsets_max;
827    int *offsets = NULL;
828    #if !defined NOPOSIX
829  int posix = 0;  int posix = 0;
830    #endif
831  int debug = 0;  int debug = 0;
832  unsigned char buffer[30000];  int done = 0;
833  unsigned char dbuffer[1024];  int all_use_dfa = 0;
834    int yield = 0;
835    int stack_size;
836    
837    /* These vectors store, end-to-end, a list of captured substring names. Assume
838    that 1024 is plenty long enough for the few names we'll be testing. */
839    
840  /* Static so that new_malloc can use it. */  uschar copynames[1024];
841    uschar getnames[1024];
842    
843    uschar *copynamesptr;
844    uschar *getnamesptr;
845    
846    /* Get buffers from malloc() so that Electric Fence will check their misuse
847    when I am debugging. They grow automatically when very long lines are read. */
848    
849    buffer = (unsigned char *)malloc(buffer_size);
850    dbuffer = (unsigned char *)malloc(buffer_size);
851    pbuffer = (unsigned char *)malloc(buffer_size);
852    
853    /* The outfile variable is static so that new_malloc can use it. */
854    
855  outfile = stdout;  outfile = stdout;
856    
857    /* The following  _setmode() stuff is some Windows magic that tells its runtime
858    library to translate CRLF into a single LF character. At least, that's what
859    I've been told: never having used Windows I take this all on trust. Originally
860    it set 0x8000, but then I was advised that _O_BINARY was better. */
861    
862    #if defined(_WIN32) || defined(WIN32)
863    _setmode( _fileno( stdout ), _O_BINARY );
864    #endif
865    
866  /* Scan options */  /* Scan options */
867    
868  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
869    {    {
870    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
871    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
872      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873        showstore = 1;
874      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875      else if (strcmp(argv[op], "-b") == 0) debug = 1;
876    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879    #if !defined NODFA
880      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881    #endif
882      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884            *endptr == 0))
885        {
886        op++;
887        argc--;
888        }
889      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890        {
891        int both = argv[op][2] == 0;
892        int temp;
893        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894                         *endptr == 0))
895          {
896          timeitm = temp;
897          op++;
898          argc--;
899          }
900        else timeitm = LOOPREPEAT;
901        if (both) timeit = timeitm;
902        }
903      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905            *endptr == 0))
906        {
907    #if defined(_WIN32) || defined(WIN32)
908        printf("PCRE: -S not supported on this OS\n");
909        exit(1);
910    #else
911        int rc;
912        struct rlimit rlim;
913        getrlimit(RLIMIT_STACK, &rlim);
914        rlim.rlim_cur = stack_size * 1024 * 1024;
915        rc = setrlimit(RLIMIT_STACK, &rlim);
916        if (rc != 0)
917          {
918        printf("PCRE: setrlimit() failed with error %d\n", rc);
919        exit(1);
920          }
921        op++;
922        argc--;
923    #endif
924        }
925    #if !defined NOPOSIX
926    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
927    #endif
928      else if (strcmp(argv[op], "-C") == 0)
929        {
930        int rc;
931        unsigned long int lrc;
932        printf("PCRE version %s\n", pcre_version());
933        printf("Compiled with\n");
934        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935        printf("  %sUTF-8 support\n", rc? "" : "No ");
936        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937        printf("  %sUnicode properties support\n", rc? "" : "No ");
938        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
940          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
941          (rc == -2)? "ANYCRLF" :
942          (rc == -1)? "ANY" : "???");
943        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
944        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
945                                         "all Unicode newlines");
946        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
947        printf("  Internal link size = %d\n", rc);
948        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
949        printf("  POSIX malloc threshold = %d\n", rc);
950        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
951        printf("  Default match limit = %ld\n", lrc);
952        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
953        printf("  Default recursion depth limit = %ld\n", lrc);
954        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
955        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
956        goto EXIT;
957        }
958      else if (strcmp(argv[op], "-help") == 0 ||
959               strcmp(argv[op], "--help") == 0)
960        {
961        usage();
962        goto EXIT;
963        }
964    else    else
965      {      {
966      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
967      return 1;      usage();
968        yield = 1;
969        goto EXIT;
970      }      }
971    op++;    op++;
972    argc--;    argc--;
973    }    }
974    
975    /* Get the store for the offsets vector, and remember what it was */
976    
977    size_offsets_max = size_offsets;
978    offsets = (int *)malloc(size_offsets_max * sizeof(int));
979    if (offsets == NULL)
980      {
981      printf("** Failed to get %d bytes of memory for offsets vector\n",
982        (int)(size_offsets_max * sizeof(int)));
983      yield = 1;
984      goto EXIT;
985      }
986    
987  /* Sort out the input and output files */  /* Sort out the input and output files */
988    
989  if (argc > 1)  if (argc > 1)
990    {    {
991    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
992    if (infile == NULL)    if (infile == NULL)
993      {      {
994      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
995      return 1;      yield = 1;
996        goto EXIT;
997      }      }
998    }    }
999    
1000  if (argc > 2)  if (argc > 2)
1001    {    {
1002    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1003    if (outfile == NULL)    if (outfile == NULL)
1004      {      {
1005      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1006      return 1;      yield = 1;
1007        goto EXIT;
1008      }      }
1009    }    }
1010    
1011  /* Set alternative malloc function */  /* Set alternative malloc function */
1012    
1013  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1014    pcre_free = new_free;
1015    pcre_stack_malloc = stack_malloc;
1016    pcre_stack_free = stack_free;
1017    
1018  /* Heading line, then prompt for first re if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1019    
1020  fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
 fprintf(outfile, "PCRE version %s\n\n", pcre_version());  
1021    
1022  /* Main loop */  /* Main loop */
1023    
1024  for (;;)  while (!done)
1025    {    {
1026    pcre *re = NULL;    pcre *re = NULL;
1027    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
1028    
1029    #if !defined NOPOSIX  /* There are still compilers that require no indent */
1030    regex_t preg;    regex_t preg;
   char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
1031    int do_posix = 0;    int do_posix = 0;
1032    int erroroffset, len, delimiter;  #endif
1033    
1034    if (infile == stdin) printf("  re> ");    const char *error;
1035    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    unsigned char *p, *pp, *ppp;
1036    if (infile != stdin) fprintf(outfile, (char *)buffer);    unsigned char *to_file = NULL;
1037      const unsigned char *tables = NULL;
1038      unsigned long int true_size, true_study_size = 0;
1039      size_t size, regex_gotten_store;
1040      int do_study = 0;
1041      int do_debug = debug;
1042      int do_G = 0;
1043      int do_g = 0;
1044      int do_showinfo = showinfo;
1045      int do_showrest = 0;
1046      int do_flip = 0;
1047      int erroroffset, len, delimiter, poffset;
1048    
1049      use_utf8 = 0;
1050      debug_lengths = 1;
1051    
1052      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
1053      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1054      fflush(outfile);
1055    
1056    p = buffer;    p = buffer;
1057    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1058    if (*p == 0) continue;    if (*p == 0) continue;
1059    
1060    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1061    complete, read more. */  
1062      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1063        {
1064        unsigned long int magic, get_options;
1065        uschar sbuf[8];
1066        FILE *f;
1067    
1068        p++;
1069        pp = p + (int)strlen((char *)p);
1070        while (isspace(pp[-1])) pp--;
1071        *pp = 0;
1072    
1073        f = fopen((char *)p, "rb");
1074        if (f == NULL)
1075          {
1076          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1077          continue;
1078          }
1079    
1080        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1081    
1082        true_size =
1083          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1084        true_study_size =
1085          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1086    
1087        re = (real_pcre *)new_malloc(true_size);
1088        regex_gotten_store = gotten_store;
1089    
1090        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1091    
1092        magic = ((real_pcre *)re)->magic_number;
1093        if (magic != MAGIC_NUMBER)
1094          {
1095          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1096            {
1097            do_flip = 1;
1098            }
1099          else
1100            {
1101            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1102            fclose(f);
1103            continue;
1104            }
1105          }
1106    
1107        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1108          do_flip? " (byte-inverted)" : "", p);
1109    
1110        /* Need to know if UTF-8 for printing data strings */
1111    
1112        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1113        use_utf8 = (get_options & PCRE_UTF8) != 0;
1114    
1115        /* Now see if there is any following study data */
1116    
1117        if (true_study_size != 0)
1118          {
1119          pcre_study_data *psd;
1120    
1121          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1122          extra->flags = PCRE_EXTRA_STUDY_DATA;
1123    
1124          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1125          extra->study_data = psd;
1126    
1127          if (fread(psd, 1, true_study_size, f) != true_study_size)
1128            {
1129            FAIL_READ:
1130            fprintf(outfile, "Failed to read data from %s\n", p);
1131            if (extra != NULL) new_free(extra);
1132            if (re != NULL) new_free(re);
1133            fclose(f);
1134            continue;
1135            }
1136          fprintf(outfile, "Study data loaded from %s\n", p);
1137          do_study = 1;     /* To get the data output if requested */
1138          }
1139        else fprintf(outfile, "No study data\n");
1140    
1141        fclose(f);
1142        goto SHOW_INFO;
1143        }
1144    
1145      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1146      the pattern; if is isn't complete, read more. */
1147    
1148    delimiter = *p++;    delimiter = *p++;
1149    
1150    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1151      {      {
1152      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1153      goto SKIP_DATA;      goto SKIP_DATA;
1154      }      }
1155    
1156    pp = p;    pp = p;
1157      poffset = p - buffer;
1158    
1159    for(;;)    for(;;)
1160      {      {
1161      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1162        {        {
1163        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1164        goto SKIP_DATA;          else if (*pp == delimiter) break;
1165          pp++;
1166        }        }
1167        if (*pp != 0) break;
1168      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if (fgets((char *)pp, len, infile) == NULL)  
1169        {        {
1170        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1171        goto END_OFF;        done = 1;
1172          goto CONTINUE;
1173        }        }
1174      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1175      }      }
1176    
1177    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1178      pointer to the correct relative point in the buffer. */
1179    
1180      p = buffer + poffset;
1181    
1182      /* If the first character after the delimiter is backslash, make
1183      the pattern end with backslash. This is purely to provide a way
1184      of testing for the error message when a pattern ends with backslash. */
1185    
1186      if (pp[1] == '\\') *pp++ = '\\';
1187    
1188      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1189      for callouts. */
1190    
1191    *pp++ = 0;    *pp++ = 0;
1192      strcpy((char *)pbuffer, (char *)p);
1193    
1194    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1195    
1196    options = 0;    options = 0;
1197    study_options = 0;    study_options = 0;
1198      log_store = showstore;  /* default from command line */
1199    
1200    while (*pp != 0)    while (*pp != 0)
1201      {      {
1202      switch (*pp++)      switch (*pp++)
1203        {        {
1204          case 'f': options |= PCRE_FIRSTLINE; break;
1205          case 'g': do_g = 1; break;
1206        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1207        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1208        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1209        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1210    
1211          case '+': do_showrest = 1; break;
1212        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1213        case 'D': do_debug = 1; break;        case 'B': do_debug = 1; break;
1214          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1215          case 'D': do_debug = do_showinfo = 1; break;
1216        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1217          case 'F': do_flip = 1; break;
1218          case 'G': do_G = 1; break;
1219          case 'I': do_showinfo = 1; break;
1220          case 'J': options |= PCRE_DUPNAMES; break;
1221          case 'M': log_store = 1; break;
1222          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1223    
1224    #if !defined NOPOSIX
1225        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1226    #endif
1227    
1228        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1229        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
1230        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1231        case '\n': case ' ': break;        case 'Z': debug_lengths = 0; break;
1232          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1233          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1234    
1235          case 'L':
1236          ppp = pp;
1237          /* The '\r' test here is so that it works on Windows. */
1238          /* The '0' test is just in case this is an unterminated line. */
1239          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1240          *ppp = 0;
1241          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1242            {
1243            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1244            goto SKIP_DATA;
1245            }
1246          locale_set = 1;
1247          tables = pcre_maketables();
1248          pp = ppp;
1249          break;
1250    
1251          case '>':
1252          to_file = pp;
1253          while (*pp != 0) pp++;
1254          while (isspace(pp[-1])) pp--;
1255          *pp = 0;
1256          break;
1257    
1258          case '<':
1259            {
1260            if (strncmp((char *)pp, "JS>", 3) == 0)
1261              {
1262              options |= PCRE_JAVASCRIPT_COMPAT;
1263              pp += 3;
1264              }
1265            else
1266              {
1267              int x = check_newline(pp, outfile);
1268              if (x == 0) goto SKIP_DATA;
1269              options |= x;
1270              while (*pp++ != '>');
1271              }
1272            }
1273          break;
1274    
1275          case '\r':                      /* So that it works in Windows */
1276          case '\n':
1277          case ' ':
1278          break;
1279    
1280        default:        default:
1281        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1282        goto SKIP_DATA;        goto SKIP_DATA;
1283        }        }
1284      }      }
1285    
1286    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1287    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
1288      local character tables. */
1289    
1290    #if !defined NOPOSIX
1291    if (posix || do_posix)    if (posix || do_posix)
1292      {      {
1293      int rc;      int rc;
1294      int cflags = 0;      int cflags = 0;
1295    
1296      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1297      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1298        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1299        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1300        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1301    
1302      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1303    
1304      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 422  for (;;) Line 1306  for (;;)
1306    
1307      if (rc != 0)      if (rc != 0)
1308        {        {
1309        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1310        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1311        goto SKIP_DATA;        goto SKIP_DATA;
1312        }        }
# Line 431  for (;;) Line 1315  for (;;)
1315    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1316    
1317    else    else
1318    #endif  /* !defined NOPOSIX */
1319    
1320      {      {
1321      if (timeit)      if (timeit > 0)
1322        {        {
1323        register int i;        register int i;
1324        clock_t time_taken;        clock_t time_taken;
1325        clock_t start_time = clock();        clock_t start_time = clock();
1326        for (i = 0; i < 4000; i++)        for (i = 0; i < timeit; i++)
1327          {          {
1328          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1329          if (re != NULL) free(re);          if (re != NULL) free(re);
1330          }          }
1331        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1332        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1333          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1334              (double)CLOCKS_PER_SEC);
1335        }        }
1336    
1337      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1338    
1339      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1340      if non-interactive. */      if non-interactive. */
# Line 460  for (;;) Line 1347  for (;;)
1347          {          {
1348          for (;;)          for (;;)
1349            {            {
1350            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1351              goto END_OFF;              {
1352                done = 1;
1353                goto CONTINUE;
1354                }
1355            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
1356            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
1357            if (len == 0) break;            if (len == 0) break;
1358            }            }
1359          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1360          }          }
1361        continue;        goto CONTINUE;
1362          }
1363    
1364        /* Compilation succeeded; print data if required. There are now two
1365        info-returning functions. The old one has a limited interface and
1366        returns only limited data. Check that it agrees with the newer one. */
1367    
1368        if (log_store)
1369          fprintf(outfile, "Memory allocation (code space): %d\n",
1370            (int)(gotten_store -
1371                  sizeof(real_pcre) -
1372                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1373    
1374        /* Extract the size for possible writing before possibly flipping it,
1375        and remember the store that was got. */
1376    
1377        true_size = ((real_pcre *)re)->size;
1378        regex_gotten_store = gotten_store;
1379    
1380        /* If /S was present, study the regexp to generate additional info to
1381        help with the matching. */
1382    
1383        if (do_study)
1384          {
1385          if (timeit > 0)
1386            {
1387            register int i;
1388            clock_t time_taken;
1389            clock_t start_time = clock();
1390            for (i = 0; i < timeit; i++)
1391              extra = pcre_study(re, study_options, &error);
1392            time_taken = clock() - start_time;
1393            if (extra != NULL) free(extra);
1394            fprintf(outfile, "  Study time %.4f milliseconds\n",
1395              (((double)time_taken * 1000.0) / (double)timeit) /
1396                (double)CLOCKS_PER_SEC);
1397            }
1398          extra = pcre_study(re, study_options, &error);
1399          if (error != NULL)
1400            fprintf(outfile, "Failed to study: %s\n", error);
1401          else if (extra != NULL)
1402            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1403        }        }
1404    
1405      /* Compilation succeeded; print data if required */      /* If the 'F' option was present, we flip the bytes of all the integer
1406        fields in the regex data block and the study block. This is to make it
1407        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1408        compiled on a different architecture. */
1409    
1410      if (showinfo || do_debug)      if (do_flip)
1411        {        {
1412        int first_char, count;        real_pcre *rre = (real_pcre *)re;
1413          rre->magic_number =
1414            byteflip(rre->magic_number, sizeof(rre->magic_number));
1415          rre->size = byteflip(rre->size, sizeof(rre->size));
1416          rre->options = byteflip(rre->options, sizeof(rre->options));
1417          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1418          rre->top_bracket =
1419            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1420          rre->top_backref =
1421            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1422          rre->first_byte =
1423            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1424          rre->req_byte =
1425            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1426          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1427            sizeof(rre->name_table_offset));
1428          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1429            sizeof(rre->name_entry_size));
1430          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1431            sizeof(rre->name_count));
1432    
1433        if (debug || do_debug) print_internals(re);        if (extra != NULL)
1434            {
1435            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1436            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1437            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1438            }
1439          }
1440    
1441        count = pcre_info(re, &options, &first_char);      /* Extract information from the compiled data if required */
1442    
1443        SHOW_INFO:
1444    
1445        if (do_debug)
1446          {
1447          fprintf(outfile, "------------------------------------------------------------------\n");
1448          pcre_printint(re, outfile, debug_lengths);
1449          }
1450    
1451        if (do_showinfo)
1452          {
1453          unsigned long int get_options, all_options;
1454    #if !defined NOINFOCHECK
1455          int old_first_char, old_options, old_count;
1456    #endif
1457          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1458            hascrorlf;
1459          int nameentrysize, namecount;
1460          const uschar *nametable;
1461    
1462          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1463          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1464          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1465          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1466          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1467          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1468          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1469          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1470          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1471          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1472          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1473          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1474    
1475    #if !defined NOINFOCHECK
1476          old_count = pcre_info(re, &old_options, &old_first_char);
1477        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1478          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
1479        else        else
1480          {          {
1481          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
1482          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1483            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
1484              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
1485              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
1486              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1487              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
1488              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
1489              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
1490              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1491          if (first_char == -1)              get_options, old_options);
1492            {          }
1493            fprintf(outfile, "First char at start or follows \\n\n");  #endif
1494            }  
1495          else if (first_char < 0)        if (size != regex_gotten_store) fprintf(outfile,
1496            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1497            (int)size, (int)regex_gotten_store);
1498    
1499          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1500          if (backrefmax > 0)
1501            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1502    
1503          if (namecount > 0)
1504            {
1505            fprintf(outfile, "Named capturing subpatterns:\n");
1506            while (namecount-- > 0)
1507            {            {
1508            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1509                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1510                GET2(nametable, 0));
1511              nametable += nameentrysize;
1512            }            }
1513            }
1514    
1515          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1516          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1517    
1518          all_options = ((real_pcre *)re)->options;
1519          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1520    
1521          if (get_options == 0) fprintf(outfile, "No options\n");
1522            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1523              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1524              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1525              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1526              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1527              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1528              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1529              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1530              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1531              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1532              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1533              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1534              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1535              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1536              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1537              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1538    
1539          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1540    
1541          switch (get_options & PCRE_NEWLINE_BITS)
1542            {
1543            case PCRE_NEWLINE_CR:
1544            fprintf(outfile, "Forced newline sequence: CR\n");
1545            break;
1546    
1547            case PCRE_NEWLINE_LF:
1548            fprintf(outfile, "Forced newline sequence: LF\n");
1549            break;
1550    
1551            case PCRE_NEWLINE_CRLF:
1552            fprintf(outfile, "Forced newline sequence: CRLF\n");
1553            break;
1554    
1555            case PCRE_NEWLINE_ANYCRLF:
1556            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1557            break;
1558    
1559            case PCRE_NEWLINE_ANY:
1560            fprintf(outfile, "Forced newline sequence: ANY\n");
1561            break;
1562    
1563            default:
1564            break;
1565            }
1566    
1567          if (first_char == -1)
1568            {
1569            fprintf(outfile, "First char at start or follows newline\n");
1570            }
1571          else if (first_char < 0)
1572            {
1573            fprintf(outfile, "No first char\n");
1574            }
1575          else
1576            {
1577            int ch = first_char & 255;
1578            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1579              "" : " (caseless)";
1580            if (PRINTHEX(ch))
1581              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1582            else
1583              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1584            }
1585    
1586          if (need_char < 0)
1587            {
1588            fprintf(outfile, "No need char\n");
1589            }
1590          else
1591            {
1592            int ch = need_char & 255;
1593            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1594              "" : " (caseless)";
1595            if (PRINTHEX(ch))
1596              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1597            else
1598              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1599            }
1600    
1601          /* Don't output study size; at present it is in any case a fixed
1602          value, but it varies, depending on the computer architecture, and
1603          so messes up the test suite. (And with the /F option, it might be
1604          flipped.) */
1605    
1606          if (do_study)
1607            {
1608            if (extra == NULL)
1609              fprintf(outfile, "Study returned NULL\n");
1610          else          else
1611            {            {
1612            if (isprint(first_char))            uschar *start_bits = NULL;
1613              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1614    
1615              if (start_bits == NULL)
1616                fprintf(outfile, "No starting byte set\n");
1617            else            else
1618              fprintf(outfile, "First char = %d\n", first_char);              {
1619                int i;
1620                int c = 24;
1621                fprintf(outfile, "Starting byte set: ");
1622                for (i = 0; i < 256; i++)
1623                  {
1624                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1625                    {
1626                    if (c > 75)
1627                      {
1628                      fprintf(outfile, "\n  ");
1629                      c = 2;
1630                      }
1631                    if (PRINTHEX(i) && i != ' ')
1632                      {
1633                      fprintf(outfile, "%c ", i);
1634                      c += 2;
1635                      }
1636                    else
1637                      {
1638                      fprintf(outfile, "\\x%02x ", i);
1639                      c += 5;
1640                      }
1641                    }
1642                  }
1643                fprintf(outfile, "\n");
1644                }
1645            }            }
1646          }          }
1647        }        }
1648    
1649      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1650      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1651        the study length, in big-endian order. */
1652    
1653      if (do_study)      if (to_file != NULL)
1654        {        {
1655        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1656          if (f == NULL)
1657          {          {
1658          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < 4000; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
1659          }          }
1660          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
1661          {          {
1662          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1663          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (uschar)((true_size >> 24) & 255);
1664            fprintf(outfile, "No starting character set\n");          sbuf[1] = (uschar)((true_size >> 16) & 255);
1665            sbuf[2] = (uschar)((true_size >>  8) & 255);
1666            sbuf[3] = (uschar)((true_size) & 255);
1667    
1668            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1669            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1670            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1671            sbuf[7] = (uschar)((true_study_size) & 255);
1672    
1673            if (fwrite(sbuf, 1, 8, f) < 8 ||
1674                fwrite(re, 1, true_size, f) < true_size)
1675              {
1676              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1677              }
1678          else          else
1679            {            {
1680            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1681            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1682              {              {
1683              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1684                    true_study_size)
1685                {                {
1686                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1687                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1688                }                }
1689                else fprintf(outfile, "Study data written to %s\n", to_file);
1690    
1691              }              }
           fprintf(outfile, "\n");  
1692            }            }
1693            fclose(f);
1694          }          }
1695    
1696          new_free(re);
1697          if (extra != NULL) new_free(extra);
1698          if (tables != NULL) new_free((void *)tables);
1699          continue;  /* With next regex */
1700        }        }
1701      }      }        /* End of non-POSIX compile */
1702    
1703    /* Read data lines and test them */    /* Read data lines and test them */
1704    
1705    for (;;)    for (;;)
1706      {      {
1707      unsigned char *pp;      uschar *q;
1708        uschar *bptr;
1709        int *use_offsets = offsets;
1710        int use_size_offsets = size_offsets;
1711        int callout_data = 0;
1712        int callout_data_set = 0;
1713      int count, c;      int count, c;
1714      int offsets[30];      int copystrings = 0;
1715      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = default_find_match_limit;
1716        int getstrings = 0;
1717        int getlist = 0;
1718        int gmatched = 0;
1719        int start_offset = 0;
1720        int g_notempty = 0;
1721        int use_dfa = 0;
1722    
1723      options = 0;      options = 0;
1724    
1725      if (infile == stdin) printf("  data> ");      *copynames = 0;
1726      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      *getnames = 0;
1727      if (infile != stdin) fprintf(outfile, (char *)buffer);  
1728        copynamesptr = copynames;
1729        getnamesptr = getnames;
1730    
1731        pcre_callout = callout;
1732        first_callout = 1;
1733        callout_extra = 0;
1734        callout_count = 0;
1735        callout_fail_count = 999999;
1736        callout_fail_id = -1;
1737        show_malloc = 0;
1738    
1739        if (extra != NULL) extra->flags &=
1740          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1741    
1742        len = 0;
1743        for (;;)
1744          {
1745          if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1746            {
1747            if (len > 0) break;
1748            done = 1;
1749            goto CONTINUE;
1750            }
1751          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1752          len = (int)strlen((char *)buffer);
1753          if (buffer[len-1] == '\n') break;
1754          }
1755    
     len = (int)strlen((char *)buffer);  
1756      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1757      buffer[len] = 0;      buffer[len] = 0;
1758      if (len == 0) break;      if (len == 0) break;
# Line 599  for (;;) Line 1760  for (;;)
1760      p = buffer;      p = buffer;
1761      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1762    
1763      pp = dbuffer;      bptr = q = dbuffer;
1764      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1765        {        {
1766        int i = 0;        int i = 0;
1767        int n = 0;        int n = 0;
1768    
1769        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1770          {          {
1771          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 620  for (;;) Line 1782  for (;;)
1782          c -= '0';          c -= '0';
1783          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1784            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1785    
1786    #if !defined NOUTF8
1787            if (use_utf8 && c > 255)
1788              {
1789              unsigned char buff8[8];
1790              int ii, utn;
1791              utn = ord2utf8(c, buff8);
1792              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1793              c = buff8[ii];   /* Last byte */
1794              }
1795    #endif
1796          break;          break;
1797    
1798          case 'x':          case 'x':
1799    
1800            /* Handle \x{..} specially - new Perl thing for utf8 */
1801    
1802    #if !defined NOUTF8
1803            if (*p == '{')
1804              {
1805              unsigned char *pt = p;
1806              c = 0;
1807              while (isxdigit(*(++pt)))
1808                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1809              if (*pt == '}')
1810                {
1811                unsigned char buff8[8];
1812                int ii, utn;
1813                if (use_utf8)
1814                  {
1815                  utn = ord2utf8(c, buff8);
1816                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817                  c = buff8[ii];   /* Last byte */
1818                  }
1819                else
1820                 {
1821                 if (c > 255)
1822                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1823                     "UTF-8 mode is not enabled.\n"
1824                     "** Truncation will probably give the wrong result.\n", c);
1825                 }
1826                p = pt + 1;
1827                break;
1828                }
1829              /* Not correct form; fall through */
1830              }
1831    #endif
1832    
1833            /* Ordinary \x */
1834    
1835          c = 0;          c = 0;
1836          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1837            {            {
# Line 631  for (;;) Line 1840  for (;;)
1840            }            }
1841          break;          break;
1842    
1843          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1844          p--;          p--;
1845          continue;          continue;
1846    
1847            case '>':
1848            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1849            continue;
1850    
1851          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1852          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1853          continue;          continue;
# Line 643  for (;;) Line 1856  for (;;)
1856          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1857          continue;          continue;
1858    
1859          case 'E':          case 'C':
1860          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1861              {
1862              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863              copystrings |= 1 << n;
1864              }
1865            else if (isalnum(*p))
1866              {
1867              uschar *npp = copynamesptr;
1868              while (isalnum(*p)) *npp++ = *p++;
1869              *npp++ = 0;
1870              *npp = 0;
1871              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1872              if (n < 0)
1873                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1874              copynamesptr = npp;
1875              }
1876            else if (*p == '+')
1877              {
1878              callout_extra = 1;
1879              p++;
1880              }
1881            else if (*p == '-')
1882              {
1883              pcre_callout = NULL;
1884              p++;
1885              }
1886            else if (*p == '!')
1887              {
1888              callout_fail_id = 0;
1889              p++;
1890              while(isdigit(*p))
1891                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1892              callout_fail_count = 0;
1893              if (*p == '!')
1894                {
1895                p++;
1896                while(isdigit(*p))
1897                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1898                }
1899              }
1900            else if (*p == '*')
1901              {
1902              int sign = 1;
1903              callout_data = 0;
1904              if (*(++p) == '-') { sign = -1; p++; }
1905              while(isdigit(*p))
1906                callout_data = callout_data * 10 + *p++ - '0';
1907              callout_data *= sign;
1908              callout_data_set = 1;
1909              }
1910            continue;
1911    
1912    #if !defined NODFA
1913            case 'D':
1914    #if !defined NOPOSIX
1915            if (posix || do_posix)
1916              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1917            else
1918    #endif
1919              use_dfa = 1;
1920            continue;
1921    
1922            case 'F':
1923            options |= PCRE_DFA_SHORTEST;
1924            continue;
1925    #endif
1926    
1927            case 'G':
1928            if (isdigit(*p))
1929              {
1930              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1931              getstrings |= 1 << n;
1932              }
1933            else if (isalnum(*p))
1934              {
1935              uschar *npp = getnamesptr;
1936              while (isalnum(*p)) *npp++ = *p++;
1937              *npp++ = 0;
1938              *npp = 0;
1939              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1940              if (n < 0)
1941                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1942              getnamesptr = npp;
1943              }
1944          continue;          continue;
1945    
1946          case 'I':          case 'L':
1947          options |= PCRE_CASELESS;          getlist = 1;
1948          continue;          continue;
1949    
1950          case 'M':          case 'M':
1951          options |= PCRE_MULTILINE;          find_match_limit = 1;
1952          continue;          continue;
1953    
1954          case 'S':          case 'N':
1955          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1956          continue;          continue;
1957    
1958          case 'O':          case 'O':
1959          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1960          if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n > size_offsets_max)
1961              {
1962              size_offsets_max = n;
1963              free(offsets);
1964              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1965              if (offsets == NULL)
1966                {
1967                printf("** Failed to get %d bytes of memory for offsets vector\n",
1968                  (int)(size_offsets_max * sizeof(int)));
1969                yield = 1;
1970                goto EXIT;
1971                }
1972              }
1973            use_size_offsets = n;
1974            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1975            continue;
1976    
1977            case 'P':
1978            options |= PCRE_PARTIAL;
1979            continue;
1980    
1981            case 'Q':
1982            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1983            if (extra == NULL)
1984              {
1985              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1986              extra->flags = 0;
1987              }
1988            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1989            extra->match_limit_recursion = n;
1990            continue;
1991    
1992            case 'q':
1993            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1994            if (extra == NULL)
1995              {
1996              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1997              extra->flags = 0;
1998              }
1999            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2000            extra->match_limit = n;
2001            continue;
2002    
2003    #if !defined NODFA
2004            case 'R':
2005            options |= PCRE_DFA_RESTART;
2006            continue;
2007    #endif
2008    
2009            case 'S':
2010            show_malloc = 1;
2011          continue;          continue;
2012    
2013            case 'Y':
2014            options |= PCRE_NO_START_OPTIMIZE;
2015            continue;
2016    
2017          case 'Z':          case 'Z':
2018          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2019          continue;          continue;
2020    
2021            case '?':
2022            options |= PCRE_NO_UTF8_CHECK;
2023            continue;
2024    
2025            case '<':
2026              {
2027              int x = check_newline(p, outfile);
2028              if (x == 0) goto NEXT_DATA;
2029              options |= x;
2030              while (*p++ != '>');
2031              }
2032            continue;
2033          }          }
2034        *pp++ = c;        *q++ = c;
2035          }
2036        *q = 0;
2037        len = q - dbuffer;
2038    
2039        /* Move the data to the end of the buffer so that a read over the end of
2040        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2041        we are using the POSIX interface, we must include the terminating zero. */
2042    
2043    #if !defined NOPOSIX
2044        if (posix || do_posix)
2045          {
2046          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2047          bptr += buffer_size - len - 1;
2048          }
2049        else
2050    #endif
2051          {
2052          memmove(bptr + buffer_size - len, bptr, len);
2053          bptr += buffer_size - len;
2054          }
2055    
2056        if ((all_use_dfa || use_dfa) && find_match_limit)
2057          {
2058          printf("**Match limit not relevant for DFA matching: ignored\n");
2059          find_match_limit = 0;
2060        }        }
     *pp = 0;  
     len = pp - dbuffer;  
2061    
2062      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2063      support timing. */      support timing or playing with the match limit or callout data. */
2064    
2065    #if !defined NOPOSIX
2066      if (posix || do_posix)      if (posix || do_posix)
2067        {        {
2068        int rc;        int rc;
2069        int eflags = 0;        int eflags = 0;
2070        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
2071          if (use_size_offsets > 0)
2072            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2073        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2074        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2075          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2076    
2077        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
2078    
2079        if (rc != 0)        if (rc != 0)
2080          {          {
2081          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2082          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2083          }          }
2084          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2085                  != 0)
2086            {
2087            fprintf(outfile, "Matched with REG_NOSUB\n");
2088            }
2089        else        else
2090          {          {
2091          int i;          size_t i;
2092          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2093            {            {
2094            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2095              {              {
2096              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
2097              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2098                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2099              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2100                if (i == 0 && do_showrest)
2101                  {
2102                  fprintf(outfile, " 0+ ");
2103                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2104                    outfile);
2105                  fprintf(outfile, "\n");
2106                  }
2107              }              }
2108            }            }
2109          }          }
2110          free(pmatch);
2111        }        }
2112    
2113      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
2114    
2115      else      else
2116    #endif  /* !defined NOPOSIX */
2117    
2118        for (;; gmatched++)    /* Loop for /g or /G */
2119        {        {
2120        if (timeit)        if (timeitm > 0)
2121          {          {
2122          register int i;          register int i;
2123          clock_t time_taken;          clock_t time_taken;
2124          clock_t start_time = clock();          clock_t start_time = clock();
2125          for (i = 0; i < 4000; i++)  
2126            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
2127              size_offsets);          if (all_use_dfa || use_dfa)
2128              {
2129              int workspace[1000];
2130              for (i = 0; i < timeitm; i++)
2131                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2132                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2133                  sizeof(workspace)/sizeof(int));
2134              }
2135            else
2136    #endif
2137    
2138            for (i = 0; i < timeitm; i++)
2139              count = pcre_exec(re, extra, (char *)bptr, len,
2140                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2141    
2142          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2143          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2144            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2145                (double)CLOCKS_PER_SEC);
2146            }
2147    
2148          /* If find_match_limit is set, we want to do repeated matches with
2149          varying limits in order to find the minimum value for the match limit and
2150          for the recursion limit. */
2151    
2152          if (find_match_limit)
2153            {
2154            if (extra == NULL)
2155              {
2156              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2157              extra->flags = 0;
2158              }
2159    
2160            (void)check_match_limit(re, extra, bptr, len, start_offset,
2161              options|g_notempty, use_offsets, use_size_offsets,
2162              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2163              PCRE_ERROR_MATCHLIMIT, "match()");
2164    
2165            count = check_match_limit(re, extra, bptr, len, start_offset,
2166              options|g_notempty, use_offsets, use_size_offsets,
2167              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2168              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2169            }
2170    
2171          /* If callout_data is set, use the interface with additional data */
2172    
2173          else if (callout_data_set)
2174            {
2175            if (extra == NULL)
2176              {
2177              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2178              extra->flags = 0;
2179              }
2180            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2181            extra->callout_data = &callout_data;
2182            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2183              options | g_notempty, use_offsets, use_size_offsets);
2184            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2185          }          }
2186    
2187        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2188          size_offsets);        value of match_limit. */
2189    
2190    #if !defined NODFA
2191          else if (all_use_dfa || use_dfa)
2192            {
2193            int workspace[1000];
2194            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2195              options | g_notempty, use_offsets, use_size_offsets, workspace,
2196              sizeof(workspace)/sizeof(int));
2197            if (count == 0)
2198              {
2199              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2200              count = use_size_offsets/2;
2201              }
2202            }
2203    #endif
2204    
2205        if (count == 0)        else
2206          {          {
2207          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2208          count = size_offsets/2;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2209            if (count == 0)
2210              {
2211              fprintf(outfile, "Matched, but too many substrings\n");
2212              count = use_size_offsets/3;
2213              }
2214          }          }
2215    
2216          /* Matched */
2217    
2218        if (count >= 0)        if (count >= 0)
2219          {          {
2220          int i;          int i, maxcount;
2221          count *= 2;  
2222          for (i = 0; i < count; i += 2)  #if !defined NODFA
2223            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2224    #endif
2225              maxcount = use_size_offsets/3;
2226    
2227            /* This is a check against a lunatic return value. */
2228    
2229            if (count > maxcount)
2230              {
2231              fprintf(outfile,
2232                "** PCRE error: returned count %d is too big for offset size %d\n",
2233                count, use_size_offsets);
2234              count = use_size_offsets/3;
2235              if (do_g || do_G)
2236                {
2237                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2238                do_g = do_G = FALSE;        /* Break g/G loop */
2239                }
2240              }
2241    
2242            for (i = 0; i < count * 2; i += 2)
2243            {            {
2244            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2245              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2246            else            else
2247              {              {
2248              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2249              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2250                  use_offsets[i+1] - use_offsets[i], outfile);
2251              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2252                if (i == 0)
2253                  {
2254                  if (do_showrest)
2255                    {
2256                    fprintf(outfile, " 0+ ");
2257                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2258                      outfile);
2259                    fprintf(outfile, "\n");
2260                    }
2261                  }
2262                }
2263              }
2264    
2265            for (i = 0; i < 32; i++)
2266              {
2267              if ((copystrings & (1 << i)) != 0)
2268                {
2269                char copybuffer[256];
2270                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2271                  i, copybuffer, sizeof(copybuffer));
2272                if (rc < 0)
2273                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2274                else
2275                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2276                }
2277              }
2278    
2279            for (copynamesptr = copynames;
2280                 *copynamesptr != 0;
2281                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2282              {
2283              char copybuffer[256];
2284              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2285                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2286              if (rc < 0)
2287                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2288              else
2289                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2290              }
2291    
2292            for (i = 0; i < 32; i++)
2293              {
2294              if ((getstrings & (1 << i)) != 0)
2295                {
2296                const char *substring;
2297                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2298                  i, &substring);
2299                if (rc < 0)
2300                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2301                else
2302                  {
2303                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2304                  pcre_free_substring(substring);
2305                  }
2306                }
2307              }
2308    
2309            for (getnamesptr = getnames;
2310                 *getnamesptr != 0;
2311                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2312              {
2313              const char *substring;
2314              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2315                count, (char *)getnamesptr, &substring);
2316              if (rc < 0)
2317                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2318              else
2319                {
2320                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2321                pcre_free_substring(substring);
2322              }              }
2323            }            }
2324    
2325            if (getlist)
2326              {
2327              const char **stringlist;
2328              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2329                &stringlist);
2330              if (rc < 0)
2331                fprintf(outfile, "get substring list failed %d\n", rc);
2332              else
2333                {
2334                for (i = 0; i < count; i++)
2335                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2336                if (stringlist[i] != NULL)
2337                  fprintf(outfile, "string list not terminated by NULL\n");
2338                /* free((void *)stringlist); */
2339                pcre_free_substring_list(stringlist);
2340                }
2341              }
2342            }
2343    
2344          /* There was a partial match */
2345    
2346          else if (count == PCRE_ERROR_PARTIAL)
2347            {
2348            fprintf(outfile, "Partial match");
2349    #if !defined NODFA
2350            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2351              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2352                bptr + use_offsets[0]);
2353    #endif
2354            fprintf(outfile, "\n");
2355            break;  /* Out of the /g loop */
2356          }          }
2357    
2358          /* Failed to match. If this is a /g or /G loop and we previously set
2359          g_notempty after a null match, this is not necessarily the end. We want
2360          to advance the start offset, and continue. We won't be at the end of the
2361          string - that was checked before setting g_notempty.
2362    
2363          Complication arises in the case when the newline option is "any" or
2364          "anycrlf". If the previous match was at the end of a line terminated by
2365          CRLF, an advance of one character just passes the \r, whereas we should
2366          prefer the longer newline sequence, as does the code in pcre_exec().
2367          Fudge the offset value to achieve this.
2368    
2369          Otherwise, in the case of UTF-8 matching, the advance must be one
2370          character, not one byte. */
2371    
2372        else        else
2373          {          {
2374          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2375              {
2376              int onechar = 1;
2377              unsigned int obits = ((real_pcre *)re)->options;
2378              use_offsets[0] = start_offset;
2379              if ((obits & PCRE_NEWLINE_BITS) == 0)
2380                {
2381                int d;
2382                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2383                obits = (d == '\r')? PCRE_NEWLINE_CR :
2384                        (d == '\n')? PCRE_NEWLINE_LF :
2385                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2386                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2387                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2388                }
2389              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2390                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2391                  &&
2392                  start_offset < len - 1 &&
2393                  bptr[start_offset] == '\r' &&
2394                  bptr[start_offset+1] == '\n')
2395                onechar++;
2396              else if (use_utf8)
2397                {
2398                while (start_offset + onechar < len)
2399                  {
2400                  int tb = bptr[start_offset+onechar];
2401                  if (tb <= 127) break;
2402                  tb &= 0xc0;
2403                  if (tb != 0 && tb != 0xc0) onechar++;
2404                  }
2405                }
2406              use_offsets[1] = start_offset + onechar;
2407              }
2408            else
2409              {
2410              if (count == PCRE_ERROR_NOMATCH)
2411                {
2412                if (gmatched == 0) fprintf(outfile, "No match\n");
2413                }
2414            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2415              break;  /* Out of the /g loop */
2416              }
2417          }          }
       }  
     }  
2418    
2419          /* If not /g or /G we are done */
2420    
2421          if (!do_g && !do_G) break;
2422    
2423          /* If we have matched an empty string, first check to see if we are at
2424          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2425          what Perl's /g options does. This turns out to be rather cunning. First
2426          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2427          same point. If this fails (picked up above) we advance to the next
2428          character. */
2429    
2430          g_notempty = 0;
2431    
2432          if (use_offsets[0] == use_offsets[1])
2433            {
2434            if (use_offsets[0] == len) break;
2435            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2436            }
2437    
2438          /* For /g, update the start offset, leaving the rest alone */
2439    
2440          if (do_g) start_offset = use_offsets[1];
2441    
2442          /* For /G, update the pointer and length */
2443    
2444          else
2445            {
2446            bptr += use_offsets[1];
2447            len -= use_offsets[1];
2448            }
2449          }  /* End of loop for /g and /G */
2450    
2451        NEXT_DATA: continue;
2452        }    /* End of loop for data lines */
2453    
2454      CONTINUE:
2455    
2456    #if !defined NOPOSIX
2457    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2458    if (re != NULL) free(re);  #endif
2459    if (extra != NULL) free(extra);  
2460      if (re != NULL) new_free(re);
2461      if (extra != NULL) new_free(extra);
2462      if (tables != NULL)
2463        {
2464        new_free((void *)tables);
2465        setlocale(LC_CTYPE, "C");
2466        locale_set = 0;
2467        }
2468    }    }
2469    
2470  END_OFF:  if (infile == stdin) fprintf(outfile, "\n");
2471  fprintf(outfile, "\n");  
2472  return 0;  EXIT:
2473    
2474    if (infile != NULL && infile != stdin) fclose(infile);
2475    if (outfile != NULL && outfile != stdout) fclose(outfile);
2476    
2477    free(buffer);
2478    free(dbuffer);
2479    free(pbuffer);
2480    free(offsets);
2481    
2482    return yield;
2483  }  }
2484    
2485  /* End */  /* End of pcretest.c */

Legend:
Removed from v.3  
changed lines
  Added in v.389

  ViewVC Help
Powered by ViewVC 1.1.5