/[pcre]/code/tags/pcre-6.0/pcretest.c
ViewVC logotype

Diff of /code/tags/pcre-6.0/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by nigel, Sat Feb 24 21:38:13 2007 UTC revision 59 by nigel, Sat Feb 24 21:39:54 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32    #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41    static int utf8_table1[] = {
42      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44    static int utf8_table2[] = {
45      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47    static int utf8_table3[] = {
48      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51    /*************************************************
52    *       Convert character value to UTF-8         *
53    *************************************************/
54    
55    /* This function takes an integer value in the range 0 - 0x7fffffff
56    and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58    Arguments:
59      cvalue     the character value
60      buffer     pointer to buffer for result - at least 6 bytes long
61    
62    Returns:     number of characters placed in the buffer
63                 -1 if input character is negative
64                 0 if input character is positive but too big (only when
65                 int is longer than 32 bits)
66    */
67    
68    static int
69    ord2utf8(int cvalue, unsigned char *buffer)
70    {
71    register int i, j;
72    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73      if (cvalue <= utf8_table1[i]) break;
74    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75    if (cvalue < 0) return -1;
76    
77    buffer += i;
78    for (j = i; j > 0; j--)
79     {
80     *buffer-- = 0x80 | (cvalue & 0x3f);
81     cvalue >>= 6;
82     }
83    *buffer = utf8_table2[i] | cvalue;
84    return i + 1;
85    }
86    
87    
88    /*************************************************
89    *            Convert UTF-8 string to value       *
90    *************************************************/
91    
92    /* This function takes one or more bytes that represents a UTF-8 character,
93    and returns the value of the character.
94    
95    Argument:
96      buffer   a pointer to the byte vector
97      vptr     a pointer to an int to receive the value
98    
99    Returns:   >  0 => the number of bytes consumed
100               -6 to 0 => malformed UTF-8 character at offset = (-return)
101    */
102    
103    int
104    utf82ord(unsigned char *buffer, int *vptr)
105    {
106    int c = *buffer++;
107    int d = c;
108    int i, j, s;
109    
110    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
111      {
112      if ((d & 0x80) == 0) break;
113      d <<= 1;
114      }
115    
116    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
117    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
118    
119    /* i now has a value in the range 1-5 */
120    
121    s = 6*i;
122    d = (c & utf8_table3[i]) << s;
123    
124    for (j = 0; j < i; j++)
125      {
126      c = *buffer++;
127      if ((c & 0xc0) != 0x80) return -(j+1);
128      s -= 6;
129      d |= (c & 0x3f) << s;
130      }
131    
132    /* Check that encoding was the correct unique one */
133    
134    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
135      if (d <= utf8_table1[j]) break;
136    if (j != i) return -(i+1);
137    
138    /* Valid value */
139    
140    *vptr = d;
141    return i+1;
142    }
143    
144    
145    
146    
147    
148    
# Line 32  code as contained in pcre.c under the DE Line 151  code as contained in pcre.c under the DE
151    
152  static const char *OP_names[] = {  static const char *OP_names[] = {
153    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
154    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
155    "not",    "Opt", "^", "$", "Any", "chars", "not",
156    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
159    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
160    "class", "Ref",    "class", "Ref", "Recurse",
161    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
162    "Brazero", "Braminzero", "Bra"    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
163      "Brazero", "Braminzero", "Branumber", "Bra"
164  };  };
165    
166    
# Line 48  static void print_internals(pcre *re) Line 168  static void print_internals(pcre *re)
168  {  {
169  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
170    
171  printf("------------------------------------------------------------------\n");  fprintf(outfile, "------------------------------------------------------------------\n");
172    
173  for(;;)  for(;;)
174    {    {
175    int c;    int c;
176    int charlength;    int charlength;
177    
178    printf("%3d ", code - ((real_pcre *)re)->code);    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
179    
180    if (*code >= OP_BRA)    if (*code >= OP_BRA)
181      {      {
182      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      if (*code - OP_BRA > EXTRACT_BASIC_MAX)
183          fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
184        else
185          fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
186      code += 2;      code += 2;
187      }      }
188    
189    else switch(*code)    else switch(*code)
190      {      {
191      case OP_END:      case OP_END:
192      printf("    %s\n", OP_names[*code]);      fprintf(outfile, "    %s\n", OP_names[*code]);
193      printf("------------------------------------------------------------------\n");      fprintf(outfile, "------------------------------------------------------------------\n");
194      return;      return;
195    
196        case OP_OPT:
197        fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
198        code++;
199        break;
200    
201      case OP_CHARS:      case OP_CHARS:
202      charlength = *(++code);      charlength = *(++code);
203      printf("%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
204      while (charlength-- > 0)      while (charlength-- > 0)
205        if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);        if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
206            else fprintf(outfile, "\\x%02x", c);
207      break;      break;
208    
209      case OP_KETRMAX:      case OP_KETRMAX:
# Line 83  for(;;) Line 212  for(;;)
212      case OP_KET:      case OP_KET:
213      case OP_ASSERT:      case OP_ASSERT:
214      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
215        case OP_ASSERTBACK:
216        case OP_ASSERTBACK_NOT:
217      case OP_ONCE:      case OP_ONCE:
218      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      case OP_COND:
219        case OP_BRANUMBER:
220        case OP_REVERSE:
221        case OP_CREF:
222        fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
223      code += 2;      code += 2;
224      break;      break;
225    
# Line 101  for(;;) Line 236  for(;;)
236      case OP_TYPEQUERY:      case OP_TYPEQUERY:
237      case OP_TYPEMINQUERY:      case OP_TYPEMINQUERY:
238      if (*code >= OP_TYPESTAR)      if (*code >= OP_TYPESTAR)
239        printf("    %s", OP_names[code[1]]);        fprintf(outfile, "    %s", OP_names[code[1]]);
240      else if (isprint(c = code[1])) printf("    %c", c);      else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);
241        else printf("    \\x%02x", c);        else fprintf(outfile, "    \\x%02x", c);
242      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
243      break;      break;
244    
245      case OP_EXACT:      case OP_EXACT:
246      case OP_UPTO:      case OP_UPTO:
247      case OP_MINUPTO:      case OP_MINUPTO:
248      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);
249        else printf("    \\x%02x{", c);        else fprintf(outfile, "    \\x%02x{", c);
250      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) fprintf(outfile, ",");
251      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
252      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) fprintf(outfile, "?");
253      code += 3;      code += 3;
254      break;      break;
255    
256      case OP_TYPEEXACT:      case OP_TYPEEXACT:
257      case OP_TYPEUPTO:      case OP_TYPEUPTO:
258      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
259      printf("    %s{", OP_names[code[3]]);      fprintf(outfile, "    %s{", OP_names[code[3]]);
260      if (*code != OP_TYPEEXACT) printf(",");      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
261      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
262      if (*code == OP_TYPEMINUPTO) printf("?");      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
263      code += 3;      code += 3;
264      break;      break;
265    
266      case OP_NOT:      case OP_NOT:
267      if (isprint(c = *(++code))) printf("    [^%c]", c);      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);
268        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
269      break;      break;
270    
271      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 139  for(;;) Line 274  for(;;)
274      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
275      case OP_NOTQUERY:      case OP_NOTQUERY:
276      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
277      if (isprint(c = code[1])) printf("    [^%c]", c);      if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);
278        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
279      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
280      break;      break;
281    
282      case OP_NOTEXACT:      case OP_NOTEXACT:
283      case OP_NOTUPTO:      case OP_NOTUPTO:
284      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
285      if (isprint(c = code[3])) printf("    [^%c]{", c);      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);
286        else printf("    [^\\x%02x]{", c);        else fprintf(outfile, "    [^\\x%02x]{", c);
287      if (*code != OP_NOTEXACT) printf(",");      if (*code != OP_NOTEXACT) fprintf(outfile, ",");
288      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
289      if (*code == OP_NOTMINUPTO) printf("?");      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
290      code += 3;      code += 3;
291      break;      break;
292    
293      case OP_REF:      case OP_REF:
294      printf("    \\%d", *(++code));      fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);
295      code++;      code += 3;
296      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
297    
298      case OP_CLASS:      case OP_CLASS:
299        {        {
300        int i, min, max;        int i, min, max;
   
301        code++;        code++;
302        printf("    [");        fprintf(outfile, "    [");
303    
304        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
305          {          {
# Line 174  for(;;) Line 308  for(;;)
308            int j;            int j;
309            for (j = i+1; j < 256; j++)            for (j = i+1; j < 256; j++)
310              if ((code[j/8] & (1 << (j&7))) == 0) break;              if ((code[j/8] & (1 << (j&7))) == 0) break;
311            if (i == '-' || i == ']') printf("\\");            if (i == '-' || i == ']') fprintf(outfile, "\\");
312            if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
313            if (--j > i)            if (--j > i)
314              {              {
315              printf("-");              fprintf(outfile, "-");
316              if (j == '-' || j == ']') printf("\\");              if (j == '-' || j == ']') fprintf(outfile, "\\");
317              if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
318              }              }
319            i = j;            i = j;
320            }            }
321          }          }
322        printf("]");        fprintf(outfile, "]");
323        code += 32;        code += 32;
324    
325        CLASS_REF_REPEAT:        CLASS_REF_REPEAT:
# Line 198  for(;;) Line 332  for(;;)
332          case OP_CRMINPLUS:          case OP_CRMINPLUS:
333          case OP_CRQUERY:          case OP_CRQUERY:
334          case OP_CRMINQUERY:          case OP_CRMINQUERY:
335          printf("%s", OP_names[*code]);          fprintf(outfile, "%s", OP_names[*code]);
336          break;          break;
337    
338          case OP_CRRANGE:          case OP_CRRANGE:
339          case OP_CRMINRANGE:          case OP_CRMINRANGE:
340          min = (code[1] << 8) + code[2];          min = (code[1] << 8) + code[2];
341          max = (code[3] << 8) + code[4];          max = (code[3] << 8) + code[4];
342          if (max == 0) printf("{%d,}", min);          if (max == 0) fprintf(outfile, "{%d,}", min);
343          else printf("{%d,%d}", min, max);          else fprintf(outfile, "{%d,%d}", min, max);
344          if (*code == OP_CRMINRANGE) printf("?");          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
345          code += 4;          code += 4;
346          break;          break;
347    
# Line 220  for(;;) Line 354  for(;;)
354      /* Anything else is just a one-node item */      /* Anything else is just a one-node item */
355    
356      default:      default:
357      printf("    %s", OP_names[*code]);      fprintf(outfile, "    %s", OP_names[*code]);
358      break;      break;
359      }      }
360    
361    code++;    code++;
362    printf("\n");    fprintf(outfile, "\n");
363    }    }
364  }  }
365    
366    
367    
368  /* Character string printing function. */  /* Character string printing function. A "normal" and a UTF-8 version. */
369    
370  static void pchars(unsigned char *p, int length)  static void pchars(unsigned char *p, int length, int utf8)
371  {  {
372  int c;  int c;
373  while (length-- > 0)  while (length-- > 0)
374      {
375      if (utf8)
376        {
377        int rc = utf82ord(p, &c);
378        if (rc > 0)
379          {
380          length -= rc - 1;
381          p += rc;
382          if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
383            else fprintf(outfile, "\\x{%02x}", c);
384          continue;
385          }
386        }
387    
388       /* Not UTF-8, or malformed UTF-8  */
389    
390    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
391      else fprintf(outfile, "\\x%02x", c);      else fprintf(outfile, "\\x%02x", c);
392      }
393  }  }
394    
395    
# Line 248  compiled re. */ Line 399  compiled re. */
399    
400  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
401  {  {
402  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
403    if (log_store)
404      fprintf(outfile, "Memory allocation (code space): %d\n",
405        (int)((int)size - offsetof(real_pcre, code[0])));
406  return malloc(size);  return malloc(size);
407  }  }
408    
409    
410    
411    
412    /* Get one piece of information from the pcre_fullinfo() function */
413    
414    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
415    {
416    int rc;
417    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
418      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
419    }
420    
421    
422    
423    
424  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
425  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
426  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 266  int study_options = 0; Line 433  int study_options = 0;
433  int op = 1;  int op = 1;
434  int timeit = 0;  int timeit = 0;
435  int showinfo = 0;  int showinfo = 0;
436    int showstore = 0;
437    int size_offsets = 45;
438    int size_offsets_max;
439    int *offsets;
440    #if !defined NOPOSIX
441  int posix = 0;  int posix = 0;
442    #endif
443  int debug = 0;  int debug = 0;
444    int done = 0;
445  unsigned char buffer[30000];  unsigned char buffer[30000];
446  unsigned char dbuffer[1024];  unsigned char dbuffer[1024];
447    
# Line 279  outfile = stdout; Line 453  outfile = stdout;
453    
454  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
455    {    {
456    if (strcmp(argv[op], "-s") == 0) log_store = 1;    char *endptr;
457    
458      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459        showstore = 1;
460    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
464          ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
465        {
466        op++;
467        argc--;
468        }
469    #if !defined NOPOSIX
470    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
471    #endif
472    else    else
473      {      {
474      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
475        printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
476        printf("  -d     debug: show compiled code; implies -i\n"
477               "  -i     show information about compiled pattern\n"
478               "  -o <n> set size of offsets vector to <n>\n");
479    #if !defined NOPOSIX
480        printf("  -p     use POSIX interface\n");
481    #endif
482        printf("  -s     output store information\n"
483               "  -t     time compilation and execution\n");
484      return 1;      return 1;
485      }      }
486    op++;    op++;
487    argc--;    argc--;
488    }    }
489    
490    /* Get the store for the offsets vector, and remember what it was */
491    
492    size_offsets_max = size_offsets;
493    offsets = malloc(size_offsets_max * sizeof(int));
494    if (offsets == NULL)
495      {
496      printf("** Failed to get %d bytes of memory for offsets vector\n",
497        size_offsets_max * sizeof(int));
498      return 1;
499      }
500    
501  /* Sort out the input and output files */  /* Sort out the input and output files */
502    
503  if (argc > 1)  if (argc > 1)
# Line 319  if (argc > 2) Line 524  if (argc > 2)
524    
525  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
526    
527  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
528    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
529  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
530    
531  /* Main loop */  /* Main loop */
532    
533  for (;;)  while (!done)
534    {    {
535    pcre *re = NULL;    pcre *re = NULL;
536    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
537    
538    #if !defined NOPOSIX  /* There are still compilers that require no indent */
539    regex_t preg;    regex_t preg;
540      int do_posix = 0;
541    #endif
542    
543    const char *error;    const char *error;
544    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
545      const unsigned char *tables = NULL;
546    int do_study = 0;    int do_study = 0;
547    int do_debug = 0;    int do_debug = debug;
548    int do_posix = 0;    int do_G = 0;
549      int do_g = 0;
550      int do_showinfo = showinfo;
551      int do_showrest = 0;
552      int utf8 = 0;
553    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
554    
555    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
556    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
557    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
558    
559    p = buffer;    p = buffer;
560    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 351  for (;;) Line 565  for (;;)
565    
566    delimiter = *p++;    delimiter = *p++;
567    
568    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
569      {      {
570      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
571      goto SKIP_DATA;      goto SKIP_DATA;
572      }      }
573    
# Line 361  for (;;) Line 575  for (;;)
575    
576    for(;;)    for(;;)
577      {      {
578      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
579          {
580          if (*pp == '\\' && pp[1] != 0) pp++;
581            else if (*pp == delimiter) break;
582          pp++;
583          }
584      if (*pp != 0) break;      if (*pp != 0) break;
585    
586      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 375  for (;;) Line 594  for (;;)
594      if (fgets((char *)pp, len, infile) == NULL)      if (fgets((char *)pp, len, infile) == NULL)
595        {        {
596        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
597        goto END_OFF;        done = 1;
598          goto CONTINUE;
599        }        }
600      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
601      }      }
602    
603      /* If the first character after the delimiter is backslash, make
604      the pattern end with backslash. This is purely to provide a way
605      of testing for the error message when a pattern ends with backslash. */
606    
607      if (pp[1] == '\\') *pp++ = '\\';
608    
609    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
610    
611    *pp++ = 0;    *pp++ = 0;
# Line 388  for (;;) Line 614  for (;;)
614    
615    options = 0;    options = 0;
616    study_options = 0;    study_options = 0;
617      log_store = showstore;  /* default from command line */
618    
619    while (*pp != 0)    while (*pp != 0)
620      {      {
621      switch (*pp++)      switch (*pp++)
622        {        {
623          case 'g': do_g = 1; break;
624        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
625        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
626        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
627        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
628    
629          case '+': do_showrest = 1; break;
630        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
631        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
632        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
633          case 'G': do_G = 1; break;
634          case 'I': do_showinfo = 1; break;
635          case 'M': log_store = 1; break;
636    
637    #if !defined NOPOSIX
638        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
639    #endif
640    
641        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
642        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
643        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
644          case '8': options |= PCRE_UTF8; utf8 = 1; break;
645    
646          case 'L':
647          ppp = pp;
648          while (*ppp != '\n' && *ppp != ' ') ppp++;
649          *ppp = 0;
650          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
651            {
652            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
653            goto SKIP_DATA;
654            }
655          tables = pcre_maketables();
656          pp = ppp;
657          break;
658    
659        case '\n': case ' ': break;        case '\n': case ' ': break;
660        default:        default:
661        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 410  for (;;) Line 663  for (;;)
663        }        }
664      }      }
665    
666    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
667    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
668      local character tables. */
669    
670    #if !defined NOPOSIX
671    if (posix || do_posix)    if (posix || do_posix)
672      {      {
673      int rc;      int rc;
# Line 435  for (;;) Line 690  for (;;)
690    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
691    
692    else    else
693    #endif  /* !defined NOPOSIX */
694    
695      {      {
696      if (timeit)      if (timeit)
697        {        {
698        register int i;        register int i;
699        clock_t time_taken;        clock_t time_taken;
700        clock_t start_time = clock();        clock_t start_time = clock();
701        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
702          {          {
703          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
704          if (re != NULL) free(re);          if (re != NULL) free(re);
705          }          }
706        time_taken = clock() - start_time;        time_taken = clock() - start_time;
707        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
708          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
709            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
710        }        }
711    
712      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
713    
714      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
715      if non-interactive. */      if non-interactive. */
# Line 465  for (;;) Line 723  for (;;)
723          for (;;)          for (;;)
724            {            {
725            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
726              goto END_OFF;              {
727                done = 1;
728                goto CONTINUE;
729                }
730            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
731            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
732            if (len == 0) break;            if (len == 0) break;
733            }            }
734          fprintf(outfile, "\n");          fprintf(outfile, "\n");
735          }          }
736        continue;        goto CONTINUE;
737        }        }
738    
739      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
740        info-returning functions. The old one has a limited interface and
741        returns only limited data. Check that it agrees with the newer one. */
742    
743      if (showinfo || do_debug)      if (do_showinfo)
744        {        {
745        int first_char, count;        unsigned long int get_options;
746          int old_first_char, old_options, old_count;
747        if (debug || do_debug) print_internals(re);        int count, backrefmax, first_char, need_char;
748          size_t size;
749    
750          if (do_debug) print_internals(re);
751    
752          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
753          new_info(re, NULL, PCRE_INFO_SIZE, &size);
754          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
755          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
756          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
757          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
758    
759        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
760        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
761          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
762        else        else
763          {          {
764          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
765          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
766            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
767              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
768              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
769              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
770              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
771              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
772              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
773              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
774          if (first_char == -1)              get_options, old_options);
775            {          }
776            fprintf(outfile, "First char at start or follows \\n\n");  
777            }        if (size != gotten_store) fprintf(outfile,
778          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
779            {          size, gotten_store);
780            fprintf(outfile, "No first char\n");  
781            }        fprintf(outfile, "Capturing subpattern count = %d\n", count);
782          if (backrefmax > 0)
783            fprintf(outfile, "Max back reference = %d\n", backrefmax);
784          if (get_options == 0) fprintf(outfile, "No options\n");
785            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
786              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
787              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
788              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
789              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
790              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
791              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
792              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
793              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
794              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
795    
796          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
797            fprintf(outfile, "Case state changes\n");
798    
799          if (first_char == -1)
800            {
801            fprintf(outfile, "First char at start or follows \\n\n");
802            }
803          else if (first_char < 0)
804            {
805            fprintf(outfile, "No first char\n");
806            }
807          else
808            {
809            if (isprint(first_char))
810              fprintf(outfile, "First char = \'%c\'\n", first_char);
811          else          else
812            {            fprintf(outfile, "First char = %d\n", first_char);
813            if (isprint(first_char))          }
814              fprintf(outfile, "First char = \'%c\'\n", first_char);  
815            else        if (need_char < 0)
816              fprintf(outfile, "First char = %d\n", first_char);          {
817            }          fprintf(outfile, "No need char\n");
818            }
819          else
820            {
821            if (isprint(need_char))
822              fprintf(outfile, "Need char = \'%c\'\n", need_char);
823            else
824              fprintf(outfile, "Need char = %d\n", need_char);
825          }          }
826        }        }
827    
# Line 526  for (;;) Line 835  for (;;)
835          register int i;          register int i;
836          clock_t time_taken;          clock_t time_taken;
837          clock_t start_time = clock();          clock_t start_time = clock();
838          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
839            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
840          time_taken = clock() - start_time;          time_taken = clock() - start_time;
841          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
842          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
843            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
844              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
845          }          }
846    
847        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 540  for (;;) Line 850  for (;;)
850        else if (extra == NULL)        else if (extra == NULL)
851          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
852    
853        /* This looks at internal information. A bit kludgy to do it this        else if (do_showinfo)
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
854          {          {
855          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
856          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
857            if (start_bits == NULL)
858            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
859          else          else
860            {            {
# Line 555  for (;;) Line 863  for (;;)
863            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
864            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
865              {              {
866              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
867                {                {
868                if (c > 75)                if (c > 75)
869                  {                  {
# Line 585  for (;;) Line 893  for (;;)
893    for (;;)    for (;;)
894      {      {
895      unsigned char *q;      unsigned char *q;
896        unsigned char *bptr = dbuffer;
897        int *use_offsets = offsets;
898        int use_size_offsets = size_offsets;
899      int count, c;      int count, c;
900      int offsets[30];      int copystrings = 0;
901      int size_offsets = sizeof(offsets)/sizeof(int);      int getstrings = 0;
902        int getlist = 0;
903        int gmatched = 0;
904        int start_offset = 0;
905        int g_notempty = 0;
906    
907      options = 0;      options = 0;
908    
909      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
910      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
911      if (infile != stdin) fprintf(outfile, (char *)buffer);        {
912          done = 1;
913          goto CONTINUE;
914          }
915        if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
916    
917      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
918      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 627  for (;;) Line 946  for (;;)
946          break;          break;
947    
948          case 'x':          case 'x':
949    
950            /* Handle \x{..} specially - new Perl thing for utf8 */
951    
952            if (*p == '{')
953              {
954              unsigned char *pt = p;
955              c = 0;
956              while (isxdigit(*(++pt)))
957                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
958              if (*pt == '}')
959                {
960                unsigned char buffer[8];
961                int ii, utn;
962                utn = ord2utf8(c, buffer);
963                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
964                c = buffer[ii];   /* Last byte */
965                p = pt + 1;
966                break;
967                }
968              /* Not correct form; fall through */
969              }
970    
971            /* Ordinary \x */
972    
973          c = 0;          c = 0;
974          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
975            {            {
# Line 647  for (;;) Line 990  for (;;)
990          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
991          continue;          continue;
992    
993          case 'E':          case 'C':
994          options |= PCRE_DOLLAR_ENDONLY;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
995            copystrings |= 1 << n;
996          continue;          continue;
997    
998          case 'I':          case 'G':
999          options |= PCRE_CASELESS;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1000            getstrings |= 1 << n;
1001          continue;          continue;
1002    
1003          case 'M':          case 'L':
1004          options |= PCRE_MULTILINE;          getlist = 1;
1005          continue;          continue;
1006    
1007          case 'S':          case 'N':
1008          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1009          continue;          continue;
1010    
1011          case 'O':          case 'O':
1012          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1014              {
1015              size_offsets_max = n;
1016              free(offsets);
1017              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1018              if (offsets == NULL)
1019                {
1020                printf("** Failed to get %d bytes of memory for offsets vector\n",
1021                  size_offsets_max * sizeof(int));
1022                return 1;
1023                }
1024              }
1025            use_size_offsets = n;
1026            if (n == 0) use_offsets = NULL;
1027          continue;          continue;
1028    
1029          case 'Z':          case 'Z':
# Line 680  for (;;) Line 1038  for (;;)
1038      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1039      support timing. */      support timing. */
1040    
1041    #if !defined NOPOSIX
1042      if (posix || do_posix)      if (posix || do_posix)
1043        {        {
1044        int rc;        int rc;
1045        int eflags = 0;        int eflags = 0;
1046        regmatch_t pmatch[30];        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1047        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1048        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1049    
1050        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1051    
1052        if (rc != 0)        if (rc != 0)
1053          {          {
# Line 699  for (;;) Line 1057  for (;;)
1057        else        else
1058          {          {
1059          size_t i;          size_t i;
1060          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < use_size_offsets; i++)
1061            {            {
1062            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1063              {              {
1064              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1065              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
1066                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1067              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1068                if (i == 0 && do_showrest)
1069                  {
1070                  fprintf(outfile, " 0+ ");
1071                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1072                  fprintf(outfile, "\n");
1073                  }
1074              }              }
1075            }            }
1076          }          }
1077          free(pmatch);
1078        }        }
1079    
1080      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1081    
1082      else      else
1083    #endif  /* !defined NOPOSIX */
1084    
1085        for (;; gmatched++)    /* Loop for /g or /G */
1086        {        {
1087        if (timeit)        if (timeit)
1088          {          {
1089          register int i;          register int i;
1090          clock_t time_taken;          clock_t time_taken;
1091          clock_t start_time = clock();          clock_t start_time = clock();
1092          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1093            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1094              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1095          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1096          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1097            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
1098              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1099          }          }
1100    
1101        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
1102          size_offsets);          start_offset, options | g_notempty, use_offsets, use_size_offsets);
1103    
1104        if (count == 0)        if (count == 0)
1105          {          {
1106          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1107          count = size_offsets/2;          count = use_size_offsets/3;
1108          }          }
1109    
1110          /* Matched */
1111    
1112        if (count >= 0)        if (count >= 0)
1113          {          {
1114          int i;          int i;
1115          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1116            {            {
1117            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1118              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1119            else            else
1120              {              {
1121              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1122              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1123              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1124                if (i == 0)
1125                  {
1126                  if (do_showrest)
1127                    {
1128                    fprintf(outfile, " 0+ ");
1129                    pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1130                    fprintf(outfile, "\n");
1131                    }
1132                  }
1133                }
1134              }
1135    
1136            for (i = 0; i < 32; i++)
1137              {
1138              if ((copystrings & (1 << i)) != 0)
1139                {
1140                char copybuffer[16];
1141                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1142                  i, copybuffer, sizeof(copybuffer));
1143                if (rc < 0)
1144                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1145                else
1146                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1147                }
1148              }
1149    
1150            for (i = 0; i < 32; i++)
1151              {
1152              if ((getstrings & (1 << i)) != 0)
1153                {
1154                const char *substring;
1155                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1156                  i, &substring);
1157                if (rc < 0)
1158                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1159                else
1160                  {
1161                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1162                  /* free((void *)substring); */
1163                  pcre_free_substring(substring);
1164                  }
1165                }
1166              }
1167    
1168            if (getlist)
1169              {
1170              const char **stringlist;
1171              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1172                &stringlist);
1173              if (rc < 0)
1174                fprintf(outfile, "get substring list failed %d\n", rc);
1175              else
1176                {
1177                for (i = 0; i < count; i++)
1178                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1179                if (stringlist[i] != NULL)
1180                  fprintf(outfile, "string list not terminated by NULL\n");
1181                /* free((void *)stringlist); */
1182                pcre_free_substring_list(stringlist);
1183              }              }
1184            }            }
1185          }          }
1186    
1187          /* Failed to match. If this is a /g or /G loop and we previously set
1188          g_notempty after a null match, this is not necessarily the end.
1189          We want to advance the start offset, and continue. Fudge the offset
1190          values to achieve this. We won't be at the end of the string - that
1191          was checked before setting g_notempty. */
1192    
1193        else        else
1194          {          {
1195          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1196            else fprintf(outfile, "Error %d\n", count);            {
1197              use_offsets[0] = start_offset;
1198              use_offsets[1] = start_offset + 1;
1199              }
1200            else
1201              {
1202              if (gmatched == 0)   /* Error if no previous matches */
1203                {
1204                if (count == -1) fprintf(outfile, "No match\n");
1205                  else fprintf(outfile, "Error %d\n", count);
1206                }
1207              break;  /* Out of the /g loop */
1208              }
1209          }          }
       }  
     }  
1210    
1211          /* If not /g or /G we are done */
1212    
1213          if (!do_g && !do_G) break;
1214    
1215          /* If we have matched an empty string, first check to see if we are at
1216          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1217          what Perl's /g options does. This turns out to be rather cunning. First
1218          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1219          same point. If this fails (picked up above) we advance to the next
1220          character. */
1221    
1222          g_notempty = 0;
1223          if (use_offsets[0] == use_offsets[1])
1224            {
1225            if (use_offsets[0] == len) break;
1226            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1227            }
1228    
1229          /* For /g, update the start offset, leaving the rest alone */
1230    
1231          if (do_g) start_offset = use_offsets[1];
1232    
1233          /* For /G, update the pointer and length */
1234    
1235          else
1236            {
1237            bptr += use_offsets[1];
1238            len -= use_offsets[1];
1239            }
1240          }  /* End of loop for /g and /G */
1241        }    /* End of loop for data lines */
1242    
1243      CONTINUE:
1244    
1245    #if !defined NOPOSIX
1246    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1247    #endif
1248    
1249    if (re != NULL) free(re);    if (re != NULL) free(re);
1250    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1251      if (tables != NULL)
1252        {
1253        free((void *)tables);
1254        setlocale(LC_CTYPE, "C");
1255        }
1256    }    }
1257    
 END_OFF:  
1258  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1259  return 0;  return 0;
1260  }  }

Legend:
Removed from v.9  
changed lines
  Added in v.59

  ViewVC Help
Powered by ViewVC 1.1.5