/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 37 by nigel, Sat Feb 24 21:39:09 2007 UTC revision 59 by nigel, Sat Feb 24 21:39:54 2007 UTC
# Line 34  Makefile. */ Line 34  Makefile. */
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41    static int utf8_table1[] = {
42      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44    static int utf8_table2[] = {
45      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47    static int utf8_table3[] = {
48      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51    /*************************************************
52    *       Convert character value to UTF-8         *
53    *************************************************/
54    
55    /* This function takes an integer value in the range 0 - 0x7fffffff
56    and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58    Arguments:
59      cvalue     the character value
60      buffer     pointer to buffer for result - at least 6 bytes long
61    
62    Returns:     number of characters placed in the buffer
63                 -1 if input character is negative
64                 0 if input character is positive but too big (only when
65                 int is longer than 32 bits)
66    */
67    
68    static int
69    ord2utf8(int cvalue, unsigned char *buffer)
70    {
71    register int i, j;
72    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73      if (cvalue <= utf8_table1[i]) break;
74    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75    if (cvalue < 0) return -1;
76    
77    buffer += i;
78    for (j = i; j > 0; j--)
79     {
80     *buffer-- = 0x80 | (cvalue & 0x3f);
81     cvalue >>= 6;
82     }
83    *buffer = utf8_table2[i] | cvalue;
84    return i + 1;
85    }
86    
87    
88    /*************************************************
89    *            Convert UTF-8 string to value       *
90    *************************************************/
91    
92    /* This function takes one or more bytes that represents a UTF-8 character,
93    and returns the value of the character.
94    
95    Argument:
96      buffer   a pointer to the byte vector
97      vptr     a pointer to an int to receive the value
98    
99    Returns:   >  0 => the number of bytes consumed
100               -6 to 0 => malformed UTF-8 character at offset = (-return)
101    */
102    
103    int
104    utf82ord(unsigned char *buffer, int *vptr)
105    {
106    int c = *buffer++;
107    int d = c;
108    int i, j, s;
109    
110    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
111      {
112      if ((d & 0x80) == 0) break;
113      d <<= 1;
114      }
115    
116    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
117    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
118    
119    /* i now has a value in the range 1-5 */
120    
121    s = 6*i;
122    d = (c & utf8_table3[i]) << s;
123    
124    for (j = 0; j < i; j++)
125      {
126      c = *buffer++;
127      if ((c & 0xc0) != 0x80) return -(j+1);
128      s -= 6;
129      d |= (c & 0x3f) << s;
130      }
131    
132    /* Check that encoding was the correct unique one */
133    
134    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
135      if (d <= utf8_table1[j]) break;
136    if (j != i) return -(i+1);
137    
138    /* Valid value */
139    
140    *vptr = d;
141    return i+1;
142    }
143    
144    
145    
146    
147    
148    
# Line 48  static const char *OP_names[] = { Line 157  static const char *OP_names[] = {
157    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
159    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
160    "class", "Ref",    "class", "Ref", "Recurse",
161    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
162    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
163    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Branumber", "Bra"
164  };  };
165    
166    
# Line 70  for(;;) Line 179  for(;;)
179    
180    if (*code >= OP_BRA)    if (*code >= OP_BRA)
181      {      {
182      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      if (*code - OP_BRA > EXTRACT_BASIC_MAX)
183          fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
184        else
185          fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
186      code += 2;      code += 2;
187      }      }
188    
# Line 86  for(;;) Line 198  for(;;)
198      code++;      code++;
199      break;      break;
200    
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
201      case OP_CHARS:      case OP_CHARS:
202      charlength = *(++code);      charlength = *(++code);
203      fprintf(outfile, "%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
# Line 113  for(;;) Line 215  for(;;)
215      case OP_ASSERTBACK:      case OP_ASSERTBACK:
216      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
217      case OP_ONCE:      case OP_ONCE:
218      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      case OP_COND:
219      code += 2;      case OP_BRANUMBER:
     break;  
   
220      case OP_REVERSE:      case OP_REVERSE:
221        case OP_CREF:
222      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
223      code += 2;      code += 2;
224      break;      break;
# Line 190  for(;;) Line 291  for(;;)
291      break;      break;
292    
293      case OP_REF:      case OP_REF:
294      fprintf(outfile, "    \\%d", *(++code));      fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);
295      code++;      code += 3;
296      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
297    
298      case OP_CLASS:      case OP_CLASS:
# Line 264  for(;;) Line 365  for(;;)
365    
366    
367    
368  /* Character string printing function. */  /* Character string printing function. A "normal" and a UTF-8 version. */
369    
370  static void pchars(unsigned char *p, int length)  static void pchars(unsigned char *p, int length, int utf8)
371  {  {
372  int c;  int c;
373  while (length-- > 0)  while (length-- > 0)
374      {
375      if (utf8)
376        {
377        int rc = utf82ord(p, &c);
378        if (rc > 0)
379          {
380          length -= rc - 1;
381          p += rc;
382          if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
383            else fprintf(outfile, "\\x{%02x}", c);
384          continue;
385          }
386        }
387    
388       /* Not UTF-8, or malformed UTF-8  */
389    
390    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
391      else fprintf(outfile, "\\x%02x", c);      else fprintf(outfile, "\\x%02x", c);
392      }
393  }  }
394    
395    
# Line 281  compiled re. */ Line 399  compiled re. */
399    
400  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
401  {  {
402    gotten_store = size;
403  if (log_store)  if (log_store)
404    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "Memory allocation (code space): %d\n",
405      (int)((int)size - offsetof(real_pcre, code[0])));      (int)((int)size - offsetof(real_pcre, code[0])));
# Line 289  return malloc(size); Line 408  return malloc(size);
408    
409    
410    
411    
412    /* Get one piece of information from the pcre_fullinfo() function */
413    
414    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
415    {
416    int rc;
417    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
418      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
419    }
420    
421    
422    
423    
424  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
425  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
426  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 302  int op = 1; Line 434  int op = 1;
434  int timeit = 0;  int timeit = 0;
435  int showinfo = 0;  int showinfo = 0;
436  int showstore = 0;  int showstore = 0;
437    int size_offsets = 45;
438    int size_offsets_max;
439    int *offsets;
440    #if !defined NOPOSIX
441  int posix = 0;  int posix = 0;
442    #endif
443  int debug = 0;  int debug = 0;
444  int done = 0;  int done = 0;
445  unsigned char buffer[30000];  unsigned char buffer[30000];
# Line 316  outfile = stdout; Line 453  outfile = stdout;
453    
454  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
455    {    {
456      char *endptr;
457    
458    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459      showstore = 1;      showstore = 1;
460    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
464          ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
465        {
466        op++;
467        argc--;
468        }
469    #if !defined NOPOSIX
470    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
471    #endif
472    else    else
473      {      {
474      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
475      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
476      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
477             "  -i   show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
478             "  -p   use POSIX interface\n"             "  -o <n> set size of offsets vector to <n>\n");
479             "  -s   output store information\n"  #if !defined NOPOSIX
480             "  -t   time compilation and execution\n");      printf("  -p     use POSIX interface\n");
481    #endif
482        printf("  -s     output store information\n"
483               "  -t     time compilation and execution\n");
484      return 1;      return 1;
485      }      }
486    op++;    op++;
487    argc--;    argc--;
488    }    }
489    
490    /* Get the store for the offsets vector, and remember what it was */
491    
492    size_offsets_max = size_offsets;
493    offsets = malloc(size_offsets_max * sizeof(int));
494    if (offsets == NULL)
495      {
496      printf("** Failed to get %d bytes of memory for offsets vector\n",
497        size_offsets_max * sizeof(int));
498      return 1;
499      }
500    
501  /* Sort out the input and output files */  /* Sort out the input and output files */
502    
503  if (argc > 1)  if (argc > 1)
# Line 376  while (!done) Line 537  while (!done)
537    
538  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
539    regex_t preg;    regex_t preg;
540      int do_posix = 0;
541  #endif  #endif
542    
543    const char *error;    const char *error;
544    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
545    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
546    int do_study = 0;    int do_study = 0;
547    int do_debug = debug;    int do_debug = debug;
548    int do_G = 0;    int do_G = 0;
549    int do_g = 0;    int do_g = 0;
550    int do_showinfo = showinfo;    int do_showinfo = showinfo;
551    int do_showrest = 0;    int do_showrest = 0;
552    int do_posix = 0;    int utf8 = 0;
553    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
554    
555    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
# Line 479  while (!done) Line 641  while (!done)
641        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
642        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
643        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
644          case '8': options |= PCRE_UTF8; utf8 = 1; break;
645    
646        case 'L':        case 'L':
647        ppp = pp;        ppp = pp;
# Line 573  while (!done) Line 736  while (!done)
736        goto CONTINUE;        goto CONTINUE;
737        }        }
738    
739      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
740        info-returning functions. The old one has a limited interface and
741        returns only limited data. Check that it agrees with the newer one. */
742    
743      if (do_showinfo)      if (do_showinfo)
744        {        {
745        int first_char, count;        unsigned long int get_options;
746          int old_first_char, old_options, old_count;
747          int count, backrefmax, first_char, need_char;
748          size_t size;
749    
750        if (do_debug) print_internals(re);        if (do_debug) print_internals(re);
751    
752        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
753          new_info(re, NULL, PCRE_INFO_SIZE, &size);
754          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
755          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
756          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
757          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
758    
759          old_count = pcre_info(re, &old_options, &old_first_char);
760        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
761          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
762        else        else
763          {          {
764          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
765          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
766            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
767              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
768              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
769              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
770              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
771              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
772              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
773              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
774              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
775            }
         if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)  
           fprintf(outfile, "Case state changes\n");  
776    
777          if (first_char == -1)        if (size != gotten_store) fprintf(outfile,
778            {          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
779            fprintf(outfile, "First char at start or follows \\n\n");          size, gotten_store);
780            }  
781          else if (first_char < 0)        fprintf(outfile, "Capturing subpattern count = %d\n", count);
782            {        if (backrefmax > 0)
783            fprintf(outfile, "No first char\n");          fprintf(outfile, "Max back reference = %d\n", backrefmax);
784            }        if (get_options == 0) fprintf(outfile, "No options\n");
785            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
786              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
787              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
788              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
789              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
790              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
791              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
792              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
793              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
794              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
795    
796          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
797            fprintf(outfile, "Case state changes\n");
798    
799          if (first_char == -1)
800            {
801            fprintf(outfile, "First char at start or follows \\n\n");
802            }
803          else if (first_char < 0)
804            {
805            fprintf(outfile, "No first char\n");
806            }
807          else
808            {
809            if (isprint(first_char))
810              fprintf(outfile, "First char = \'%c\'\n", first_char);
811          else          else
812            {            fprintf(outfile, "First char = %d\n", first_char);
813            if (isprint(first_char))          }
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
814    
815          if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)        if (need_char < 0)
816            {          {
817            int req_char = ((real_pcre *)re)->req_char;          fprintf(outfile, "No need char\n");
818            if (isprint(req_char))          }
819              fprintf(outfile, "Req char = \'%c\'\n", req_char);        else
820            else          {
821              fprintf(outfile, "Req char = %d\n", req_char);          if (isprint(need_char))
822            }            fprintf(outfile, "Need char = \'%c\'\n", need_char);
823          else fprintf(outfile, "No req char\n");          else
824              fprintf(outfile, "Need char = %d\n", need_char);
825          }          }
826        }        }
827    
# Line 654  while (!done) Line 850  while (!done)
850        else if (extra == NULL)        else if (extra == NULL)
851          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
852    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
853        else if (do_showinfo)        else if (do_showinfo)
854          {          {
855          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
856          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
857            if (start_bits == NULL)
858            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
859          else          else
860            {            {
# Line 669  while (!done) Line 863  while (!done)
863            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
864            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
865              {              {
866              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
867                {                {
868                if (c > 75)                if (c > 75)
869                  {                  {
# Line 700  while (!done) Line 894  while (!done)
894      {      {
895      unsigned char *q;      unsigned char *q;
896      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
897        int *use_offsets = offsets;
898        int use_size_offsets = size_offsets;
899      int count, c;      int count, c;
900      int copystrings = 0;      int copystrings = 0;
901      int getstrings = 0;      int getstrings = 0;
902      int getlist = 0;      int getlist = 0;
903        int gmatched = 0;
904      int start_offset = 0;      int start_offset = 0;
905      int offsets[45];      int g_notempty = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
906    
907      options = 0;      options = 0;
908    
# Line 750  while (!done) Line 946  while (!done)
946          break;          break;
947    
948          case 'x':          case 'x':
949    
950            /* Handle \x{..} specially - new Perl thing for utf8 */
951    
952            if (*p == '{')
953              {
954              unsigned char *pt = p;
955              c = 0;
956              while (isxdigit(*(++pt)))
957                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
958              if (*pt == '}')
959                {
960                unsigned char buffer[8];
961                int ii, utn;
962                utn = ord2utf8(c, buffer);
963                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
964                c = buffer[ii];   /* Last byte */
965                p = pt + 1;
966                break;
967                }
968              /* Not correct form; fall through */
969              }
970    
971            /* Ordinary \x */
972    
973          c = 0;          c = 0;
974          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
975            {            {
# Line 790  while (!done) Line 1010  while (!done)
1010    
1011          case 'O':          case 'O':
1012          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1014              {
1015              size_offsets_max = n;
1016              free(offsets);
1017              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1018              if (offsets == NULL)
1019                {
1020                printf("** Failed to get %d bytes of memory for offsets vector\n",
1021                  size_offsets_max * sizeof(int));
1022                return 1;
1023                }
1024              }
1025            use_size_offsets = n;
1026            if (n == 0) use_offsets = NULL;
1027          continue;          continue;
1028    
1029          case 'Z':          case 'Z':
# Line 810  while (!done) Line 1043  while (!done)
1043        {        {
1044        int rc;        int rc;
1045        int eflags = 0;        int eflags = 0;
1046        regmatch_t pmatch[30];        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1047        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1048        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1049    
1050        rc = regexec(&preg, (const char *)bptr,        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);  
1051    
1052        if (rc != 0)        if (rc != 0)
1053          {          {
# Line 825  while (!done) Line 1057  while (!done)
1057        else        else
1058          {          {
1059          size_t i;          size_t i;
1060          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < use_size_offsets; i++)
1061            {            {
1062            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1063              {              {
1064              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1065              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
1066                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1067              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1068              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1069                {                {
1070                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1071                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1072                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1073                }                }
1074              }              }
1075            }            }
1076          }          }
1077          free(pmatch);
1078        }        }
1079    
1080      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 849  while (!done) Line 1082  while (!done)
1082      else      else
1083  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1084    
1085      for (;;)      for (;; gmatched++)    /* Loop for /g or /G */
1086        {        {
1087        if (timeit)        if (timeit)
1088          {          {
# Line 858  while (!done) Line 1091  while (!done)
1091          clock_t start_time = clock();          clock_t start_time = clock();
1092          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1093            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1094              (do_g? start_offset : 0), options, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1095          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1096          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1097            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
# Line 866  while (!done) Line 1099  while (!done)
1099          }          }
1100    
1101        count = pcre_exec(re, extra, (char *)bptr, len,        count = pcre_exec(re, extra, (char *)bptr, len,
1102          (do_g? start_offset : 0), options, offsets, size_offsets);          start_offset, options | g_notempty, use_offsets, use_size_offsets);
1103    
1104        if (count == 0)        if (count == 0)
1105          {          {
1106          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1107          count = size_offsets/3;          count = use_size_offsets/3;
1108          }          }
1109    
1110          /* Matched */
1111    
1112        if (count >= 0)        if (count >= 0)
1113          {          {
1114          int i;          int i;
1115          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1116            {            {
1117            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1118              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1119            else            else
1120              {              {
1121              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1122              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1123              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1124              if (i == 0)              if (i == 0)
1125                {                {
               start_offset = offsets[1];  
1126                if (do_showrest)                if (do_showrest)
1127                  {                  {
1128                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1129                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1130                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1131                  }                  }
1132                }                }
# Line 904  while (!done) Line 1138  while (!done)
1138            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1139              {              {
1140              char copybuffer[16];              char copybuffer[16];
1141              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1142                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1143              if (rc < 0)              if (rc < 0)
1144                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 918  while (!done) Line 1152  while (!done)
1152            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1153              {              {
1154              const char *substring;              const char *substring;
1155              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1156                i, &substring);                i, &substring);
1157              if (rc < 0)              if (rc < 0)
1158                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1159              else              else
1160                {                {
1161                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1162                free((void *)substring);                /* free((void *)substring); */
1163                  pcre_free_substring(substring);
1164                }                }
1165              }              }
1166            }            }
# Line 933  while (!done) Line 1168  while (!done)
1168          if (getlist)          if (getlist)
1169            {            {
1170            const char **stringlist;            const char **stringlist;
1171            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1172              &stringlist);              &stringlist);
1173            if (rc < 0)            if (rc < 0)
1174              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 943  while (!done) Line 1178  while (!done)
1178                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1179              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1180                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1181              free((void *)stringlist);              /* free((void *)stringlist); */
1182                pcre_free_substring_list(stringlist);
1183              }              }
1184            }            }
   
1185          }          }
1186    
1187          /* Failed to match. If this is a /g or /G loop and we previously set
1188          g_notempty after a null match, this is not necessarily the end.
1189          We want to advance the start offset, and continue. Fudge the offset
1190          values to achieve this. We won't be at the end of the string - that
1191          was checked before setting g_notempty. */
1192    
1193        else        else
1194          {          {
1195          if (start_offset == 0)          if (g_notempty != 0)
1196            {            {
1197            if (count == -1) fprintf(outfile, "No match\n");            use_offsets[0] = start_offset;
1198              else fprintf(outfile, "Error %d\n", count);            use_offsets[1] = start_offset + 1;
1199              }
1200            else
1201              {
1202              if (gmatched == 0)   /* Error if no previous matches */
1203                {
1204                if (count == -1) fprintf(outfile, "No match\n");
1205                  else fprintf(outfile, "Error %d\n", count);
1206                }
1207              break;  /* Out of the /g loop */
1208            }            }
         start_offset = -1;  
1209          }          }
1210    
1211        if ((!do_g && !do_G) || start_offset <= 0) break;        /* If not /g or /G we are done */
1212        if (do_G)  
1213          if (!do_g && !do_G) break;
1214    
1215          /* If we have matched an empty string, first check to see if we are at
1216          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1217          what Perl's /g options does. This turns out to be rather cunning. First
1218          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1219          same point. If this fails (picked up above) we advance to the next
1220          character. */
1221    
1222          g_notempty = 0;
1223          if (use_offsets[0] == use_offsets[1])
1224          {          {
1225          bptr += start_offset;          if (use_offsets[0] == len) break;
1226          len -= start_offset;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1227          }          }
1228        }  
1229      }        /* For /g, update the start offset, leaving the rest alone */
1230    
1231          if (do_g) start_offset = use_offsets[1];
1232    
1233          /* For /G, update the pointer and length */
1234    
1235          else
1236            {
1237            bptr += use_offsets[1];
1238            len -= use_offsets[1];
1239            }
1240          }  /* End of loop for /g and /G */
1241        }    /* End of loop for data lines */
1242    
1243    CONTINUE:    CONTINUE:
1244    

Legend:
Removed from v.37  
changed lines
  Added in v.59

  ViewVC Help
Powered by ViewVC 1.1.5