/[pcre]/code/tags/pcre-6.0/pcretest.c
ViewVC logotype

Diff of /code/tags/pcre-6.0/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 59 by nigel, Sat Feb 24 21:39:54 2007 UTC
# Line 73  for (i = 0; i < sizeof(utf8_table1)/size Line 73  for (i = 0; i < sizeof(utf8_table1)/size
73    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
74  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75  if (cvalue < 0) return -1;  if (cvalue < 0) return -1;
76  *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
77  cvalue >>= 6 - i;  buffer += i;
78  for (j = 0; j < i; j++)  for (j = i; j > 0; j--)
79    {   {
80    *buffer++ = 0x80 | (cvalue & 0x3f);   *buffer-- = 0x80 | (cvalue & 0x3f);
81    cvalue >>= 6;   cvalue >>= 6;
82    }   }
83    *buffer = utf8_table2[i] | cvalue;
84  return i + 1;  return i + 1;
85  }  }
86    
# Line 117  if (i == 0 || i == 6) return 0;        / Line 118  if (i == 0 || i == 6) return 0;        /
118    
119  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
120    
121  d = c & utf8_table3[i];  s = 6*i;
122  s = 6 - i;  d = (c & utf8_table3[i]) << s;
123    
124  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
125    {    {
126    c = *buffer++;    c = *buffer++;
127    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
128      s -= 6;
129    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
130    }    }
131    
132  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
# Line 159  static const char *OP_names[] = { Line 160  static const char *OP_names[] = {
160    "class", "Ref", "Recurse",    "class", "Ref", "Recurse",
161    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
162    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
163    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Branumber", "Bra"
164  };  };
165    
166    
# Line 178  for(;;) Line 179  for(;;)
179    
180    if (*code >= OP_BRA)    if (*code >= OP_BRA)
181      {      {
182      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      if (*code - OP_BRA > EXTRACT_BASIC_MAX)
183          fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
184        else
185          fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
186      code += 2;      code += 2;
187      }      }
188    
# Line 194  for(;;) Line 198  for(;;)
198      code++;      code++;
199      break;      break;
200    
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
201      case OP_CHARS:      case OP_CHARS:
202      charlength = *(++code);      charlength = *(++code);
203      fprintf(outfile, "%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
# Line 221  for(;;) Line 215  for(;;)
215      case OP_ASSERTBACK:      case OP_ASSERTBACK:
216      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
217      case OP_ONCE:      case OP_ONCE:
218      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      case OP_COND:
219      code += 2;      case OP_BRANUMBER:
     break;  
   
220      case OP_REVERSE:      case OP_REVERSE:
221        case OP_CREF:
222      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
223      code += 2;      code += 2;
224      break;      break;
# Line 298  for(;;) Line 291  for(;;)
291      break;      break;
292    
293      case OP_REF:      case OP_REF:
294      fprintf(outfile, "    \\%d", *(++code));      fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);
295      code++;      code += 3;
296      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
297    
298      case OP_CLASS:      case OP_CLASS:
# Line 441  int op = 1; Line 434  int op = 1;
434  int timeit = 0;  int timeit = 0;
435  int showinfo = 0;  int showinfo = 0;
436  int showstore = 0;  int showstore = 0;
437    int size_offsets = 45;
438    int size_offsets_max;
439    int *offsets;
440    #if !defined NOPOSIX
441  int posix = 0;  int posix = 0;
442    #endif
443  int debug = 0;  int debug = 0;
444  int done = 0;  int done = 0;
445  unsigned char buffer[30000];  unsigned char buffer[30000];
# Line 455  outfile = stdout; Line 453  outfile = stdout;
453    
454  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
455    {    {
456      char *endptr;
457    
458    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459      showstore = 1;      showstore = 1;
460    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
464          ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
465        {
466        op++;
467        argc--;
468        }
469    #if !defined NOPOSIX
470    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
471    #endif
472    else    else
473      {      {
474      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
475      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
476      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
477             "  -i   show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
478             "  -p   use POSIX interface\n"             "  -o <n> set size of offsets vector to <n>\n");
479             "  -s   output store information\n"  #if !defined NOPOSIX
480             "  -t   time compilation and execution\n");      printf("  -p     use POSIX interface\n");
481    #endif
482        printf("  -s     output store information\n"
483               "  -t     time compilation and execution\n");
484      return 1;      return 1;
485      }      }
486    op++;    op++;
487    argc--;    argc--;
488    }    }
489    
490    /* Get the store for the offsets vector, and remember what it was */
491    
492    size_offsets_max = size_offsets;
493    offsets = malloc(size_offsets_max * sizeof(int));
494    if (offsets == NULL)
495      {
496      printf("** Failed to get %d bytes of memory for offsets vector\n",
497        size_offsets_max * sizeof(int));
498      return 1;
499      }
500    
501  /* Sort out the input and output files */  /* Sort out the input and output files */
502    
503  if (argc > 1)  if (argc > 1)
# Line 520  while (!done) Line 542  while (!done)
542    
543    const char *error;    const char *error;
544    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
545    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
546    int do_study = 0;    int do_study = 0;
547    int do_debug = debug;    int do_debug = debug;
548    int do_G = 0;    int do_G = 0;
# Line 720  while (!done) Line 742  while (!done)
742    
743      if (do_showinfo)      if (do_showinfo)
744        {        {
745          unsigned long int get_options;
746        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
747        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
748        size_t size;        size_t size;
749    
750        if (do_debug) print_internals(re);        if (do_debug) print_internals(re);
751    
752        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
753        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
754        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
755        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 746  while (!done) Line 769  while (!done)
769            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
770              first_char, old_first_char);              first_char, old_first_char);
771    
772          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
773            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
774              old_options);              get_options, old_options);
775          }          }
776    
777        if (size != gotten_store) fprintf(outfile,        if (size != gotten_store) fprintf(outfile,
# Line 758  while (!done) Line 781  while (!done)
781        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
782        if (backrefmax > 0)        if (backrefmax > 0)
783          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
784        if (options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
785          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
786            ((options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
787            ((options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
788            ((options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
789            ((options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
790            ((options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
791            ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
792            ((options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
793            ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
794            ((options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
795    
796        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
797          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 871  while (!done) Line 894  while (!done)
894      {      {
895      unsigned char *q;      unsigned char *q;
896      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
897        int *use_offsets = offsets;
898        int use_size_offsets = size_offsets;
899      int count, c;      int count, c;
900      int copystrings = 0;      int copystrings = 0;
901      int getstrings = 0;      int getstrings = 0;
# Line 878  while (!done) Line 903  while (!done)
903      int gmatched = 0;      int gmatched = 0;
904      int start_offset = 0;      int start_offset = 0;
905      int g_notempty = 0;      int g_notempty = 0;
     int offsets[45];  
     int size_offsets = sizeof(offsets)/sizeof(int);  
906    
907      options = 0;      options = 0;
908    
# Line 987  while (!done) Line 1010  while (!done)
1010    
1011          case 'O':          case 'O':
1012          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1014              {
1015              size_offsets_max = n;
1016              free(offsets);
1017              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1018              if (offsets == NULL)
1019                {
1020                printf("** Failed to get %d bytes of memory for offsets vector\n",
1021                  size_offsets_max * sizeof(int));
1022                return 1;
1023                }
1024              }
1025            use_size_offsets = n;
1026            if (n == 0) use_offsets = NULL;
1027          continue;          continue;
1028    
1029          case 'Z':          case 'Z':
# Line 1007  while (!done) Line 1043  while (!done)
1043        {        {
1044        int rc;        int rc;
1045        int eflags = 0;        int eflags = 0;
1046        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1047        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1048        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1049    
1050        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1051    
1052        if (rc != 0)        if (rc != 0)
1053          {          {
# Line 1021  while (!done) Line 1057  while (!done)
1057        else        else
1058          {          {
1059          size_t i;          size_t i;
1060          for (i = 0; i < size_offsets; i++)          for (i = 0; i < use_size_offsets; i++)
1061            {            {
1062            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1063              {              {
# Line 1038  while (!done) Line 1074  while (!done)
1074              }              }
1075            }            }
1076          }          }
1077          free(pmatch);
1078        }        }
1079    
1080      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 1054  while (!done) Line 1091  while (!done)
1091          clock_t start_time = clock();          clock_t start_time = clock();
1092          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1093            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1094              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1095          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1096          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1097            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
# Line 1062  while (!done) Line 1099  while (!done)
1099          }          }
1100    
1101        count = pcre_exec(re, extra, (char *)bptr, len,        count = pcre_exec(re, extra, (char *)bptr, len,
1102          start_offset, options | g_notempty, offsets, size_offsets);          start_offset, options | g_notempty, use_offsets, use_size_offsets);
1103    
1104        if (count == 0)        if (count == 0)
1105          {          {
1106          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1107          count = size_offsets/3;          count = use_size_offsets/3;
1108          }          }
1109    
1110        /* Matched */        /* Matched */
# Line 1077  while (!done) Line 1114  while (!done)
1114          int i;          int i;
1115          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1116            {            {
1117            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1118              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1119            else            else
1120              {              {
1121              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1122              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1123              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1124              if (i == 0)              if (i == 0)
1125                {                {
1126                if (do_showrest)                if (do_showrest)
1127                  {                  {
1128                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1129                  pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1130                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1131                  }                  }
1132                }                }
# Line 1101  while (!done) Line 1138  while (!done)
1138            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1139              {              {
1140              char copybuffer[16];              char copybuffer[16];
1141              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1142                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1143              if (rc < 0)              if (rc < 0)
1144                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 1115  while (!done) Line 1152  while (!done)
1152            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1153              {              {
1154              const char *substring;              const char *substring;
1155              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1156                i, &substring);                i, &substring);
1157              if (rc < 0)              if (rc < 0)
1158                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
# Line 1131  while (!done) Line 1168  while (!done)
1168          if (getlist)          if (getlist)
1169            {            {
1170            const char **stringlist;            const char **stringlist;
1171            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1172              &stringlist);              &stringlist);
1173            if (rc < 0)            if (rc < 0)
1174              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 1157  while (!done) Line 1194  while (!done)
1194          {          {
1195          if (g_notempty != 0)          if (g_notempty != 0)
1196            {            {
1197            offsets[0] = start_offset;            use_offsets[0] = start_offset;
1198            offsets[1] = start_offset + 1;            use_offsets[1] = start_offset + 1;
1199            }            }
1200          else          else
1201            {            {
# Line 1183  while (!done) Line 1220  while (!done)
1220        character. */        character. */
1221    
1222        g_notempty = 0;        g_notempty = 0;
1223        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1224          {          {
1225          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1226          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1227          }          }
1228    
1229        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1230    
1231        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1232    
1233        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1234    
1235        else        else
1236          {          {
1237          bptr += offsets[1];          bptr += use_offsets[1];
1238          len -= offsets[1];          len -= use_offsets[1];
1239          }          }
1240        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1241      }    /* End of loop for data lines */      }    /* End of loop for data lines */

Legend:
Removed from v.49  
changed lines
  Added in v.59

  ViewVC Help
Powered by ViewVC 1.1.5