/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 35 by nigel, Sat Feb 24 21:39:05 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
# Line 21  Line 22 
22  #endif  #endif
23  #endif  #endif
24    
25  #define LOOPREPEAT 10000  #define LOOPREPEAT 20000
26    
27    
28  static FILE *outfile;  static FILE *outfile;
# Line 273  compiled re. */ Line 274  compiled re. */
274    
275  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
276  {  {
277  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  if (log_store)
278      fprintf(outfile, "Memory allocation (code space): %d\n",
279        (int)((int)size - offsetof(real_pcre, code[0])));
280  return malloc(size);  return malloc(size);
281  }  }
282    
# Line 291  int study_options = 0; Line 294  int study_options = 0;
294  int op = 1;  int op = 1;
295  int timeit = 0;  int timeit = 0;
296  int showinfo = 0;  int showinfo = 0;
297    int showstore = 0;
298  int posix = 0;  int posix = 0;
299  int debug = 0;  int debug = 0;
300  int done = 0;  int done = 0;
# Line 305  outfile = stdout; Line 309  outfile = stdout;
309    
310  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
311    {    {
312    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
313        showstore = 1;
314    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
315    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
316    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 313  while (argc > 1 && argv[op][0] == '-') Line 318  while (argc > 1 && argv[op][0] == '-')
318    else    else
319      {      {
320      printf("*** Unknown option %s\n", argv[op]);      printf("*** Unknown option %s\n", argv[op]);
321        printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
322        printf("  -d   debug: show compiled code; implies -i\n"
323               "  -i   show information about compiled pattern\n"
324               "  -p   use POSIX interface\n"
325               "  -s   output store information\n"
326               "  -t   time compilation and execution\n");
327      return 1;      return 1;
328      }      }
329    op++;    op++;
# Line 357  while (!done) Line 368  while (!done)
368    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
369    regex_t preg;    regex_t preg;
370    const char *error;    const char *error;
371    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
372      unsigned const char *tables = NULL;
373    int do_study = 0;    int do_study = 0;
374    int do_debug = 0;    int do_debug = debug;
375      int do_G = 0;
376      int do_g = 0;
377      int do_showinfo = showinfo;
378      int do_showrest = 0;
379    int do_posix = 0;    int do_posix = 0;
380    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
381    
# Line 376  while (!done) Line 392  while (!done)
392    
393    delimiter = *p++;    delimiter = *p++;
394    
395    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
396      {      {
397      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
398      goto SKIP_DATA;      goto SKIP_DATA;
399      }      }
400    
# Line 386  while (!done) Line 402  while (!done)
402    
403    for(;;)    for(;;)
404      {      {
405      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
406          {
407          if (*pp == '\\' && pp[1] != 0) pp++;
408            else if (*pp == delimiter) break;
409          pp++;
410          }
411      if (*pp != 0) break;      if (*pp != 0) break;
412    
413      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 406  while (!done) Line 427  while (!done)
427      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
428      }      }
429    
430      /* If the first character after the delimiter is backslash, make
431      the pattern end with backslash. This is purely to provide a way
432      of testing for the error message when a pattern ends with backslash. */
433    
434      if (pp[1] == '\\') *pp++ = '\\';
435    
436    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
437    
438    *pp++ = 0;    *pp++ = 0;
# Line 414  while (!done) Line 441  while (!done)
441    
442    options = 0;    options = 0;
443    study_options = 0;    study_options = 0;
444      log_store = showstore;  /* default from command line */
445    
446    while (*pp != 0)    while (*pp != 0)
447      {      {
448      switch (*pp++)      switch (*pp++)
449        {        {
450          case 'g': do_g = 1; break;
451        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
452        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
453        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
454        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
455    
456          case '+': do_showrest = 1; break;
457        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
458        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
459        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
460          case 'G': do_G = 1; break;
461          case 'I': do_showinfo = 1; break;
462          case 'M': log_store = 1; break;
463        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
464        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
465        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
466        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
467    
468          case 'L':
469          ppp = pp;
470          while (*ppp != '\n' && *ppp != ' ') ppp++;
471          *ppp = 0;
472          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
473            {
474            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
475            goto SKIP_DATA;
476            }
477          tables = pcre_maketables();
478          pp = ppp;
479          break;
480    
481        case '\n': case ' ': break;        case '\n': case ' ': break;
482        default:        default:
483        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 437  while (!done) Line 486  while (!done)
486      }      }
487    
488    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
489    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
490      local character tables. */
491    
492    if (posix || do_posix)    if (posix || do_posix)
493      {      {
# Line 469  while (!done) Line 519  while (!done)
519        clock_t start_time = clock();        clock_t start_time = clock();
520        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < LOOPREPEAT; i++)
521          {          {
522          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
523          if (re != NULL) free(re);          if (re != NULL) free(re);
524          }          }
525        time_taken = clock() - start_time;        time_taken = clock() - start_time;
526        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
527          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
528            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
529        }        }
530    
531      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
532    
533      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
534      if non-interactive. */      if non-interactive. */
# Line 501  while (!done) Line 552  while (!done)
552            }            }
553          fprintf(outfile, "\n");          fprintf(outfile, "\n");
554          }          }
555        continue;        goto CONTINUE;
556        }        }
557    
558      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required */
559    
560      if (showinfo || do_debug)      if (do_showinfo)
561        {        {
562        int first_char, count;        int first_char, count;
563    
564        if (debug || do_debug) print_internals(re, outfile);        if (do_debug) print_internals(re, outfile);
565    
566        count = pcre_info(re, &options, &first_char);        count = pcre_info(re, &options, &first_char);
567        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 560  while (!done) Line 611  while (!done)
611            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
612          time_taken = clock() - start_time;          time_taken = clock() - start_time;
613          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
614          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
615            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
616              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
617          }          }
618    
619        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 573  while (!done) Line 625  while (!done)
625        /* This looks at internal information. A bit kludgy to do it this        /* This looks at internal information. A bit kludgy to do it this
626        way, but it is useful for testing. */        way, but it is useful for testing. */
627    
628        else if (showinfo || do_debug)        else if (do_showinfo)
629          {          {
630          real_pcre_extra *xx = (real_pcre_extra *)extra;          real_pcre_extra *xx = (real_pcre_extra *)extra;
631          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          if ((xx->options & PCRE_STUDY_MAPPED) == 0)
# Line 615  while (!done) Line 667  while (!done)
667    for (;;)    for (;;)
668      {      {
669      unsigned char *q;      unsigned char *q;
670        unsigned char *bptr = dbuffer;
671      int count, c;      int count, c;
672        int copystrings = 0;
673        int getstrings = 0;
674        int getlist = 0;
675        int start_offset = 0;
676      int offsets[45];      int offsets[45];
677      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
678    
679      options = 0;      options = 0;
680    
681      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
682      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
683        {        {
684        done = 1;        done = 1;
# Line 681  while (!done) Line 738  while (!done)
738          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
739          continue;          continue;
740    
741            case 'C':
742            while(isdigit(*p)) n = n * 10 + *p++ - '0';
743            copystrings |= 1 << n;
744            continue;
745    
746            case 'G':
747            while(isdigit(*p)) n = n * 10 + *p++ - '0';
748            getstrings |= 1 << n;
749            continue;
750    
751            case 'L':
752            getlist = 1;
753            continue;
754    
755          case 'O':          case 'O':
756          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
757          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
# Line 706  while (!done) Line 777  while (!done)
777        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
778        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
779    
780        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (unsigned char *)bptr,
781          pmatch, eflags);          sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);
782    
783        if (rc != 0)        if (rc != 0)
784          {          {
# Line 725  while (!done) Line 796  while (!done)
796              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
797                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so);
798              fprintf(outfile, "\n");              fprintf(outfile, "\n");
799                if (i == 0 && do_showrest)
800                  {
801                  fprintf(outfile, " 0+ ");
802                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
803                  fprintf(outfile, "\n");
804                  }
805              }              }
806            }            }
807          }          }
808        }        }
809    
810      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
811    
812      else      else for (;;)
813        {        {
814        if (timeit)        if (timeit)
815          {          {
816          register int i;          register int i;
817          clock_t time_taken;          clock_t time_taken;
818          clock_t start_time = clock();          clock_t start_time = clock();
819          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
820            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
821              size_offsets);              (do_g? start_offset : 0), options, offsets, size_offsets);
822          time_taken = clock() - start_time;          time_taken = clock() - start_time;
823          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
824            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
825              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
826          }          }
827    
828        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
829          size_offsets);          (do_g? start_offset : 0), options, offsets, size_offsets);
830    
831        if (count == 0)        if (count == 0)
832          {          {
# Line 759  while (!done) Line 837  while (!done)
837        if (count >= 0)        if (count >= 0)
838          {          {
839          int i;          int i;
840          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
841            {            {
842            if (offsets[i] < 0)            if (offsets[i] < 0)
843              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
844            else            else
845              {              {
846              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
847              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
848              fprintf(outfile, "\n");              fprintf(outfile, "\n");
849                if (i == 0)
850                  {
851                  start_offset = offsets[1];
852                  if (do_showrest)
853                    {
854                    fprintf(outfile, " 0+ ");
855                    pchars(bptr + offsets[i+1], len - offsets[i+1]);
856                    fprintf(outfile, "\n");
857                    }
858                  }
859                }
860              }
861    
862            for (i = 0; i < 32; i++)
863              {
864              if ((copystrings & (1 << i)) != 0)
865                {
866                char buffer[16];
867                int rc = pcre_copy_substring((char *)bptr, offsets, count,
868                  i, buffer, sizeof(buffer));
869                if (rc < 0)
870                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
871                else
872                  fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);
873              }              }
874            }            }
875    
876            for (i = 0; i < 32; i++)
877              {
878              if ((getstrings & (1 << i)) != 0)
879                {
880                const char *substring;
881                int rc = pcre_get_substring((char *)bptr, offsets, count,
882                  i, &substring);
883                if (rc < 0)
884                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
885                else
886                  {
887                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
888                  free((void *)substring);
889                  }
890                }
891              }
892    
893            if (getlist)
894              {
895              const char **stringlist;
896              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
897                &stringlist);
898              if (rc < 0)
899                fprintf(outfile, "get substring list failed %d\n", rc);
900              else
901                {
902                for (i = 0; i < count; i++)
903                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
904                if (stringlist[i] != NULL)
905                  fprintf(outfile, "string list not terminated by NULL\n");
906                free((void *)stringlist);
907                }
908              }
909    
910          }          }
911        else        else
912          {          {
913          if (count == -1) fprintf(outfile, "No match\n");          if (start_offset == 0)
914            else fprintf(outfile, "Error %d\n", count);            {
915              if (count == -1) fprintf(outfile, "No match\n");
916                else fprintf(outfile, "Error %d\n", count);
917              }
918            start_offset = -1;
919            }
920    
921          if ((!do_g && !do_G) || start_offset <= 0) break;
922          if (do_G)
923            {
924            bptr += start_offset;
925            len -= start_offset;
926          }          }
927        }        }
928      }      }
# Line 784  while (!done) Line 931  while (!done)
931    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
932    if (re != NULL) free(re);    if (re != NULL) free(re);
933    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
934      if (tables != NULL)
935        {
936        free((void *)tables);
937        setlocale(LC_CTYPE, "C");
938        }
939    }    }
940    
941  fprintf(outfile, "\n");  fprintf(outfile, "\n");

Legend:
Removed from v.23  
changed lines
  Added in v.35

  ViewVC Help
Powered by ViewVC 1.1.5