/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 39 by nigel, Sat Feb 24 21:39:13 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32  #define LOOPREPEAT 10000  #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
# Line 47  static const char *OP_names[] = { Line 55  static const char *OP_names[] = {
55  };  };
56    
57    
58  static void print_internals(pcre *re, FILE *outfile)  static void print_internals(pcre *re)
59  {  {
60  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
61    
# Line 273  compiled re. */ Line 281  compiled re. */
281    
282  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
283  {  {
284  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  if (log_store)
285      fprintf(outfile, "Memory allocation (code space): %d\n",
286        (int)((int)size - offsetof(real_pcre, code[0])));
287  return malloc(size);  return malloc(size);
288  }  }
289    
# Line 291  int study_options = 0; Line 301  int study_options = 0;
301  int op = 1;  int op = 1;
302  int timeit = 0;  int timeit = 0;
303  int showinfo = 0;  int showinfo = 0;
304    int showstore = 0;
305  int posix = 0;  int posix = 0;
306  int debug = 0;  int debug = 0;
307  int done = 0;  int done = 0;
# Line 305  outfile = stdout; Line 316  outfile = stdout;
316    
317  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
318    {    {
319    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
320        showstore = 1;
321    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
322    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
323    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 313  while (argc > 1 && argv[op][0] == '-') Line 325  while (argc > 1 && argv[op][0] == '-')
325    else    else
326      {      {
327      printf("*** Unknown option %s\n", argv[op]);      printf("*** Unknown option %s\n", argv[op]);
328        printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
329        printf("  -d   debug: show compiled code; implies -i\n"
330               "  -i   show information about compiled pattern\n"
331               "  -p   use POSIX interface\n"
332               "  -s   output store information\n"
333               "  -t   time compilation and execution\n");
334      return 1;      return 1;
335      }      }
336    op++;    op++;
# Line 355  while (!done) Line 373  while (!done)
373    {    {
374    pcre *re = NULL;    pcre *re = NULL;
375    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
376    
377    #if !defined NOPOSIX  /* There are still compilers that require no indent */
378    regex_t preg;    regex_t preg;
379    #endif
380    
381    const char *error;    const char *error;
382    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
383      unsigned const char *tables = NULL;
384    int do_study = 0;    int do_study = 0;
385    int do_debug = 0;    int do_debug = debug;
386      int do_G = 0;
387      int do_g = 0;
388      int do_showinfo = showinfo;
389      int do_showrest = 0;
390    int do_posix = 0;    int do_posix = 0;
391    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
392    
# Line 376  while (!done) Line 403  while (!done)
403    
404    delimiter = *p++;    delimiter = *p++;
405    
406    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
407      {      {
408      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
409      goto SKIP_DATA;      goto SKIP_DATA;
410      }      }
411    
# Line 386  while (!done) Line 413  while (!done)
413    
414    for(;;)    for(;;)
415      {      {
416      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
417          {
418          if (*pp == '\\' && pp[1] != 0) pp++;
419            else if (*pp == delimiter) break;
420          pp++;
421          }
422      if (*pp != 0) break;      if (*pp != 0) break;
423    
424      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 406  while (!done) Line 438  while (!done)
438      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
439      }      }
440    
441      /* If the first character after the delimiter is backslash, make
442      the pattern end with backslash. This is purely to provide a way
443      of testing for the error message when a pattern ends with backslash. */
444    
445      if (pp[1] == '\\') *pp++ = '\\';
446    
447    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
448    
449    *pp++ = 0;    *pp++ = 0;
# Line 414  while (!done) Line 452  while (!done)
452    
453    options = 0;    options = 0;
454    study_options = 0;    study_options = 0;
455      log_store = showstore;  /* default from command line */
456    
457    while (*pp != 0)    while (*pp != 0)
458      {      {
459      switch (*pp++)      switch (*pp++)
460        {        {
461          case 'g': do_g = 1; break;
462        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
463        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
464        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
465        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
466    
467          case '+': do_showrest = 1; break;
468        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
469        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
470        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
471          case 'G': do_G = 1; break;
472          case 'I': do_showinfo = 1; break;
473          case 'M': log_store = 1; break;
474    
475    #if !defined NOPOSIX
476        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
477    #endif
478    
479        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
480        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
481        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
482    
483          case 'L':
484          ppp = pp;
485          while (*ppp != '\n' && *ppp != ' ') ppp++;
486          *ppp = 0;
487          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
488            {
489            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
490            goto SKIP_DATA;
491            }
492          tables = pcre_maketables();
493          pp = ppp;
494          break;
495    
496        case '\n': case ' ': break;        case '\n': case ' ': break;
497        default:        default:
498        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 437  while (!done) Line 501  while (!done)
501      }      }
502    
503    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
504    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
505      local character tables. */
506    
507    #if !defined NOPOSIX
508    if (posix || do_posix)    if (posix || do_posix)
509      {      {
510      int rc;      int rc;
# Line 461  while (!done) Line 527  while (!done)
527    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
528    
529    else    else
530    #endif  /* !defined NOPOSIX */
531    
532      {      {
533      if (timeit)      if (timeit)
534        {        {
# Line 469  while (!done) Line 537  while (!done)
537        clock_t start_time = clock();        clock_t start_time = clock();
538        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < LOOPREPEAT; i++)
539          {          {
540          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
541          if (re != NULL) free(re);          if (re != NULL) free(re);
542          }          }
543        time_taken = clock() - start_time;        time_taken = clock() - start_time;
544        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
545          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
546            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
547        }        }
548    
549      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
550    
551      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
552      if non-interactive. */      if non-interactive. */
# Line 501  while (!done) Line 570  while (!done)
570            }            }
571          fprintf(outfile, "\n");          fprintf(outfile, "\n");
572          }          }
573        continue;        goto CONTINUE;
574        }        }
575    
576      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required */
577    
578      if (showinfo || do_debug)      if (do_showinfo)
579        {        {
580        int first_char, count;        int first_char, count;
581    
582        if (debug || do_debug) print_internals(re, outfile);        if (do_debug) print_internals(re);
583    
584        count = pcre_info(re, &options, &first_char);        count = pcre_info(re, &options, &first_char);
585        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 528  while (!done) Line 597  while (!done)
597              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
598              ((options & PCRE_EXTRA) != 0)? " extra" : "",              ((options & PCRE_EXTRA) != 0)? " extra" : "",
599              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
600    
601            if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
602              fprintf(outfile, "Case state changes\n");
603    
604          if (first_char == -1)          if (first_char == -1)
605            {            {
606            fprintf(outfile, "First char at start or follows \\n\n");            fprintf(outfile, "First char at start or follows \\n\n");
# Line 543  while (!done) Line 616  while (!done)
616            else            else
617              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "First char = %d\n", first_char);
618            }            }
619    
620            if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
621              {
622              int req_char = ((real_pcre *)re)->req_char;
623              if (isprint(req_char))
624                fprintf(outfile, "Req char = \'%c\'\n", req_char);
625              else
626                fprintf(outfile, "Req char = %d\n", req_char);
627              }
628            else fprintf(outfile, "No req char\n");
629          }          }
630        }        }
631    
# Line 560  while (!done) Line 643  while (!done)
643            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
644          time_taken = clock() - start_time;          time_taken = clock() - start_time;
645          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
646          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
647            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
648              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
649          }          }
650    
651        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 573  while (!done) Line 657  while (!done)
657        /* This looks at internal information. A bit kludgy to do it this        /* This looks at internal information. A bit kludgy to do it this
658        way, but it is useful for testing. */        way, but it is useful for testing. */
659    
660        else if (showinfo || do_debug)        else if (do_showinfo)
661          {          {
662          real_pcre_extra *xx = (real_pcre_extra *)extra;          real_pcre_extra *xx = (real_pcre_extra *)extra;
663          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          if ((xx->options & PCRE_STUDY_MAPPED) == 0)
# Line 615  while (!done) Line 699  while (!done)
699    for (;;)    for (;;)
700      {      {
701      unsigned char *q;      unsigned char *q;
702        unsigned char *bptr = dbuffer;
703      int count, c;      int count, c;
704        int copystrings = 0;
705        int getstrings = 0;
706        int getlist = 0;
707        int gmatched = 0;
708        int start_offset = 0;
709      int offsets[45];      int offsets[45];
710      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
711    
712      options = 0;      options = 0;
713    
714      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
715      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
716        {        {
717        done = 1;        done = 1;
# Line 681  while (!done) Line 771  while (!done)
771          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
772          continue;          continue;
773    
774            case 'C':
775            while(isdigit(*p)) n = n * 10 + *p++ - '0';
776            copystrings |= 1 << n;
777            continue;
778    
779            case 'G':
780            while(isdigit(*p)) n = n * 10 + *p++ - '0';
781            getstrings |= 1 << n;
782            continue;
783    
784            case 'L':
785            getlist = 1;
786            continue;
787    
788            case 'N':
789            options |= PCRE_NOTEMPTY;
790            continue;
791    
792          case 'O':          case 'O':
793          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
794          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
# Line 698  while (!done) Line 806  while (!done)
806      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
807      support timing. */      support timing. */
808    
809    #if !defined NOPOSIX
810      if (posix || do_posix)      if (posix || do_posix)
811        {        {
812        int rc;        int rc;
# Line 706  while (!done) Line 815  while (!done)
815        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
816        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
817    
818        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr,
819          pmatch, eflags);          sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);
820    
821        if (rc != 0)        if (rc != 0)
822          {          {
# Line 725  while (!done) Line 834  while (!done)
834              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
835                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so);
836              fprintf(outfile, "\n");              fprintf(outfile, "\n");
837                if (i == 0 && do_showrest)
838                  {
839                  fprintf(outfile, " 0+ ");
840                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
841                  fprintf(outfile, "\n");
842                  }
843              }              }
844            }            }
845          }          }
846        }        }
847    
848      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
849    
850      else      else
851    #endif  /* !defined NOPOSIX */
852    
853        for (;; gmatched++)    /* Loop for /g or /G */
854        {        {
855        if (timeit)        if (timeit)
856          {          {
857          register int i;          register int i;
858          clock_t time_taken;          clock_t time_taken;
859          clock_t start_time = clock();          clock_t start_time = clock();
860          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
861            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
862              size_offsets);              start_offset, options, offsets, size_offsets);
863          time_taken = clock() - start_time;          time_taken = clock() - start_time;
864          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
865            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
866              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
867          }          }
868    
869        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
870          size_offsets);          start_offset, options, offsets, size_offsets);
871    
872        if (count == 0)        if (count == 0)
873          {          {
# Line 756  while (!done) Line 875  while (!done)
875          count = size_offsets/3;          count = size_offsets/3;
876          }          }
877    
878          /* Matched */
879    
880        if (count >= 0)        if (count >= 0)
881          {          {
882          int i;          int i;
883          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
884            {            {
885            if (offsets[i] < 0)            if (offsets[i] < 0)
886              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
887            else            else
888              {              {
889              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
890              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
891              fprintf(outfile, "\n");              fprintf(outfile, "\n");
892                if (i == 0)
893                  {
894                  if (do_showrest)
895                    {
896                    fprintf(outfile, " 0+ ");
897                    pchars(bptr + offsets[i+1], len - offsets[i+1]);
898                    fprintf(outfile, "\n");
899                    }
900                  }
901                }
902              }
903    
904            for (i = 0; i < 32; i++)
905              {
906              if ((copystrings & (1 << i)) != 0)
907                {
908                char copybuffer[16];
909                int rc = pcre_copy_substring((char *)bptr, offsets, count,
910                  i, copybuffer, sizeof(copybuffer));
911                if (rc < 0)
912                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
913                else
914                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
915              }              }
916            }            }
917    
918            for (i = 0; i < 32; i++)
919              {
920              if ((getstrings & (1 << i)) != 0)
921                {
922                const char *substring;
923                int rc = pcre_get_substring((char *)bptr, offsets, count,
924                  i, &substring);
925                if (rc < 0)
926                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
927                else
928                  {
929                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
930                  free((void *)substring);
931                  }
932                }
933              }
934    
935            if (getlist)
936              {
937              const char **stringlist;
938              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
939                &stringlist);
940              if (rc < 0)
941                fprintf(outfile, "get substring list failed %d\n", rc);
942              else
943                {
944                for (i = 0; i < count; i++)
945                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
946                if (stringlist[i] != NULL)
947                  fprintf(outfile, "string list not terminated by NULL\n");
948                free((void *)stringlist);
949                }
950              }
951            }
952    
953          /* Failed to match */
954    
955          else
956            {
957            if (gmatched == 0)
958              {
959              if (count == -1) fprintf(outfile, "No match\n");
960                else fprintf(outfile, "Error %d\n", count);
961              }
962            break;  /* Out of the /g loop */
963          }          }
964    
965          /* If not /g or /G we are done */
966    
967          if (!do_g && !do_G) break;
968    
969          /* If we have matched an empty string, set PCRE_NOTEMPTY for the next
970          match. This mimics what Perl's /g option does. */
971    
972          if (offsets[1] == offsets[0])
973            options |= PCRE_NOTEMPTY;
974          else
975            options &= ~PCRE_NOTEMPTY;
976    
977          /* For /g, update the start offset, leaving the rest alone */
978    
979          if (do_g) start_offset = offsets[1];
980    
981          /* For /G, update the pointer and length */
982    
983        else        else
984          {          {
985          if (count == -1) fprintf(outfile, "No match\n");          bptr += offsets[1];
986            else fprintf(outfile, "Error %d\n", count);          len -= offsets[1];
987          }          }
988        }        }  /* End of loop for /g and /G */
989      }      }    /* End of loop for data lines */
990    
991    CONTINUE:    CONTINUE:
992    
993    #if !defined NOPOSIX
994    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
995    #endif
996    
997    if (re != NULL) free(re);    if (re != NULL) free(re);
998    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
999      if (tables != NULL)
1000        {
1001        free((void *)tables);
1002        setlocale(LC_CTYPE, "C");
1003        }
1004    }    }
1005    
1006  fprintf(outfile, "\n");  fprintf(outfile, "\n");

Legend:
Removed from v.23  
changed lines
  Added in v.39

  ViewVC Help
Powered by ViewVC 1.1.5