/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 37 by nigel, Sat Feb 24 21:39:09 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32  #define LOOPREPEAT 10000  #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
# Line 47  static const char *OP_names[] = { Line 55  static const char *OP_names[] = {
55  };  };
56    
57    
58  static void print_internals(pcre *re, FILE *outfile)  static void print_internals(pcre *re)
59  {  {
60  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
61    
# Line 273  compiled re. */ Line 281  compiled re. */
281    
282  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
283  {  {
284  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  if (log_store)
285      fprintf(outfile, "Memory allocation (code space): %d\n",
286        (int)((int)size - offsetof(real_pcre, code[0])));
287  return malloc(size);  return malloc(size);
288  }  }
289    
# Line 291  int study_options = 0; Line 301  int study_options = 0;
301  int op = 1;  int op = 1;
302  int timeit = 0;  int timeit = 0;
303  int showinfo = 0;  int showinfo = 0;
304    int showstore = 0;
305  int posix = 0;  int posix = 0;
306  int debug = 0;  int debug = 0;
307  int done = 0;  int done = 0;
# Line 305  outfile = stdout; Line 316  outfile = stdout;
316    
317  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
318    {    {
319    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
320        showstore = 1;
321    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
322    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
323    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 313  while (argc > 1 && argv[op][0] == '-') Line 325  while (argc > 1 && argv[op][0] == '-')
325    else    else
326      {      {
327      printf("*** Unknown option %s\n", argv[op]);      printf("*** Unknown option %s\n", argv[op]);
328        printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
329        printf("  -d   debug: show compiled code; implies -i\n"
330               "  -i   show information about compiled pattern\n"
331               "  -p   use POSIX interface\n"
332               "  -s   output store information\n"
333               "  -t   time compilation and execution\n");
334      return 1;      return 1;
335      }      }
336    op++;    op++;
# Line 355  while (!done) Line 373  while (!done)
373    {    {
374    pcre *re = NULL;    pcre *re = NULL;
375    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
376    
377    #if !defined NOPOSIX  /* There are still compilers that require no indent */
378    regex_t preg;    regex_t preg;
379    #endif
380    
381    const char *error;    const char *error;
382    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
383      unsigned const char *tables = NULL;
384    int do_study = 0;    int do_study = 0;
385    int do_debug = 0;    int do_debug = debug;
386      int do_G = 0;
387      int do_g = 0;
388      int do_showinfo = showinfo;
389      int do_showrest = 0;
390    int do_posix = 0;    int do_posix = 0;
391    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
392    
# Line 376  while (!done) Line 403  while (!done)
403    
404    delimiter = *p++;    delimiter = *p++;
405    
406    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
407      {      {
408      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
409      goto SKIP_DATA;      goto SKIP_DATA;
410      }      }
411    
# Line 386  while (!done) Line 413  while (!done)
413    
414    for(;;)    for(;;)
415      {      {
416      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
417          {
418          if (*pp == '\\' && pp[1] != 0) pp++;
419            else if (*pp == delimiter) break;
420          pp++;
421          }
422      if (*pp != 0) break;      if (*pp != 0) break;
423    
424      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 406  while (!done) Line 438  while (!done)
438      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
439      }      }
440    
441      /* If the first character after the delimiter is backslash, make
442      the pattern end with backslash. This is purely to provide a way
443      of testing for the error message when a pattern ends with backslash. */
444    
445      if (pp[1] == '\\') *pp++ = '\\';
446    
447    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
448    
449    *pp++ = 0;    *pp++ = 0;
# Line 414  while (!done) Line 452  while (!done)
452    
453    options = 0;    options = 0;
454    study_options = 0;    study_options = 0;
455      log_store = showstore;  /* default from command line */
456    
457    while (*pp != 0)    while (*pp != 0)
458      {      {
459      switch (*pp++)      switch (*pp++)
460        {        {
461          case 'g': do_g = 1; break;
462        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
463        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
464        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
465        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
466    
467          case '+': do_showrest = 1; break;
468        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
469        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
470        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
471          case 'G': do_G = 1; break;
472          case 'I': do_showinfo = 1; break;
473          case 'M': log_store = 1; break;
474    
475    #if !defined NOPOSIX
476        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
477    #endif
478    
479        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
480        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
481        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
482    
483          case 'L':
484          ppp = pp;
485          while (*ppp != '\n' && *ppp != ' ') ppp++;
486          *ppp = 0;
487          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
488            {
489            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
490            goto SKIP_DATA;
491            }
492          tables = pcre_maketables();
493          pp = ppp;
494          break;
495    
496        case '\n': case ' ': break;        case '\n': case ' ': break;
497        default:        default:
498        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 437  while (!done) Line 501  while (!done)
501      }      }
502    
503    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
504    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
505      local character tables. */
506    
507    #if !defined NOPOSIX
508    if (posix || do_posix)    if (posix || do_posix)
509      {      {
510      int rc;      int rc;
# Line 461  while (!done) Line 527  while (!done)
527    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
528    
529    else    else
530    #endif  /* !defined NOPOSIX */
531    
532      {      {
533      if (timeit)      if (timeit)
534        {        {
# Line 469  while (!done) Line 537  while (!done)
537        clock_t start_time = clock();        clock_t start_time = clock();
538        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < LOOPREPEAT; i++)
539          {          {
540          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
541          if (re != NULL) free(re);          if (re != NULL) free(re);
542          }          }
543        time_taken = clock() - start_time;        time_taken = clock() - start_time;
544        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
545          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
546            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
547        }        }
548    
549      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
550    
551      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
552      if non-interactive. */      if non-interactive. */
# Line 501  while (!done) Line 570  while (!done)
570            }            }
571          fprintf(outfile, "\n");          fprintf(outfile, "\n");
572          }          }
573        continue;        goto CONTINUE;
574        }        }
575    
576      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required */
577    
578      if (showinfo || do_debug)      if (do_showinfo)
579        {        {
580        int first_char, count;        int first_char, count;
581    
582        if (debug || do_debug) print_internals(re, outfile);        if (do_debug) print_internals(re);
583    
584        count = pcre_info(re, &options, &first_char);        count = pcre_info(re, &options, &first_char);
585        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 528  while (!done) Line 597  while (!done)
597              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
598              ((options & PCRE_EXTRA) != 0)? " extra" : "",              ((options & PCRE_EXTRA) != 0)? " extra" : "",
599              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
600    
601            if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
602              fprintf(outfile, "Case state changes\n");
603    
604          if (first_char == -1)          if (first_char == -1)
605            {            {
606            fprintf(outfile, "First char at start or follows \\n\n");            fprintf(outfile, "First char at start or follows \\n\n");
# Line 543  while (!done) Line 616  while (!done)
616            else            else
617              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "First char = %d\n", first_char);
618            }            }
619    
620            if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
621              {
622              int req_char = ((real_pcre *)re)->req_char;
623              if (isprint(req_char))
624                fprintf(outfile, "Req char = \'%c\'\n", req_char);
625              else
626                fprintf(outfile, "Req char = %d\n", req_char);
627              }
628            else fprintf(outfile, "No req char\n");
629          }          }
630        }        }
631    
# Line 560  while (!done) Line 643  while (!done)
643            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
644          time_taken = clock() - start_time;          time_taken = clock() - start_time;
645          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
646          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
647            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
648              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
649          }          }
650    
651        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 573  while (!done) Line 657  while (!done)
657        /* This looks at internal information. A bit kludgy to do it this        /* This looks at internal information. A bit kludgy to do it this
658        way, but it is useful for testing. */        way, but it is useful for testing. */
659    
660        else if (showinfo || do_debug)        else if (do_showinfo)
661          {          {
662          real_pcre_extra *xx = (real_pcre_extra *)extra;          real_pcre_extra *xx = (real_pcre_extra *)extra;
663          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          if ((xx->options & PCRE_STUDY_MAPPED) == 0)
# Line 615  while (!done) Line 699  while (!done)
699    for (;;)    for (;;)
700      {      {
701      unsigned char *q;      unsigned char *q;
702        unsigned char *bptr = dbuffer;
703      int count, c;      int count, c;
704        int copystrings = 0;
705        int getstrings = 0;
706        int getlist = 0;
707        int start_offset = 0;
708      int offsets[45];      int offsets[45];
709      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
710    
711      options = 0;      options = 0;
712    
713      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
714      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
715        {        {
716        done = 1;        done = 1;
# Line 681  while (!done) Line 770  while (!done)
770          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
771          continue;          continue;
772    
773            case 'C':
774            while(isdigit(*p)) n = n * 10 + *p++ - '0';
775            copystrings |= 1 << n;
776            continue;
777    
778            case 'G':
779            while(isdigit(*p)) n = n * 10 + *p++ - '0';
780            getstrings |= 1 << n;
781            continue;
782    
783            case 'L':
784            getlist = 1;
785            continue;
786    
787            case 'N':
788            options |= PCRE_NOTEMPTY;
789            continue;
790    
791          case 'O':          case 'O':
792          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
793          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
# Line 698  while (!done) Line 805  while (!done)
805      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
806      support timing. */      support timing. */
807    
808    #if !defined NOPOSIX
809      if (posix || do_posix)      if (posix || do_posix)
810        {        {
811        int rc;        int rc;
# Line 706  while (!done) Line 814  while (!done)
814        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
815        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
816    
817        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr,
818          pmatch, eflags);          sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);
819    
820        if (rc != 0)        if (rc != 0)
821          {          {
# Line 725  while (!done) Line 833  while (!done)
833              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
834                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so);
835              fprintf(outfile, "\n");              fprintf(outfile, "\n");
836                if (i == 0 && do_showrest)
837                  {
838                  fprintf(outfile, " 0+ ");
839                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
840                  fprintf(outfile, "\n");
841                  }
842              }              }
843            }            }
844          }          }
845        }        }
846    
847      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
848    
849      else      else
850    #endif  /* !defined NOPOSIX */
851    
852        for (;;)
853        {        {
854        if (timeit)        if (timeit)
855          {          {
856          register int i;          register int i;
857          clock_t time_taken;          clock_t time_taken;
858          clock_t start_time = clock();          clock_t start_time = clock();
859          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
860            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
861              size_offsets);              (do_g? start_offset : 0), options, offsets, size_offsets);
862          time_taken = clock() - start_time;          time_taken = clock() - start_time;
863          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
864            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
865              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
866          }          }
867    
868        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
869          size_offsets);          (do_g? start_offset : 0), options, offsets, size_offsets);
870    
871        if (count == 0)        if (count == 0)
872          {          {
# Line 759  while (!done) Line 877  while (!done)
877        if (count >= 0)        if (count >= 0)
878          {          {
879          int i;          int i;
880          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
881            {            {
882            if (offsets[i] < 0)            if (offsets[i] < 0)
883              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
884            else            else
885              {              {
886              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
887              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
888              fprintf(outfile, "\n");              fprintf(outfile, "\n");
889                if (i == 0)
890                  {
891                  start_offset = offsets[1];
892                  if (do_showrest)
893                    {
894                    fprintf(outfile, " 0+ ");
895                    pchars(bptr + offsets[i+1], len - offsets[i+1]);
896                    fprintf(outfile, "\n");
897                    }
898                  }
899                }
900              }
901    
902            for (i = 0; i < 32; i++)
903              {
904              if ((copystrings & (1 << i)) != 0)
905                {
906                char copybuffer[16];
907                int rc = pcre_copy_substring((char *)bptr, offsets, count,
908                  i, copybuffer, sizeof(copybuffer));
909                if (rc < 0)
910                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
911                else
912                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
913                }
914              }
915    
916            for (i = 0; i < 32; i++)
917              {
918              if ((getstrings & (1 << i)) != 0)
919                {
920                const char *substring;
921                int rc = pcre_get_substring((char *)bptr, offsets, count,
922                  i, &substring);
923                if (rc < 0)
924                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
925                else
926                  {
927                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
928                  free((void *)substring);
929                  }
930              }              }
931            }            }
932    
933            if (getlist)
934              {
935              const char **stringlist;
936              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
937                &stringlist);
938              if (rc < 0)
939                fprintf(outfile, "get substring list failed %d\n", rc);
940              else
941                {
942                for (i = 0; i < count; i++)
943                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
944                if (stringlist[i] != NULL)
945                  fprintf(outfile, "string list not terminated by NULL\n");
946                free((void *)stringlist);
947                }
948              }
949    
950          }          }
951        else        else
952          {          {
953          if (count == -1) fprintf(outfile, "No match\n");          if (start_offset == 0)
954            else fprintf(outfile, "Error %d\n", count);            {
955              if (count == -1) fprintf(outfile, "No match\n");
956                else fprintf(outfile, "Error %d\n", count);
957              }
958            start_offset = -1;
959            }
960    
961          if ((!do_g && !do_G) || start_offset <= 0) break;
962          if (do_G)
963            {
964            bptr += start_offset;
965            len -= start_offset;
966          }          }
967        }        }
968      }      }
969    
970    CONTINUE:    CONTINUE:
971    
972    #if !defined NOPOSIX
973    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
974    #endif
975    
976    if (re != NULL) free(re);    if (re != NULL) free(re);
977    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
978      if (tables != NULL)
979        {
980        free((void *)tables);
981        setlocale(LC_CTYPE, "C");
982        }
983    }    }
984    
985  fprintf(outfile, "\n");  fprintf(outfile, "\n");

Legend:
Removed from v.23  
changed lines
  Added in v.37

  ViewVC Help
Powered by ViewVC 1.1.5