/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 27 by nigel, Sat Feb 24 21:38:49 2007 UTC revision 41 by nigel, Sat Feb 24 21:39:17 2007 UTC
# Line 12  Line 12 
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 48  static const char *OP_names[] = { Line 55  static const char *OP_names[] = {
55  };  };
56    
57    
58  static void print_internals(pcre *re, FILE *outfile)  static void print_internals(pcre *re)
59  {  {
60  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
61    
# Line 274  compiled re. */ Line 281  compiled re. */
281    
282  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
283  {  {
284  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  if (log_store)
285      fprintf(outfile, "Memory allocation (code space): %d\n",
286        (int)((int)size - offsetof(real_pcre, code[0])));
287  return malloc(size);  return malloc(size);
288  }  }
289    
# Line 292  int study_options = 0; Line 301  int study_options = 0;
301  int op = 1;  int op = 1;
302  int timeit = 0;  int timeit = 0;
303  int showinfo = 0;  int showinfo = 0;
304    int showstore = 0;
305  int posix = 0;  int posix = 0;
306  int debug = 0;  int debug = 0;
307  int done = 0;  int done = 0;
# Line 306  outfile = stdout; Line 316  outfile = stdout;
316    
317  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
318    {    {
319    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
320        showstore = 1;
321    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
322    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
323    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 362  while (!done) Line 373  while (!done)
373    {    {
374    pcre *re = NULL;    pcre *re = NULL;
375    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
376    
377    #if !defined NOPOSIX  /* There are still compilers that require no indent */
378    regex_t preg;    regex_t preg;
379    #endif
380    
381    const char *error;    const char *error;
382    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
383    unsigned const char *tables = NULL;    unsigned const char *tables = NULL;
384    int do_study = 0;    int do_study = 0;
385    int do_debug = debug;    int do_debug = debug;
386      int do_G = 0;
387      int do_g = 0;
388    int do_showinfo = showinfo;    int do_showinfo = showinfo;
389      int do_showrest = 0;
390    int do_posix = 0;    int do_posix = 0;
391    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
392    
# Line 385  while (!done) Line 403  while (!done)
403    
404    delimiter = *p++;    delimiter = *p++;
405    
406    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
407      {      {
408      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
409      goto SKIP_DATA;      goto SKIP_DATA;
410      }      }
411    
# Line 395  while (!done) Line 413  while (!done)
413    
414    for(;;)    for(;;)
415      {      {
416      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
417          {
418          if (*pp == '\\' && pp[1] != 0) pp++;
419            else if (*pp == delimiter) break;
420          pp++;
421          }
422      if (*pp != 0) break;      if (*pp != 0) break;
423    
424      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 415  while (!done) Line 438  while (!done)
438      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
439      }      }
440    
441      /* If the first character after the delimiter is backslash, make
442      the pattern end with backslash. This is purely to provide a way
443      of testing for the error message when a pattern ends with backslash. */
444    
445      if (pp[1] == '\\') *pp++ = '\\';
446    
447    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
448    
449    *pp++ = 0;    *pp++ = 0;
# Line 423  while (!done) Line 452  while (!done)
452    
453    options = 0;    options = 0;
454    study_options = 0;    study_options = 0;
455      log_store = showstore;  /* default from command line */
456    
457    while (*pp != 0)    while (*pp != 0)
458      {      {
459      switch (*pp++)      switch (*pp++)
460        {        {
461          case 'g': do_g = 1; break;
462        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
463        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
464        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
465        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
466    
467          case '+': do_showrest = 1; break;
468        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
469        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
470        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
471          case 'G': do_G = 1; break;
472        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
473          case 'M': log_store = 1; break;
474    
475    #if !defined NOPOSIX
476        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
477    #endif
478    
479        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
480        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
481        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
# Line 465  while (!done) Line 504  while (!done)
504    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
505    local character tables. */    local character tables. */
506    
507    #if !defined NOPOSIX
508    if (posix || do_posix)    if (posix || do_posix)
509      {      {
510      int rc;      int rc;
# Line 487  while (!done) Line 527  while (!done)
527    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
528    
529    else    else
530    #endif  /* !defined NOPOSIX */
531    
532      {      {
533      if (timeit)      if (timeit)
534        {        {
# Line 537  while (!done) Line 579  while (!done)
579        {        {
580        int first_char, count;        int first_char, count;
581    
582        if (do_debug) print_internals(re, outfile);        if (do_debug) print_internals(re);
583    
584        count = pcre_info(re, &options, &first_char);        count = pcre_info(re, &options, &first_char);
585        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 555  while (!done) Line 597  while (!done)
597              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
598              ((options & PCRE_EXTRA) != 0)? " extra" : "",              ((options & PCRE_EXTRA) != 0)? " extra" : "",
599              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
600    
601            if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
602              fprintf(outfile, "Case state changes\n");
603    
604          if (first_char == -1)          if (first_char == -1)
605            {            {
606            fprintf(outfile, "First char at start or follows \\n\n");            fprintf(outfile, "First char at start or follows \\n\n");
# Line 570  while (!done) Line 616  while (!done)
616            else            else
617              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "First char = %d\n", first_char);
618            }            }
619    
620            if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
621              {
622              int req_char = ((real_pcre *)re)->req_char;
623              if (isprint(req_char))
624                fprintf(outfile, "Req char = \'%c\'\n", req_char);
625              else
626                fprintf(outfile, "Req char = %d\n", req_char);
627              }
628            else fprintf(outfile, "No req char\n");
629          }          }
630        }        }
631    
# Line 643  while (!done) Line 699  while (!done)
699    for (;;)    for (;;)
700      {      {
701      unsigned char *q;      unsigned char *q;
702        unsigned char *bptr = dbuffer;
703      int count, c;      int count, c;
704        int copystrings = 0;
705        int getstrings = 0;
706        int getlist = 0;
707        int gmatched = 0;
708        int start_offset = 0;
709        int g_notempty = 0;
710      int offsets[45];      int offsets[45];
711      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
712    
713      options = 0;      options = 0;
714    
715      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
716      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
717        {        {
718        done = 1;        done = 1;
# Line 709  while (!done) Line 772  while (!done)
772          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
773          continue;          continue;
774    
775            case 'C':
776            while(isdigit(*p)) n = n * 10 + *p++ - '0';
777            copystrings |= 1 << n;
778            continue;
779    
780            case 'G':
781            while(isdigit(*p)) n = n * 10 + *p++ - '0';
782            getstrings |= 1 << n;
783            continue;
784    
785            case 'L':
786            getlist = 1;
787            continue;
788    
789            case 'N':
790            options |= PCRE_NOTEMPTY;
791            continue;
792    
793          case 'O':          case 'O':
794          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
795          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
# Line 726  while (!done) Line 807  while (!done)
807      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
808      support timing. */      support timing. */
809    
810    #if !defined NOPOSIX
811      if (posix || do_posix)      if (posix || do_posix)
812        {        {
813        int rc;        int rc;
814        int eflags = 0;        int eflags = 0;
815        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
816        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
817        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
818    
819        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         pmatch, eflags);  
820    
821        if (rc != 0)        if (rc != 0)
822          {          {
# Line 745  while (!done) Line 826  while (!done)
826        else        else
827          {          {
828          size_t i;          size_t i;
829          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
830            {            {
831            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
832              {              {
# Line 753  while (!done) Line 834  while (!done)
834              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
835                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so);
836              fprintf(outfile, "\n");              fprintf(outfile, "\n");
837                if (i == 0 && do_showrest)
838                  {
839                  fprintf(outfile, " 0+ ");
840                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
841                  fprintf(outfile, "\n");
842                  }
843              }              }
844            }            }
845          }          }
846        }        }
847    
848      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
849    
850      else      else
851    #endif  /* !defined NOPOSIX */
852    
853        for (;; gmatched++)    /* Loop for /g or /G */
854        {        {
855        if (timeit)        if (timeit)
856          {          {
# Line 768  while (!done) Line 858  while (!done)
858          clock_t time_taken;          clock_t time_taken;
859          clock_t start_time = clock();          clock_t start_time = clock();
860          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
861            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
862              size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
863          time_taken = clock() - start_time;          time_taken = clock() - start_time;
864          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
865            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
866            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
867          }          }
868    
869        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
870          size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
871    
872        if (count == 0)        if (count == 0)
873          {          {
# Line 785  while (!done) Line 875  while (!done)
875          count = size_offsets/3;          count = size_offsets/3;
876          }          }
877    
878          /* Matched */
879    
880        if (count >= 0)        if (count >= 0)
881          {          {
882          int i;          int i;
883          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
884            {            {
885            if (offsets[i] < 0)            if (offsets[i] < 0)
886              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
887            else            else
888              {              {
889              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
890              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
891              fprintf(outfile, "\n");              fprintf(outfile, "\n");
892                if (i == 0)
893                  {
894                  if (do_showrest)
895                    {
896                    fprintf(outfile, " 0+ ");
897                    pchars(bptr + offsets[i+1], len - offsets[i+1]);
898                    fprintf(outfile, "\n");
899                    }
900                  }
901                }
902              }
903    
904            for (i = 0; i < 32; i++)
905              {
906              if ((copystrings & (1 << i)) != 0)
907                {
908                char copybuffer[16];
909                int rc = pcre_copy_substring((char *)bptr, offsets, count,
910                  i, copybuffer, sizeof(copybuffer));
911                if (rc < 0)
912                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
913                else
914                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
915                }
916              }
917    
918            for (i = 0; i < 32; i++)
919              {
920              if ((getstrings & (1 << i)) != 0)
921                {
922                const char *substring;
923                int rc = pcre_get_substring((char *)bptr, offsets, count,
924                  i, &substring);
925                if (rc < 0)
926                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
927                else
928                  {
929                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
930                  free((void *)substring);
931                  }
932                }
933              }
934    
935            if (getlist)
936              {
937              const char **stringlist;
938              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
939                &stringlist);
940              if (rc < 0)
941                fprintf(outfile, "get substring list failed %d\n", rc);
942              else
943                {
944                for (i = 0; i < count; i++)
945                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
946                if (stringlist[i] != NULL)
947                  fprintf(outfile, "string list not terminated by NULL\n");
948                free((void *)stringlist);
949                }
950              }
951            }
952    
953          /* Failed to match. If this is a /g or /G loop and we previously set
954          PCRE_NOTEMPTY after a null match, this is not necessarily the end.
955          We want to advance the start offset, and continue. Fudge the offset
956          values to achieve this. We won't be at the end of the string - that
957          was checked before setting PCRE_NOTEMPTY. */
958    
959          else
960            {
961            if (g_notempty != 0)
962              {
963              offsets[0] = start_offset;
964              offsets[1] = start_offset + 1;
965              }
966            else
967              {
968              if (gmatched == 0)   /* Error if no previous matches */
969                {
970                if (count == -1) fprintf(outfile, "No match\n");
971                  else fprintf(outfile, "Error %d\n", count);
972              }              }
973              break;  /* Out of the /g loop */
974            }            }
975          }          }
976    
977          /* If not /g or /G we are done */
978    
979          if (!do_g && !do_G) break;
980    
981          /* If we have matched an empty string, first check to see if we are at
982          the end of the subject. If so, the /g loop is over. Otherwise, mimic
983          what Perl's /g options does. This turns out to be rather cunning. First
984          we set PCRE_NOTEMPTY and try the match again at the same point. If this
985          fails (picked up above) we advance to the next character. */
986    
987          g_notempty = 0;
988          if (offsets[0] == offsets[1])
989            {
990            if (offsets[0] == len) break;
991            g_notempty = PCRE_NOTEMPTY;
992            }
993    
994          /* For /g, update the start offset, leaving the rest alone */
995    
996          if (do_g) start_offset = offsets[1];
997    
998          /* For /G, update the pointer and length */
999    
1000        else        else
1001          {          {
1002          if (count == -1) fprintf(outfile, "No match\n");          bptr += offsets[1];
1003            else fprintf(outfile, "Error %d\n", count);          len -= offsets[1];
1004          }          }
1005        }        }  /* End of loop for /g and /G */
1006      }      }    /* End of loop for data lines */
1007    
1008    CONTINUE:    CONTINUE:
1009    
1010    #if !defined NOPOSIX
1011    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1012    #endif
1013    
1014    if (re != NULL) free(re);    if (re != NULL) free(re);
1015    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1016    if (tables != NULL)    if (tables != NULL)

Legend:
Removed from v.27  
changed lines
  Added in v.41

  ViewVC Help
Powered by ViewVC 1.1.5