/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 35 by nigel, Sat Feb 24 21:39:05 2007 UTC revision 45 by nigel, Sat Feb 24 21:39:25 2007 UTC
# Line 12  Line 12 
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 27  Line 34 
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
# Line 41  static const char *OP_names[] = { Line 49  static const char *OP_names[] = {
49    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
51    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
52    "class", "Ref",    "class", "Ref", "Recurse",
53    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
54    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
55    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
56  };  };
57    
58    
59  static void print_internals(pcre *re, FILE *outfile)  static void print_internals(pcre *re)
60  {  {
61  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
62    
# Line 274  compiled re. */ Line 282  compiled re. */
282    
283  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
284  {  {
285    gotten_store = size;
286  if (log_store)  if (log_store)
287    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "Memory allocation (code space): %d\n",
288      (int)((int)size - offsetof(real_pcre, code[0])));      (int)((int)size - offsetof(real_pcre, code[0])));
# Line 282  return malloc(size); Line 291  return malloc(size);
291    
292    
293    
294    
295    /* Get one piece of information from the pcre_fullinfo() function */
296    
297    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
298    {
299    int rc;
300    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
301      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
302    }
303    
304    
305    
306    
307  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
308  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
309  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 366  while (!done) Line 388  while (!done)
388    {    {
389    pcre *re = NULL;    pcre *re = NULL;
390    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
391    
392    #if !defined NOPOSIX  /* There are still compilers that require no indent */
393    regex_t preg;    regex_t preg;
394      int do_posix = 0;
395    #endif
396    
397    const char *error;    const char *error;
398    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
399    unsigned const char *tables = NULL;    unsigned const char *tables = NULL;
# Line 376  while (!done) Line 403  while (!done)
403    int do_g = 0;    int do_g = 0;
404    int do_showinfo = showinfo;    int do_showinfo = showinfo;
405    int do_showrest = 0;    int do_showrest = 0;
   int do_posix = 0;  
406    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
407    
408    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
# Line 460  while (!done) Line 486  while (!done)
486        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
487        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
488        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
489    
490    #if !defined NOPOSIX
491        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
492    #endif
493    
494        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
495        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
496        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
# Line 489  while (!done) Line 519  while (!done)
519    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
520    local character tables. */    local character tables. */
521    
522    #if !defined NOPOSIX
523    if (posix || do_posix)    if (posix || do_posix)
524      {      {
525      int rc;      int rc;
# Line 511  while (!done) Line 542  while (!done)
542    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
543    
544    else    else
545    #endif  /* !defined NOPOSIX */
546    
547      {      {
548      if (timeit)      if (timeit)
549        {        {
# Line 555  while (!done) Line 588  while (!done)
588        goto CONTINUE;        goto CONTINUE;
589        }        }
590    
591      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
592        info-returning functions. The old one has a limited interface and
593        returns only limited data. Check that it agrees with the newer one. */
594    
595      if (do_showinfo)      if (do_showinfo)
596        {        {
597        int first_char, count;        int old_first_char, old_options, old_count;
598          int count, backrefmax, first_char, need_char;
599        if (do_debug) print_internals(re, outfile);        size_t size;
600    
601          if (do_debug) print_internals(re);
602    
603          new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
604          new_info(re, NULL, PCRE_INFO_SIZE, &size);
605          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
606          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
607          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
608          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
609    
610        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
611        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
612          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
613        else        else
614          {          {
615          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
616          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
617            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
618              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
619              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
620              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
621              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
622              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
623              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != options) fprintf(outfile,
624              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
625              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              old_options);
626          if (first_char == -1)          }
627            {  
628            fprintf(outfile, "First char at start or follows \\n\n");        if (size != gotten_store) fprintf(outfile,
629            }          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
630          else if (first_char < 0)          size, gotten_store);
631            {  
632            fprintf(outfile, "No first char\n");        fprintf(outfile, "Capturing subpattern count = %d\n", count);
633            }        if (backrefmax > 0)
634            fprintf(outfile, "Max back reference = %d\n", backrefmax);
635          if (options == 0) fprintf(outfile, "No options\n");
636            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
637              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
638              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
639              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
640              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
641              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
642              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
643              ((options & PCRE_EXTRA) != 0)? " extra" : "",
644              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
645    
646          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
647            fprintf(outfile, "Case state changes\n");
648    
649          if (first_char == -1)
650            {
651            fprintf(outfile, "First char at start or follows \\n\n");
652            }
653          else if (first_char < 0)
654            {
655            fprintf(outfile, "No first char\n");
656            }
657          else
658            {
659            if (isprint(first_char))
660              fprintf(outfile, "First char = \'%c\'\n", first_char);
661          else          else
662            {            fprintf(outfile, "First char = %d\n", first_char);
663            if (isprint(first_char))          }
664              fprintf(outfile, "First char = \'%c\'\n", first_char);  
665            else        if (need_char < 0)
666              fprintf(outfile, "First char = %d\n", first_char);          {
667            }          fprintf(outfile, "No need char\n");
668            }
669          else
670            {
671            if (isprint(need_char))
672              fprintf(outfile, "Need char = \'%c\'\n", need_char);
673            else
674              fprintf(outfile, "Need char = %d\n", need_char);
675          }          }
676        }        }
677    
# Line 622  while (!done) Line 700  while (!done)
700        else if (extra == NULL)        else if (extra == NULL)
701          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
702    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
703        else if (do_showinfo)        else if (do_showinfo)
704          {          {
705          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
706          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
707            if (start_bits == NULL)
708            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
709          else          else
710            {            {
# Line 637  while (!done) Line 713  while (!done)
713            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
714            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
715              {              {
716              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
717                {                {
718                if (c > 75)                if (c > 75)
719                  {                  {
# Line 672  while (!done) Line 748  while (!done)
748      int copystrings = 0;      int copystrings = 0;
749      int getstrings = 0;      int getstrings = 0;
750      int getlist = 0;      int getlist = 0;
751        int gmatched = 0;
752      int start_offset = 0;      int start_offset = 0;
753        int g_notempty = 0;
754      int offsets[45];      int offsets[45];
755      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
756    
# Line 752  while (!done) Line 830  while (!done)
830          getlist = 1;          getlist = 1;
831          continue;          continue;
832    
833            case 'N':
834            options |= PCRE_NOTEMPTY;
835            continue;
836    
837          case 'O':          case 'O':
838          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
839          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
# Line 769  while (!done) Line 851  while (!done)
851      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
852      support timing. */      support timing. */
853    
854    #if !defined NOPOSIX
855      if (posix || do_posix)      if (posix || do_posix)
856        {        {
857        int rc;        int rc;
858        int eflags = 0;        int eflags = 0;
859        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
860        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
861        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
862    
863        rc = regexec(&preg, (unsigned char *)bptr,        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);  
864    
865        if (rc != 0)        if (rc != 0)
866          {          {
# Line 788  while (!done) Line 870  while (!done)
870        else        else
871          {          {
872          size_t i;          size_t i;
873          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
874            {            {
875            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
876              {              {
# Line 809  while (!done) Line 891  while (!done)
891    
892      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
893    
894      else for (;;)      else
895    #endif  /* !defined NOPOSIX */
896    
897        for (;; gmatched++)    /* Loop for /g or /G */
898        {        {
899        if (timeit)        if (timeit)
900          {          {
# Line 818  while (!done) Line 903  while (!done)
903          clock_t start_time = clock();          clock_t start_time = clock();
904          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
905            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
906              (do_g? start_offset : 0), options, offsets, size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
907          time_taken = clock() - start_time;          time_taken = clock() - start_time;
908          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
909            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
# Line 826  while (!done) Line 911  while (!done)
911          }          }
912    
913        count = pcre_exec(re, extra, (char *)bptr, len,        count = pcre_exec(re, extra, (char *)bptr, len,
914          (do_g? start_offset : 0), options, offsets, size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
915    
916        if (count == 0)        if (count == 0)
917          {          {
# Line 834  while (!done) Line 919  while (!done)
919          count = size_offsets/3;          count = size_offsets/3;
920          }          }
921    
922          /* Matched */
923    
924        if (count >= 0)        if (count >= 0)
925          {          {
926          int i;          int i;
# Line 848  while (!done) Line 935  while (!done)
935              fprintf(outfile, "\n");              fprintf(outfile, "\n");
936              if (i == 0)              if (i == 0)
937                {                {
               start_offset = offsets[1];  
938                if (do_showrest)                if (do_showrest)
939                  {                  {
940                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
# Line 863  while (!done) Line 949  while (!done)
949            {            {
950            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
951              {              {
952              char buffer[16];              char copybuffer[16];
953              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, offsets, count,
954                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
955              if (rc < 0)              if (rc < 0)
956                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
957              else              else
958                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
959              }              }
960            }            }
961    
# Line 906  while (!done) Line 992  while (!done)
992              free((void *)stringlist);              free((void *)stringlist);
993              }              }
994            }            }
   
995          }          }
996    
997          /* Failed to match. If this is a /g or /G loop and we previously set
998          PCRE_NOTEMPTY after a null match, this is not necessarily the end.
999          We want to advance the start offset, and continue. Fudge the offset
1000          values to achieve this. We won't be at the end of the string - that
1001          was checked before setting PCRE_NOTEMPTY. */
1002    
1003        else        else
1004          {          {
1005          if (start_offset == 0)          if (g_notempty != 0)
1006              {
1007              offsets[0] = start_offset;
1008              offsets[1] = start_offset + 1;
1009              }
1010            else
1011            {            {
1012            if (count == -1) fprintf(outfile, "No match\n");            if (gmatched == 0)   /* Error if no previous matches */
1013              else fprintf(outfile, "Error %d\n", count);              {
1014                if (count == -1) fprintf(outfile, "No match\n");
1015                  else fprintf(outfile, "Error %d\n", count);
1016                }
1017              break;  /* Out of the /g loop */
1018            }            }
         start_offset = -1;  
1019          }          }
1020    
1021        if ((!do_g && !do_G) || start_offset <= 0) break;        /* If not /g or /G we are done */
1022        if (do_G)  
1023          if (!do_g && !do_G) break;
1024    
1025          /* If we have matched an empty string, first check to see if we are at
1026          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1027          what Perl's /g options does. This turns out to be rather cunning. First
1028          we set PCRE_NOTEMPTY and try the match again at the same point. If this
1029          fails (picked up above) we advance to the next character. */
1030    
1031          g_notempty = 0;
1032          if (offsets[0] == offsets[1])
1033          {          {
1034          bptr += start_offset;          if (offsets[0] == len) break;
1035          len -= start_offset;          g_notempty = PCRE_NOTEMPTY;
1036          }          }
1037        }  
1038      }        /* For /g, update the start offset, leaving the rest alone */
1039    
1040          if (do_g) start_offset = offsets[1];
1041    
1042          /* For /G, update the pointer and length */
1043    
1044          else
1045            {
1046            bptr += offsets[1];
1047            len -= offsets[1];
1048            }
1049          }  /* End of loop for /g and /G */
1050        }    /* End of loop for data lines */
1051    
1052    CONTINUE:    CONTINUE:
1053    
1054    #if !defined NOPOSIX
1055    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1056    #endif
1057    
1058    if (re != NULL) free(re);    if (re != NULL) free(re);
1059    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1060    if (tables != NULL)    if (tables != NULL)

Legend:
Removed from v.35  
changed lines
  Added in v.45

  ViewVC Help
Powered by ViewVC 1.1.5