/[pcre]/code/tags/pcre-6.0/pcretest.c
ViewVC logotype

Diff of /code/tags/pcre-6.0/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 39 by nigel, Sat Feb 24 21:39:13 2007 UTC revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC
# Line 34  Makefile. */ Line 34  Makefile. */
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
# Line 48  static const char *OP_names[] = { Line 49  static const char *OP_names[] = {
49    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
51    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
52    "class", "Ref",    "class", "Ref", "Recurse",
53    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
54    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
55    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
# Line 281  compiled re. */ Line 282  compiled re. */
282    
283  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
284  {  {
285    gotten_store = size;
286  if (log_store)  if (log_store)
287    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "Memory allocation (code space): %d\n",
288      (int)((int)size - offsetof(real_pcre, code[0])));      (int)((int)size - offsetof(real_pcre, code[0])));
# Line 289  return malloc(size); Line 291  return malloc(size);
291    
292    
293    
294    
295    /* Get one piece of information from the pcre_fullinfo() function */
296    
297    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
298    {
299    int rc;
300    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
301      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
302    }
303    
304    
305    
306    
307  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
308  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
309  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 573  while (!done) Line 588  while (!done)
588        goto CONTINUE;        goto CONTINUE;
589        }        }
590    
591      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
592        info-returning functions. The old one has a limited interface and
593        returns only limited data. Check that it agrees with the newer one. */
594    
595      if (do_showinfo)      if (do_showinfo)
596        {        {
597        int first_char, count;        int old_first_char, old_options, old_count;
598          int count, backrefmax, first_char, need_char;
599          size_t size;
600    
601        if (do_debug) print_internals(re);        if (do_debug) print_internals(re);
602    
603        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
604          new_info(re, NULL, PCRE_INFO_SIZE, &size);
605          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
606          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
607          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
608          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
609    
610          old_count = pcre_info(re, &old_options, &old_first_char);
611        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
612          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
613        else        else
614          {          {
615          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
616          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
617            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
618              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
619              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
620              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
621              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
622              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
623              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != options) fprintf(outfile,
624              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
625              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              old_options);
626            }
627          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)  
628            fprintf(outfile, "Case state changes\n");        if (size != gotten_store) fprintf(outfile,
629            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
630            size, gotten_store);
631    
632          fprintf(outfile, "Capturing subpattern count = %d\n", count);
633          if (backrefmax > 0)
634            fprintf(outfile, "Max back reference = %d\n", backrefmax);
635          if (options == 0) fprintf(outfile, "No options\n");
636            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
637              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
638              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
639              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
640              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
641              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
642              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
643              ((options & PCRE_EXTRA) != 0)? " extra" : "",
644              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
645    
646          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
647            fprintf(outfile, "Case state changes\n");
648    
649          if (first_char == -1)        if (first_char == -1)
650            {          {
651            fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows \\n\n");
652            }          }
653          else if (first_char < 0)        else if (first_char < 0)
654            {          {
655            fprintf(outfile, "No first char\n");          fprintf(outfile, "No first char\n");
656            }          }
657          else
658            {
659            if (isprint(first_char))
660              fprintf(outfile, "First char = \'%c\'\n", first_char);
661          else          else
662            {            fprintf(outfile, "First char = %d\n", first_char);
663            if (isprint(first_char))          }
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
664    
665          if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)        if (need_char < 0)
666            {          {
667            int req_char = ((real_pcre *)re)->req_char;          fprintf(outfile, "No need char\n");
668            if (isprint(req_char))          }
669              fprintf(outfile, "Req char = \'%c\'\n", req_char);        else
670            else          {
671              fprintf(outfile, "Req char = %d\n", req_char);          if (isprint(need_char))
672            }            fprintf(outfile, "Need char = \'%c\'\n", need_char);
673          else fprintf(outfile, "No req char\n");          else
674              fprintf(outfile, "Need char = %d\n", need_char);
675          }          }
676        }        }
677    
# Line 654  while (!done) Line 700  while (!done)
700        else if (extra == NULL)        else if (extra == NULL)
701          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
702    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
703        else if (do_showinfo)        else if (do_showinfo)
704          {          {
705          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
706          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
707            if (start_bits == NULL)
708            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
709          else          else
710            {            {
# Line 669  while (!done) Line 713  while (!done)
713            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
714            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
715              {              {
716              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
717                {                {
718                if (c > 75)                if (c > 75)
719                  {                  {
# Line 706  while (!done) Line 750  while (!done)
750      int getlist = 0;      int getlist = 0;
751      int gmatched = 0;      int gmatched = 0;
752      int start_offset = 0;      int start_offset = 0;
753        int g_notempty = 0;
754      int offsets[45];      int offsets[45];
755      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
756    
# Line 811  while (!done) Line 856  while (!done)
856        {        {
857        int rc;        int rc;
858        int eflags = 0;        int eflags = 0;
859        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
860        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
861        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
862    
863        rc = regexec(&preg, (const char *)bptr,        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);  
864    
865        if (rc != 0)        if (rc != 0)
866          {          {
# Line 826  while (!done) Line 870  while (!done)
870        else        else
871          {          {
872          size_t i;          size_t i;
873          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
874            {            {
875            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
876              {              {
# Line 859  while (!done) Line 903  while (!done)
903          clock_t start_time = clock();          clock_t start_time = clock();
904          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
905            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
906              start_offset, options, offsets, size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
907          time_taken = clock() - start_time;          time_taken = clock() - start_time;
908          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
909            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
# Line 867  while (!done) Line 911  while (!done)
911          }          }
912    
913        count = pcre_exec(re, extra, (char *)bptr, len,        count = pcre_exec(re, extra, (char *)bptr, len,
914          start_offset, options, offsets, size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
915    
916        if (count == 0)        if (count == 0)
917          {          {
# Line 950  while (!done) Line 994  while (!done)
994            }            }
995          }          }
996    
997        /* Failed to match */        /* Failed to match. If this is a /g or /G loop and we previously set
998          PCRE_NOTEMPTY after a null match, this is not necessarily the end.
999          We want to advance the start offset, and continue. Fudge the offset
1000          values to achieve this. We won't be at the end of the string - that
1001          was checked before setting PCRE_NOTEMPTY. */
1002    
1003        else        else
1004          {          {
1005          if (gmatched == 0)          if (g_notempty != 0)
1006            {            {
1007            if (count == -1) fprintf(outfile, "No match\n");            offsets[0] = start_offset;
1008              else fprintf(outfile, "Error %d\n", count);            offsets[1] = start_offset + 1;
1009              }
1010            else
1011              {
1012              if (gmatched == 0)   /* Error if no previous matches */
1013                {
1014                if (count == -1) fprintf(outfile, "No match\n");
1015                  else fprintf(outfile, "Error %d\n", count);
1016                }
1017              break;  /* Out of the /g loop */
1018            }            }
         break;  /* Out of the /g loop */  
1019          }          }
1020    
1021        /* If not /g or /G we are done */        /* If not /g or /G we are done */
1022    
1023        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
1024    
1025        /* If we have matched an empty string, set PCRE_NOTEMPTY for the next        /* If we have matched an empty string, first check to see if we are at
1026        match. This mimics what Perl's /g option does. */        the end of the subject. If so, the /g loop is over. Otherwise, mimic
1027          what Perl's /g options does. This turns out to be rather cunning. First
1028          we set PCRE_NOTEMPTY and try the match again at the same point. If this
1029          fails (picked up above) we advance to the next character. */
1030    
1031        if (offsets[1] == offsets[0])        g_notempty = 0;
1032          options |= PCRE_NOTEMPTY;        if (offsets[0] == offsets[1])
1033        else          {
1034          options &= ~PCRE_NOTEMPTY;          if (offsets[0] == len) break;
1035            g_notempty = PCRE_NOTEMPTY;
1036            }
1037    
1038        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1039    

Legend:
Removed from v.39  
changed lines
  Added in v.43

  ViewVC Help
Powered by ViewVC 1.1.5