/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC revision 507 by ph10, Wed Mar 10 16:08:01 2010 UTC
# Line 71  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74  #define isatty _isatty         /* This is what Windows calls them, I'm told */  #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79  #define fileno _fileno  #define fileno _fileno
80    #endif
81    
82  #else  #else
83  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
# Line 113  external symbols to prevent clashes. */ Line 118  external symbols to prevent clashes. */
118    
119  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
120  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
121  pcre_compile.c when that module is compiled with debugging enabled.  pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
 The definition of the macro PRINTABLE, which determines whether to print an  
 output character as-is or as a hex value when showing compiled patterns, is  
 contained in this file. We uses it here also, in cases when the locale has not  
 been explicitly changed, so as to get consistent output from systems that  
 differ in their output from isprint() even in the "C" locale. */  
123    
124    #define COMPILING_PCRETEST
125  #include "pcre_printint.src"  #include "pcre_printint.src"
126    
127  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 875  while (argc > 1 && argv[op][0] == '-') Line 881  while (argc > 1 && argv[op][0] == '-')
881    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
882    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
883    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
884    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
885  #if !defined NODFA  #if !defined NODFA
886    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
887  #endif  #endif
# Line 928  while (argc > 1 && argv[op][0] == '-') Line 934  while (argc > 1 && argv[op][0] == '-')
934    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
935      {      {
936      int rc;      int rc;
937      unsigned long int lrc;      unsigned long int lrc;
938      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
939      printf("Compiled with\n");      printf("Compiled with\n");
940      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 937  while (argc > 1 && argv[op][0] == '-') Line 943  while (argc > 1 && argv[op][0] == '-')
943      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
944      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
945      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
946      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
947      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
948        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
949        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
# Line 1300  while (!done) Line 1306  while (!done)
1306      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1307      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1308      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1309        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1310    
1311      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1312    
# Line 1320  while (!done) Line 1327  while (!done)
1327  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1328    
1329      {      {
1330        unsigned long int get_options;
1331    
1332      if (timeit > 0)      if (timeit > 0)
1333        {        {
1334        register int i;        register int i;
# Line 1363  while (!done) Line 1372  while (!done)
1372        goto CONTINUE;        goto CONTINUE;
1373        }        }
1374    
1375      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1376      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1377      returns only limited data. Check that it agrees with the newer one. */      lines. */
1378    
1379        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1380        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1381    
1382        /* Print information if required. There are now two info-returning
1383        functions. The old one has a limited interface and returns only limited
1384        data. Check that it agrees with the newer one. */
1385    
1386      if (log_store)      if (log_store)
1387        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1436  while (!done) Line 1452  while (!done)
1452          {          {
1453          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1454          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1455          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1456            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1457          }          }
1458        }        }
1459    
# Line 1450  while (!done) Line 1467  while (!done)
1467        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1468        }        }
1469    
1470        /* We already have the options in get_options (see above) */
1471    
1472      if (do_showinfo)      if (do_showinfo)
1473        {        {
1474        unsigned long int get_options, all_options;        unsigned long int all_options;
1475  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1476        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1477  #endif  #endif
# Line 1461  while (!done) Line 1480  while (!done)
1480        int nameentrysize, namecount;        int nameentrysize, namecount;
1481        const uschar *nametable;        const uschar *nametable;
1482    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1483        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1484        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1485        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1612  while (!done) Line 1630  while (!done)
1630          else          else
1631            {            {
1632            uschar *start_bits = NULL;            uschar *start_bits = NULL;
1633            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
1634    
1635              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1636              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1637    
1638              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1639            if (start_bits == NULL)            if (start_bits == NULL)
1640              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
1641            else            else
1642              {              {
1643              int i;              int i;
# Line 1954  while (!done) Line 1976  while (!done)
1976          continue;          continue;
1977    
1978          case 'N':          case 'N':
1979          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
1980              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1981            else
1982              options |= PCRE_NOTEMPTY;
1983          continue;          continue;
1984    
1985          case 'O':          case 'O':
# Line 1977  while (!done) Line 2002  while (!done)
2002          continue;          continue;
2003    
2004          case 'P':          case 'P':
2005          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2006              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2007          continue;          continue;
2008    
2009          case 'Q':          case 'Q':
# Line 2011  while (!done) Line 2037  while (!done)
2037          case 'S':          case 'S':
2038          show_malloc = 1;          show_malloc = 1;
2039          continue;          continue;
2040    
2041          case 'Y':          case 'Y':
2042          options |= PCRE_NO_START_OPTIMIZE;          options |= PCRE_NO_START_OPTIMIZE;
2043          continue;          continue;
2044    
2045          case 'Z':          case 'Z':
2046          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
# Line 2074  while (!done) Line 2100  while (!done)
2100          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2101        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2102        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2103        if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;        if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2104    
2105        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2106    
# Line 2130  while (!done) Line 2156  while (!done)
2156            {            {
2157            int workspace[1000];            int workspace[1000];
2158            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2159              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2160                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2161                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2162            }            }
# Line 2193  while (!done) Line 2219  while (!done)
2219        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2220          {          {
2221          int workspace[1000];          int workspace[1000];
2222          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2223            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2224            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2225          if (count == 0)          if (count == 0)
# Line 2348  while (!done) Line 2374  while (!done)
2374        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2375          {          {
2376          fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
2377  #if !defined NODFA          if (use_size_offsets > 1)
2378          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            {
2379            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, ": ");
2380              bptr + use_offsets[0]);            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2381  #endif              outfile);
2382              }
2383          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2384          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2385          }          }
# Line 2425  while (!done) Line 2452  while (!done)
2452        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2453    
2454        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2455        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2456        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2457        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2458        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2459        character. */        character. */
2460    
# Line 2436  while (!done) Line 2463  while (!done)
2463        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2464          {          {
2465          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2466          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2467          }          }
2468    
2469        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */

Legend:
Removed from v.391  
changed lines
  Added in v.507

  ViewVC Help
Powered by ViewVC 1.1.5