/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 386 by ph10, Tue Mar 10 11:48:33 2009 UTC revision 461 by ph10, Mon Oct 5 10:59:35 2009 UTC
# Line 71  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74  #define isatty _isatty         /* This is what Windows calls them, I'm told */  #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79  #define fileno _fileno  #define fileno _fileno
80    #endif
81    
82  #else  #else
83  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
# Line 875  while (argc > 1 && argv[op][0] == '-') Line 880  while (argc > 1 && argv[op][0] == '-')
880    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
881    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884  #if !defined NODFA  #if !defined NODFA
885    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886  #endif  #endif
# Line 928  while (argc > 1 && argv[op][0] == '-') Line 933  while (argc > 1 && argv[op][0] == '-')
933    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
934      {      {
935      int rc;      int rc;
936      unsigned long int lrc;      unsigned long int lrc;
937      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
938      printf("Compiled with\n");      printf("Compiled with\n");
939      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 936  while (argc > 1 && argv[op][0] == '-') Line 941  while (argc > 1 && argv[op][0] == '-')
941      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
943      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
945        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
946        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
947          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
949        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
950      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)pcre_config(PCRE_CONFIG_BSR, &rc);
# Line 1298  while (!done) Line 1305  while (!done)
1305      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1306      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1307      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1308        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1309    
1310      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1311    
# Line 1318  while (!done) Line 1326  while (!done)
1326  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1327    
1328      {      {
1329        unsigned long int get_options;
1330    
1331      if (timeit > 0)      if (timeit > 0)
1332        {        {
1333        register int i;        register int i;
# Line 1361  while (!done) Line 1371  while (!done)
1371        goto CONTINUE;        goto CONTINUE;
1372        }        }
1373    
1374      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1375      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1376      returns only limited data. Check that it agrees with the newer one. */      lines. */
1377    
1378        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1379        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1380    
1381        /* Print information if required. There are now two info-returning
1382        functions. The old one has a limited interface and returns only limited
1383        data. Check that it agrees with the newer one. */
1384    
1385      if (log_store)      if (log_store)
1386        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1434  while (!done) Line 1451  while (!done)
1451          {          {
1452          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1453          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1454          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1455            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1456          }          }
1457        }        }
1458    
# Line 1448  while (!done) Line 1466  while (!done)
1466        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1467        }        }
1468    
1469        /* We already have the options in get_options (see above) */
1470    
1471      if (do_showinfo)      if (do_showinfo)
1472        {        {
1473        unsigned long int get_options, all_options;        unsigned long int all_options;
1474  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1475        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1476  #endif  #endif
# Line 1459  while (!done) Line 1479  while (!done)
1479        int nameentrysize, namecount;        int nameentrysize, namecount;
1480        const uschar *nametable;        const uschar *nametable;
1481    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1482        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1483        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1484        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1610  while (!done) Line 1629  while (!done)
1629          else          else
1630            {            {
1631            uschar *start_bits = NULL;            uschar *start_bits = NULL;
1632            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
1633    
1634              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1635              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1636    
1637              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1638            if (start_bits == NULL)            if (start_bits == NULL)
1639              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
1640            else            else
1641              {              {
1642              int i;              int i;
# Line 1952  while (!done) Line 1975  while (!done)
1975          continue;          continue;
1976    
1977          case 'N':          case 'N':
1978          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
1979              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1980            else
1981              options |= PCRE_NOTEMPTY;
1982          continue;          continue;
1983    
1984          case 'O':          case 'O':
# Line 1975  while (!done) Line 2001  while (!done)
2001          continue;          continue;
2002    
2003          case 'P':          case 'P':
2004          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2005              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2006          continue;          continue;
2007    
2008          case 'Q':          case 'Q':
# Line 2010  while (!done) Line 2037  while (!done)
2037          show_malloc = 1;          show_malloc = 1;
2038          continue;          continue;
2039    
2040            case 'Y':
2041            options |= PCRE_NO_START_OPTIMIZE;
2042            continue;
2043    
2044          case 'Z':          case 'Z':
2045          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2046          continue;          continue;
# Line 2068  while (!done) Line 2099  while (!done)
2099          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2100        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2101        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2102          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2103    
2104        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2105    
# Line 2123  while (!done) Line 2155  while (!done)
2155            {            {
2156            int workspace[1000];            int workspace[1000];
2157            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
2158              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2159                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2160                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2161            }            }
# Line 2186  while (!done) Line 2218  while (!done)
2218        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2219          {          {
2220          int workspace[1000];          int workspace[1000];
2221          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2222            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2223            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2224          if (count == 0)          if (count == 0)
# Line 2341  while (!done) Line 2373  while (!done)
2373        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2374          {          {
2375          fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
2376  #if !defined NODFA          if (use_size_offsets > 1)
2377          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            {
2378            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, ": ");
2379              bptr + use_offsets[0]);            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2380  #endif              outfile);
2381              }
2382          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2383          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2384          }          }
# Line 2375  while (!done) Line 2408  while (!done)
2408              {              {
2409              int d;              int d;
2410              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2411              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
2412                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
2413                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
2414                        (d == 10)? PCRE_NEWLINE_LF :
2415                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2416                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
2417                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2418              }              }
# Line 2416  while (!done) Line 2451  while (!done)
2451        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2452    
2453        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2454        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2455        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2456        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2457        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2458        character. */        character. */
2459    
# Line 2427  while (!done) Line 2462  while (!done)
2462        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2463          {          {
2464          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2465          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2466          }          }
2467    
2468        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */

Legend:
Removed from v.386  
changed lines
  Added in v.461

  ViewVC Help
Powered by ViewVC 1.1.5