/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 343 by ph10, Mon Apr 21 16:30:37 2008 UTC revision 432 by ph10, Wed Sep 2 16:02:56 2009 UTC
# Line 71  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74  #define isatty _isatty         /* This is what Windows calls them, I'm told */  #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79  #define fileno _fileno  #define fileno _fileno
80    #endif
81    
82  #else  #else
83  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
# Line 94  appropriately for an application, not fo Line 99  appropriately for an application, not fo
99  #include "pcre.h"  #include "pcre.h"
100  #include "pcre_internal.h"  #include "pcre_internal.h"
101    
102  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
103  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
104  symbols to prevent clashes. */  external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
108  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
109  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 784  printf("  -dfa     force DFA matching fo Line 790  printf("  -dfa     force DFA matching fo
790  #endif  #endif
791  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
792  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
793           "  -M       find MATCH_LIMIT minimum for each subject\n"
794         "  -m       output memory used information\n"         "  -m       output memory used information\n"
795         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
796  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 813  int main(int argc, char **argv) Line 820  int main(int argc, char **argv)
820  FILE *infile = stdin;  FILE *infile = stdin;
821  int options = 0;  int options = 0;
822  int study_options = 0;  int study_options = 0;
823    int default_find_match_limit = FALSE;
824  int op = 1;  int op = 1;
825  int timeit = 0;  int timeit = 0;
826  int timeitm = 0;  int timeitm = 0;
# Line 872  while (argc > 1 && argv[op][0] == '-') Line 880  while (argc > 1 && argv[op][0] == '-')
880    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
881    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884  #if !defined NODFA  #if !defined NODFA
885    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886  #endif  #endif
# Line 924  while (argc > 1 && argv[op][0] == '-') Line 933  while (argc > 1 && argv[op][0] == '-')
933    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
934      {      {
935      int rc;      int rc;
936        unsigned long int lrc;
937      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
938      printf("Compiled with\n");      printf("Compiled with\n");
939      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 931  while (argc > 1 && argv[op][0] == '-') Line 941  while (argc > 1 && argv[op][0] == '-')
941      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
943      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
945        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
946        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
947          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
949        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
950      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)pcre_config(PCRE_CONFIG_BSR, &rc);
# Line 942  while (argc > 1 && argv[op][0] == '-') Line 954  while (argc > 1 && argv[op][0] == '-')
954      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
955      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
957      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
959      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
961      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
963      goto EXIT;      goto EXIT;
# Line 1255  while (!done) Line 1267  while (!done)
1267          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmp((char *)pp, "JS>", 3) == 0)
1268            {            {
1269            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
1270            pp += 3;            pp += 3;
1271            }            }
1272          else          else
1273            {            {
1274            int x = check_newline(pp, outfile);            int x = check_newline(pp, outfile);
1275            if (x == 0) goto SKIP_DATA;            if (x == 0) goto SKIP_DATA;
1276            options |= x;            options |= x;
1277            while (*pp++ != '>');            while (*pp++ != '>');
1278            }            }
1279          }          }
1280        break;        break;
1281    
# Line 1293  while (!done) Line 1305  while (!done)
1305      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1306      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1307      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1308        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1309    
1310      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1311    
# Line 1313  while (!done) Line 1326  while (!done)
1326  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1327    
1328      {      {
1329        unsigned long int get_options;
1330    
1331      if (timeit > 0)      if (timeit > 0)
1332        {        {
1333        register int i;        register int i;
# Line 1356  while (!done) Line 1371  while (!done)
1371        goto CONTINUE;        goto CONTINUE;
1372        }        }
1373    
1374      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1375      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1376      returns only limited data. Check that it agrees with the newer one. */      lines. */
1377    
1378        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1379        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1380    
1381        /* Print information if required. There are now two info-returning
1382        functions. The old one has a limited interface and returns only limited
1383        data. Check that it agrees with the newer one. */
1384    
1385      if (log_store)      if (log_store)
1386        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1443  while (!done) Line 1465  while (!done)
1465        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1466        }        }
1467    
1468        /* We already have the options in get_options (see above) */
1469    
1470      if (do_showinfo)      if (do_showinfo)
1471        {        {
1472        unsigned long int get_options, all_options;        unsigned long int all_options;
1473  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1474        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1475  #endif  #endif
# Line 1454  while (!done) Line 1478  while (!done)
1478        int nameentrysize, namecount;        int nameentrysize, namecount;
1479        const uschar *nametable;        const uschar *nametable;
1480    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1481        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1482        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1483        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1707  while (!done) Line 1730  while (!done)
1730      int callout_data_set = 0;      int callout_data_set = 0;
1731      int count, c;      int count, c;
1732      int copystrings = 0;      int copystrings = 0;
1733      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
1734      int getstrings = 0;      int getstrings = 0;
1735      int getlist = 0;      int getlist = 0;
1736      int gmatched = 0;      int gmatched = 0;
# Line 1805  while (!done) Line 1828  while (!done)
1828              {              {
1829              unsigned char buff8[8];              unsigned char buff8[8];
1830              int ii, utn;              int ii, utn;
1831              utn = ord2utf8(c, buff8);              if (use_utf8)
1832              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
1833              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
1834                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1835                  c = buff8[ii];   /* Last byte */
1836                  }
1837                else
1838                 {
1839                 if (c > 255)
1840                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1841                     "UTF-8 mode is not enabled.\n"
1842                     "** Truncation will probably give the wrong result.\n", c);
1843                 }
1844              p = pt + 1;              p = pt + 1;
1845              break;              break;
1846              }              }
# Line 1960  while (!done) Line 1993  while (!done)
1993          continue;          continue;
1994    
1995          case 'P':          case 'P':
1996          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
1997              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
1998          continue;          continue;
1999    
2000          case 'Q':          case 'Q':
# Line 1995  while (!done) Line 2029  while (!done)
2029          show_malloc = 1;          show_malloc = 1;
2030          continue;          continue;
2031    
2032            case 'Y':
2033            options |= PCRE_NO_START_OPTIMIZE;
2034            continue;
2035    
2036          case 'Z':          case 'Z':
2037          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2038          continue;          continue;
# Line 2017  while (!done) Line 2055  while (!done)
2055      *q = 0;      *q = 0;
2056      len = q - dbuffer;      len = q - dbuffer;
2057    
2058        /* Move the data to the end of the buffer so that a read over the end of
2059        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2060        we are using the POSIX interface, we must include the terminating zero. */
2061    
2062    #if !defined NOPOSIX
2063        if (posix || do_posix)
2064          {
2065          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2066          bptr += buffer_size - len - 1;
2067          }
2068        else
2069    #endif
2070          {
2071          memmove(bptr + buffer_size - len, bptr, len);
2072          bptr += buffer_size - len;
2073          }
2074    
2075      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2076        {        {
2077        printf("**Match limit not relevant for DFA matching: ignored\n");        printf("**Match limit not relevant for DFA matching: ignored\n");
# Line 2036  while (!done) Line 2091  while (!done)
2091          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2092        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2093        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2094          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2095    
2096        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2097    
# Line 2309  while (!done) Line 2365  while (!done)
2365        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2366          {          {
2367          fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
2368  #if !defined NODFA          if (use_size_offsets > 1)
2369          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            {
2370            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, ": ");
2371              bptr + use_offsets[0]);            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2372  #endif              outfile);
2373              }
2374          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2375          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2376          }          }
# Line 2343  while (!done) Line 2400  while (!done)
2400              {              {
2401              int d;              int d;
2402              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2403              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
2404                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
2405                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
2406                        (d == 10)? PCRE_NEWLINE_LF :
2407                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2408                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
2409                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2410              }              }

Legend:
Removed from v.343  
changed lines
  Added in v.432

  ViewVC Help
Powered by ViewVC 1.1.5