/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC
# Line 49  POSSIBILITY OF SUCH DAMAGE. Line 49  POSSIBILITY OF SUCH DAMAGE.
49  #include <errno.h>  #include <errno.h>
50    
51  #ifdef SUPPORT_LIBREADLINE  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53  #include <unistd.h>  #include <unistd.h>
54    #endif
55  #include <readline/readline.h>  #include <readline/readline.h>
56  #include <readline/history.h>  #include <readline/history.h>
57  #endif  #endif
# Line 69  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82  #else  #else
83  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
84  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 89  appropriately for an application, not fo Line 99  appropriately for an application, not fo
99  #include "pcre.h"  #include "pcre.h"
100  #include "pcre_internal.h"  #include "pcre_internal.h"
101    
102  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
103  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
104  symbols to prevent clashes. */  external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
108  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
109  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 779  printf("  -dfa     force DFA matching fo Line 790  printf("  -dfa     force DFA matching fo
790  #endif  #endif
791  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
792  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
793           "  -M       find MATCH_LIMIT minimum for each subject\n"
794         "  -m       output memory used information\n"         "  -m       output memory used information\n"
795         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
796  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 808  int main(int argc, char **argv) Line 820  int main(int argc, char **argv)
820  FILE *infile = stdin;  FILE *infile = stdin;
821  int options = 0;  int options = 0;
822  int study_options = 0;  int study_options = 0;
823    int default_find_match_limit = FALSE;
824  int op = 1;  int op = 1;
825  int timeit = 0;  int timeit = 0;
826  int timeitm = 0;  int timeitm = 0;
# Line 867  while (argc > 1 && argv[op][0] == '-') Line 880  while (argc > 1 && argv[op][0] == '-')
880    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
881    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884  #if !defined NODFA  #if !defined NODFA
885    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886  #endif  #endif
# Line 919  while (argc > 1 && argv[op][0] == '-') Line 933  while (argc > 1 && argv[op][0] == '-')
933    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
934      {      {
935      int rc;      int rc;
936        unsigned long int lrc;
937      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
938      printf("Compiled with\n");      printf("Compiled with\n");
939      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 926  while (argc > 1 && argv[op][0] == '-') Line 941  while (argc > 1 && argv[op][0] == '-')
941      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
943      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
945        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
946        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
947          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
949        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
950      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)pcre_config(PCRE_CONFIG_BSR, &rc);
# Line 937  while (argc > 1 && argv[op][0] == '-') Line 954  while (argc > 1 && argv[op][0] == '-')
954      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
955      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
957      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
959      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
961      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
963      goto EXIT;      goto EXIT;
# Line 1250  while (!done) Line 1267  while (!done)
1267          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmp((char *)pp, "JS>", 3) == 0)
1268            {            {
1269            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
1270            pp += 3;            pp += 3;
1271            }            }
1272          else          else
1273            {            {
1274            int x = check_newline(pp, outfile);            int x = check_newline(pp, outfile);
1275            if (x == 0) goto SKIP_DATA;            if (x == 0) goto SKIP_DATA;
1276            options |= x;            options |= x;
1277            while (*pp++ != '>');            while (*pp++ != '>');
1278            }            }
1279          }          }
1280        break;        break;
1281    
# Line 1308  while (!done) Line 1325  while (!done)
1325  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1326    
1327      {      {
1328        unsigned long int get_options;
1329    
1330      if (timeit > 0)      if (timeit > 0)
1331        {        {
1332        register int i;        register int i;
# Line 1351  while (!done) Line 1370  while (!done)
1370        goto CONTINUE;        goto CONTINUE;
1371        }        }
1372    
1373      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1374      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1375      returns only limited data. Check that it agrees with the newer one. */      lines. */
1376    
1377        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1378        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1379    
1380        /* Print information if required. There are now two info-returning
1381        functions. The old one has a limited interface and returns only limited
1382        data. Check that it agrees with the newer one. */
1383    
1384      if (log_store)      if (log_store)
1385        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1438  while (!done) Line 1464  while (!done)
1464        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
1465        }        }
1466    
1467        /* We already have the options in get_options (see above) */
1468    
1469      if (do_showinfo)      if (do_showinfo)
1470        {        {
1471        unsigned long int get_options, all_options;        unsigned long int all_options;
1472  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1473        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1474  #endif  #endif
# Line 1449  while (!done) Line 1477  while (!done)
1477        int nameentrysize, namecount;        int nameentrysize, namecount;
1478        const uschar *nametable;        const uschar *nametable;
1479    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1480        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1481        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1482        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1702  while (!done) Line 1729  while (!done)
1729      int callout_data_set = 0;      int callout_data_set = 0;
1730      int count, c;      int count, c;
1731      int copystrings = 0;      int copystrings = 0;
1732      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
1733      int getstrings = 0;      int getstrings = 0;
1734      int getlist = 0;      int getlist = 0;
1735      int gmatched = 0;      int gmatched = 0;
# Line 1800  while (!done) Line 1827  while (!done)
1827              {              {
1828              unsigned char buff8[8];              unsigned char buff8[8];
1829              int ii, utn;              int ii, utn;
1830              utn = ord2utf8(c, buff8);              if (use_utf8)
1831              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
1832              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
1833                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1834                  c = buff8[ii];   /* Last byte */
1835                  }
1836                else
1837                 {
1838                 if (c > 255)
1839                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1840                     "UTF-8 mode is not enabled.\n"
1841                     "** Truncation will probably give the wrong result.\n", c);
1842                 }
1843              p = pt + 1;              p = pt + 1;
1844              break;              break;
1845              }              }
# Line 1955  while (!done) Line 1992  while (!done)
1992          continue;          continue;
1993    
1994          case 'P':          case 'P':
1995          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
1996              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
1997          continue;          continue;
1998    
1999          case 'Q':          case 'Q':
# Line 1990  while (!done) Line 2028  while (!done)
2028          show_malloc = 1;          show_malloc = 1;
2029          continue;          continue;
2030    
2031            case 'Y':
2032            options |= PCRE_NO_START_OPTIMIZE;
2033            continue;
2034    
2035          case 'Z':          case 'Z':
2036          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2037          continue;          continue;
# Line 2012  while (!done) Line 2054  while (!done)
2054      *q = 0;      *q = 0;
2055      len = q - dbuffer;      len = q - dbuffer;
2056    
2057        /* Move the data to the end of the buffer so that a read over the end of
2058        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2059        we are using the POSIX interface, we must include the terminating zero. */
2060    
2061    #if !defined NOPOSIX
2062        if (posix || do_posix)
2063          {
2064          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2065          bptr += buffer_size - len - 1;
2066          }
2067        else
2068    #endif
2069          {
2070          memmove(bptr + buffer_size - len, bptr, len);
2071          bptr += buffer_size - len;
2072          }
2073    
2074      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2075        {        {
2076        printf("**Match limit not relevant for DFA matching: ignored\n");        printf("**Match limit not relevant for DFA matching: ignored\n");
# Line 2031  while (!done) Line 2090  while (!done)
2090          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2091        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2092        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2093          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2094    
2095        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2096    
# Line 2304  while (!done) Line 2364  while (!done)
2364        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2365          {          {
2366          fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
2367  #if !defined NODFA          if (use_size_offsets > 1)
2368          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            {
2369            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, ": ");
2370              bptr + use_offsets[0]);            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2371  #endif              outfile);
2372              }
2373          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2374          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2375          }          }
# Line 2338  while (!done) Line 2399  while (!done)
2399              {              {
2400              int d;              int d;
2401              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2402              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
2403                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
2404                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
2405                        (d == 10)? PCRE_NEWLINE_LF :
2406                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2407                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
2408                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2409              }              }

Legend:
Removed from v.336  
changed lines
  Added in v.427

  ViewVC Help
Powered by ViewVC 1.1.5