/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 804 by ph10, Mon Dec 12 16:23:37 2011 UTC revision 805 by ph10, Wed Dec 14 16:49:20 2011 UTC
# Line 107  appropriately for an application, not fo Line 107  appropriately for an application, not fo
107  #include "pcre.h"  #include "pcre.h"
108  #include "pcre_internal.h"  #include "pcre_internal.h"
109    
110    /* The pcre_printint() function, which prints the internal form of a compiled
111    regex, is held in a separate file so that (a) it can be compiled in either
112    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
113    when that is compiled in debug mode. */
114    
115    #ifdef SUPPORT_PCRE8
116    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
117    #endif
118    #ifdef SUPPORT_PCRE16
119    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
120    #endif
121    
122  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
123  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
124  external symbols to prevent clashes. */  external symbols to prevent clashes. */
# Line 125  external symbols to prevent clashes. */ Line 137  external symbols to prevent clashes. */
137    
138  #include "pcre_tables.c"  #include "pcre_tables.c"
139    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
140  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
141  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
142  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
143  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
144  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
145    
146    #ifdef EBCDIC
147    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
148    #else
149    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
150    #endif
151    
152  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
153    
# Line 193  static size_t gotten_store; Line 203  static size_t gotten_store;
203  static size_t first_gotten_store = 0;  static size_t first_gotten_store = 0;
204  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
205    
206    static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
207    
208  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
209    
210  static int buffer_size = 50000;  static int buffer_size = 50000;
# Line 200  static pcre_uint8 *buffer = NULL; Line 212  static pcre_uint8 *buffer = NULL;
212  static pcre_uint8 *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
213  static pcre_uint8 *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
214    
215    #ifdef SUPPORT_PCRE16
216    static int buffer16_size = 0;
217    static pcre_uint16 *buffer16 = NULL;
218    #endif
219    
220  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
221    
222  static const char *errtexts[] = {  static const char *errtexts[] = {
# Line 230  static const char *errtexts[] = { Line 247  static const char *errtexts[] = {
247    "bad offset value",    "bad offset value",
248    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8 is handled specially */
249    "nested recursion at the same subject position",    "nested recursion at the same subject position",
250    "JIT stack limit reached"    "JIT stack limit reached",
251      "pattern compiled in wrong mode (8-bit/16-bit error)"
252  };  };
253    
254    
# Line 592  return (pcre_jit_stack *)arg; Line 610  return (pcre_jit_stack *)arg;
610  }  }
611    
612    
613    #ifdef SUPPORT_PCRE16
614    /*************************************************
615    *         Convert a string to 16-bit             *
616    *************************************************/
617    
618    /* The result is always left in buffer16. */
619    
620    static int
621    to16(unsigned char *p, int utf)
622    {
623    pcre_uint16 *pp;
624    int len = (int)strlen((char *)p) + 1;
625    
626    if (buffer16_size < 2*len)
627      {
628      if (buffer16 != NULL) free(buffer16);
629      buffer16_size = 2*len;
630      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
631      if (buffer16 == NULL)
632        {
633        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
634        exit(1);
635        }
636      }
637    
638    pp = buffer16;
639    
640    if (!utf)
641      {
642      while (*p != 0) *pp++ = *p++;
643      *pp++ = 0;
644      }
645    
646    else
647      {
648    fprintf(stderr, "pcretest: no support yet for UTF-16\n");
649    exit(1);
650      }
651    
652    return pp - buffer16;
653    }
654    #endif
655    
656    
657  /*************************************************  /*************************************************
658  *        Read or extend an input line            *  *        Read or extend an input line            *
659  *************************************************/  *************************************************/
# Line 1046  free(block); Line 1108  free(block);
1108  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1109  {  {
1110  int rc;  int rc;
1111  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
1112    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1113  }  }
1114    
# Line 1191  printf("If input is a terminal, readline Line 1253  printf("If input is a terminal, readline
1253  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
1254  #endif  #endif
1255  printf("\nOptions:\n");  printf("\nOptions:\n");
1256    #ifdef SUPPORT_PCRE16
1257    printf("  -16      use 16-bit interface\n");
1258    #endif
1259  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1260  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1261  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 1248  int posix = 0; Line 1313  int posix = 0;
1313  int debug = 0;  int debug = 0;
1314  int done = 0;  int done = 0;
1315  int all_use_dfa = 0;  int all_use_dfa = 0;
1316    int use_pcre16 = 0;
1317  int yield = 0;  int yield = 0;
1318  int stack_size;  int stack_size;
1319    
1320  pcre_jit_stack *jit_stack = NULL;  pcre_jit_stack *jit_stack = NULL;
1321    
   
1322  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1323  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1324    
# Line 1263  pcre_uchar getnames[1024]; Line 1328  pcre_uchar getnames[1024];
1328  pcre_uchar *copynamesptr;  pcre_uchar *copynamesptr;
1329  pcre_uchar *getnamesptr;  pcre_uchar *getnamesptr;
1330    
1331  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that valgrind will check their misuse when
1332  when I am debugging. They grow automatically when very long lines are read. */  debugging. They grow automatically when very long lines are read. The 16-bit
1333    buffer (buffer16) is obtained only if needed. */
1334    
1335  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
1336  dbuffer = (pcre_uint8 *)malloc(buffer_size);  dbuffer = (pcre_uint8 *)malloc(buffer_size);
# Line 1289  while (argc > 1 && argv[op][0] == '-') Line 1355  while (argc > 1 && argv[op][0] == '-')
1355    {    {
1356    unsigned char *endptr;    unsigned char *endptr;
1357    
1358    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1359      else if (strcmp(argv[op], "-m") == 0) showstore = 1;
1360    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1361    else if (strcmp(argv[op], "-s+") == 0)    else if (strcmp(argv[op], "-s+") == 0)
1362      {      {
# Line 1356  while (argc > 1 && argv[op][0] == '-') Line 1423  while (argc > 1 && argv[op][0] == '-')
1423      unsigned long int lrc;      unsigned long int lrc;
1424      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1425      printf("Compiled with\n");      printf("Compiled with\n");
1426    
1427    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */
1428    
1429    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1430        printf("  8-bit and 16-bit support\n");
1431        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1432        printf("  %sUTF-8 support\n", rc? "" : "No ");
1433        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1434        printf("  %sUTF-16 support\n", rc? "" : "No ");
1435    #elif defined SUPPORT_PCRE8
1436        printf("  8-bit support only\n");
1437      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1438      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1439    #else
1440        printf("  16-bit support only\n");
1441        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1442        printf("  %sUTF-16 support\n", rc? "" : "No ");
1443    #endif
1444    
1445      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1446      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1447      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)pcre_config(PCRE_CONFIG_JIT, &rc);
# Line 1404  while (argc > 1 && argv[op][0] == '-') Line 1488  while (argc > 1 && argv[op][0] == '-')
1488    argc--;    argc--;
1489    }    }
1490    
1491    /* Select which fullinfo function to use. */
1492    
1493    fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
1494    
1495  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1496    
1497  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
# Line 1442  if (argc > 2) Line 1530  if (argc > 2)
1530    
1531  /* Set alternative malloc function */  /* Set alternative malloc function */
1532    
1533    #ifdef SUPPORT_PCRE8
1534  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1535  pcre_free = new_free;  pcre_free = new_free;
1536  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1537  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1538    #endif
1539    
1540    #ifdef SUPPORT_PCRE16
1541    pcre16_malloc = new_malloc;
1542    pcre16_free = new_free;
1543    pcre16_stack_malloc = stack_malloc;
1544    pcre16_stack_free = stack_free;
1545    #endif
1546    
1547  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1548    
# Line 1764  while (!done) Line 1861  while (!done)
1861    
1862    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1863    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1864    local character tables. */    local character tables. Neither does it have 16-bit support. */
1865    
1866  #if !defined NOPOSIX  #if !defined NOPOSIX
1867    if (posix || do_posix)    if (posix || do_posix)
# Line 1801  while (!done) Line 1898  while (!done)
1898    
1899      {      {
1900      unsigned long int get_options;      unsigned long int get_options;
1901    
1902        /* In 16-bit mode, convert the input. The space needed for a non-UTF string
1903        is exactly double the 8-bit size. For a UTF-8 string, the size needed for
1904        UTF-16 is no more than double, because up to 0xffff uses no more than 3
1905        bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
1906        and up to 4 bytes in UTF-16. */
1907    
1908    #ifdef SUPPORT_PCRE16
1909        if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
1910    #endif
1911    
1912        /* Compile many times when timing */
1913    
1914      if (timeit > 0)      if (timeit > 0)
1915        {        {
# Line 1809  while (!done) Line 1918  while (!done)
1918        clock_t start_time = clock();        clock_t start_time = clock();
1919        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
1920          {          {
1921          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  #ifdef SUPPORT_PCRE16
1922            if (use_pcre16)
1923              re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1924            else
1925    #endif
1926              re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1927          if (re != NULL) free(re);          if (re != NULL) free(re);
1928          }          }
1929        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1819  while (!done) Line 1933  while (!done)
1933        }        }
1934    
1935      first_gotten_store = 0;      first_gotten_store = 0;
1936      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  
1937    #ifdef SUPPORT_PCRE16
1938        if (use_pcre16)
1939          re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1940        else
1941    #endif
1942          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1943    
1944      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1945      if non-interactive. */      if non-interactive. */
# Line 1880  while (!done) Line 2000  while (!done)
2000          clock_t time_taken;          clock_t time_taken;
2001          clock_t start_time = clock();          clock_t start_time = clock();
2002          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2003            extra = pcre_study(re, study_options | force_study_options, &error);            {
2004              if (use_pcre16)
2005                extra = pcre16_study(re, study_options | force_study_options, &error);
2006              else
2007                extra = pcre_study(re, study_options | force_study_options, &error);
2008              }
2009          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2010          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL) pcre_free_study(extra);
2011          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2012            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2013              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2014          }          }
2015        extra = pcre_study(re, study_options | force_study_options, &error);        if (use_pcre16)
2016            extra = pcre16_study(re, study_options | force_study_options, &error);
2017          else
2018            extra = pcre_study(re, study_options | force_study_options, &error);
2019        if (error != NULL)        if (error != NULL)
2020          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2021        else if (extra != NULL)        else if (extra != NULL)
# Line 1953  while (!done) Line 2081  while (!done)
2081          }          }
2082        }        }
2083    
2084      /* Extract information from the compiled data if required. There are now      /* Extract and display information from the compiled data if required. */
     two info-returning functions. The old one has a limited interface and  
     returns only limited data. Check that it agrees with the newer one. */  
2085    
2086      SHOW_INFO:      SHOW_INFO:
2087    
2088      if (do_debug)      if (do_debug)
2089        {        {
2090        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2091        pcre_printint(re, outfile, debug_lengths);        if (use_pcre16)
2092            pcre16_printint(re, outfile, debug_lengths);
2093          else
2094            pcre_printint(re, outfile, debug_lengths);
2095        }        }
2096    
2097      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1990  while (!done) Line 2119  while (!done)
2119        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2120        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2121    
2122          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2123          that it gives the same results as the new function. */
2124    
2125  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2126        old_count = pcre_info(re, &old_options, &old_first_char);        if (!use_pcre16)
2127        if (count < 0) fprintf(outfile,          {
2128          "Error %d from pcre_info()\n", count);          old_count = pcre_info(re, &old_options, &old_first_char);
2129        else          if (count < 0) fprintf(outfile,
2130          {            "Error %d from pcre_info()\n", count);
2131          if (old_count != count) fprintf(outfile,          else
2132            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,            {
2133              old_count);            if (old_count != count) fprintf(outfile,
2134                "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2135          if (old_first_char != first_char) fprintf(outfile,                old_count);
2136            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
2137              first_char, old_first_char);            if (old_first_char != first_char) fprintf(outfile,
2138                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2139          if (old_options != (int)get_options) fprintf(outfile,                first_char, old_first_char);
2140            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
2141              get_options, old_options);            if (old_options != (int)get_options) fprintf(outfile,
2142          }              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2143                  get_options, old_options);
2144              }
2145            }
2146  #endif  #endif
2147    
2148        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
# Line 2712  while (!done) Line 2847  while (!done)
2847          register int i;          register int i;
2848          clock_t time_taken;          clock_t time_taken;
2849          clock_t start_time = clock();          clock_t start_time = clock();
2850    
2851    #ifdef SUPPORT_PCRE16
2852            if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
2853    #endif
2854    
2855    
2856  #if !defined NODFA  #if !defined NODFA
2857          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
# Line 2798  while (!done) Line 2938  while (!done)
2938    
2939        else        else
2940          {          {
2941          count = pcre_exec(re, extra, (char *)bptr, len,          if (use_pcre16)
2942            start_offset, options | g_notempty, use_offsets, use_size_offsets);            count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
2943                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2944            else
2945              count = pcre_exec(re, extra, (char *)bptr, len,
2946                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2947          if (count == 0)          if (count == 0)
2948            {            {
2949            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 3124  free(dbuffer); Line 3268  free(dbuffer);
3268  free(pbuffer);  free(pbuffer);
3269  free(offsets);  free(offsets);
3270    
3271    #ifdef SUPPORT_PCRE16
3272    if (buffer16 != NULL) free(buffer16);
3273    #endif
3274    
3275  return yield;  return yield;
3276  }  }
3277    

Legend:
Removed from v.804  
changed lines
  Added in v.805

  ViewVC Help
Powered by ViewVC 1.1.5