/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 46  POSSIBILITY OF SUCH DAMAGE.
46    
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49  /* We need the internal info for displaying the results of pcre_study() and  /* We include pcre_internal.h because we need the internal info for displaying
50  other internal data; pcretest also uses some of the fixed tables, and generally  the results of pcre_study() and we also need to know about the internal
51  has "inside information" compared to a program that strictly follows the PCRE  macros, structures, and other internal data values; pcretest has "inside
52  API. */  information" compared to a program that strictly follows the PCRE API. */
53    
54  #include "pcre_internal.h"  #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60    #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71    /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75    #include "pcre_printint.src"
76    
77    
78  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
79  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 83  Makefile. */
83  #include "pcreposix.h"  #include "pcreposix.h"
84  #endif  #endif
85    
86    /* It is also possible, for the benefit of the version imported into Exim, to
87    build pcretest without support for UTF8 (define NOUTF8), without the interface
88    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89    function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
96  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 77  Makefile. */ Line 106  Makefile. */
106  #define DBUFFER_SIZE BUFFER_SIZE  #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
113  static int callout_count;  static int callout_count;
# Line 135  Returns:   >  0 => the number of bytes c Line 166  Returns:   >  0 => the number of bytes c
166             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
167  */  */
168    
169    #if !defined NOUTF8
170    
171  static int  static int
172  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
173  {  {
# Line 154  if (i == 0 || i == 6) return 0;        / Line 187  if (i == 0 || i == 6) return 0;        /
187  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
188    
189  s = 6*i;  s = 6*i;
190  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
191    
192  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
193    {    {
# Line 166  for (j = 0; j < i; j++) Line 199  for (j = 0; j < i; j++)
199    
200  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
201    
202  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
203    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
204  if (j != i) return -(i+1);  if (j != i) return -(i+1);
205    
206  /* Valid value */  /* Valid value */
# Line 176  if (j != i) return -(i+1); Line 209  if (j != i) return -(i+1);
209  return i+1;  return i+1;
210  }  }
211    
212    #endif
213    
214    
215    
216    /*************************************************
217    *       Convert character value to UTF-8         *
218    *************************************************/
219    
220    /* This function takes an integer value in the range 0 - 0x7fffffff
221    and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223    Arguments:
224      cvalue     the character value
225      buffer     pointer to buffer for result - at least 6 bytes long
226    
227    Returns:     number of characters placed in the buffer
228    */
229    
230    static int
231    ord2utf8(int cvalue, uschar *buffer)
232    {
233    register int i, j;
234    for (i = 0; i < utf8_table1_size; i++)
235      if (cvalue <= utf8_table1[i]) break;
236    buffer += i;
237    for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242    *buffer = utf8_table2[i] | cvalue;
243    return i + 1;
244    }
245    
246    
247    
248  /*************************************************  /*************************************************
# Line 188  chars without printing. */ Line 255  chars without printing. */
255    
256  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
257  {  {
258  int c;  int c = 0;
259  int yield = 0;  int yield = 0;
260    
261  while (length-- > 0)  while (length-- > 0)
262    {    {
263    #if !defined NOUTF8
264    if (use_utf8)    if (use_utf8)
265      {      {
266      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 215  while (length-- > 0) Line 283  while (length-- > 0)
283        continue;        continue;
284        }        }
285      }      }
286    #endif
287    
288     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
289    
# Line 464  while (argc > 1 && argv[op][0] == '-') Line 533  while (argc > 1 && argv[op][0] == '-')
533    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
534    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
535    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
536    #if !defined NODFA
537    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
538    #endif
539    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
540        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
541          *endptr == 0))          *endptr == 0))
# Line 502  while (argc > 1 && argv[op][0] == '-') Line 573  while (argc > 1 && argv[op][0] == '-')
573      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
574      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
575      printf("  -d     debug: show compiled code; implies -i\n");      printf("  -d     debug: show compiled code; implies -i\n");
576    #if !defined NODFA
577      printf("  -dfa   force DFA matching for all subjects\n");      printf("  -dfa   force DFA matching for all subjects\n");
578    #endif
579      printf("  -i     show information about compiled pattern\n"      printf("  -i     show information about compiled pattern\n"
580             "  -m     output memory used information\n"             "  -m     output memory used information\n"
581             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
# Line 962  while (!done) Line 1035  while (!done)
1035      if (do_showinfo)      if (do_showinfo)
1036        {        {
1037        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1038    #if !defined NOINFOCHECK
1039        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1040    #endif
1041        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1042        int nameentrysize, namecount;        int nameentrysize, namecount;
1043        const uschar *nametable;        const uschar *nametable;
# Line 970  while (!done) Line 1045  while (!done)
1045        if (do_debug)        if (do_debug)
1046          {          {
1047          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
1048          _pcre_printint(re, outfile);          pcre_printint(re, outfile);
1049          }          }
1050    
1051        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 983  while (!done) Line 1058  while (!done)
1058        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1059        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1060    
1061    #if !defined NOINFOCHECK
1062        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1063        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1064          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1000  while (!done) Line 1076  while (!done)
1076            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1077              get_options, old_options);              get_options, old_options);
1078          }          }
1079    #endif
1080    
1081        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1082          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1259  while (!done) Line 1336  while (!done)
1336    
1337          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1338    
1339    #if !defined NOUTF8
1340          if (*p == '{')          if (*p == '{')
1341            {            {
1342            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1269  while (!done) Line 1347  while (!done)
1347              {              {
1348              unsigned char buff8[8];              unsigned char buff8[8];
1349              int ii, utn;              int ii, utn;
1350              utn = _pcre_ord2utf8(c, buff8);              utn = ord2utf8(c, buff8);
1351              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1352              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1353              p = pt + 1;              p = pt + 1;
# Line 1277  while (!done) Line 1355  while (!done)
1355              }              }
1356            /* Not correct form; fall through */            /* Not correct form; fall through */
1357            }            }
1358    #endif
1359    
1360          /* Ordinary \x */          /* Ordinary \x */
1361    
# Line 1357  while (!done) Line 1436  while (!done)
1436            }            }
1437          continue;          continue;
1438    
1439    #if !defined NODFA
1440          case 'D':          case 'D':
1441    #if !defined NOPOSIX
1442          if (posix || do_posix)          if (posix || do_posix)
1443            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1444          else          else
1445    #endif
1446            use_dfa = 1;            use_dfa = 1;
1447          continue;          continue;
1448    
1449          case 'F':          case 'F':
1450          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
1451          continue;          continue;
1452    #endif
1453    
1454          case 'G':          case 'G':
1455          if (isdigit(*p))          if (isdigit(*p))
# Line 1422  while (!done) Line 1505  while (!done)
1505          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1506          continue;          continue;
1507    
1508    #if !defined NODFA
1509          case 'R':          case 'R':
1510          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
1511          continue;          continue;
1512    #endif
1513    
1514          case 'S':          case 'S':
1515          show_malloc = 1;          show_malloc = 1;
# Line 1507  while (!done) Line 1592  while (!done)
1592          clock_t time_taken;          clock_t time_taken;
1593          clock_t start_time = clock();          clock_t start_time = clock();
1594    
1595    #if !defined NODFA
1596          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
1597            {            {
1598            int workspace[1000];            int workspace[1000];
# Line 1516  while (!done) Line 1602  while (!done)
1602                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
1603            }            }
1604          else          else
1605    #endif
1606    
1607          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1608            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
# Line 1591  while (!done) Line 1678  while (!done)
1678        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1679        value of match_limit. */        value of match_limit. */
1680    
1681    #if !defined NODFA
1682        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
1683          {          {
1684          int workspace[1000];          int workspace[1000];
# Line 1603  while (!done) Line 1691  while (!done)
1691            count = use_size_offsets/2;            count = use_size_offsets/2;
1692            }            }
1693          }          }
1694    #endif
1695    
1696        else        else
1697          {          {
# Line 1699  while (!done) Line 1788  while (!done)
1788        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
1789          {          {
1790          fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
1791    #if !defined NODFA
1792          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1793            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1794              bptr + use_offsets[0]);              bptr + use_offsets[0]);
1795    #endif
1796          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1797          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
1798          }          }

Legend:
Removed from v.77  
changed lines
  Added in v.85

  ViewVC Help
Powered by ViewVC 1.1.5