/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 79 by nigel, Sat Feb 24 21:40:52 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 46  POSSIBILITY OF SUCH DAMAGE.
46    
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49  /* We need the internal info for displaying the results of pcre_study() and  /* We include pcre_internal.h because we need the internal info for displaying
50  other internal data; pcretest also uses some of the fixed tables, and generally  the results of pcre_study() and we also need to know about the internal
51  has "inside information" compared to a program that strictly follows the PCRE  macros, structures, and other internal data values; pcretest has "inside
52  API. */  information" compared to a program that strictly follows the PCRE API. */
53    
54  #include "pcre_internal.h"  #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60    #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71    /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75    #include "pcre_printint.src"
76    
77    
78  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
79  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 68  to the DFA matcher (NODFA), and without Line 89  to the DFA matcher (NODFA), and without
89  function (define NOINFOCHECK). */  function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
96  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 83  function (define NOINFOCHECK). */ Line 106  function (define NOINFOCHECK). */
106  #define DBUFFER_SIZE BUFFER_SIZE  #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
113  static int callout_count;  static int callout_count;
# Line 162  if (i == 0 || i == 6) return 0;        / Line 187  if (i == 0 || i == 6) return 0;        /
187  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
188    
189  s = 6*i;  s = 6*i;
190  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
191    
192  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
193    {    {
# Line 174  for (j = 0; j < i; j++) Line 199  for (j = 0; j < i; j++)
199    
200  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
201    
202  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
203    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
204  if (j != i) return -(i+1);  if (j != i) return -(i+1);
205    
206  /* Valid value */  /* Valid value */
# Line 189  return i+1; Line 214  return i+1;
214    
215    
216  /*************************************************  /*************************************************
217    *       Convert character value to UTF-8         *
218    *************************************************/
219    
220    /* This function takes an integer value in the range 0 - 0x7fffffff
221    and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223    Arguments:
224      cvalue     the character value
225      buffer     pointer to buffer for result - at least 6 bytes long
226    
227    Returns:     number of characters placed in the buffer
228    */
229    
230    static int
231    ord2utf8(int cvalue, uschar *buffer)
232    {
233    register int i, j;
234    for (i = 0; i < utf8_table1_size; i++)
235      if (cvalue <= utf8_table1[i]) break;
236    buffer += i;
237    for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242    *buffer = utf8_table2[i] | cvalue;
243    return i + 1;
244    }
245    
246    
247    
248    /*************************************************
249  *             Print character string             *  *             Print character string             *
250  *************************************************/  *************************************************/
251    
# Line 198  chars without printing. */ Line 255  chars without printing. */
255    
256  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
257  {  {
258  int c;  int c = 0;
259  int yield = 0;  int yield = 0;
260    
261  while (length-- > 0)  while (length-- > 0)
# Line 418  return ((value & 0x000000ff) << 24) | Line 475  return ((value & 0x000000ff) << 24) |
475    
476    
477  /*************************************************  /*************************************************
478    *        Check match or recursion limit          *
479    *************************************************/
480    
481    static int
482    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
483      int start_offset, int options, int *use_offsets, int use_size_offsets,
484      int flag, unsigned long int *limit, int errnumber, const char *msg)
485    {
486    int count;
487    int min = 0;
488    int mid = 64;
489    int max = -1;
490    
491    extra->flags |= flag;
492    
493    for (;;)
494      {
495      *limit = mid;
496    
497      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
498        use_offsets, use_size_offsets);
499    
500      if (count == errnumber)
501        {
502        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
503        min = mid;
504        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
505        }
506    
507      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
508                             count == PCRE_ERROR_PARTIAL)
509        {
510        if (mid == min + 1)
511          {
512          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
513          break;
514          }
515        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
516        max = mid;
517        mid = (min + mid)/2;
518        }
519      else break;    /* Some other error */
520      }
521    
522    extra->flags &= ~flag;
523    return count;
524    }
525    
526    
527    
528    /*************************************************
529  *                Main Program                    *  *                Main Program                    *
530  *************************************************/  *************************************************/
531    
# Line 434  int op = 1; Line 542  int op = 1;
542  int timeit = 0;  int timeit = 0;
543  int showinfo = 0;  int showinfo = 0;
544  int showstore = 0;  int showstore = 0;
545    int quiet = 0;
546  int size_offsets = 45;  int size_offsets = 45;
547  int size_offsets_max;  int size_offsets_max;
548  int *offsets = NULL;  int *offsets = NULL;
# Line 474  while (argc > 1 && argv[op][0] == '-') Line 583  while (argc > 1 && argv[op][0] == '-')
583    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
584      showstore = 1;      showstore = 1;
585    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
586      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
587    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
588    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
589  #if !defined NODFA  #if !defined NODFA
# Line 506  while (argc > 1 && argv[op][0] == '-') Line 616  while (argc > 1 && argv[op][0] == '-')
616      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
617      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
618      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
619        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
620        printf("  Default recursion depth limit = %d\n", rc);
621      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
622      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
623      exit(0);      exit(0);
# Line 577  pcre_free = new_free; Line 689  pcre_free = new_free;
689  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
690  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
691    
692  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
693    
694  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
695    
696  /* Main loop */  /* Main loop */
697    
# Line 839  while (!done) Line 951  while (!done)
951      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
952      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
953      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
954        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
955        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
956    
957      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
958    
959      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 988  while (!done) Line 1103  while (!done)
1103        if (do_debug)        if (do_debug)
1104          {          {
1105          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
1106          _pcre_printint(re, outfile);          pcre_printint(re, outfile);
1107          }          }
1108    
1109        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 1054  while (!done) Line 1169  while (!done)
1169          fprintf(outfile, "Partial matching not supported\n");          fprintf(outfile, "Partial matching not supported\n");
1170    
1171        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1172          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
1173            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1174            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1175            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1064  while (!done) Line 1179  while (!done)
1179            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1180            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1181            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1182              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1183            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1184            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1185    
# Line 1209  while (!done) Line 1325  while (!done)
1325    
1326    for (;;)    for (;;)
1327      {      {
1328      unsigned char *q;      uschar *q;
1329      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1330      int *use_offsets = offsets;      int *use_offsets = offsets;
1331      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1332      int callout_data = 0;      int callout_data = 0;
# Line 1290  while (!done) Line 1406  while (!done)
1406              {              {
1407              unsigned char buff8[8];              unsigned char buff8[8];
1408              int ii, utn;              int ii, utn;
1409              utn = _pcre_ord2utf8(c, buff8);              utn = ord2utf8(c, buff8);
1410              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1411              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1412              p = pt + 1;              p = pt + 1;
# Line 1498  while (!done) Line 1614  while (!done)
1614          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1615          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1616          }          }
1617          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1618                  != 0)
1619            {
1620            fprintf(outfile, "Matched with REG_NOSUB\n");
1621            }
1622        else        else
1623          {          {
1624          size_t i;          size_t i;
# Line 1558  while (!done) Line 1679  while (!done)
1679          }          }
1680    
1681        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
1682        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
1683          for the recursion limit. */
1684    
1685        if (find_match_limit)        if (find_match_limit)
1686          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
1687          if (extra == NULL)          if (extra == NULL)
1688            {            {
1689            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1690            extra->flags = 0;            extra->flags = 0;
1691            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
1692    
1693          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          count = check_match_limit(re, extra, bptr, len, start_offset,
1694              options|g_notempty, use_offsets, use_size_offsets,
1695              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1696              PCRE_ERROR_MATCHLIMIT, "match()");
1697    
1698            count = check_match_limit(re, extra, bptr, len, start_offset,
1699              options|g_notempty, use_offsets, use_size_offsets,
1700              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1701              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1702          }          }
1703    
1704        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */

Legend:
Removed from v.79  
changed lines
  Added in v.87

  ViewVC Help
Powered by ViewVC 1.1.5