/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 541 by ph10, Mon Jun 14 15:19:33 2010 UTC revision 606 by ph10, Mon Jun 6 17:46:22 2011 UTC
# Line 79  input mode under Windows. */ Line 79  input mode under Windows. */
79  #define fileno _fileno  #define fileno _fileno
80  #endif  #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  #else  #else
91  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
92  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 189  static uschar *buffer = NULL; Line 197  static uschar *buffer = NULL;
197  static uschar *dbuffer = NULL;  static uschar *dbuffer = NULL;
198  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
199    
200    /* Textual explanations for runtime error codes */
201    
202    static const char *errtexts[] = {
203      NULL,  /* 0 is no error */
204      NULL,  /* NOMATCH is handled specially */
205      "NULL argument passed",
206      "bad option value",
207      "magic number missing",
208      "unknown opcode - pattern overwritten?",
209      "no more memory",
210      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
211      "match limit exceeded",
212      "callout error code",
213      NULL,  /* BADUTF8 is handled specially */
214      "bad UTF-8 offset",
215      NULL,  /* PARTIAL is handled specially */
216      "not used - internal error",
217      "internal error - pattern overwritten?",
218      "bad count value",
219      "item unsupported for DFA matching",
220      "backreference condition or recursion test not supported for DFA matching",
221      "match limit not supported for DFA matching",
222      "workspace size exceeded in DFA matching",
223      "too much recursion for DFA matching",
224      "recursion limit exceeded",
225      "not used - internal error",
226      "invalid combination of newline options",
227      "bad offset value",
228      NULL  /* SHORTUTF8 is handled specially */
229    };
230    
231    
232  /*************************************************  /*************************************************
233  *         Alternate character tables             *  *         Alternate character tables             *
234  *************************************************/  *************************************************/
235    
236  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237  using the default tables of the library. However, the T option can be used to  using the default tables of the library. However, the T option can be used to
238  select alternate sets of tables, for different kinds of testing. Note also that  select alternate sets of tables, for different kinds of testing. Note also that
239  the L (locale) option also adjusts the tables. */  the L (locale) option also adjusts the tables. */
240    
241  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
242  only ASCII characters. */  only ASCII characters. */
243    
244  static const unsigned char tables0[] = {  static const unsigned char tables0[] = {
# Line 371  graph, print, punct, and cntrl. Other cl Line 410  graph, print, punct, and cntrl. Other cl
410    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412    
413  /* This is a set of tables that came orginally from a Windows user. It seems to  /* This is a set of tables that came orginally from a Windows user. It seems to
414  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
415  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
416    
417  static const unsigned char tables1[] = {  static const unsigned char tables1[] = {
# Line 516  static const unsigned char tables1[] = { Line 555  static const unsigned char tables1[] = {
555    
556    
557    
558    
559    #ifndef HAVE_STRERROR
560    /*************************************************
561    *     Provide strerror() for non-ANSI libraries  *
562    *************************************************/
563    
564    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565    in their libraries, but can provide the same facility by this simple
566    alternative function. */
567    
568    extern int   sys_nerr;
569    extern char *sys_errlist[];
570    
571    char *
572    strerror(int n)
573    {
574    if (n < 0 || n >= sys_nerr) return "unknown error number";
575    return sys_errlist[n];
576    }
577    #endif /* HAVE_STRERROR */
578    
579    
580    
581    
582  /*************************************************  /*************************************************
583  *        Read or extend an input line            *  *        Read or extend an input line            *
584  *************************************************/  *************************************************/
# Line 1123  printf("  -p       use POSIX interface\n Line 1186  printf("  -p       use POSIX interface\n
1186  #endif  #endif
1187  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1188  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1189  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied\n"
1190         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1191  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1192  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1151  int timeit = 0; Line 1214  int timeit = 0;
1214  int timeitm = 0;  int timeitm = 0;
1215  int showinfo = 0;  int showinfo = 0;
1216  int showstore = 0;  int showstore = 0;
1217    int force_study = 0;
1218  int quiet = 0;  int quiet = 0;
1219  int size_offsets = 45;  int size_offsets = 45;
1220  int size_offsets_max;  int size_offsets_max;
# Line 1199  while (argc > 1 && argv[op][0] == '-') Line 1263  while (argc > 1 && argv[op][0] == '-')
1263    {    {
1264    unsigned char *endptr;    unsigned char *endptr;
1265    
1266    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 1564  while (!done) Line 1628  while (!done)
1628        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1629        case 'W': options |= PCRE_UCP; break;        case 'W': options |= PCRE_UCP; break;
1630        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1631          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1632        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
1633        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1634        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1635    
1636        case 'T':        case 'T':
1637        switch (*pp++)        switch (*pp++)
1638          {          {
1639          case '0': tables = tables0; break;          case '0': tables = tables0; break;
1640          case '1': tables = tables1; break;          case '1': tables = tables1; break;
1641    
1642          case '\r':          case '\r':
1643          case '\n':          case '\n':
1644          case ' ':          case ' ':
1645          case 0:          case 0:
1646          fprintf(outfile, "** Missing table number after /T\n");          fprintf(outfile, "** Missing table number after /T\n");
1647          goto SKIP_DATA;          goto SKIP_DATA;
1648    
1649          default:          default:
1650          fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);          fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1651          goto SKIP_DATA;          goto SKIP_DATA;
1652          }          }
1653        break;        break;
1654    
1655        case 'L':        case 'L':
1656        ppp = pp;        ppp = pp;
# Line 1743  while (!done) Line 1808  while (!done)
1808      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
1809      regex_gotten_store = gotten_store;      regex_gotten_store = gotten_store;
1810    
1811      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regexp to generate additional info to
1812      help with the matching. */      help with the matching. */
1813    
1814      if (do_study)      if (do_study || force_study)
1815        {        {
1816        if (timeit > 0)        if (timeit > 0)
1817          {          {
# Line 1900  while (!done) Line 1965  while (!done)
1965        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1966    
1967        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1968          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1969            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1970            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1971            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1916  while (!done) Line 1981  while (!done)
1981            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1982            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1983            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1984              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1985            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1986    
1987        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1985  while (!done) Line 2051  while (!done)
2051        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2052        flipped.) */        flipped.) */
2053    
2054        if (do_study)        if (do_study || force_study)
2055          {          {
2056          if (extra == NULL)          if (extra == NULL)
2057            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
# Line 2081  while (!done) Line 2147  while (!done)
2147    
2148        new_free(re);        new_free(re);
2149        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2150        if (locale_set)        if (locale_set)
2151          {          {
2152          new_free((void *)tables);          new_free((void *)tables);
2153          setlocale(LC_CTYPE, "C");          setlocale(LC_CTYPE, "C");
2154          locale_set = 0;          locale_set = 0;
2155          }          }
2156        continue;  /* With next regex */        continue;  /* With next regex */
2157        }        }
2158      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 2108  while (!done) Line 2174  while (!done)
2174      int getlist = 0;      int getlist = 0;
2175      int gmatched = 0;      int gmatched = 0;
2176      int start_offset = 0;      int start_offset = 0;
2177        int start_offset_sign = 1;
2178      int g_notempty = 0;      int g_notempty = 0;
2179      int use_dfa = 0;      int use_dfa = 0;
2180    
# Line 2137  while (!done) Line 2204  while (!done)
2204          {          {
2205          if (len > 0)    /* Reached EOF without hitting a newline */          if (len > 0)    /* Reached EOF without hitting a newline */
2206            {            {
2207            fprintf(outfile, "\n");            fprintf(outfile, "\n");
2208            break;            break;
2209            }            }
2210          done = 1;          done = 1;
2211          goto CONTINUE;          goto CONTINUE;
2212          }          }
# Line 2240  while (!done) Line 2307  while (!done)
2307          continue;          continue;
2308    
2309          case '>':          case '>':
2310            if (*p == '-')
2311              {
2312              start_offset_sign = -1;
2313              p++;
2314              }
2315          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2316            start_offset *= start_offset_sign;
2317          continue;          continue;
2318    
2319          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 2313  while (!done) Line 2386  while (!done)
2386  #endif  #endif
2387            use_dfa = 1;            use_dfa = 1;
2388          continue;          continue;
2389    #endif
2390    
2391    #if !defined NODFA
2392          case 'F':          case 'F':
2393          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2394          continue;          continue;
# Line 2434  while (!done) Line 2509  while (!done)
2509        }        }
2510      *q = 0;      *q = 0;
2511      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
2512    
2513      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
2514      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2515      we are using the POSIX interface, we must include the terminating zero. */      we are using the POSIX interface, we must include the terminating zero. */
# Line 2765  while (!done) Line 2840  while (!done)
2840        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2841        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2842    
2843        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
2844        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
2845        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
2846        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
2847        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
2848          newline setting in the pattern; if none was set, use pcre_config() to
2849          find the default.
2850    
2851        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2852        character, not one byte. */        character, not one byte. */
# Line 2794  while (!done) Line 2871  while (!done)
2871                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
2872              }              }
2873            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2874                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2875                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2876                &&                &&
2877                start_offset < len - 1 &&                start_offset < len - 1 &&
# Line 2804  while (!done) Line 2882  while (!done)
2882              {              {
2883              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2884                {                {
2885                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2886                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
2887                }                }
2888              }              }
2889            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
2890            }            }
2891          else          else
2892            {            {
2893            if (count == PCRE_ERROR_NOMATCH)            switch(count)
2894              {              {
2895                case PCRE_ERROR_NOMATCH:
2896              if (gmatched == 0)              if (gmatched == 0)
2897                {                {
2898                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL) fprintf(outfile, "No match\n");
2899                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  else fprintf(outfile, "No match, mark = %s\n", markptr);
2900                }                }
2901                break;
2902    
2903                case PCRE_ERROR_BADUTF8:
2904                case PCRE_ERROR_SHORTUTF8:
2905                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2906                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2907                if (use_size_offsets >= 2)
2908                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2909                    use_offsets[1]);
2910                fprintf(outfile, "\n");
2911                break;
2912    
2913                default:
2914                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2915                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2916                else
2917                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
2918                break;
2919              }              }
2920            else fprintf(outfile, "Error %d\n", count);  
2921            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2922            }            }
2923          }          }

Legend:
Removed from v.541  
changed lines
  Added in v.606

  ViewVC Help
Powered by ViewVC 1.1.5