/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 256 by ph10, Wed Sep 19 08:57:53 2007 UTC revision 356 by ph10, Tue Jul 8 14:14:34 2008 UTC
# Line 33  Line 33 
33  #include "config.h"  #include "config.h"
34  #endif  #endif
35    
 #ifdef HAVE_WINDOWS_H  
 #define HAVE_STRTOQ 1  
 #define strtoll     _strtoui64  
 #define strtoull    _strtoi64  
 #endif  
   
36  #include <stdlib.h>  #include <stdlib.h>
37  #include <stdio.h>  #include <stdio.h>
38  #include <ctype.h>  #include <ctype.h>
# Line 61  static const int kMaxArgs = 16; Line 55  static const int kMaxArgs = 16;
55  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace
56    
57  // Special object that stands-in for no argument  // Special object that stands-in for no argument
58  PCRECPP_EXP_DEFN Arg no_arg((void*)NULL);  Arg RE::no_arg((void*)NULL);
59    
60    // This is for ABI compatibility with old versions of pcre (pre-7.6),
61    // which defined a global no_arg variable instead of putting it in the
62    // RE class.  This works on GCC >= 3, at least.  It definitely works
63    // for ELF, but may not for other object formats (Mach-O, for
64    // instance, does not support aliases.)  We could probably have a more
65    // inclusive test if we ever needed it.  (Note that not only the
66    // __attribute__ syntax, but also __USER_LABEL_PREFIX__, are
67    // gnu-specific.)
68    #if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__)
69    # define ULP_AS_STRING(x)            ULP_AS_STRING_INTERNAL(x)
70    # define ULP_AS_STRING_INTERNAL(x)   #x
71    # define USER_LABEL_PREFIX_STR       ULP_AS_STRING(__USER_LABEL_PREFIX__)
72    extern Arg no_arg
73      __attribute__((alias(USER_LABEL_PREFIX_STR "_ZN7pcrecpp2RE6no_argE")));
74    #endif
75    
76  // If a regular expression has no error, its error_ field points here  // If a regular expression has no error, its error_ field points here
77  static const string empty_string;  static const string empty_string;
# Line 360  static int NewlineMode(int pcre_options) Line 370  static int NewlineMode(int pcre_options)
370      else if (newline == -1)      else if (newline == -1)
371        newline_mode = PCRE_NEWLINE_ANY;        newline_mode = PCRE_NEWLINE_ANY;
372      else if (newline == -2)      else if (newline == -2)
373        newline_mode = PCRE_NEWLINE_ANYCRLF;        newline_mode = PCRE_NEWLINE_ANYCRLF;
374      else      else
375        assert("" == "Unexpected return value from pcre_config(NEWLINE)");        assert(NULL == "Unexpected return value from pcre_config(NEWLINE)");
376    }    }
377    return newline_mode;    return newline_mode;
378  }  }
# Line 375  int RE::GlobalReplace(const StringPiece& Line 385  int RE::GlobalReplace(const StringPiece&
385    int start = 0;    int start = 0;
386    int lastend = -1;    int lastend = -1;
387    
388    for (; start <= static_cast<int>(str->length()); count++) {    while (start <= static_cast<int>(str->length())) {
389      int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);      int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
390      if (matches <= 0)      if (matches <= 0)
391        break;        break;
# Line 390  int RE::GlobalReplace(const StringPiece& Line 400  int RE::GlobalReplace(const StringPiece&
400        // Note it's better to call pcre_fullinfo() than to examine        // Note it's better to call pcre_fullinfo() than to examine
401        // all_options(), since options_ could have changed bewteen        // all_options(), since options_ could have changed bewteen
402        // compile-time and now, but this is simpler and safe enough.        // compile-time and now, but this is simpler and safe enough.
403        // Modified by PH to add ANY and ANYCRLF.        // Modified by PH to add ANY and ANYCRLF.
404        if (start+1 < static_cast<int>(str->length()) &&        if (start+1 < static_cast<int>(str->length()) &&
405            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
406            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
# Line 447  bool RE::Extract(const StringPiece& rewr Line 457  bool RE::Extract(const StringPiece& rewr
457    // Note that it's legal to escape a character even if it has no    // Note that it's legal to escape a character even if it has no
458    // special meaning in a regular expression -- so this function does    // special meaning in a regular expression -- so this function does
459    // that.  (This also makes it identical to the perl function of the    // that.  (This also makes it identical to the perl function of the
460    // same name; see `perldoc -f quotemeta`.)    // same name; see `perldoc -f quotemeta`.)  The one exception is
461      // escaping NUL: rather than doing backslash + NUL, like perl does,
462      // we do '\0', because pcre itself doesn't take embedded NUL chars.
463    for (int ii = 0; ii < unquoted.size(); ++ii) {    for (int ii = 0; ii < unquoted.size(); ++ii) {
464      // Note that using 'isalnum' here raises the benchmark time from      // Note that using 'isalnum' here raises the benchmark time from
465      // 32ns to 58ns:      // 32ns to 58ns:
466      if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&      if (unquoted[ii] == '\0') {
467          (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&        result += "\\0";
468          (unquoted[ii] < '0' || unquoted[ii] > '9') &&      } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
469          unquoted[ii] != '_' &&                 (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
470          // If this is the part of a UTF8 or Latin1 character, we need                 (unquoted[ii] < '0' || unquoted[ii] > '9') &&
471          // to copy this byte without escaping.  Experimentally this is                 unquoted[ii] != '_' &&
472          // what works correctly with the regexp library.                 // If this is the part of a UTF8 or Latin1 character, we need
473          !(unquoted[ii] & 128)) {                 // to copy this byte without escaping.  Experimentally this is
474                   // what works correctly with the regexp library.
475                   !(unquoted[ii] & 128)) {
476        result += '\\';        result += '\\';
477          result += unquoted[ii];
478        } else {
479          result += unquoted[ii];
480      }      }
     result += unquoted[ii];  
481    }    }
482    
483    return result;    return result;
# Line 589  bool RE::Rewrite(string *out, const Stri Line 605  bool RE::Rewrite(string *out, const Stri
605          if (start >= 0)          if (start >= 0)
606            out->append(text.data() + start, vec[2 * n + 1] - start);            out->append(text.data() + start, vec[2 * n + 1] - start);
607        } else if (c == '\\') {        } else if (c == '\\') {
608          out->push_back('\\');          *out += '\\';
609        } else {        } else {
610          //fprintf(stderr, "invalid rewrite pattern: %.*s\n",          //fprintf(stderr, "invalid rewrite pattern: %.*s\n",
611          //        rewrite.size(), rewrite.data());          //        rewrite.size(), rewrite.data());
612          return false;          return false;
613        }        }
614      } else {      } else {
615        out->push_back(c);        *out += c;
616      }      }
617    }    }
618    return true;    return true;
# Line 624  bool Arg::parse_null(const char* str, in Line 640  bool Arg::parse_null(const char* str, in
640  }  }
641    
642  bool Arg::parse_string(const char* str, int n, void* dest) {  bool Arg::parse_string(const char* str, int n, void* dest) {
643      if (dest == NULL) return true;
644    reinterpret_cast<string*>(dest)->assign(str, n);    reinterpret_cast<string*>(dest)->assign(str, n);
645    return true;    return true;
646  }  }
647    
648  bool Arg::parse_stringpiece(const char* str, int n, void* dest) {  bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
649      if (dest == NULL) return true;
650    reinterpret_cast<StringPiece*>(dest)->set(str, n);    reinterpret_cast<StringPiece*>(dest)->set(str, n);
651    return true;    return true;
652  }  }
653    
654  bool Arg::parse_char(const char* str, int n, void* dest) {  bool Arg::parse_char(const char* str, int n, void* dest) {
655    if (n != 1) return false;    if (n != 1) return false;
656      if (dest == NULL) return true;
657    *(reinterpret_cast<char*>(dest)) = str[0];    *(reinterpret_cast<char*>(dest)) = str[0];
658    return true;    return true;
659  }  }
660    
661  bool Arg::parse_uchar(const char* str, int n, void* dest) {  bool Arg::parse_uchar(const char* str, int n, void* dest) {
662    if (n != 1) return false;    if (n != 1) return false;
663      if (dest == NULL) return true;
664    *(reinterpret_cast<unsigned char*>(dest)) = str[0];    *(reinterpret_cast<unsigned char*>(dest)) = str[0];
665    return true;    return true;
666  }  }
# Line 689  bool Arg::parse_long_radix(const char* s Line 709  bool Arg::parse_long_radix(const char* s
709    long r = strtol(str, &end, radix);    long r = strtol(str, &end, radix);
710    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
711    if (errno) return false;    if (errno) return false;
712      if (dest == NULL) return true;
713    *(reinterpret_cast<long*>(dest)) = r;    *(reinterpret_cast<long*>(dest)) = r;
714    return true;    return true;
715  }  }
# Line 706  bool Arg::parse_ulong_radix(const char* Line 727  bool Arg::parse_ulong_radix(const char*
727    unsigned long r = strtoul(str, &end, radix);    unsigned long r = strtoul(str, &end, radix);
728    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
729    if (errno) return false;    if (errno) return false;
730      if (dest == NULL) return true;
731    *(reinterpret_cast<unsigned long*>(dest)) = r;    *(reinterpret_cast<unsigned long*>(dest)) = r;
732    return true;    return true;
733  }  }
# Line 717  bool Arg::parse_short_radix(const char* Line 739  bool Arg::parse_short_radix(const char*
739    long r;    long r;
740    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
741    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range
742      if (dest == NULL) return true;
743    *(reinterpret_cast<short*>(dest)) = static_cast<short>(r);    *(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
744    return true;    return true;
745  }  }
# Line 728  bool Arg::parse_ushort_radix(const char* Line 751  bool Arg::parse_ushort_radix(const char*
751    unsigned long r;    unsigned long r;
752    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
753    if (r > USHRT_MAX) return false;                      // Out of range    if (r > USHRT_MAX) return false;                      // Out of range
754      if (dest == NULL) return true;
755    *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);    *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
756    return true;    return true;
757  }  }
# Line 739  bool Arg::parse_int_radix(const char* st Line 763  bool Arg::parse_int_radix(const char* st
763    long r;    long r;
764    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
765    if (r < INT_MIN || r > INT_MAX) return false;         // Out of range    if (r < INT_MIN || r > INT_MAX) return false;         // Out of range
766      if (dest == NULL) return true;
767    *(reinterpret_cast<int*>(dest)) = r;    *(reinterpret_cast<int*>(dest)) = r;
768    return true;    return true;
769  }  }
# Line 750  bool Arg::parse_uint_radix(const char* s Line 775  bool Arg::parse_uint_radix(const char* s
775    unsigned long r;    unsigned long r;
776    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
777    if (r > UINT_MAX) return false;                       // Out of range    if (r > UINT_MAX) return false;                       // Out of range
778      if (dest == NULL) return true;
779    *(reinterpret_cast<unsigned int*>(dest)) = r;    *(reinterpret_cast<unsigned int*>(dest)) = r;
780    return true;    return true;
781  }  }
# Line 770  bool Arg::parse_longlong_radix(const cha Line 796  bool Arg::parse_longlong_radix(const cha
796    long long r = strtoq(str, &end, radix);    long long r = strtoq(str, &end, radix);
797  #elif defined HAVE_STRTOLL  #elif defined HAVE_STRTOLL
798    long long r = strtoll(str, &end, radix);    long long r = strtoll(str, &end, radix);
799    #elif defined HAVE__STRTOI64
800      long long r = _strtoi64(str, &end, radix);
801  #else  #else
802  #error parse_longlong_radix: cannot convert input to a long-long  #error parse_longlong_radix: cannot convert input to a long-long
803  #endif  #endif
804    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
805    if (errno) return false;    if (errno) return false;
806      if (dest == NULL) return true;
807    *(reinterpret_cast<long long*>(dest)) = r;    *(reinterpret_cast<long long*>(dest)) = r;
808    return true;    return true;
809  #endif   /* HAVE_LONG_LONG */  #endif   /* HAVE_LONG_LONG */
# Line 797  bool Arg::parse_ulonglong_radix(const ch Line 826  bool Arg::parse_ulonglong_radix(const ch
826    unsigned long long r = strtouq(str, &end, radix);    unsigned long long r = strtouq(str, &end, radix);
827  #elif defined HAVE_STRTOLL  #elif defined HAVE_STRTOLL
828    unsigned long long r = strtoull(str, &end, radix);    unsigned long long r = strtoull(str, &end, radix);
829    #elif defined HAVE__STRTOI64
830      unsigned long long r = _strtoui64(str, &end, radix);
831  #else  #else
832  #error parse_ulonglong_radix: cannot convert input to a long-long  #error parse_ulonglong_radix: cannot convert input to a long-long
833  #endif  #endif
834    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
835    if (errno) return false;    if (errno) return false;
836      if (dest == NULL) return true;
837    *(reinterpret_cast<unsigned long long*>(dest)) = r;    *(reinterpret_cast<unsigned long long*>(dest)) = r;
838    return true;    return true;
839  #endif   /* HAVE_UNSIGNED_LONG_LONG */  #endif   /* HAVE_UNSIGNED_LONG_LONG */
# Line 819  bool Arg::parse_double(const char* str, Line 851  bool Arg::parse_double(const char* str,
851    double r = strtod(buf, &end);    double r = strtod(buf, &end);
852    if (end != buf + n) return false;   // Leftover junk    if (end != buf + n) return false;   // Leftover junk
853    if (errno) return false;    if (errno) return false;
854      if (dest == NULL) return true;
855    *(reinterpret_cast<double*>(dest)) = r;    *(reinterpret_cast<double*>(dest)) = r;
856    return true;    return true;
857  }  }
# Line 826  bool Arg::parse_double(const char* str, Line 859  bool Arg::parse_double(const char* str,
859  bool Arg::parse_float(const char* str, int n, void* dest) {  bool Arg::parse_float(const char* str, int n, void* dest) {
860    double r;    double r;
861    if (!parse_double(str, n, &r)) return false;    if (!parse_double(str, n, &r)) return false;
862      if (dest == NULL) return true;
863    *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);    *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
864    return true;    return true;
865  }  }

Legend:
Removed from v.256  
changed lines
  Added in v.356

  ViewVC Help
Powered by ViewVC 1.1.5