/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 253 by ph10, Mon Sep 17 10:51:30 2007 UTC revision 326 by ph10, Sat Mar 8 17:24:02 2008 UTC
# Line 33  Line 33 
33  #include "config.h"  #include "config.h"
34  #endif  #endif
35    
 #ifdef HAVE_WINDOWS_H  
 #define HAVE_STRTOQ 1  
 #define strtoll     _strtoui64  
 #define strtoull    _strtoi64  
 #endif  
   
36  #include <stdlib.h>  #include <stdlib.h>
37  #include <stdio.h>  #include <stdio.h>
38  #include <ctype.h>  #include <ctype.h>
# Line 61  static const int kMaxArgs = 16; Line 55  static const int kMaxArgs = 16;
55  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace
56    
57  // Special object that stands-in for no argument  // Special object that stands-in for no argument
58  PCRECPP_EXP_DEFN Arg no_arg((void*)NULL);  Arg RE::no_arg((void*)NULL);
59    
60    // This is for ABI compatibility with old versions of pcre (pre-7.6),
61    // which defined a global no_arg variable instead of putting it in the
62    // RE class.  This works on GCC >= 3, at least.  We could probably have
63    // a more inclusive test if we ever needed it.
64    #if defined(__GNUC__) && __GNUC__ >= 3
65    extern Arg no_arg __attribute__((alias("_ZN7pcrecpp2RE6no_argE")));
66    #endif
67    
68  // If a regular expression has no error, its error_ field points here  // If a regular expression has no error, its error_ field points here
69  static const string empty_string;  static const string empty_string;
# Line 360  static int NewlineMode(int pcre_options) Line 362  static int NewlineMode(int pcre_options)
362      else if (newline == -1)      else if (newline == -1)
363        newline_mode = PCRE_NEWLINE_ANY;        newline_mode = PCRE_NEWLINE_ANY;
364      else if (newline == -2)      else if (newline == -2)
365        newline_mode = PCRE_NEWLINE_ANYCRLF;        newline_mode = PCRE_NEWLINE_ANYCRLF;
366      else      else
367        assert("" == "Unexpected return value from pcre_config(NEWLINE)");        assert("" == "Unexpected return value from pcre_config(NEWLINE)");
368    }    }
# Line 375  int RE::GlobalReplace(const StringPiece& Line 377  int RE::GlobalReplace(const StringPiece&
377    int start = 0;    int start = 0;
378    int lastend = -1;    int lastend = -1;
379    
380    for (; start <= static_cast<int>(str->length()); count++) {    while (start <= static_cast<int>(str->length())) {
381      int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);      int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
382      if (matches <= 0)      if (matches <= 0)
383        break;        break;
# Line 390  int RE::GlobalReplace(const StringPiece& Line 392  int RE::GlobalReplace(const StringPiece&
392        // Note it's better to call pcre_fullinfo() than to examine        // Note it's better to call pcre_fullinfo() than to examine
393        // all_options(), since options_ could have changed bewteen        // all_options(), since options_ could have changed bewteen
394        // compile-time and now, but this is simpler and safe enough.        // compile-time and now, but this is simpler and safe enough.
395        // Modified by PH to add ANY and ANYCRLF.        // Modified by PH to add ANY and ANYCRLF.
396        if (start+1 < static_cast<int>(str->length()) &&        if (start+1 < static_cast<int>(str->length()) &&
397            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
398            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
# Line 447  bool RE::Extract(const StringPiece& rewr Line 449  bool RE::Extract(const StringPiece& rewr
449    // Note that it's legal to escape a character even if it has no    // Note that it's legal to escape a character even if it has no
450    // special meaning in a regular expression -- so this function does    // special meaning in a regular expression -- so this function does
451    // that.  (This also makes it identical to the perl function of the    // that.  (This also makes it identical to the perl function of the
452    // same name; see `perldoc -f quotemeta`.)    // same name; see `perldoc -f quotemeta`.)  The one exception is
453      // escaping NUL: rather than doing backslash + NUL, like perl does,
454      // we do '\0', because pcre itself doesn't take embedded NUL chars.
455    for (int ii = 0; ii < unquoted.size(); ++ii) {    for (int ii = 0; ii < unquoted.size(); ++ii) {
456      // Note that using 'isalnum' here raises the benchmark time from      // Note that using 'isalnum' here raises the benchmark time from
457      // 32ns to 58ns:      // 32ns to 58ns:
458      if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&      if (unquoted[ii] == '\0') {
459          (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&        result += "\\0";
460          (unquoted[ii] < '0' || unquoted[ii] > '9') &&      } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
461          unquoted[ii] != '_' &&                 (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
462          // If this is the part of a UTF8 or Latin1 character, we need                 (unquoted[ii] < '0' || unquoted[ii] > '9') &&
463          // to copy this byte without escaping.  Experimentally this is                 unquoted[ii] != '_' &&
464          // what works correctly with the regexp library.                 // If this is the part of a UTF8 or Latin1 character, we need
465          !(unquoted[ii] & 128)) {                 // to copy this byte without escaping.  Experimentally this is
466                   // what works correctly with the regexp library.
467                   !(unquoted[ii] & 128)) {
468        result += '\\';        result += '\\';
469          result += unquoted[ii];
470        } else {
471          result += unquoted[ii];
472      }      }
     result += unquoted[ii];  
473    }    }
474    
475    return result;    return result;
# Line 624  bool Arg::parse_null(const char* str, in Line 632  bool Arg::parse_null(const char* str, in
632  }  }
633    
634  bool Arg::parse_string(const char* str, int n, void* dest) {  bool Arg::parse_string(const char* str, int n, void* dest) {
635      if (dest == NULL) return true;
636    reinterpret_cast<string*>(dest)->assign(str, n);    reinterpret_cast<string*>(dest)->assign(str, n);
637    return true;    return true;
638  }  }
639    
640  bool Arg::parse_stringpiece(const char* str, int n, void* dest) {  bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
641      if (dest == NULL) return true;
642    reinterpret_cast<StringPiece*>(dest)->set(str, n);    reinterpret_cast<StringPiece*>(dest)->set(str, n);
643    return true;    return true;
644  }  }
645    
646  bool Arg::parse_char(const char* str, int n, void* dest) {  bool Arg::parse_char(const char* str, int n, void* dest) {
647    if (n != 1) return false;    if (n != 1) return false;
648      if (dest == NULL) return true;
649    *(reinterpret_cast<char*>(dest)) = str[0];    *(reinterpret_cast<char*>(dest)) = str[0];
650    return true;    return true;
651  }  }
652    
653  bool Arg::parse_uchar(const char* str, int n, void* dest) {  bool Arg::parse_uchar(const char* str, int n, void* dest) {
654    if (n != 1) return false;    if (n != 1) return false;
655      if (dest == NULL) return true;
656    *(reinterpret_cast<unsigned char*>(dest)) = str[0];    *(reinterpret_cast<unsigned char*>(dest)) = str[0];
657    return true;    return true;
658  }  }
# Line 689  bool Arg::parse_long_radix(const char* s Line 701  bool Arg::parse_long_radix(const char* s
701    long r = strtol(str, &end, radix);    long r = strtol(str, &end, radix);
702    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
703    if (errno) return false;    if (errno) return false;
704      if (dest == NULL) return true;
705    *(reinterpret_cast<long*>(dest)) = r;    *(reinterpret_cast<long*>(dest)) = r;
706    return true;    return true;
707  }  }
# Line 706  bool Arg::parse_ulong_radix(const char* Line 719  bool Arg::parse_ulong_radix(const char*
719    unsigned long r = strtoul(str, &end, radix);    unsigned long r = strtoul(str, &end, radix);
720    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
721    if (errno) return false;    if (errno) return false;
722      if (dest == NULL) return true;
723    *(reinterpret_cast<unsigned long*>(dest)) = r;    *(reinterpret_cast<unsigned long*>(dest)) = r;
724    return true;    return true;
725  }  }
# Line 717  bool Arg::parse_short_radix(const char* Line 731  bool Arg::parse_short_radix(const char*
731    long r;    long r;
732    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
733    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range
734    *(reinterpret_cast<short*>(dest)) = r;    if (dest == NULL) return true;
735      *(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
736    return true;    return true;
737  }  }
738    
# Line 728  bool Arg::parse_ushort_radix(const char* Line 743  bool Arg::parse_ushort_radix(const char*
743    unsigned long r;    unsigned long r;
744    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
745    if (r > USHRT_MAX) return false;                      // Out of range    if (r > USHRT_MAX) return false;                      // Out of range
746    *(reinterpret_cast<unsigned short*>(dest)) = r;    if (dest == NULL) return true;
747      *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
748    return true;    return true;
749  }  }
750    
# Line 739  bool Arg::parse_int_radix(const char* st Line 755  bool Arg::parse_int_radix(const char* st
755    long r;    long r;
756    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
757    if (r < INT_MIN || r > INT_MAX) return false;         // Out of range    if (r < INT_MIN || r > INT_MAX) return false;         // Out of range
758      if (dest == NULL) return true;
759    *(reinterpret_cast<int*>(dest)) = r;    *(reinterpret_cast<int*>(dest)) = r;
760    return true;    return true;
761  }  }
# Line 750  bool Arg::parse_uint_radix(const char* s Line 767  bool Arg::parse_uint_radix(const char* s
767    unsigned long r;    unsigned long r;
768    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
769    if (r > UINT_MAX) return false;                       // Out of range    if (r > UINT_MAX) return false;                       // Out of range
770      if (dest == NULL) return true;
771    *(reinterpret_cast<unsigned int*>(dest)) = r;    *(reinterpret_cast<unsigned int*>(dest)) = r;
772    return true;    return true;
773  }  }
# Line 770  bool Arg::parse_longlong_radix(const cha Line 788  bool Arg::parse_longlong_radix(const cha
788    long long r = strtoq(str, &end, radix);    long long r = strtoq(str, &end, radix);
789  #elif defined HAVE_STRTOLL  #elif defined HAVE_STRTOLL
790    long long r = strtoll(str, &end, radix);    long long r = strtoll(str, &end, radix);
791    #elif defined HAVE__STRTOI64
792      long long r = _strtoi64(str, &end, radix);
793  #else  #else
794  #error parse_longlong_radix: cannot convert input to a long-long  #error parse_longlong_radix: cannot convert input to a long-long
795  #endif  #endif
796    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
797    if (errno) return false;    if (errno) return false;
798      if (dest == NULL) return true;
799    *(reinterpret_cast<long long*>(dest)) = r;    *(reinterpret_cast<long long*>(dest)) = r;
800    return true;    return true;
801  #endif   /* HAVE_LONG_LONG */  #endif   /* HAVE_LONG_LONG */
# Line 797  bool Arg::parse_ulonglong_radix(const ch Line 818  bool Arg::parse_ulonglong_radix(const ch
818    unsigned long long r = strtouq(str, &end, radix);    unsigned long long r = strtouq(str, &end, radix);
819  #elif defined HAVE_STRTOLL  #elif defined HAVE_STRTOLL
820    unsigned long long r = strtoull(str, &end, radix);    unsigned long long r = strtoull(str, &end, radix);
821    #elif defined HAVE__STRTOI64
822      unsigned long long r = _strtoui64(str, &end, radix);
823  #else  #else
824  #error parse_ulonglong_radix: cannot convert input to a long-long  #error parse_ulonglong_radix: cannot convert input to a long-long
825  #endif  #endif
826    if (end != str + n) return false;   // Leftover junk    if (end != str + n) return false;   // Leftover junk
827    if (errno) return false;    if (errno) return false;
828      if (dest == NULL) return true;
829    *(reinterpret_cast<unsigned long long*>(dest)) = r;    *(reinterpret_cast<unsigned long long*>(dest)) = r;
830    return true;    return true;
831  #endif   /* HAVE_UNSIGNED_LONG_LONG */  #endif   /* HAVE_UNSIGNED_LONG_LONG */
# Line 819  bool Arg::parse_double(const char* str, Line 843  bool Arg::parse_double(const char* str,
843    double r = strtod(buf, &end);    double r = strtod(buf, &end);
844    if (end != buf + n) return false;   // Leftover junk    if (end != buf + n) return false;   // Leftover junk
845    if (errno) return false;    if (errno) return false;
846      if (dest == NULL) return true;
847    *(reinterpret_cast<double*>(dest)) = r;    *(reinterpret_cast<double*>(dest)) = r;
848    return true;    return true;
849  }  }
# Line 826  bool Arg::parse_double(const char* str, Line 851  bool Arg::parse_double(const char* str,
851  bool Arg::parse_float(const char* str, int n, void* dest) {  bool Arg::parse_float(const char* str, int n, void* dest) {
852    double r;    double r;
853    if (!parse_double(str, n, &r)) return false;    if (!parse_double(str, n, &r)) return false;
854      if (dest == NULL) return true;
855    *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);    *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
856    return true;    return true;
857  }  }

Legend:
Removed from v.253  
changed lines
  Added in v.326

  ViewVC Help
Powered by ViewVC 1.1.5