/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 308 by ph10, Mon Jan 21 14:57:19 2008 UTC revision 326 by ph10, Sat Mar 8 17:24:02 2008 UTC
# Line 57  static const int kVecSize = (1 + kMaxArg Line 57  static const int kVecSize = (1 + kMaxArg
57  // Special object that stands-in for no argument  // Special object that stands-in for no argument
58  Arg RE::no_arg((void*)NULL);  Arg RE::no_arg((void*)NULL);
59    
60    // This is for ABI compatibility with old versions of pcre (pre-7.6),
61    // which defined a global no_arg variable instead of putting it in the
62    // RE class.  This works on GCC >= 3, at least.  We could probably have
63    // a more inclusive test if we ever needed it.
64    #if defined(__GNUC__) && __GNUC__ >= 3
65    extern Arg no_arg __attribute__((alias("_ZN7pcrecpp2RE6no_argE")));
66    #endif
67    
68  // If a regular expression has no error, its error_ field points here  // If a regular expression has no error, its error_ field points here
69  static const string empty_string;  static const string empty_string;
70    
# Line 441  bool RE::Extract(const StringPiece& rewr Line 449  bool RE::Extract(const StringPiece& rewr
449    // Note that it's legal to escape a character even if it has no    // Note that it's legal to escape a character even if it has no
450    // special meaning in a regular expression -- so this function does    // special meaning in a regular expression -- so this function does
451    // that.  (This also makes it identical to the perl function of the    // that.  (This also makes it identical to the perl function of the
452    // same name; see `perldoc -f quotemeta`.)    // same name; see `perldoc -f quotemeta`.)  The one exception is
453      // escaping NUL: rather than doing backslash + NUL, like perl does,
454      // we do '\0', because pcre itself doesn't take embedded NUL chars.
455    for (int ii = 0; ii < unquoted.size(); ++ii) {    for (int ii = 0; ii < unquoted.size(); ++ii) {
456      // Note that using 'isalnum' here raises the benchmark time from      // Note that using 'isalnum' here raises the benchmark time from
457      // 32ns to 58ns:      // 32ns to 58ns:
458      if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&      if (unquoted[ii] == '\0') {
459          (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&        result += "\\0";
460          (unquoted[ii] < '0' || unquoted[ii] > '9') &&      } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
461          unquoted[ii] != '_' &&                 (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
462          // If this is the part of a UTF8 or Latin1 character, we need                 (unquoted[ii] < '0' || unquoted[ii] > '9') &&
463          // to copy this byte without escaping.  Experimentally this is                 unquoted[ii] != '_' &&
464          // what works correctly with the regexp library.                 // If this is the part of a UTF8 or Latin1 character, we need
465          !(unquoted[ii] & 128)) {                 // to copy this byte without escaping.  Experimentally this is
466                   // what works correctly with the regexp library.
467                   !(unquoted[ii] & 128)) {
468        result += '\\';        result += '\\';
469          result += unquoted[ii];
470        } else {
471          result += unquoted[ii];
472      }      }
     result += unquoted[ii];  
473    }    }
474    
475    return result;    return result;

Legend:
Removed from v.308  
changed lines
  Added in v.326

  ViewVC Help
Powered by ViewVC 1.1.5