/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC
# Line 61  static const string empty_string; Line 61  static const string empty_string;
61  // If the user doesn't ask for any options, we just use this one  // If the user doesn't ask for any options, we just use this one
62  static RE_Options default_options;  static RE_Options default_options;
63    
64  void RE::Init(const char* pat, const RE_Options* options) {  void RE::Init(const string& pat, const RE_Options* options) {
65    pattern_ = pat;    pattern_ = pat;
66    if (options == NULL) {    if (options == NULL) {
67      options_ = default_options;      options_ = default_options;
# Line 78  void RE::Init(const char* pat, const RE_ Line 78  void RE::Init(const char* pat, const RE_
78      // conservative in that it may treat some "simple" patterns      // conservative in that it may treat some "simple" patterns
79      // as "complex" (e.g., if the vertical bar is in a character      // as "complex" (e.g., if the vertical bar is in a character
80      // class or is escaped).  But it seems good enough.      // class or is escaped).  But it seems good enough.
81      if (strchr(pat, '|') == NULL) {      if (strchr(pat.c_str(), '|') == NULL) {
82        // Simple pattern: we can use position-based checks to perform        // Simple pattern: we can use position-based checks to perform
83        // fully anchored matches        // fully anchored matches
84        re_full_ = re_partial_;        re_full_ = re_partial_;
# Line 89  void RE::Init(const char* pat, const RE_ Line 89  void RE::Init(const char* pat, const RE_
89    }    }
90  }  }
91    
92  RE::~RE() {  void RE::Cleanup() {
93    if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);    if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
94    if (re_partial_ != NULL)                         (*pcre_free)(re_partial_);    if (re_partial_ != NULL)                         (*pcre_free)(re_partial_);
95    if (error_ != &empty_string)                     delete error_;    if (error_ != &empty_string)                     delete error_;
96  }  }
97    
98    
99    RE::~RE() {
100      Cleanup();
101    }
102    
103    
104  pcre* RE::Compile(Anchor anchor) {  pcre* RE::Compile(Anchor anchor) {
105    // First, convert RE_Options into pcre options    // First, convert RE_Options into pcre options
106    int pcre_options = 0;    int pcre_options = 0;
# Line 332  bool RE::Replace(const StringPiece& rewr Line 338  bool RE::Replace(const StringPiece& rewr
338    return true;    return true;
339  }  }
340    
341    // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
342    // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
343    static int NewlineMode(int pcre_options) {
344      // TODO: if we can make it threadsafe, cache this var
345      int newline_mode = 0;
346      /* if (newline_mode) return newline_mode; */  // do this once it's cached
347      if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
348        newline_mode = (pcre_options &
349                        (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
350      } else {
351        int newline;
352        pcre_config(PCRE_CONFIG_NEWLINE, &newline);
353        if (newline == 10)
354          newline_mode = PCRE_NEWLINE_LF;
355        else if (newline == 13)
356          newline_mode = PCRE_NEWLINE_CR;
357        else if (newline == 3338)
358          newline_mode = PCRE_NEWLINE_CRLF;
359        else
360          assert("" == "Unexpected return value from pcre_config(NEWLINE)");
361      }
362      return newline_mode;
363    }
364    
365  int RE::GlobalReplace(const StringPiece& rewrite,  int RE::GlobalReplace(const StringPiece& rewrite,
366                        string *str) const {                        string *str) const {
367    int count = 0;    int count = 0;
# Line 350  int RE::GlobalReplace(const StringPiece& Line 380  int RE::GlobalReplace(const StringPiece&
380      if (matchstart == matchend && matchstart == lastend) {      if (matchstart == matchend && matchstart == lastend) {
381        // advance one character if we matched an empty string at the same        // advance one character if we matched an empty string at the same
382        // place as the last match occurred        // place as the last match occurred
383        if (start < static_cast<int>(str->length()))        matchend = start + 1;
384          out.push_back((*str)[start]);        // If the current char is CR and we're in CRLF mode, skip LF too.
385        start++;        // Note it's better to call pcre_fullinfo() than to examine
386          // all_options(), since options_ could have changed bewteen
387          // compile-time and now, but this is simpler and safe enough.
388          if (start+1 < static_cast<int>(str->length()) &&
389              (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
390              NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
391            matchend++;
392          }
393          // We also need to advance more than one char if we're in utf8 mode.
394    #ifdef SUPPORT_UTF8
395          if (options_.utf8()) {
396            while (matchend < static_cast<int>(str->length()) &&
397                   ((*str)[matchend] & 0xc0) == 0x80)
398              matchend++;
399          }
400    #endif
401          if (matchend <= static_cast<int>(str->length()))
402            out.append(*str, start, matchend - start);
403          start = matchend;
404      } else {      } else {
405        out.append(*str, start, matchstart - start);        out.append(*str, start, matchstart - start);
406        Rewrite(&out, rewrite, *str, vec, matches);        Rewrite(&out, rewrite, *str, vec, matches);
# Line 382  bool RE::Extract(const StringPiece& rewr Line 430  bool RE::Extract(const StringPiece& rewr
430    return Rewrite(out, rewrite, text, vec, matches);    return Rewrite(out, rewrite, text, vec, matches);
431  }  }
432    
433    /*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
434      string result;
435    
436      // Escape any ascii character not in [A-Za-z_0-9].
437      //
438      // Note that it's legal to escape a character even if it has no
439      // special meaning in a regular expression -- so this function does
440      // that.  (This also makes it identical to the perl function of the
441      // same name; see `perldoc -f quotemeta`.)
442      for (int ii = 0; ii < unquoted.size(); ++ii) {
443        // Note that using 'isalnum' here raises the benchmark time from
444        // 32ns to 58ns:
445        if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
446            (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
447            (unquoted[ii] < '0' || unquoted[ii] > '9') &&
448            unquoted[ii] != '_' &&
449            // If this is the part of a UTF8 or Latin1 character, we need
450            // to copy this byte without escaping.  Experimentally this is
451            // what works correctly with the regexp library.
452            !(unquoted[ii] & 128)) {
453          result += '\\';
454        }
455        result += unquoted[ii];
456      }
457    
458      return result;
459    }
460    
461  /***** Actual matching and rewriting code *****/  /***** Actual matching and rewriting code *****/
462    
463  int RE::TryMatch(const StringPiece& text,  int RE::TryMatch(const StringPiece& text,
# Line 767  bool Arg::parse_float(const char* str, i Line 843  bool Arg::parse_float(const char* str, i
843      return parse_##name##_radix(str, n, dest, 0);                       \      return parse_##name##_radix(str, n, dest, 0);                       \
844    }    }
845    
846  DEFINE_INTEGER_PARSERS(short);  DEFINE_INTEGER_PARSERS(short)      /*                                   */
847  DEFINE_INTEGER_PARSERS(ushort);  DEFINE_INTEGER_PARSERS(ushort)     /*                                   */
848  DEFINE_INTEGER_PARSERS(int);  DEFINE_INTEGER_PARSERS(int)        /* Don't use semicolons after these  */
849  DEFINE_INTEGER_PARSERS(uint);  DEFINE_INTEGER_PARSERS(uint)       /* statements because they can cause */
850  DEFINE_INTEGER_PARSERS(long);  DEFINE_INTEGER_PARSERS(long)       /* compiler warnings if the checking */
851  DEFINE_INTEGER_PARSERS(ulong);  DEFINE_INTEGER_PARSERS(ulong)      /* level is turned up high enough.   */
852  DEFINE_INTEGER_PARSERS(longlong);  DEFINE_INTEGER_PARSERS(longlong)   /*                                   */
853  DEFINE_INTEGER_PARSERS(ulonglong);  DEFINE_INTEGER_PARSERS(ulonglong)  /*                                   */
854    
855  #undef DEFINE_INTEGER_PARSERS  #undef DEFINE_INTEGER_PARSERS
856    

Legend:
Removed from v.87  
changed lines
  Added in v.96

  ViewVC Help
Powered by ViewVC 1.1.5