/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC
# Line 29  Line 29 
29  //  //
30  // Author: Sanjay Ghemawat  // Author: Sanjay Ghemawat
31    
32    #ifdef HAVE_CONFIG_H
33    #include "config.h"
34    #endif
35    
36    #ifdef _WIN32
37    #define HAVE_STRTOQ 1
38    #define strtoll     _strtoui64
39    #define strtoull    _strtoi64
40    #endif
41    
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <stdio.h>  #include <stdio.h>
44  #include <ctype.h>  #include <ctype.h>
# Line 37  Line 47 
47  #include <errno.h>  #include <errno.h>
48  #include <string>  #include <string>
49  #include <algorithm>  #include <algorithm>
50  #include "config.h"  
51  // We need this to compile the proper dll on windows/msys.  This is copied  #include "pcrecpp_internal.h"
 // from pcre_internal.h.  It would probably be better just to include that.  
 #define PCRE_DEFINITION  /* Win32 __declspec(export) trigger for .dll */  
52  #include "pcre.h"  #include "pcre.h"
 #include "pcre_stringpiece.h"  
53  #include "pcrecpp.h"  #include "pcrecpp.h"
54    #include "pcre_stringpiece.h"
55    
56    
57  namespace pcrecpp {  namespace pcrecpp {
# Line 53  static const int kMaxArgs = 16; Line 61  static const int kMaxArgs = 16;
61  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace
62    
63  // Special object that stands-in for no argument  // Special object that stands-in for no argument
64  Arg no_arg((void*)NULL);  PCRECPP_EXP_DEFN Arg no_arg((void*)NULL);
65    
66  // If a regular expression has no error, its error_ field points here  // If a regular expression has no error, its error_ field points here
67  static const string empty_string;  static const string empty_string;
# Line 61  static const string empty_string; Line 69  static const string empty_string;
69  // If the user doesn't ask for any options, we just use this one  // If the user doesn't ask for any options, we just use this one
70  static RE_Options default_options;  static RE_Options default_options;
71    
72  void RE::Init(const char* pat, const RE_Options* options) {  void RE::Init(const string& pat, const RE_Options* options) {
73    pattern_ = pat;    pattern_ = pat;
74    if (options == NULL) {    if (options == NULL) {
75      options_ = default_options;      options_ = default_options;
# Line 74  void RE::Init(const char* pat, const RE_ Line 82  void RE::Init(const char* pat, const RE_
82    
83    re_partial_ = Compile(UNANCHORED);    re_partial_ = Compile(UNANCHORED);
84    if (re_partial_ != NULL) {    if (re_partial_ != NULL) {
85      // Check for complicated patterns.  The following change is      re_full_ = Compile(ANCHOR_BOTH);
     // conservative in that it may treat some "simple" patterns  
     // as "complex" (e.g., if the vertical bar is in a character  
     // class or is escaped).  But it seems good enough.  
     if (strchr(pat, '|') == NULL) {  
       // Simple pattern: we can use position-based checks to perform  
       // fully anchored matches  
       re_full_ = re_partial_;  
     } else {  
       // We need a special pattern for anchored matches  
       re_full_ = Compile(ANCHOR_BOTH);  
     }  
86    }    }
87  }  }
88    
89    void RE::Cleanup() {
90      if (re_full_ != NULL)         (*pcre_free)(re_full_);
91      if (re_partial_ != NULL)      (*pcre_free)(re_partial_);
92      if (error_ != &empty_string)  delete error_;
93    }
94    
95    
96  RE::~RE() {  RE::~RE() {
97    if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);    Cleanup();
   if (re_partial_ != NULL)                         (*pcre_free)(re_partial_);  
   if (error_ != &empty_string)                     delete error_;  
98  }  }
99    
100    
101  pcre* RE::Compile(Anchor anchor) {  pcre* RE::Compile(Anchor anchor) {
102    // First, convert RE_Options into pcre options    // First, convert RE_Options into pcre options
103    int pcre_options = 0;    int pcre_options = 0;
# Line 424  bool RE::Extract(const StringPiece& rewr Line 427  bool RE::Extract(const StringPiece& rewr
427    return Rewrite(out, rewrite, text, vec, matches);    return Rewrite(out, rewrite, text, vec, matches);
428  }  }
429    
430    /*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
431      string result;
432    
433      // Escape any ascii character not in [A-Za-z_0-9].
434      //
435      // Note that it's legal to escape a character even if it has no
436      // special meaning in a regular expression -- so this function does
437      // that.  (This also makes it identical to the perl function of the
438      // same name; see `perldoc -f quotemeta`.)
439      for (int ii = 0; ii < unquoted.size(); ++ii) {
440        // Note that using 'isalnum' here raises the benchmark time from
441        // 32ns to 58ns:
442        if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
443            (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
444            (unquoted[ii] < '0' || unquoted[ii] > '9') &&
445            unquoted[ii] != '_' &&
446            // If this is the part of a UTF8 or Latin1 character, we need
447            // to copy this byte without escaping.  Experimentally this is
448            // what works correctly with the regexp library.
449            !(unquoted[ii] & 128)) {
450          result += '\\';
451        }
452        result += unquoted[ii];
453      }
454    
455      return result;
456    }
457    
458  /***** Actual matching and rewriting code *****/  /***** Actual matching and rewriting code *****/
459    
460  int RE::TryMatch(const StringPiece& text,  int RE::TryMatch(const StringPiece& text,
# Line 437  int RE::TryMatch(const StringPiece& text Line 468  int RE::TryMatch(const StringPiece& text
468      return 0;      return 0;
469    }    }
470    
471    pcre_extra extra = { 0 };    pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
472    if (options_.match_limit() > 0) {    if (options_.match_limit() > 0) {
473      extra.flags |= PCRE_EXTRA_MATCH_LIMIT;      extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
474      extra.match_limit = options_.match_limit();      extra.match_limit = options_.match_limit();
# Line 470  int RE::TryMatch(const StringPiece& text Line 501  int RE::TryMatch(const StringPiece& text
501      rc = vecsize / 2;      rc = vecsize / 2;
502    }    }
503    
   if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {  
     // We need an extra check to make sure that the match extended  
     // to the end of the input string  
     assert(vec[0] == 0);                 // PCRE_ANCHORED forces starting match  
     if (vec[1] != text.size()) return 0; // Did not get ending match  
   }  
   
504    return rc;    return rc;
505  }  }
506    
# Line 809  bool Arg::parse_float(const char* str, i Line 833  bool Arg::parse_float(const char* str, i
833      return parse_##name##_radix(str, n, dest, 0);                       \      return parse_##name##_radix(str, n, dest, 0);                       \
834    }    }
835    
836  DEFINE_INTEGER_PARSERS(short);  DEFINE_INTEGER_PARSERS(short)      /*                                   */
837  DEFINE_INTEGER_PARSERS(ushort);  DEFINE_INTEGER_PARSERS(ushort)     /*                                   */
838  DEFINE_INTEGER_PARSERS(int);  DEFINE_INTEGER_PARSERS(int)        /* Don't use semicolons after these  */
839  DEFINE_INTEGER_PARSERS(uint);  DEFINE_INTEGER_PARSERS(uint)       /* statements because they can cause */
840  DEFINE_INTEGER_PARSERS(long);  DEFINE_INTEGER_PARSERS(long)       /* compiler warnings if the checking */
841  DEFINE_INTEGER_PARSERS(ulong);  DEFINE_INTEGER_PARSERS(ulong)      /* level is turned up high enough.   */
842  DEFINE_INTEGER_PARSERS(longlong);  DEFINE_INTEGER_PARSERS(longlong)   /*                                   */
843  DEFINE_INTEGER_PARSERS(ulonglong);  DEFINE_INTEGER_PARSERS(ulonglong)  /*                                   */
844    
845  #undef DEFINE_INTEGER_PARSERS  #undef DEFINE_INTEGER_PARSERS
846    

Legend:
Removed from v.91  
changed lines
  Added in v.236

  ViewVC Help
Powered by ViewVC 1.1.5