/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 256 by ph10, Wed Sep 19 08:57:53 2007 UTC
# Line 29  Line 29 
29  //  //
30  // Author: Sanjay Ghemawat  // Author: Sanjay Ghemawat
31    
32    #ifdef HAVE_CONFIG_H
33    #include "config.h"
34    #endif
35    
36    #ifdef HAVE_WINDOWS_H
37    #define HAVE_STRTOQ 1
38    #define strtoll     _strtoui64
39    #define strtoull    _strtoi64
40    #endif
41    
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <stdio.h>  #include <stdio.h>
44  #include <ctype.h>  #include <ctype.h>
# Line 37  Line 47 
47  #include <errno.h>  #include <errno.h>
48  #include <string>  #include <string>
49  #include <algorithm>  #include <algorithm>
50  #include "config.h"  
51  // We need this to compile the proper dll on windows/msys.  This is copied  #include "pcrecpp_internal.h"
 // from pcre_internal.h.  It would probably be better just to include that.  
 #define PCRE_DEFINITION  /* Win32 __declspec(export) trigger for .dll */  
52  #include "pcre.h"  #include "pcre.h"
 #include "pcre_stringpiece.h"  
53  #include "pcrecpp.h"  #include "pcrecpp.h"
54    #include "pcre_stringpiece.h"
55    
56    
57  namespace pcrecpp {  namespace pcrecpp {
# Line 53  static const int kMaxArgs = 16; Line 61  static const int kMaxArgs = 16;
61  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace
62    
63  // Special object that stands-in for no argument  // Special object that stands-in for no argument
64  Arg no_arg((void*)NULL);  PCRECPP_EXP_DEFN Arg no_arg((void*)NULL);
65    
66  // If a regular expression has no error, its error_ field points here  // If a regular expression has no error, its error_ field points here
67  static const string empty_string;  static const string empty_string;
# Line 74  void RE::Init(const string& pat, const R Line 82  void RE::Init(const string& pat, const R
82    
83    re_partial_ = Compile(UNANCHORED);    re_partial_ = Compile(UNANCHORED);
84    if (re_partial_ != NULL) {    if (re_partial_ != NULL) {
85      // Check for complicated patterns.  The following change is      re_full_ = Compile(ANCHOR_BOTH);
     // conservative in that it may treat some "simple" patterns  
     // as "complex" (e.g., if the vertical bar is in a character  
     // class or is escaped).  But it seems good enough.  
     if (strchr(pat.c_str(), '|') == NULL) {  
       // Simple pattern: we can use position-based checks to perform  
       // fully anchored matches  
       re_full_ = re_partial_;  
     } else {  
       // We need a special pattern for anchored matches  
       re_full_ = Compile(ANCHOR_BOTH);  
     }  
86    }    }
87  }  }
88    
89  void RE::Cleanup() {  void RE::Cleanup() {
90    if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);    if (re_full_ != NULL)         (*pcre_free)(re_full_);
91    if (re_partial_ != NULL)                         (*pcre_free)(re_partial_);    if (re_partial_ != NULL)      (*pcre_free)(re_partial_);
92    if (error_ != &empty_string)                     delete error_;    if (error_ != &empty_string)  delete error_;
93  }  }
94    
95    
# Line 340  bool RE::Replace(const StringPiece& rewr Line 337  bool RE::Replace(const StringPiece& rewr
337    
338  // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.  // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
339  // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.  // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
340    // Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF.
341    
342  static int NewlineMode(int pcre_options) {  static int NewlineMode(int pcre_options) {
343    // TODO: if we can make it threadsafe, cache this var    // TODO: if we can make it threadsafe, cache this var
344    int newline_mode = 0;    int newline_mode = 0;
345    /* if (newline_mode) return newline_mode; */  // do this once it's cached    /* if (newline_mode) return newline_mode; */  // do this once it's cached
346    if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {    if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
347                          PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) {
348      newline_mode = (pcre_options &      newline_mode = (pcre_options &
349                      (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));                      (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
350                         PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF));
351    } else {    } else {
352      int newline;      int newline;
353      pcre_config(PCRE_CONFIG_NEWLINE, &newline);      pcre_config(PCRE_CONFIG_NEWLINE, &newline);
# Line 356  static int NewlineMode(int pcre_options) Line 357  static int NewlineMode(int pcre_options)
357        newline_mode = PCRE_NEWLINE_CR;        newline_mode = PCRE_NEWLINE_CR;
358      else if (newline == 3338)      else if (newline == 3338)
359        newline_mode = PCRE_NEWLINE_CRLF;        newline_mode = PCRE_NEWLINE_CRLF;
360        else if (newline == -1)
361          newline_mode = PCRE_NEWLINE_ANY;
362        else if (newline == -2)
363          newline_mode = PCRE_NEWLINE_ANYCRLF;
364      else      else
365        assert("" == "Unexpected return value from pcre_config(NEWLINE)");        assert("" == "Unexpected return value from pcre_config(NEWLINE)");
366    }    }
# Line 385  int RE::GlobalReplace(const StringPiece& Line 390  int RE::GlobalReplace(const StringPiece&
390        // Note it's better to call pcre_fullinfo() than to examine        // Note it's better to call pcre_fullinfo() than to examine
391        // all_options(), since options_ could have changed bewteen        // all_options(), since options_ could have changed bewteen
392        // compile-time and now, but this is simpler and safe enough.        // compile-time and now, but this is simpler and safe enough.
393          // Modified by PH to add ANY and ANYCRLF.
394        if (start+1 < static_cast<int>(str->length()) &&        if (start+1 < static_cast<int>(str->length()) &&
395            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
396            NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
397               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
398               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)
399              ) {
400          matchend++;          matchend++;
401        }        }
402        // We also need to advance more than one char if we're in utf8 mode.        // We also need to advance more than one char if we're in utf8 mode.
# Line 471  int RE::TryMatch(const StringPiece& text Line 480  int RE::TryMatch(const StringPiece& text
480      return 0;      return 0;
481    }    }
482    
483    pcre_extra extra = { 0 };    pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
484    if (options_.match_limit() > 0) {    if (options_.match_limit() > 0) {
485      extra.flags |= PCRE_EXTRA_MATCH_LIMIT;      extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
486      extra.match_limit = options_.match_limit();      extra.match_limit = options_.match_limit();
# Line 504  int RE::TryMatch(const StringPiece& text Line 513  int RE::TryMatch(const StringPiece& text
513      rc = vecsize / 2;      rc = vecsize / 2;
514    }    }
515    
   if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {  
     // We need an extra check to make sure that the match extended  
     // to the end of the input string  
     assert(vec[0] == 0);                 // PCRE_ANCHORED forces starting match  
     if (vec[1] != text.size()) return 0; // Did not get ending match  
   }  
   
516    return rc;    return rc;
517  }  }
518    
# Line 715  bool Arg::parse_short_radix(const char* Line 717  bool Arg::parse_short_radix(const char*
717    long r;    long r;
718    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
719    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range
720    *(reinterpret_cast<short*>(dest)) = r;    *(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
721    return true;    return true;
722  }  }
723    
# Line 726  bool Arg::parse_ushort_radix(const char* Line 728  bool Arg::parse_ushort_radix(const char*
728    unsigned long r;    unsigned long r;
729    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
730    if (r > USHRT_MAX) return false;                      // Out of range    if (r > USHRT_MAX) return false;                      // Out of range
731    *(reinterpret_cast<unsigned short*>(dest)) = r;    *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
732    return true;    return true;
733  }  }
734    

Legend:
Removed from v.96  
changed lines
  Added in v.256

  ViewVC Help
Powered by ViewVC 1.1.5