29 |
// |
// |
30 |
// Author: Sanjay Ghemawat |
// Author: Sanjay Ghemawat |
31 |
|
|
32 |
|
#ifdef HAVE_CONFIG_H |
33 |
|
#include <config.h> |
34 |
|
#endif |
35 |
|
|
36 |
#include <stdlib.h> |
#include <stdlib.h> |
37 |
#include <stdio.h> |
#include <stdio.h> |
38 |
#include <ctype.h> |
#include <ctype.h> |
41 |
#include <errno.h> |
#include <errno.h> |
42 |
#include <string> |
#include <string> |
43 |
#include <algorithm> |
#include <algorithm> |
44 |
#include "config.h" |
|
45 |
// We need this to compile the proper dll on windows/msys. This is copied |
#include "pcrecpp_internal.h" |
46 |
// from pcre_internal.h. It would probably be better just to include that. |
#include <pcre.h> |
|
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ |
|
|
#include "pcre.h" |
|
|
#include "pcre_stringpiece.h" |
|
47 |
#include "pcrecpp.h" |
#include "pcrecpp.h" |
48 |
|
#include "pcre_stringpiece.h" |
49 |
|
|
50 |
|
|
51 |
namespace pcrecpp { |
namespace pcrecpp { |
55 |
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace |
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace |
56 |
|
|
57 |
// Special object that stands-in for no argument |
// Special object that stands-in for no argument |
58 |
Arg no_arg((void*)NULL); |
PCRECPP_EXP_DEFN Arg no_arg((void*)NULL); |
59 |
|
|
60 |
// If a regular expression has no error, its error_ field points here |
// If a regular expression has no error, its error_ field points here |
61 |
static const string empty_string; |
static const string empty_string; |
63 |
// If the user doesn't ask for any options, we just use this one |
// If the user doesn't ask for any options, we just use this one |
64 |
static RE_Options default_options; |
static RE_Options default_options; |
65 |
|
|
66 |
void RE::Init(const char* pat, const RE_Options* options) { |
void RE::Init(const string& pat, const RE_Options* options) { |
67 |
pattern_ = pat; |
pattern_ = pat; |
68 |
if (options == NULL) { |
if (options == NULL) { |
69 |
options_ = default_options; |
options_ = default_options; |
76 |
|
|
77 |
re_partial_ = Compile(UNANCHORED); |
re_partial_ = Compile(UNANCHORED); |
78 |
if (re_partial_ != NULL) { |
if (re_partial_ != NULL) { |
79 |
// Check for complicated patterns. The following change is |
re_full_ = Compile(ANCHOR_BOTH); |
|
// conservative in that it may treat some "simple" patterns |
|
|
// as "complex" (e.g., if the vertical bar is in a character |
|
|
// class or is escaped). But it seems good enough. |
|
|
if (strchr(pat, '|') == NULL) { |
|
|
// Simple pattern: we can use position-based checks to perform |
|
|
// fully anchored matches |
|
|
re_full_ = re_partial_; |
|
|
} else { |
|
|
// We need a special pattern for anchored matches |
|
|
re_full_ = Compile(ANCHOR_BOTH); |
|
|
} |
|
80 |
} |
} |
81 |
} |
} |
82 |
|
|
83 |
|
void RE::Cleanup() { |
84 |
|
if (re_full_ != NULL) (*pcre_free)(re_full_); |
85 |
|
if (re_partial_ != NULL) (*pcre_free)(re_partial_); |
86 |
|
if (error_ != &empty_string) delete error_; |
87 |
|
} |
88 |
|
|
89 |
|
|
90 |
RE::~RE() { |
RE::~RE() { |
91 |
if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_); |
Cleanup(); |
|
if (re_partial_ != NULL) (*pcre_free)(re_partial_); |
|
|
if (error_ != &empty_string) delete error_; |
|
92 |
} |
} |
93 |
|
|
94 |
|
|
95 |
pcre* RE::Compile(Anchor anchor) { |
pcre* RE::Compile(Anchor anchor) { |
96 |
// First, convert RE_Options into pcre options |
// First, convert RE_Options into pcre options |
97 |
int pcre_options = 0; |
int pcre_options = 0; |
421 |
return Rewrite(out, rewrite, text, vec, matches); |
return Rewrite(out, rewrite, text, vec, matches); |
422 |
} |
} |
423 |
|
|
424 |
|
/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) { |
425 |
|
string result; |
426 |
|
|
427 |
|
// Escape any ascii character not in [A-Za-z_0-9]. |
428 |
|
// |
429 |
|
// Note that it's legal to escape a character even if it has no |
430 |
|
// special meaning in a regular expression -- so this function does |
431 |
|
// that. (This also makes it identical to the perl function of the |
432 |
|
// same name; see `perldoc -f quotemeta`.) |
433 |
|
for (int ii = 0; ii < unquoted.size(); ++ii) { |
434 |
|
// Note that using 'isalnum' here raises the benchmark time from |
435 |
|
// 32ns to 58ns: |
436 |
|
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && |
437 |
|
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') && |
438 |
|
(unquoted[ii] < '0' || unquoted[ii] > '9') && |
439 |
|
unquoted[ii] != '_' && |
440 |
|
// If this is the part of a UTF8 or Latin1 character, we need |
441 |
|
// to copy this byte without escaping. Experimentally this is |
442 |
|
// what works correctly with the regexp library. |
443 |
|
!(unquoted[ii] & 128)) { |
444 |
|
result += '\\'; |
445 |
|
} |
446 |
|
result += unquoted[ii]; |
447 |
|
} |
448 |
|
|
449 |
|
return result; |
450 |
|
} |
451 |
|
|
452 |
/***** Actual matching and rewriting code *****/ |
/***** Actual matching and rewriting code *****/ |
453 |
|
|
454 |
int RE::TryMatch(const StringPiece& text, |
int RE::TryMatch(const StringPiece& text, |
462 |
return 0; |
return 0; |
463 |
} |
} |
464 |
|
|
465 |
pcre_extra extra = { 0 }; |
pcre_extra extra = { 0, 0, 0, 0, 0, 0 }; |
466 |
if (options_.match_limit() > 0) { |
if (options_.match_limit() > 0) { |
467 |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT; |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT; |
468 |
extra.match_limit = options_.match_limit(); |
extra.match_limit = options_.match_limit(); |
495 |
rc = vecsize / 2; |
rc = vecsize / 2; |
496 |
} |
} |
497 |
|
|
|
if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) { |
|
|
// We need an extra check to make sure that the match extended |
|
|
// to the end of the input string |
|
|
assert(vec[0] == 0); // PCRE_ANCHORED forces starting match |
|
|
if (vec[1] != text.size()) return 0; // Did not get ending match |
|
|
} |
|
|
|
|
498 |
return rc; |
return rc; |
499 |
} |
} |
500 |
|
|
827 |
return parse_##name##_radix(str, n, dest, 0); \ |
return parse_##name##_radix(str, n, dest, 0); \ |
828 |
} |
} |
829 |
|
|
830 |
DEFINE_INTEGER_PARSERS(short); |
DEFINE_INTEGER_PARSERS(short) /* */ |
831 |
DEFINE_INTEGER_PARSERS(ushort); |
DEFINE_INTEGER_PARSERS(ushort) /* */ |
832 |
DEFINE_INTEGER_PARSERS(int); |
DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */ |
833 |
DEFINE_INTEGER_PARSERS(uint); |
DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */ |
834 |
DEFINE_INTEGER_PARSERS(long); |
DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */ |
835 |
DEFINE_INTEGER_PARSERS(ulong); |
DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */ |
836 |
DEFINE_INTEGER_PARSERS(longlong); |
DEFINE_INTEGER_PARSERS(longlong) /* */ |
837 |
DEFINE_INTEGER_PARSERS(ulonglong); |
DEFINE_INTEGER_PARSERS(ulonglong) /* */ |
838 |
|
|
839 |
#undef DEFINE_INTEGER_PARSERS |
#undef DEFINE_INTEGER_PARSERS |
840 |
|
|