61 |
// If the user doesn't ask for any options, we just use this one |
// If the user doesn't ask for any options, we just use this one |
62 |
static RE_Options default_options; |
static RE_Options default_options; |
63 |
|
|
64 |
void RE::Init(const char* pat, const RE_Options* options) { |
void RE::Init(const string& pat, const RE_Options* options) { |
65 |
pattern_ = pat; |
pattern_ = pat; |
66 |
if (options == NULL) { |
if (options == NULL) { |
67 |
options_ = default_options; |
options_ = default_options; |
78 |
// conservative in that it may treat some "simple" patterns |
// conservative in that it may treat some "simple" patterns |
79 |
// as "complex" (e.g., if the vertical bar is in a character |
// as "complex" (e.g., if the vertical bar is in a character |
80 |
// class or is escaped). But it seems good enough. |
// class or is escaped). But it seems good enough. |
81 |
if (strchr(pat, '|') == NULL) { |
if (strchr(pat.c_str(), '|') == NULL) { |
82 |
// Simple pattern: we can use position-based checks to perform |
// Simple pattern: we can use position-based checks to perform |
83 |
// fully anchored matches |
// fully anchored matches |
84 |
re_full_ = re_partial_; |
re_full_ = re_partial_; |
89 |
} |
} |
90 |
} |
} |
91 |
|
|
92 |
RE::~RE() { |
void RE::Cleanup() { |
93 |
if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_); |
if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_); |
94 |
if (re_partial_ != NULL) (*pcre_free)(re_partial_); |
if (re_partial_ != NULL) (*pcre_free)(re_partial_); |
95 |
if (error_ != &empty_string) delete error_; |
if (error_ != &empty_string) delete error_; |
96 |
} |
} |
97 |
|
|
98 |
|
|
99 |
|
RE::~RE() { |
100 |
|
Cleanup(); |
101 |
|
} |
102 |
|
|
103 |
|
|
104 |
pcre* RE::Compile(Anchor anchor) { |
pcre* RE::Compile(Anchor anchor) { |
105 |
// First, convert RE_Options into pcre options |
// First, convert RE_Options into pcre options |
106 |
int pcre_options = 0; |
int pcre_options = 0; |
430 |
return Rewrite(out, rewrite, text, vec, matches); |
return Rewrite(out, rewrite, text, vec, matches); |
431 |
} |
} |
432 |
|
|
433 |
|
/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) { |
434 |
|
string result; |
435 |
|
|
436 |
|
// Escape any ascii character not in [A-Za-z_0-9]. |
437 |
|
// |
438 |
|
// Note that it's legal to escape a character even if it has no |
439 |
|
// special meaning in a regular expression -- so this function does |
440 |
|
// that. (This also makes it identical to the perl function of the |
441 |
|
// same name; see `perldoc -f quotemeta`.) |
442 |
|
for (int ii = 0; ii < unquoted.size(); ++ii) { |
443 |
|
// Note that using 'isalnum' here raises the benchmark time from |
444 |
|
// 32ns to 58ns: |
445 |
|
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && |
446 |
|
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') && |
447 |
|
(unquoted[ii] < '0' || unquoted[ii] > '9') && |
448 |
|
unquoted[ii] != '_' && |
449 |
|
// If this is the part of a UTF8 or Latin1 character, we need |
450 |
|
// to copy this byte without escaping. Experimentally this is |
451 |
|
// what works correctly with the regexp library. |
452 |
|
!(unquoted[ii] & 128)) { |
453 |
|
result += '\\'; |
454 |
|
} |
455 |
|
result += unquoted[ii]; |
456 |
|
} |
457 |
|
|
458 |
|
return result; |
459 |
|
} |
460 |
|
|
461 |
/***** Actual matching and rewriting code *****/ |
/***** Actual matching and rewriting code *****/ |
462 |
|
|
463 |
int RE::TryMatch(const StringPiece& text, |
int RE::TryMatch(const StringPiece& text, |
843 |
return parse_##name##_radix(str, n, dest, 0); \ |
return parse_##name##_radix(str, n, dest, 0); \ |
844 |
} |
} |
845 |
|
|
846 |
DEFINE_INTEGER_PARSERS(short); |
DEFINE_INTEGER_PARSERS(short) /* */ |
847 |
DEFINE_INTEGER_PARSERS(ushort); |
DEFINE_INTEGER_PARSERS(ushort) /* */ |
848 |
DEFINE_INTEGER_PARSERS(int); |
DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */ |
849 |
DEFINE_INTEGER_PARSERS(uint); |
DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */ |
850 |
DEFINE_INTEGER_PARSERS(long); |
DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */ |
851 |
DEFINE_INTEGER_PARSERS(ulong); |
DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */ |
852 |
DEFINE_INTEGER_PARSERS(longlong); |
DEFINE_INTEGER_PARSERS(longlong) /* */ |
853 |
DEFINE_INTEGER_PARSERS(ulonglong); |
DEFINE_INTEGER_PARSERS(ulonglong) /* */ |
854 |
|
|
855 |
#undef DEFINE_INTEGER_PARSERS |
#undef DEFINE_INTEGER_PARSERS |
856 |
|
|