112 |
// T (where "bool T::ParseFrom(const char*, int)" exists) |
// T (where "bool T::ParseFrom(const char*, int)" exists) |
113 |
// NULL (the corresponding matched sub-pattern is not copied) |
// NULL (the corresponding matched sub-pattern is not copied) |
114 |
// |
// |
115 |
|
// CAVEAT: An optional sub-pattern that does not exist in the matched |
116 |
|
// string is assigned the empty string. Therefore, the following will |
117 |
|
// return false (because the empty string is not a valid number): |
118 |
|
// int number; |
119 |
|
// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number); |
120 |
|
// |
121 |
// ----------------------------------------------------------------------- |
// ----------------------------------------------------------------------- |
122 |
// DO_MATCH |
// DO_MATCH |
123 |
// |
// |
331 |
|
|
332 |
|
|
333 |
#include <string> |
#include <string> |
|
#include <pcrecpparg.h> // defines the Arg class |
|
|
// These aren't technically needed here, but we include them |
|
|
// anyway so folks who include pcrecpp.h don't have to include |
|
|
// all these other header files as well. |
|
334 |
#include <pcre.h> |
#include <pcre.h> |
335 |
|
#include <pcrecpparg.h> // defines the Arg class |
336 |
|
// This isn't technically needed here, but we include it |
337 |
|
// anyway so folks who include pcrecpp.h don't have to. |
338 |
#include <pcre_stringpiece.h> |
#include <pcre_stringpiece.h> |
339 |
|
|
340 |
namespace pcrecpp { |
namespace pcrecpp { |
346 |
#define PCRE_IS_SET(o) \ |
#define PCRE_IS_SET(o) \ |
347 |
(all_options_ & o) == o |
(all_options_ & o) == o |
348 |
|
|
|
// We convert user-passed pointers into special Arg objects |
|
|
extern Arg no_arg; |
|
|
|
|
349 |
/***** Compiling regular expressions: the RE class *****/ |
/***** Compiling regular expressions: the RE class *****/ |
350 |
|
|
351 |
// RE_Options allow you to set options to be passed along to pcre, |
// RE_Options allow you to set options to be passed along to pcre, |
352 |
// along with other options we put on top of pcre. |
// along with other options we put on top of pcre. |
353 |
// Only 9 modifiers, plus match_limit and match_limit_recursion, |
// Only 9 modifiers, plus match_limit and match_limit_recursion, |
354 |
// are supported now. |
// are supported now. |
355 |
class RE_Options { |
class PCRECPP_EXP_DEFN RE_Options { |
356 |
public: |
public: |
357 |
// constructor |
// constructor |
358 |
RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {} |
RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {} |
400 |
return PCRE_IS_SET(PCRE_DOTALL); |
return PCRE_IS_SET(PCRE_DOTALL); |
401 |
} |
} |
402 |
RE_Options &set_dotall(bool x) { |
RE_Options &set_dotall(bool x) { |
403 |
PCRE_SET_OR_CLEAR(x,PCRE_DOTALL); |
PCRE_SET_OR_CLEAR(x, PCRE_DOTALL); |
404 |
} |
} |
405 |
|
|
406 |
bool extended() const { |
bool extended() const { |
407 |
return PCRE_IS_SET(PCRE_EXTENDED); |
return PCRE_IS_SET(PCRE_EXTENDED); |
408 |
} |
} |
409 |
RE_Options &set_extended(bool x) { |
RE_Options &set_extended(bool x) { |
410 |
PCRE_SET_OR_CLEAR(x,PCRE_EXTENDED); |
PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED); |
411 |
} |
} |
412 |
|
|
413 |
bool dollar_endonly() const { |
bool dollar_endonly() const { |
414 |
return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY); |
return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY); |
415 |
} |
} |
416 |
RE_Options &set_dollar_endonly(bool x) { |
RE_Options &set_dollar_endonly(bool x) { |
417 |
PCRE_SET_OR_CLEAR(x,PCRE_DOLLAR_ENDONLY); |
PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY); |
418 |
} |
} |
419 |
|
|
420 |
bool extra() const { |
bool extra() const { |
421 |
return PCRE_IS_SET( PCRE_EXTRA); |
return PCRE_IS_SET(PCRE_EXTRA); |
422 |
} |
} |
423 |
RE_Options &set_extra(bool x) { |
RE_Options &set_extra(bool x) { |
424 |
PCRE_SET_OR_CLEAR(x, PCRE_EXTRA); |
PCRE_SET_OR_CLEAR(x, PCRE_EXTRA); |
484 |
// Interface for regular expression matching. Also corresponds to a |
// Interface for regular expression matching. Also corresponds to a |
485 |
// pre-compiled regular expression. An "RE" object is safe for |
// pre-compiled regular expression. An "RE" object is safe for |
486 |
// concurrent use by multiple threads. |
// concurrent use by multiple threads. |
487 |
class RE { |
class PCRECPP_EXP_DEFN RE { |
488 |
public: |
public: |
489 |
// We provide implicit conversions from strings so that users can |
// We provide implicit conversions from strings so that users can |
490 |
// pass in a string or a "const char*" wherever an "RE" is expected. |
// pass in a string or a "const char*" wherever an "RE" is expected. |
491 |
|
RE(const string& pat) { Init(pat, NULL); } |
492 |
|
RE(const string& pat, const RE_Options& option) { Init(pat, &option); } |
493 |
RE(const char* pat) { Init(pat, NULL); } |
RE(const char* pat) { Init(pat, NULL); } |
494 |
RE(const char *pat, const RE_Options& option) { Init(pat, &option); } |
RE(const char* pat, const RE_Options& option) { Init(pat, &option); } |
495 |
RE(const string& pat) { Init(pat.c_str(), NULL); } |
RE(const unsigned char* pat) { |
496 |
RE(const string& pat, const RE_Options& option) { Init(pat.c_str(), &option); } |
Init(reinterpret_cast<const char*>(pat), NULL); |
497 |
|
} |
498 |
|
RE(const unsigned char* pat, const RE_Options& option) { |
499 |
|
Init(reinterpret_cast<const char*>(pat), &option); |
500 |
|
} |
501 |
|
|
502 |
|
// Copy constructor & assignment - note that these are expensive |
503 |
|
// because they recompile the expression. |
504 |
|
RE(const RE& re) { Init(re.pattern_, &re.options_); } |
505 |
|
const RE& operator=(const RE& re) { |
506 |
|
if (this != &re) { |
507 |
|
Cleanup(); |
508 |
|
|
509 |
|
// This is the code that originally came from Google |
510 |
|
// Init(re.pattern_.c_str(), &re.options_); |
511 |
|
|
512 |
|
// This is the replacement from Ari Pollak |
513 |
|
Init(re.pattern_, &re.options_); |
514 |
|
} |
515 |
|
return *this; |
516 |
|
} |
517 |
|
|
518 |
|
|
519 |
~RE(); |
~RE(); |
520 |
|
|
614 |
const StringPiece &text, |
const StringPiece &text, |
615 |
string *out) const; |
string *out) const; |
616 |
|
|
617 |
|
// Escapes all potentially meaningful regexp characters in |
618 |
|
// 'unquoted'. The returned string, used as a regular expression, |
619 |
|
// will exactly match the original string. For example, |
620 |
|
// 1.5-2.0? |
621 |
|
// may become: |
622 |
|
// 1\.5\-2\.0\? |
623 |
|
// Note QuoteMeta behaves the same as perl's QuoteMeta function, |
624 |
|
// *except* that it escapes the NUL character (\0) as backslash + 0, |
625 |
|
// rather than backslash + NUL. |
626 |
|
static string QuoteMeta(const StringPiece& unquoted); |
627 |
|
|
628 |
|
|
629 |
/***** Generic matching interface *****/ |
/***** Generic matching interface *****/ |
630 |
|
|
631 |
// Type of match (TODO: Should be restructured as part of RE_Options) |
// Type of match (TODO: Should be restructured as part of RE_Options) |
646 |
// regexp wasn't valid on construction. |
// regexp wasn't valid on construction. |
647 |
int NumberOfCapturingGroups() const; |
int NumberOfCapturingGroups() const; |
648 |
|
|
649 |
|
// The default value for an argument, to indicate no arg was passed in |
650 |
|
static Arg no_arg; |
651 |
|
|
652 |
private: |
private: |
653 |
|
|
654 |
void Init(const char* pattern, const RE_Options* options); |
void Init(const string& pattern, const RE_Options* options); |
655 |
|
void Cleanup(); |
656 |
|
|
657 |
// Match against "text", filling in "vec" (up to "vecsize" * 2/3) with |
// Match against "text", filling in "vec" (up to "vecsize" * 2/3) with |
658 |
// pairs of integers for the beginning and end positions of matched |
// pairs of integers for the beginning and end positions of matched |
696 |
pcre* re_full_; // For full matches |
pcre* re_full_; // For full matches |
697 |
pcre* re_partial_; // For partial matches |
pcre* re_partial_; // For partial matches |
698 |
const string* error_; // Error indicator (or points to empty string) |
const string* error_; // Error indicator (or points to empty string) |
|
|
|
|
// Don't allow the default copy or assignment constructors -- |
|
|
// they're expensive and too easy to do by accident. |
|
|
RE(const RE&); |
|
|
void operator=(const RE&); |
|
699 |
}; |
}; |
700 |
|
|
701 |
} // namespace pcrecpp |
} // namespace pcrecpp |