1 |
// Copyright (c) 2005, Google Inc. |
// -*- coding: utf-8 -*- |
2 |
|
// |
3 |
|
// Copyright (c) 2005 - 2006, Google Inc. |
4 |
// All rights reserved. |
// All rights reserved. |
5 |
// |
// |
6 |
// Redistribution and use in source and binary forms, with or without |
// Redistribution and use in source and binary forms, with or without |
33 |
// |
// |
34 |
// TODO: Test extractions for PartialMatch/Consume |
// TODO: Test extractions for PartialMatch/Consume |
35 |
|
|
36 |
|
#ifdef HAVE_CONFIG_H |
37 |
|
#include <config.h> |
38 |
|
#endif |
39 |
|
|
40 |
#include <stdio.h> |
#include <stdio.h> |
41 |
|
#include <cassert> |
42 |
#include <vector> |
#include <vector> |
|
#include "config.h" |
|
43 |
#include "pcrecpp.h" |
#include "pcrecpp.h" |
44 |
|
|
45 |
using pcrecpp::StringPiece; |
using pcrecpp::StringPiece; |
265 |
"aaaaa", |
"aaaaa", |
266 |
"bbaaaaa", |
"bbaaaaa", |
267 |
"bbabbabbabbabbabb" }, |
"bbabbabbabbabbabb" }, |
268 |
|
{ "b*", |
269 |
|
"bb", |
270 |
|
"aa\naa\n", |
271 |
|
"bbaa\naa\n", |
272 |
|
"bbabbabb\nbbabbabb\nbb" }, |
273 |
|
{ "b*", |
274 |
|
"bb", |
275 |
|
"aa\raa\r", |
276 |
|
"bbaa\raa\r", |
277 |
|
"bbabbabb\rbbabbabb\rbb" }, |
278 |
|
{ "b*", |
279 |
|
"bb", |
280 |
|
"aa\r\naa\r\n", |
281 |
|
"bbaa\r\naa\r\n", |
282 |
|
"bbabbabb\r\nbbabbabb\r\nbb" }, |
283 |
|
#ifdef SUPPORT_UTF8 |
284 |
|
{ "b*", |
285 |
|
"bb", |
286 |
|
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
287 |
|
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
288 |
|
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" }, |
289 |
|
{ "b*", |
290 |
|
"bb", |
291 |
|
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
292 |
|
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
293 |
|
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
294 |
|
"bb\nbb""\xE3\x81\xB8""bb\r\nbb") }, |
295 |
|
#endif |
296 |
{ "", NULL, NULL, NULL, NULL } |
{ "", NULL, NULL, NULL, NULL } |
297 |
}; |
}; |
298 |
|
|
299 |
|
#ifdef SUPPORT_UTF8 |
300 |
|
const bool support_utf8 = true; |
301 |
|
#else |
302 |
|
const bool support_utf8 = false; |
303 |
|
#endif |
304 |
|
|
305 |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
306 |
|
RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); |
307 |
|
assert(re.error().empty()); |
308 |
string one(t->original); |
string one(t->original); |
309 |
CHECK(RE(t->regexp).Replace(t->rewrite, &one)); |
CHECK(re.Replace(t->rewrite, &one)); |
310 |
CHECK_EQ(one, t->single); |
CHECK_EQ(one, t->single); |
311 |
string all(t->original); |
string all(t->original); |
312 |
CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0); |
CHECK(re.GlobalReplace(t->rewrite, &all) > 0); |
313 |
CHECK_EQ(all, t->global); |
CHECK_EQ(all, t->global); |
314 |
} |
} |
315 |
|
|
316 |
|
// One final test: test \r\n replacement when we're not in CRLF mode |
317 |
|
{ |
318 |
|
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
319 |
|
assert(re.error().empty()); |
320 |
|
string all("aa\r\naa\r\n"); |
321 |
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
322 |
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
323 |
|
} |
324 |
|
{ |
325 |
|
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
326 |
|
assert(re.error().empty()); |
327 |
|
string all("aa\r\naa\r\n"); |
328 |
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
329 |
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
330 |
|
} |
331 |
|
// TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
332 |
|
// Alas, the answer depends on how pcre was compiled. |
333 |
} |
} |
334 |
|
|
335 |
static void TestExtract() { |
static void TestExtract() { |
450 |
CHECK(re4.FullMatch(text_bad) == false); |
CHECK(re4.FullMatch(text_bad) == false); |
451 |
} |
} |
452 |
|
|
453 |
|
// A meta-quoted string, interpreted as a pattern, should always match |
454 |
|
// the original unquoted string. |
455 |
|
static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { |
456 |
|
string quoted = RE::QuoteMeta(unquoted); |
457 |
|
RE re(quoted, options); |
458 |
|
CHECK(re.FullMatch(unquoted)); |
459 |
|
} |
460 |
|
|
461 |
|
// A string containing meaningful regexp characters, which is then meta- |
462 |
|
// quoted, should not generally match a string the unquoted string does. |
463 |
|
static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
464 |
|
RE_Options options = RE_Options()) { |
465 |
|
string quoted = RE::QuoteMeta(unquoted); |
466 |
|
RE re(quoted, options); |
467 |
|
CHECK(!re.FullMatch(should_not_match)); |
468 |
|
} |
469 |
|
|
470 |
|
// Tests that quoted meta characters match their original strings, |
471 |
|
// and that a few things that shouldn't match indeed do not. |
472 |
|
static void TestQuotaMetaSimple() { |
473 |
|
TestQuoteMeta("foo"); |
474 |
|
TestQuoteMeta("foo.bar"); |
475 |
|
TestQuoteMeta("foo\\.bar"); |
476 |
|
TestQuoteMeta("[1-9]"); |
477 |
|
TestQuoteMeta("1.5-2.0?"); |
478 |
|
TestQuoteMeta("\\d"); |
479 |
|
TestQuoteMeta("Who doesn't like ice cream?"); |
480 |
|
TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); |
481 |
|
TestQuoteMeta("((?!)xxx).*yyy"); |
482 |
|
TestQuoteMeta("(["); |
483 |
|
} |
484 |
|
|
485 |
|
static void TestQuoteMetaSimpleNegative() { |
486 |
|
NegativeTestQuoteMeta("foo", "bar"); |
487 |
|
NegativeTestQuoteMeta("...", "bar"); |
488 |
|
NegativeTestQuoteMeta("\\.", "."); |
489 |
|
NegativeTestQuoteMeta("\\.", ".."); |
490 |
|
NegativeTestQuoteMeta("(a)", "a"); |
491 |
|
NegativeTestQuoteMeta("(a|b)", "a"); |
492 |
|
NegativeTestQuoteMeta("(a|b)", "(a)"); |
493 |
|
NegativeTestQuoteMeta("(a|b)", "a|b"); |
494 |
|
NegativeTestQuoteMeta("[0-9]", "0"); |
495 |
|
NegativeTestQuoteMeta("[0-9]", "0-9"); |
496 |
|
NegativeTestQuoteMeta("[0-9]", "[9]"); |
497 |
|
NegativeTestQuoteMeta("((?!)xxx)", "xxx"); |
498 |
|
} |
499 |
|
|
500 |
|
static void TestQuoteMetaLatin1() { |
501 |
|
TestQuoteMeta("3\xb2 = 9"); |
502 |
|
} |
503 |
|
|
504 |
|
static void TestQuoteMetaUtf8() { |
505 |
|
#ifdef SUPPORT_UTF8 |
506 |
|
TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); |
507 |
|
TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 |
508 |
|
TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) |
509 |
|
TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character |
510 |
|
TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) |
511 |
|
TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) |
512 |
|
TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work |
513 |
|
NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) |
514 |
|
"27\\\xc2\\\xb0", |
515 |
|
pcrecpp::UTF8()); |
516 |
|
#endif |
517 |
|
} |
518 |
|
|
519 |
|
static void TestQuoteMetaAll() { |
520 |
|
printf("Testing QuoteMeta\n"); |
521 |
|
TestQuotaMetaSimple(); |
522 |
|
TestQuoteMetaSimpleNegative(); |
523 |
|
TestQuoteMetaLatin1(); |
524 |
|
TestQuoteMetaUtf8(); |
525 |
|
} |
526 |
|
|
527 |
// |
// |
528 |
// Options tests contributed by |
// Options tests contributed by |
529 |
// Giuseppe Maxia, CTO, Stardata s.r.l. |
// Giuseppe Maxia, CTO, Stardata s.r.l. |
746 |
Test_all_options(); |
Test_all_options(); |
747 |
} |
} |
748 |
|
|
749 |
|
static void TestConstructors() { |
750 |
|
printf("Testing constructors\n"); |
751 |
|
|
752 |
|
RE_Options options; |
753 |
|
options.set_dotall(true); |
754 |
|
const char *str = "HELLO\n" "cruel\n" "world"; |
755 |
|
|
756 |
|
RE orig("HELLO.*world", options); |
757 |
|
CHECK(orig.FullMatch(str)); |
758 |
|
|
759 |
|
RE copy1(orig); |
760 |
|
CHECK(copy1.FullMatch(str)); |
761 |
|
|
762 |
|
RE copy2("not a match"); |
763 |
|
CHECK(!copy2.FullMatch(str)); |
764 |
|
copy2 = copy1; |
765 |
|
CHECK(copy2.FullMatch(str)); |
766 |
|
copy2 = orig; |
767 |
|
CHECK(copy2.FullMatch(str)); |
768 |
|
|
769 |
|
// Make sure when we assign to ourselves, nothing bad happens |
770 |
|
orig = orig; |
771 |
|
copy1 = copy1; |
772 |
|
copy2 = copy2; |
773 |
|
CHECK(orig.FullMatch(str)); |
774 |
|
CHECK(copy1.FullMatch(str)); |
775 |
|
CHECK(copy2.FullMatch(str)); |
776 |
|
} |
777 |
|
|
778 |
int main(int argc, char** argv) { |
int main(int argc, char** argv) { |
779 |
// Treat any flag as --help |
// Treat any flag as --help |
780 |
if (argc > 1 && argv[1][0] == '-') { |
if (argc > 1 && argv[1][0] == '-') { |
809 |
/***** FullMatch with no args *****/ |
/***** FullMatch with no args *****/ |
810 |
|
|
811 |
CHECK(RE("h.*o").FullMatch("hello")); |
CHECK(RE("h.*o").FullMatch("hello")); |
812 |
CHECK(!RE("h.*o").FullMatch("othello")); |
CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front |
813 |
CHECK(!RE("h.*o").FullMatch("hello!")); |
CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end |
814 |
|
CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op |
815 |
|
CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op |
816 |
|
CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops |
817 |
|
|
818 |
/***** FullMatch with args *****/ |
/***** FullMatch with args *****/ |
819 |
|
|
908 |
CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); |
CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); |
909 |
} |
} |
910 |
#ifdef HAVE_LONG_LONG |
#ifdef HAVE_LONG_LONG |
911 |
|
# if defined(__MINGW__) || defined(__MINGW32__) |
912 |
|
# define LLD "%I64d" |
913 |
|
# define LLU "%I64u" |
914 |
|
# else |
915 |
|
# define LLD "%lld" |
916 |
|
# define LLU "%llu" |
917 |
|
# endif |
918 |
{ |
{ |
919 |
long long v; |
long long v; |
920 |
static const long long max_value = 0x7fffffffffffffffLL; |
static const long long max_value = 0x7fffffffffffffffLL; |
924 |
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
925 |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); |
926 |
|
|
927 |
snprintf(buf, sizeof(buf), "%lld", max_value); |
snprintf(buf, sizeof(buf), LLD, max_value); |
928 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
929 |
|
|
930 |
snprintf(buf, sizeof(buf), "%lld", min_value); |
snprintf(buf, sizeof(buf), LLD, min_value); |
931 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); |
932 |
|
|
933 |
snprintf(buf, sizeof(buf), "%lld", max_value); |
snprintf(buf, sizeof(buf), LLD, max_value); |
934 |
assert(buf[strlen(buf)-1] != '9'); |
assert(buf[strlen(buf)-1] != '9'); |
935 |
buf[strlen(buf)-1]++; |
buf[strlen(buf)-1]++; |
936 |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
937 |
|
|
938 |
snprintf(buf, sizeof(buf), "%lld", min_value); |
snprintf(buf, sizeof(buf), LLD, min_value); |
939 |
assert(buf[strlen(buf)-1] != '9'); |
assert(buf[strlen(buf)-1] != '9'); |
940 |
buf[strlen(buf)-1]++; |
buf[strlen(buf)-1]++; |
941 |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
951 |
CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); |
CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); |
952 |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); |
953 |
|
|
954 |
snprintf(buf, sizeof(buf), "%llu", max_value); |
snprintf(buf, sizeof(buf), LLU, max_value); |
955 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
956 |
|
|
957 |
assert(buf[strlen(buf)-1] != '9'); |
assert(buf[strlen(buf)-1] != '9'); |
1103 |
CHECK(RE("h.*o").PartialMatch("hello!")); |
CHECK(RE("h.*o").PartialMatch("hello!")); |
1104 |
CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); |
CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); |
1105 |
|
|
1106 |
|
/***** other tests *****/ |
1107 |
|
|
1108 |
RadixTests(); |
RadixTests(); |
1109 |
TestReplace(); |
TestReplace(); |
1110 |
TestExtract(); |
TestExtract(); |
1111 |
TestConsume(); |
TestConsume(); |
1112 |
TestFindAndConsume(); |
TestFindAndConsume(); |
1113 |
|
TestQuoteMetaAll(); |
1114 |
TestMatchNumberPeculiarity(); |
TestMatchNumberPeculiarity(); |
1115 |
|
|
1116 |
// Check the pattern() accessor |
// Check the pattern() accessor |
1230 |
VERBOSE_TEST = true; |
VERBOSE_TEST = true; |
1231 |
TestOptions(); |
TestOptions(); |
1232 |
|
|
1233 |
|
// Test the constructors |
1234 |
|
TestConstructors(); |
1235 |
|
|
1236 |
// Done |
// Done |
1237 |
printf("OK\n"); |
printf("OK\n"); |
1238 |
|
|