--- code/trunk/pcrecpp_unittest.cc 2007/09/17 10:09:22 248 +++ code/trunk/pcrecpp_unittest.cc 2008/03/08 17:24:02 326 @@ -37,10 +37,6 @@ #include "config.h" #endif -#ifdef HAVE_WINDOWS_H -#define snprintf _snprintf -#endif - #include #include #include @@ -114,8 +110,8 @@ initial_size = VirtualProcessSize(); printf("Size after 50000: %llu\n", initial_size); } - char buf[100]; - snprintf(buf, sizeof(buf), "pat%09d", i); + char buf[100]; // definitely big enough + sprintf(buf, "pat%09d", i); RE newre(buf); } uint64 final_size = VirtualProcessSize(); @@ -217,87 +213,103 @@ const char *original; const char *single; const char *global; + int global_count; // the expected return value from ReplaceAll }; static const ReplaceTest tests[] = { { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", "\\2\\1ay", "the quick brown fox jumps over the lazy dogs.", "ethay quick brown fox jumps over the lazy dogs.", - "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." }, + "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", + 9 }, { "\\w+", "\\0-NOSPAM", "paul.haahr@google.com", "paul-NOSPAM.haahr@google.com", - "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" }, + "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", + 4 }, { "^", "(START)", "foo", "(START)foo", - "(START)foo" }, + "(START)foo", + 1 }, { "^", "(START)", "", "(START)", - "(START)" }, + "(START)", + 1 }, { "$", "(END)", "", "(END)", - "(END)" }, + "(END)", + 1 }, { "b", "bb", "ababababab", "abbabababab", - "abbabbabbabbabb" }, + "abbabbabbabbabb", + 5 }, { "b", "bb", "bbbbbb", "bbbbbbb", - "bbbbbbbbbbbb" }, + "bbbbbbbbbbbb", + 6 }, { "b+", "bb", "bbbbbb", "bb", - "bb" }, + "bb", + 1 }, { "b*", "bb", "bbbbbb", "bb", - "bb" }, + "bb", + 1 }, { "b*", "bb", "aaaaa", "bbaaaaa", - "bbabbabbabbabbabb" }, + "bbabbabbabbabbabb", + 6 }, { "b*", "bb", "aa\naa\n", "bbaa\naa\n", - "bbabbabb\nbbabbabb\nbb" }, + "bbabbabb\nbbabbabb\nbb", + 7 }, { "b*", "bb", "aa\raa\r", "bbaa\raa\r", - "bbabbabb\rbbabbabb\rbb" }, + "bbabbabb\rbbabbabb\rbb", + 7 }, { "b*", "bb", "aa\r\naa\r\n", "bbaa\r\naa\r\n", - "bbabbabb\r\nbbabbabb\r\nbb" }, + "bbabbabb\r\nbbabbabb\r\nbb", + 7 }, #ifdef SUPPORT_UTF8 { "b*", "bb", "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", - "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" }, + "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", + 5 }, { "b*", "bb", "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" - "bb\nbb""\xE3\x81\xB8""bb\r\nbb") }, + "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), + 9 }, #endif - { "", NULL, NULL, NULL, NULL } + { "", NULL, NULL, NULL, NULL, 0 } }; #ifdef SUPPORT_UTF8 @@ -313,8 +325,9 @@ CHECK(re.Replace(t->rewrite, &one)); CHECK_EQ(one, t->single); string all(t->original); - CHECK(re.GlobalReplace(t->rewrite, &all) > 0); + const int replace_count = re.GlobalReplace(t->rewrite, &all); CHECK_EQ(all, t->global); + CHECK_EQ(replace_count, t->global_count); } // One final test: test \r\n replacement when we're not in CRLF mode @@ -322,14 +335,14 @@ RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); assert(re.error().empty()); string all("aa\r\naa\r\n"); - CHECK(re.GlobalReplace("bb", &all) > 0); + CHECK_EQ(re.GlobalReplace("bb", &all), 9); CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); } { RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); assert(re.error().empty()); string all("aa\r\naa\r\n"); - CHECK(re.GlobalReplace("bb", &all) > 0); + CHECK_EQ(re.GlobalReplace("bb", &all), 9); CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); } // TODO: test what happens when no PCRE_NEWLINE_* flag is set. @@ -484,6 +497,7 @@ TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); TestQuoteMeta("((?!)xxx).*yyy"); TestQuoteMeta("(["); + TestQuoteMeta(string("foo\0bar", 7)); } static void TestQuoteMetaSimpleNegative() { @@ -861,6 +875,24 @@ CHECK_EQ(s, string("ruby")); CHECK_EQ(i, 1234); + // Ignore non-void* NULL arg + CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); + CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); + CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); + CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); +#ifdef HAVE_LONG_LONG + CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); +#endif + CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); + CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); + + // Fail on non-void* NULL arg if the match doesn't parse for the given type. + CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); + CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); + CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); + CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); + CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); + // Ignored arg CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); CHECK_EQ(s, string("ruby")); @@ -923,23 +955,23 @@ long long v; static const long long max_value = 0x7fffffffffffffffLL; static const long long min_value = -max_value - 1; - char buf[32]; + char buf[32]; // definitely big enough for a long long CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); - snprintf(buf, sizeof(buf), LLD, max_value); + sprintf(buf, LLD, max_value); CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); - snprintf(buf, sizeof(buf), LLD, min_value); + sprintf(buf, LLD, min_value); CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); - snprintf(buf, sizeof(buf), LLD, max_value); + sprintf(buf, LLD, max_value); assert(buf[strlen(buf)-1] != '9'); buf[strlen(buf)-1]++; CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); - snprintf(buf, sizeof(buf), LLD, min_value); + sprintf(buf, LLD, min_value); assert(buf[strlen(buf)-1] != '9'); buf[strlen(buf)-1]++; CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); @@ -950,12 +982,12 @@ unsigned long long v; long long v2; static const unsigned long long max_value = 0xffffffffffffffffULL; - char buf[32]; + char buf[32]; // definitely big enough for a unsigned long long CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); - snprintf(buf, sizeof(buf), LLU, max_value); + sprintf(buf, LLU, max_value); CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); assert(buf[strlen(buf)-1] != '9'); @@ -1136,13 +1168,13 @@ printf("Testing UTF-8 handling\n"); // Three Japanese characters (nihongo) - const char utf8_string[] = { + const unsigned char utf8_string[] = { 0xe6, 0x97, 0xa5, // 65e5 0xe6, 0x9c, 0xac, // 627c 0xe8, 0xaa, 0x9e, // 8a9e 0 }; - const char utf8_pattern[] = { + const unsigned char utf8_pattern[] = { '.', 0xe6, 0x9c, 0xac, // 627c '.',