/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 263 by ph10, Mon Nov 12 16:53:25 2007 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // -*- coding: utf-8 -*-
2    //
3    // Copyright (c) 2005 - 2006, Google Inc.
4  // All rights reserved.  // All rights reserved.
5  //  //
6  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 31  Line 33 
33  //  //
34  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
35    
36    #ifdef HAVE_CONFIG_H
37    #include "config.h"
38    #endif
39    
40  #include <stdio.h>  #include <stdio.h>
41    #include <cassert>
42  #include <vector>  #include <vector>
 #include "config.h"  
43  #include "pcrecpp.h"  #include "pcrecpp.h"
44    
45  using pcrecpp::StringPiece;  using pcrecpp::StringPiece;
# Line 104  static void LeakTest() { Line 110  static void LeakTest() {
110        initial_size = VirtualProcessSize();        initial_size = VirtualProcessSize();
111        printf("Size after 50000: %llu\n", initial_size);        printf("Size after 50000: %llu\n", initial_size);
112      }      }
113      char buf[100];      char buf[100];  // definitely big enough
114      snprintf(buf, sizeof(buf), "pat%09d", i);      sprintf(buf, "pat%09d", i);
115      RE newre(buf);      RE newre(buf);
116    }    }
117    uint64 final_size = VirtualProcessSize();    uint64 final_size = VirtualProcessSize();
# Line 259  static void TestReplace() { Line 265  static void TestReplace() {
265        "aaaaa",        "aaaaa",
266        "bbaaaaa",        "bbaaaaa",
267        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb" },
268        { "b*",
269          "bb",
270          "aa\naa\n",
271          "bbaa\naa\n",
272          "bbabbabb\nbbabbabb\nbb" },
273        { "b*",
274          "bb",
275          "aa\raa\r",
276          "bbaa\raa\r",
277          "bbabbabb\rbbabbabb\rbb" },
278        { "b*",
279          "bb",
280          "aa\r\naa\r\n",
281          "bbaa\r\naa\r\n",
282          "bbabbabb\r\nbbabbabb\r\nbb" },
283    #ifdef SUPPORT_UTF8
284        { "b*",
285          "bb",
286          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
287          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
288          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
289        { "b*",
290          "bb",
291          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
292          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
293          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
294           "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
295    #endif
296      { "", NULL, NULL, NULL, NULL }      { "", NULL, NULL, NULL, NULL }
297    };    };
298    
299    #ifdef SUPPORT_UTF8
300      const bool support_utf8 = true;
301    #else
302      const bool support_utf8 = false;
303    #endif
304    
305    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
306        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
307        assert(re.error().empty());
308      string one(t->original);      string one(t->original);
309      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
310      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
311      string all(t->original);      string all(t->original);
312      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
313      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
314    }    }
315    
316      // One final test: test \r\n replacement when we're not in CRLF mode
317      {
318        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
319        assert(re.error().empty());
320        string all("aa\r\naa\r\n");
321        CHECK(re.GlobalReplace("bb", &all) > 0);
322        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
323      }
324      {
325        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
326        assert(re.error().empty());
327        string all("aa\r\naa\r\n");
328        CHECK(re.GlobalReplace("bb", &all) > 0);
329        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
330      }
331      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
332      //       Alas, the answer depends on how pcre was compiled.
333  }  }
334    
335  static void TestExtract() {  static void TestExtract() {
# Line 390  static void TestRecursion() { Line 450  static void TestRecursion() {
450    CHECK(re4.FullMatch(text_bad) == false);    CHECK(re4.FullMatch(text_bad) == false);
451  }  }
452    
453    // A meta-quoted string, interpreted as a pattern, should always match
454    // the original unquoted string.
455    static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
456      string quoted = RE::QuoteMeta(unquoted);
457      RE re(quoted, options);
458      CHECK(re.FullMatch(unquoted));
459    }
460    
461    // A string containing meaningful regexp characters, which is then meta-
462    // quoted, should not generally match a string the unquoted string does.
463    static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
464                                      RE_Options options = RE_Options()) {
465      string quoted = RE::QuoteMeta(unquoted);
466      RE re(quoted, options);
467      CHECK(!re.FullMatch(should_not_match));
468    }
469    
470    // Tests that quoted meta characters match their original strings,
471    // and that a few things that shouldn't match indeed do not.
472    static void TestQuotaMetaSimple() {
473      TestQuoteMeta("foo");
474      TestQuoteMeta("foo.bar");
475      TestQuoteMeta("foo\\.bar");
476      TestQuoteMeta("[1-9]");
477      TestQuoteMeta("1.5-2.0?");
478      TestQuoteMeta("\\d");
479      TestQuoteMeta("Who doesn't like ice cream?");
480      TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
481      TestQuoteMeta("((?!)xxx).*yyy");
482      TestQuoteMeta("([");
483    }
484    
485    static void TestQuoteMetaSimpleNegative() {
486      NegativeTestQuoteMeta("foo", "bar");
487      NegativeTestQuoteMeta("...", "bar");
488      NegativeTestQuoteMeta("\\.", ".");
489      NegativeTestQuoteMeta("\\.", "..");
490      NegativeTestQuoteMeta("(a)", "a");
491      NegativeTestQuoteMeta("(a|b)", "a");
492      NegativeTestQuoteMeta("(a|b)", "(a)");
493      NegativeTestQuoteMeta("(a|b)", "a|b");
494      NegativeTestQuoteMeta("[0-9]", "0");
495      NegativeTestQuoteMeta("[0-9]", "0-9");
496      NegativeTestQuoteMeta("[0-9]", "[9]");
497      NegativeTestQuoteMeta("((?!)xxx)", "xxx");
498    }
499    
500    static void TestQuoteMetaLatin1() {
501      TestQuoteMeta("3\xb2 = 9");
502    }
503    
504    static void TestQuoteMetaUtf8() {
505    #ifdef SUPPORT_UTF8
506      TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
507      TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
508      TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
509      TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
510      TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
511      TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
512      TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
513      NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
514                            "27\\\xc2\\\xb0",
515                            pcrecpp::UTF8());
516    #endif
517    }
518    
519    static void TestQuoteMetaAll() {
520      printf("Testing QuoteMeta\n");
521      TestQuotaMetaSimple();
522      TestQuoteMetaSimpleNegative();
523      TestQuoteMetaLatin1();
524      TestQuoteMetaUtf8();
525    }
526    
527  //  //
528  // Options tests contributed by  // Options tests contributed by
529  // Giuseppe Maxia, CTO, Stardata s.r.l.  // Giuseppe Maxia, CTO, Stardata s.r.l.
# Line 612  static void TestOptions() { Line 746  static void TestOptions() {
746    Test_all_options();    Test_all_options();
747  }  }
748    
749    static void TestConstructors() {
750      printf("Testing constructors\n");
751    
752      RE_Options options;
753      options.set_dotall(true);
754      const char *str = "HELLO\n" "cruel\n" "world";
755    
756      RE orig("HELLO.*world", options);
757      CHECK(orig.FullMatch(str));
758    
759      RE copy1(orig);
760      CHECK(copy1.FullMatch(str));
761    
762      RE copy2("not a match");
763      CHECK(!copy2.FullMatch(str));
764      copy2 = copy1;
765      CHECK(copy2.FullMatch(str));
766      copy2 = orig;
767      CHECK(copy2.FullMatch(str));
768    
769      // Make sure when we assign to ourselves, nothing bad happens
770      orig = orig;
771      copy1 = copy1;
772      copy2 = copy2;
773      CHECK(orig.FullMatch(str));
774      CHECK(copy1.FullMatch(str));
775      CHECK(copy2.FullMatch(str));
776    }
777    
778  int main(int argc, char** argv) {  int main(int argc, char** argv) {
779    // Treat any flag as --help    // Treat any flag as --help
780    if (argc > 1 && argv[1][0] == '-') {    if (argc > 1 && argv[1][0] == '-') {
# Line 646  int main(int argc, char** argv) { Line 809  int main(int argc, char** argv) {
809    /***** FullMatch with no args *****/    /***** FullMatch with no args *****/
810    
811    CHECK(RE("h.*o").FullMatch("hello"));    CHECK(RE("h.*o").FullMatch("hello"));
812    CHECK(!RE("h.*o").FullMatch("othello"));    CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
813    CHECK(!RE("h.*o").FullMatch("hello!"));    CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
814      CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
815      CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
816      CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
817    
818    /***** FullMatch with args *****/    /***** FullMatch with args *****/
819    
# Line 691  int main(int argc, char** argv) { Line 857  int main(int argc, char** argv) {
857    CHECK_EQ(s, string("ruby"));    CHECK_EQ(s, string("ruby"));
858    CHECK_EQ(i, 1234);    CHECK_EQ(i, 1234);
859    
860      // Ignore non-void* NULL arg
861      CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
862      CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
863      CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
864      CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
865      CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
866      CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
867      CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
868    
869      // Fail on non-void* NULL arg if the match doesn't parse for the given type.
870      CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
871      CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
872      CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
873      CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
874      CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
875    
876    // Ignored arg    // Ignored arg
877    CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));    CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
878    CHECK_EQ(s, string("ruby"));    CHECK_EQ(s, string("ruby"));
# Line 742  int main(int argc, char** argv) { Line 924  int main(int argc, char** argv) {
924      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
925    }    }
926  #ifdef HAVE_LONG_LONG  #ifdef HAVE_LONG_LONG
927    # if defined(__MINGW__) || defined(__MINGW32__)
928    #   define LLD "%I64d"
929    #   define LLU "%I64u"
930    # else
931    #   define LLD "%lld"
932    #   define LLU "%llu"
933    # endif
934    {    {
935      long long v;      long long v;
936      static const long long max_value = 0x7fffffffffffffffLL;      static const long long max_value = 0x7fffffffffffffffLL;
937      static const long long min_value = -max_value - 1;      static const long long min_value = -max_value - 1;
938      char buf[32];      char buf[32];  // definitely big enough for a long long
939    
940      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
941      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
942    
943      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
944      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
945    
946      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
947      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
948    
949      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
950      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
951      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
952      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
953    
954      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
955      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
956      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
957      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
# Line 773  int main(int argc, char** argv) { Line 962  int main(int argc, char** argv) {
962      unsigned long long v;      unsigned long long v;
963      long long v2;      long long v2;
964      static const unsigned long long max_value = 0xffffffffffffffffULL;      static const unsigned long long max_value = 0xffffffffffffffffULL;
965      char buf[32];      char buf[32];  // definitely big enough for a unsigned long long
966    
967      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
968      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
969    
970      snprintf(buf, sizeof(buf), "%llu", max_value);      sprintf(buf, LLU, max_value);
971      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
972    
973      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
# Line 930  int main(int argc, char** argv) { Line 1119  int main(int argc, char** argv) {
1119    CHECK(RE("h.*o").PartialMatch("hello!"));    CHECK(RE("h.*o").PartialMatch("hello!"));
1120    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1121    
1122      /***** other tests *****/
1123    
1124    RadixTests();    RadixTests();
1125    TestReplace();    TestReplace();
1126    TestExtract();    TestExtract();
1127    TestConsume();    TestConsume();
1128    TestFindAndConsume();    TestFindAndConsume();
1129      TestQuoteMetaAll();
1130    TestMatchNumberPeculiarity();    TestMatchNumberPeculiarity();
1131    
1132    // Check the pattern() accessor    // Check the pattern() accessor
# Line 956  int main(int argc, char** argv) { Line 1148  int main(int argc, char** argv) {
1148      printf("Testing UTF-8 handling\n");      printf("Testing UTF-8 handling\n");
1149    
1150      // Three Japanese characters (nihongo)      // Three Japanese characters (nihongo)
1151      const char utf8_string[] = {      const unsigned char utf8_string[] = {
1152           0xe6, 0x97, 0xa5, // 65e5           0xe6, 0x97, 0xa5, // 65e5
1153           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1154           0xe8, 0xaa, 0x9e, // 8a9e           0xe8, 0xaa, 0x9e, // 8a9e
1155           0           0
1156      };      };
1157      const char utf8_pattern[] = {      const unsigned char utf8_pattern[] = {
1158           '.',           '.',
1159           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1160           '.',           '.',
# Line 1054  int main(int argc, char** argv) { Line 1246  int main(int argc, char** argv) {
1246      VERBOSE_TEST  = true;      VERBOSE_TEST  = true;
1247    TestOptions();    TestOptions();
1248    
1249      // Test the constructors
1250      TestConstructors();
1251    
1252    // Done    // Done
1253    printf("OK\n");    printf("OK\n");
1254    

Legend:
Removed from v.87  
changed lines
  Added in v.263

  ViewVC Help
Powered by ViewVC 1.1.5