/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 81 by nigel, Sat Feb 24 21:40:59 2007 UTC revision 257 by ph10, Wed Sep 19 09:11:19 2007 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // -*- coding: utf-8 -*-
2    //
3    // Copyright (c) 2005 - 2006, Google Inc.
4  // All rights reserved.  // All rights reserved.
5  //  //
6  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 31  Line 33 
33  //  //
34  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
35    
36    #ifdef HAVE_CONFIG_H
37    #include "config.h"
38    #endif
39    
40  #include <stdio.h>  #include <stdio.h>
41    #include <cassert>
42  #include <vector>  #include <vector>
 #include "config.h"  
43  #include "pcrecpp.h"  #include "pcrecpp.h"
44    
45  using pcrecpp::StringPiece;  using pcrecpp::StringPiece;
# Line 104  static void LeakTest() { Line 110  static void LeakTest() {
110        initial_size = VirtualProcessSize();        initial_size = VirtualProcessSize();
111        printf("Size after 50000: %llu\n", initial_size);        printf("Size after 50000: %llu\n", initial_size);
112      }      }
113      char buf[100];      char buf[100];  // definitely big enough
114      snprintf(buf, sizeof(buf), "pat%09d", i);      sprintf(buf, "pat%09d", i);
115      RE newre(buf);      RE newre(buf);
116    }    }
117    uint64 final_size = VirtualProcessSize();    uint64 final_size = VirtualProcessSize();
# Line 259  static void TestReplace() { Line 265  static void TestReplace() {
265        "aaaaa",        "aaaaa",
266        "bbaaaaa",        "bbaaaaa",
267        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb" },
268        { "b*",
269          "bb",
270          "aa\naa\n",
271          "bbaa\naa\n",
272          "bbabbabb\nbbabbabb\nbb" },
273        { "b*",
274          "bb",
275          "aa\raa\r",
276          "bbaa\raa\r",
277          "bbabbabb\rbbabbabb\rbb" },
278        { "b*",
279          "bb",
280          "aa\r\naa\r\n",
281          "bbaa\r\naa\r\n",
282          "bbabbabb\r\nbbabbabb\r\nbb" },
283    #ifdef SUPPORT_UTF8
284        { "b*",
285          "bb",
286          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
287          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
288          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
289        { "b*",
290          "bb",
291          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
292          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
293          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
294           "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
295    #endif
296      { "", NULL, NULL, NULL, NULL }      { "", NULL, NULL, NULL, NULL }
297    };    };
298    
299    #ifdef SUPPORT_UTF8
300      const bool support_utf8 = true;
301    #else
302      const bool support_utf8 = false;
303    #endif
304    
305    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
306        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
307        assert(re.error().empty());
308      string one(t->original);      string one(t->original);
309      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
310      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
311      string all(t->original);      string all(t->original);
312      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
313      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
314    }    }
315    
316      // One final test: test \r\n replacement when we're not in CRLF mode
317      {
318        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
319        assert(re.error().empty());
320        string all("aa\r\naa\r\n");
321        CHECK(re.GlobalReplace("bb", &all) > 0);
322        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
323      }
324      {
325        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
326        assert(re.error().empty());
327        string all("aa\r\naa\r\n");
328        CHECK(re.GlobalReplace("bb", &all) > 0);
329        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
330      }
331      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
332      //       Alas, the answer depends on how pcre was compiled.
333  }  }
334    
335  static void TestExtract() {  static void TestExtract() {
# Line 348  static void TestMatchNumberPeculiarity() Line 408  static void TestMatchNumberPeculiarity()
408    CHECK_EQ(a, "");    CHECK_EQ(a, "");
409  }  }
410    
411  static void TestRecursion(int size, const char *pattern, int match_limit) {  static void TestRecursion() {
412    printf("Testing recursion\n");    printf("Testing recursion\n");
413    
414    // Fill up a string repeating the pattern given    // Get one string that passes (sometimes), one that never does.
415    string domain;    string text_good("abcdefghijk");
416    domain.resize(size);    string text_bad("acdefghijkl");
417    int patlen = strlen(pattern);  
418    for (int i = 0; i < size; ++i) {    // According to pcretest, matching text_good against (\w+)*b
419      domain[i] = pattern[i % patlen];    // requires match_limit of at least 8192, and match_recursion_limit
420    }    // of at least 37.
421    // Just make sure it doesn't crash due to too much recursion.  
422    RE_Options options;    RE_Options options_ml;
423    options.set_match_limit(match_limit);    options_ml.set_match_limit(8192);
424    RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);    RE re("(\\w+)*b", options_ml);
425    re.FullMatch(domain);    CHECK(re.PartialMatch(text_good) == true);
426      CHECK(re.PartialMatch(text_bad) == false);
427      CHECK(re.FullMatch(text_good) == false);
428      CHECK(re.FullMatch(text_bad) == false);
429    
430      options_ml.set_match_limit(1024);
431      RE re2("(\\w+)*b", options_ml);
432      CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
433      CHECK(re2.PartialMatch(text_bad) == false);
434      CHECK(re2.FullMatch(text_good) == false);
435      CHECK(re2.FullMatch(text_bad) == false);
436    
437      RE_Options options_mlr;
438      options_mlr.set_match_limit_recursion(50);
439      RE re3("(\\w+)*b", options_mlr);
440      CHECK(re3.PartialMatch(text_good) == true);
441      CHECK(re3.PartialMatch(text_bad) == false);
442      CHECK(re3.FullMatch(text_good) == false);
443      CHECK(re3.FullMatch(text_bad) == false);
444    
445      options_mlr.set_match_limit_recursion(10);
446      RE re4("(\\w+)*b", options_mlr);
447      CHECK(re4.PartialMatch(text_good) == false);
448      CHECK(re4.PartialMatch(text_bad) == false);
449      CHECK(re4.FullMatch(text_good) == false);
450      CHECK(re4.FullMatch(text_bad) == false);
451    }
452    
453    // A meta-quoted string, interpreted as a pattern, should always match
454    // the original unquoted string.
455    static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
456      string quoted = RE::QuoteMeta(unquoted);
457      RE re(quoted, options);
458      CHECK(re.FullMatch(unquoted));
459    }
460    
461    // A string containing meaningful regexp characters, which is then meta-
462    // quoted, should not generally match a string the unquoted string does.
463    static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
464                                      RE_Options options = RE_Options()) {
465      string quoted = RE::QuoteMeta(unquoted);
466      RE re(quoted, options);
467      CHECK(!re.FullMatch(should_not_match));
468    }
469    
470    // Tests that quoted meta characters match their original strings,
471    // and that a few things that shouldn't match indeed do not.
472    static void TestQuotaMetaSimple() {
473      TestQuoteMeta("foo");
474      TestQuoteMeta("foo.bar");
475      TestQuoteMeta("foo\\.bar");
476      TestQuoteMeta("[1-9]");
477      TestQuoteMeta("1.5-2.0?");
478      TestQuoteMeta("\\d");
479      TestQuoteMeta("Who doesn't like ice cream?");
480      TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
481      TestQuoteMeta("((?!)xxx).*yyy");
482      TestQuoteMeta("([");
483    }
484    
485    static void TestQuoteMetaSimpleNegative() {
486      NegativeTestQuoteMeta("foo", "bar");
487      NegativeTestQuoteMeta("...", "bar");
488      NegativeTestQuoteMeta("\\.", ".");
489      NegativeTestQuoteMeta("\\.", "..");
490      NegativeTestQuoteMeta("(a)", "a");
491      NegativeTestQuoteMeta("(a|b)", "a");
492      NegativeTestQuoteMeta("(a|b)", "(a)");
493      NegativeTestQuoteMeta("(a|b)", "a|b");
494      NegativeTestQuoteMeta("[0-9]", "0");
495      NegativeTestQuoteMeta("[0-9]", "0-9");
496      NegativeTestQuoteMeta("[0-9]", "[9]");
497      NegativeTestQuoteMeta("((?!)xxx)", "xxx");
498    }
499    
500    static void TestQuoteMetaLatin1() {
501      TestQuoteMeta("3\xb2 = 9");
502    }
503    
504    static void TestQuoteMetaUtf8() {
505    #ifdef SUPPORT_UTF8
506      TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
507      TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
508      TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
509      TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
510      TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
511      TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
512      TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
513      NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
514                            "27\\\xc2\\\xb0",
515                            pcrecpp::UTF8());
516    #endif
517    }
518    
519    static void TestQuoteMetaAll() {
520      printf("Testing QuoteMeta\n");
521      TestQuotaMetaSimple();
522      TestQuoteMetaSimpleNegative();
523      TestQuoteMetaLatin1();
524      TestQuoteMetaUtf8();
525  }  }
526    
527  //  //
# Line 587  static void TestOptions() { Line 746  static void TestOptions() {
746    Test_all_options();    Test_all_options();
747  }  }
748    
749    static void TestConstructors() {
750      printf("Testing constructors\n");
751    
752      RE_Options options;
753      options.set_dotall(true);
754      const char *str = "HELLO\n" "cruel\n" "world";
755    
756      RE orig("HELLO.*world", options);
757      CHECK(orig.FullMatch(str));
758    
759      RE copy1(orig);
760      CHECK(copy1.FullMatch(str));
761    
762      RE copy2("not a match");
763      CHECK(!copy2.FullMatch(str));
764      copy2 = copy1;
765      CHECK(copy2.FullMatch(str));
766      copy2 = orig;
767      CHECK(copy2.FullMatch(str));
768    
769      // Make sure when we assign to ourselves, nothing bad happens
770      orig = orig;
771      copy1 = copy1;
772      copy2 = copy2;
773      CHECK(orig.FullMatch(str));
774      CHECK(copy1.FullMatch(str));
775      CHECK(copy2.FullMatch(str));
776    }
777    
778  int main(int argc, char** argv) {  int main(int argc, char** argv) {
779    // Treat any flag as --help    // Treat any flag as --help
780    if (argc > 1 && argv[1][0] == '-') {    if (argc > 1 && argv[1][0] == '-') {
# Line 621  int main(int argc, char** argv) { Line 809  int main(int argc, char** argv) {
809    /***** FullMatch with no args *****/    /***** FullMatch with no args *****/
810    
811    CHECK(RE("h.*o").FullMatch("hello"));    CHECK(RE("h.*o").FullMatch("hello"));
812    CHECK(!RE("h.*o").FullMatch("othello"));    CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
813    CHECK(!RE("h.*o").FullMatch("hello!"));    CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
814      CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
815      CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
816      CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
817    
818    /***** FullMatch with args *****/    /***** FullMatch with args *****/
819    
# Line 717  int main(int argc, char** argv) { Line 908  int main(int argc, char** argv) {
908      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
909    }    }
910  #ifdef HAVE_LONG_LONG  #ifdef HAVE_LONG_LONG
911    # if defined(__MINGW__) || defined(__MINGW32__)
912    #   define LLD "%I64d"
913    #   define LLU "%I64u"
914    # else
915    #   define LLD "%lld"
916    #   define LLU "%llu"
917    # endif
918    {    {
919      long long v;      long long v;
920      static const long long max_value = 0x7fffffffffffffffLL;      static const long long max_value = 0x7fffffffffffffffLL;
921      static const long long min_value = -max_value - 1;      static const long long min_value = -max_value - 1;
922      char buf[32];      char buf[32];  // definitely big enough for a long long
923    
924      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
925      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
926    
927      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
928      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
929    
930      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
931      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
932    
933      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
934      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
935      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
936      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
937    
938      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
939      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
940      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
941      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
# Line 748  int main(int argc, char** argv) { Line 946  int main(int argc, char** argv) {
946      unsigned long long v;      unsigned long long v;
947      long long v2;      long long v2;
948      static const unsigned long long max_value = 0xffffffffffffffffULL;      static const unsigned long long max_value = 0xffffffffffffffffULL;
949      char buf[32];      char buf[32];  // definitely big enough for a unsigned long long
950    
951      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
952      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
953    
954      snprintf(buf, sizeof(buf), "%llu", max_value);      sprintf(buf, LLU, max_value);
955      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
956    
957      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
# Line 905  int main(int argc, char** argv) { Line 1103  int main(int argc, char** argv) {
1103    CHECK(RE("h.*o").PartialMatch("hello!"));    CHECK(RE("h.*o").PartialMatch("hello!"));
1104    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1105    
1106      /***** other tests *****/
1107    
1108    RadixTests();    RadixTests();
1109    TestReplace();    TestReplace();
1110    TestExtract();    TestExtract();
1111    TestConsume();    TestConsume();
1112    TestFindAndConsume();    TestFindAndConsume();
1113      TestQuoteMetaAll();
1114    TestMatchNumberPeculiarity();    TestMatchNumberPeculiarity();
1115    
1116    // Check the pattern() accessor    // Check the pattern() accessor
# Line 931  int main(int argc, char** argv) { Line 1132  int main(int argc, char** argv) {
1132      printf("Testing UTF-8 handling\n");      printf("Testing UTF-8 handling\n");
1133    
1134      // Three Japanese characters (nihongo)      // Three Japanese characters (nihongo)
1135      const char utf8_string[] = {      const unsigned char utf8_string[] = {
1136           0xe6, 0x97, 0xa5, // 65e5           0xe6, 0x97, 0xa5, // 65e5
1137           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1138           0xe8, 0xaa, 0x9e, // 8a9e           0xe8, 0xaa, 0x9e, // 8a9e
1139           0           0
1140      };      };
1141      const char utf8_pattern[] = {      const unsigned char utf8_pattern[] = {
1142           '.',           '.',
1143           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1144           '.',           '.',
# Line 1021  int main(int argc, char** argv) { Line 1222  int main(int argc, char** argv) {
1222      CHECK(!re.error().empty());      CHECK(!re.error().empty());
1223    }    }
1224    
1225    // Test that recursion is stopped: there will be some errors reported    // Test that recursion is stopped
1226    int matchlimit = 5000;    TestRecursion();
   int bytes = 15 * 1024;  // enough to crash if there was no match limit  
   TestRecursion(bytes, ".", matchlimit);  
   TestRecursion(bytes, "a", matchlimit);  
   TestRecursion(bytes, "a.", matchlimit);  
   TestRecursion(bytes, "ab.", matchlimit);  
   TestRecursion(bytes, "abc.", matchlimit);  
1227    
1228    // Test Options    // Test Options
1229    if (getenv("VERBOSE_TEST") != NULL)    if (getenv("VERBOSE_TEST") != NULL)
1230      VERBOSE_TEST  = true;      VERBOSE_TEST  = true;
1231    TestOptions();    TestOptions();
1232    
1233      // Test the constructors
1234      TestConstructors();
1235    
1236    // Done    // Done
1237    printf("OK\n");    printf("OK\n");
1238    

Legend:
Removed from v.81  
changed lines
  Added in v.257

  ViewVC Help
Powered by ViewVC 1.1.5