/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 234 by ph10, Tue Sep 11 11:36:27 2007 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // -*- coding: utf-8 -*-
2    //
3    // Copyright (c) 2005 - 2006, Google Inc.
4  // All rights reserved.  // All rights reserved.
5  //  //
6  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 31  Line 33 
33  //  //
34  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
35    
36    #ifdef HAVE_CONFIG_H
37    #include <config.h>
38    #endif
39    
40    #ifdef _WIN32
41    #define snprintf _snprintf
42    #endif
43    
44  #include <stdio.h>  #include <stdio.h>
45    #include <cassert>
46  #include <vector>  #include <vector>
 #include "config.h"  
47  #include "pcrecpp.h"  #include "pcrecpp.h"
48    
49  using pcrecpp::StringPiece;  using pcrecpp::StringPiece;
# Line 259  static void TestReplace() { Line 269  static void TestReplace() {
269        "aaaaa",        "aaaaa",
270        "bbaaaaa",        "bbaaaaa",
271        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb" },
272        { "b*",
273          "bb",
274          "aa\naa\n",
275          "bbaa\naa\n",
276          "bbabbabb\nbbabbabb\nbb" },
277        { "b*",
278          "bb",
279          "aa\raa\r",
280          "bbaa\raa\r",
281          "bbabbabb\rbbabbabb\rbb" },
282        { "b*",
283          "bb",
284          "aa\r\naa\r\n",
285          "bbaa\r\naa\r\n",
286          "bbabbabb\r\nbbabbabb\r\nbb" },
287    #ifdef SUPPORT_UTF8
288        { "b*",
289          "bb",
290          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
291          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
292          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
293        { "b*",
294          "bb",
295          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
296          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
297          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
298           "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
299    #endif
300      { "", NULL, NULL, NULL, NULL }      { "", NULL, NULL, NULL, NULL }
301    };    };
302    
303    #ifdef SUPPORT_UTF8
304      const bool support_utf8 = true;
305    #else
306      const bool support_utf8 = false;
307    #endif
308    
309    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
310        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
311        assert(re.error().empty());
312      string one(t->original);      string one(t->original);
313      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
314      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
315      string all(t->original);      string all(t->original);
316      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
317      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
318    }    }
319    
320      // One final test: test \r\n replacement when we're not in CRLF mode
321      {
322        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
323        assert(re.error().empty());
324        string all("aa\r\naa\r\n");
325        CHECK(re.GlobalReplace("bb", &all) > 0);
326        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327      }
328      {
329        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
330        assert(re.error().empty());
331        string all("aa\r\naa\r\n");
332        CHECK(re.GlobalReplace("bb", &all) > 0);
333        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
334      }
335      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
336      //       Alas, the answer depends on how pcre was compiled.
337  }  }
338    
339  static void TestExtract() {  static void TestExtract() {
# Line 390  static void TestRecursion() { Line 454  static void TestRecursion() {
454    CHECK(re4.FullMatch(text_bad) == false);    CHECK(re4.FullMatch(text_bad) == false);
455  }  }
456    
457    // A meta-quoted string, interpreted as a pattern, should always match
458    // the original unquoted string.
459    static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
460      string quoted = RE::QuoteMeta(unquoted);
461      RE re(quoted, options);
462      CHECK(re.FullMatch(unquoted));
463    }
464    
465    // A string containing meaningful regexp characters, which is then meta-
466    // quoted, should not generally match a string the unquoted string does.
467    static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
468                                      RE_Options options = RE_Options()) {
469      string quoted = RE::QuoteMeta(unquoted);
470      RE re(quoted, options);
471      CHECK(!re.FullMatch(should_not_match));
472    }
473    
474    // Tests that quoted meta characters match their original strings,
475    // and that a few things that shouldn't match indeed do not.
476    static void TestQuotaMetaSimple() {
477      TestQuoteMeta("foo");
478      TestQuoteMeta("foo.bar");
479      TestQuoteMeta("foo\\.bar");
480      TestQuoteMeta("[1-9]");
481      TestQuoteMeta("1.5-2.0?");
482      TestQuoteMeta("\\d");
483      TestQuoteMeta("Who doesn't like ice cream?");
484      TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
485      TestQuoteMeta("((?!)xxx).*yyy");
486      TestQuoteMeta("([");
487    }
488    
489    static void TestQuoteMetaSimpleNegative() {
490      NegativeTestQuoteMeta("foo", "bar");
491      NegativeTestQuoteMeta("...", "bar");
492      NegativeTestQuoteMeta("\\.", ".");
493      NegativeTestQuoteMeta("\\.", "..");
494      NegativeTestQuoteMeta("(a)", "a");
495      NegativeTestQuoteMeta("(a|b)", "a");
496      NegativeTestQuoteMeta("(a|b)", "(a)");
497      NegativeTestQuoteMeta("(a|b)", "a|b");
498      NegativeTestQuoteMeta("[0-9]", "0");
499      NegativeTestQuoteMeta("[0-9]", "0-9");
500      NegativeTestQuoteMeta("[0-9]", "[9]");
501      NegativeTestQuoteMeta("((?!)xxx)", "xxx");
502    }
503    
504    static void TestQuoteMetaLatin1() {
505      TestQuoteMeta("3\xb2 = 9");
506    }
507    
508    static void TestQuoteMetaUtf8() {
509    #ifdef SUPPORT_UTF8
510      TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
511      TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
512      TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
513      TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
514      TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
515      TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
516      TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
517      NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
518                            "27\\\xc2\\\xb0",
519                            pcrecpp::UTF8());
520    #endif
521    }
522    
523    static void TestQuoteMetaAll() {
524      printf("Testing QuoteMeta\n");
525      TestQuotaMetaSimple();
526      TestQuoteMetaSimpleNegative();
527      TestQuoteMetaLatin1();
528      TestQuoteMetaUtf8();
529    }
530    
531  //  //
532  // Options tests contributed by  // Options tests contributed by
533  // Giuseppe Maxia, CTO, Stardata s.r.l.  // Giuseppe Maxia, CTO, Stardata s.r.l.
# Line 612  static void TestOptions() { Line 750  static void TestOptions() {
750    Test_all_options();    Test_all_options();
751  }  }
752    
753    static void TestConstructors() {
754      printf("Testing constructors\n");
755    
756      RE_Options options;
757      options.set_dotall(true);
758      const char *str = "HELLO\n" "cruel\n" "world";
759    
760      RE orig("HELLO.*world", options);
761      CHECK(orig.FullMatch(str));
762    
763      RE copy1(orig);
764      CHECK(copy1.FullMatch(str));
765    
766      RE copy2("not a match");
767      CHECK(!copy2.FullMatch(str));
768      copy2 = copy1;
769      CHECK(copy2.FullMatch(str));
770      copy2 = orig;
771      CHECK(copy2.FullMatch(str));
772    
773      // Make sure when we assign to ourselves, nothing bad happens
774      orig = orig;
775      copy1 = copy1;
776      copy2 = copy2;
777      CHECK(orig.FullMatch(str));
778      CHECK(copy1.FullMatch(str));
779      CHECK(copy2.FullMatch(str));
780    }
781    
782  int main(int argc, char** argv) {  int main(int argc, char** argv) {
783    // Treat any flag as --help    // Treat any flag as --help
784    if (argc > 1 && argv[1][0] == '-') {    if (argc > 1 && argv[1][0] == '-') {
# Line 646  int main(int argc, char** argv) { Line 813  int main(int argc, char** argv) {
813    /***** FullMatch with no args *****/    /***** FullMatch with no args *****/
814    
815    CHECK(RE("h.*o").FullMatch("hello"));    CHECK(RE("h.*o").FullMatch("hello"));
816    CHECK(!RE("h.*o").FullMatch("othello"));    CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
817    CHECK(!RE("h.*o").FullMatch("hello!"));    CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
818      CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
819      CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
820      CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
821    
822    /***** FullMatch with args *****/    /***** FullMatch with args *****/
823    
# Line 742  int main(int argc, char** argv) { Line 912  int main(int argc, char** argv) {
912      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
913    }    }
914  #ifdef HAVE_LONG_LONG  #ifdef HAVE_LONG_LONG
915    # if defined(__MINGW__) || defined(__MINGW32__)
916    #   define LLD "%I64d"
917    #   define LLU "%I64u"
918    # else
919    #   define LLD "%lld"
920    #   define LLU "%llu"
921    # endif
922    {    {
923      long long v;      long long v;
924      static const long long max_value = 0x7fffffffffffffffLL;      static const long long max_value = 0x7fffffffffffffffLL;
# Line 751  int main(int argc, char** argv) { Line 928  int main(int argc, char** argv) {
928      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
929      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
930    
931      snprintf(buf, sizeof(buf), "%lld", max_value);      snprintf(buf, sizeof(buf), LLD, max_value);
932      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
933    
934      snprintf(buf, sizeof(buf), "%lld", min_value);      snprintf(buf, sizeof(buf), LLD, min_value);
935      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
936    
937      snprintf(buf, sizeof(buf), "%lld", max_value);      snprintf(buf, sizeof(buf), LLD, max_value);
938      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
939      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
940      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
941    
942      snprintf(buf, sizeof(buf), "%lld", min_value);      snprintf(buf, sizeof(buf), LLD, min_value);
943      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
944      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
945      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
# Line 778  int main(int argc, char** argv) { Line 955  int main(int argc, char** argv) {
955      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
956      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
957    
958      snprintf(buf, sizeof(buf), "%llu", max_value);      snprintf(buf, sizeof(buf), LLU, max_value);
959      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
960    
961      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
# Line 930  int main(int argc, char** argv) { Line 1107  int main(int argc, char** argv) {
1107    CHECK(RE("h.*o").PartialMatch("hello!"));    CHECK(RE("h.*o").PartialMatch("hello!"));
1108    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1109    
1110      /***** other tests *****/
1111    
1112    RadixTests();    RadixTests();
1113    TestReplace();    TestReplace();
1114    TestExtract();    TestExtract();
1115    TestConsume();    TestConsume();
1116    TestFindAndConsume();    TestFindAndConsume();
1117      TestQuoteMetaAll();
1118    TestMatchNumberPeculiarity();    TestMatchNumberPeculiarity();
1119    
1120    // Check the pattern() accessor    // Check the pattern() accessor
# Line 1054  int main(int argc, char** argv) { Line 1234  int main(int argc, char** argv) {
1234      VERBOSE_TEST  = true;      VERBOSE_TEST  = true;
1235    TestOptions();    TestOptions();
1236    
1237      // Test the constructors
1238      TestConstructors();
1239    
1240    // Done    // Done
1241    printf("OK\n");    printf("OK\n");
1242    

Legend:
Removed from v.87  
changed lines
  Added in v.234

  ViewVC Help
Powered by ViewVC 1.1.5