/[pcre]/code/tags/pcre-7.2/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/tags/pcre-7.2/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcrecpp_unittest.cc revision 81 by nigel, Sat Feb 24 21:40:59 2007 UTC code/tags/pcre-7.2/pcrecpp_unittest.cc revision 186 by ph10, Tue Jun 19 13:41:40 2007 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // -*- coding: utf-8 -*-
2    //
3    // Copyright (c) 2005 - 2006, Google Inc.
4  // All rights reserved.  // All rights reserved.
5  //  //
6  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 32  Line 34 
34  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
35    
36  #include <stdio.h>  #include <stdio.h>
37    #include <cassert>
38  #include <vector>  #include <vector>
39  #include "config.h"  #include "config.h"
40  #include "pcrecpp.h"  #include "pcrecpp.h"
# Line 259  static void TestReplace() { Line 262  static void TestReplace() {
262        "aaaaa",        "aaaaa",
263        "bbaaaaa",        "bbaaaaa",
264        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb" },
265        { "b*",
266          "bb",
267          "aa\naa\n",
268          "bbaa\naa\n",
269          "bbabbabb\nbbabbabb\nbb" },
270        { "b*",
271          "bb",
272          "aa\raa\r",
273          "bbaa\raa\r",
274          "bbabbabb\rbbabbabb\rbb" },
275        { "b*",
276          "bb",
277          "aa\r\naa\r\n",
278          "bbaa\r\naa\r\n",
279          "bbabbabb\r\nbbabbabb\r\nbb" },
280    #ifdef SUPPORT_UTF8
281        { "b*",
282          "bb",
283          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
284          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286        { "b*",
287          "bb",
288          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
289          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291           "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292    #endif
293      { "", NULL, NULL, NULL, NULL }      { "", NULL, NULL, NULL, NULL }
294    };    };
295    
296    #ifdef SUPPORT_UTF8
297      const bool support_utf8 = true;
298    #else
299      const bool support_utf8 = false;
300    #endif
301    
302    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304        assert(re.error().empty());
305      string one(t->original);      string one(t->original);
306      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
307      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
308      string all(t->original);      string all(t->original);
309      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
311    }    }
312    
313      // One final test: test \r\n replacement when we're not in CRLF mode
314      {
315        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316        assert(re.error().empty());
317        string all("aa\r\naa\r\n");
318        CHECK(re.GlobalReplace("bb", &all) > 0);
319        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320      }
321      {
322        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323        assert(re.error().empty());
324        string all("aa\r\naa\r\n");
325        CHECK(re.GlobalReplace("bb", &all) > 0);
326        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327      }
328      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329      //       Alas, the answer depends on how pcre was compiled.
330  }  }
331    
332  static void TestExtract() {  static void TestExtract() {
# Line 348  static void TestMatchNumberPeculiarity() Line 405  static void TestMatchNumberPeculiarity()
405    CHECK_EQ(a, "");    CHECK_EQ(a, "");
406  }  }
407    
408  static void TestRecursion(int size, const char *pattern, int match_limit) {  static void TestRecursion() {
409    printf("Testing recursion\n");    printf("Testing recursion\n");
410    
411    // Fill up a string repeating the pattern given    // Get one string that passes (sometimes), one that never does.
412    string domain;    string text_good("abcdefghijk");
413    domain.resize(size);    string text_bad("acdefghijkl");
414    int patlen = strlen(pattern);  
415    for (int i = 0; i < size; ++i) {    // According to pcretest, matching text_good against (\w+)*b
416      domain[i] = pattern[i % patlen];    // requires match_limit of at least 8192, and match_recursion_limit
417    }    // of at least 37.
418    // Just make sure it doesn't crash due to too much recursion.  
419    RE_Options options;    RE_Options options_ml;
420    options.set_match_limit(match_limit);    options_ml.set_match_limit(8192);
421    RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);    RE re("(\\w+)*b", options_ml);
422    re.FullMatch(domain);    CHECK(re.PartialMatch(text_good) == true);
423      CHECK(re.PartialMatch(text_bad) == false);
424      CHECK(re.FullMatch(text_good) == false);
425      CHECK(re.FullMatch(text_bad) == false);
426    
427      options_ml.set_match_limit(1024);
428      RE re2("(\\w+)*b", options_ml);
429      CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
430      CHECK(re2.PartialMatch(text_bad) == false);
431      CHECK(re2.FullMatch(text_good) == false);
432      CHECK(re2.FullMatch(text_bad) == false);
433    
434      RE_Options options_mlr;
435      options_mlr.set_match_limit_recursion(50);
436      RE re3("(\\w+)*b", options_mlr);
437      CHECK(re3.PartialMatch(text_good) == true);
438      CHECK(re3.PartialMatch(text_bad) == false);
439      CHECK(re3.FullMatch(text_good) == false);
440      CHECK(re3.FullMatch(text_bad) == false);
441    
442      options_mlr.set_match_limit_recursion(10);
443      RE re4("(\\w+)*b", options_mlr);
444      CHECK(re4.PartialMatch(text_good) == false);
445      CHECK(re4.PartialMatch(text_bad) == false);
446      CHECK(re4.FullMatch(text_good) == false);
447      CHECK(re4.FullMatch(text_bad) == false);
448    }
449    
450    // A meta-quoted string, interpreted as a pattern, should always match
451    // the original unquoted string.
452    static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453      string quoted = RE::QuoteMeta(unquoted);
454      RE re(quoted, options);
455      CHECK(re.FullMatch(unquoted));
456    }
457    
458    // A string containing meaningful regexp characters, which is then meta-
459    // quoted, should not generally match a string the unquoted string does.
460    static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461                                      RE_Options options = RE_Options()) {
462      string quoted = RE::QuoteMeta(unquoted);
463      RE re(quoted, options);
464      CHECK(!re.FullMatch(should_not_match));
465    }
466    
467    // Tests that quoted meta characters match their original strings,
468    // and that a few things that shouldn't match indeed do not.
469    static void TestQuotaMetaSimple() {
470      TestQuoteMeta("foo");
471      TestQuoteMeta("foo.bar");
472      TestQuoteMeta("foo\\.bar");
473      TestQuoteMeta("[1-9]");
474      TestQuoteMeta("1.5-2.0?");
475      TestQuoteMeta("\\d");
476      TestQuoteMeta("Who doesn't like ice cream?");
477      TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478      TestQuoteMeta("((?!)xxx).*yyy");
479      TestQuoteMeta("([");
480    }
481    
482    static void TestQuoteMetaSimpleNegative() {
483      NegativeTestQuoteMeta("foo", "bar");
484      NegativeTestQuoteMeta("...", "bar");
485      NegativeTestQuoteMeta("\\.", ".");
486      NegativeTestQuoteMeta("\\.", "..");
487      NegativeTestQuoteMeta("(a)", "a");
488      NegativeTestQuoteMeta("(a|b)", "a");
489      NegativeTestQuoteMeta("(a|b)", "(a)");
490      NegativeTestQuoteMeta("(a|b)", "a|b");
491      NegativeTestQuoteMeta("[0-9]", "0");
492      NegativeTestQuoteMeta("[0-9]", "0-9");
493      NegativeTestQuoteMeta("[0-9]", "[9]");
494      NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495    }
496    
497    static void TestQuoteMetaLatin1() {
498      TestQuoteMeta("3\xb2 = 9");
499    }
500    
501    static void TestQuoteMetaUtf8() {
502    #ifdef SUPPORT_UTF8
503      TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504      TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
505      TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
506      TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
507      TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
508      TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509      TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510      NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
511                            "27\\\xc2\\\xb0",
512                            pcrecpp::UTF8());
513    #endif
514    }
515    
516    static void TestQuoteMetaAll() {
517      printf("Testing QuoteMeta\n");
518      TestQuotaMetaSimple();
519      TestQuoteMetaSimpleNegative();
520      TestQuoteMetaLatin1();
521      TestQuoteMetaUtf8();
522  }  }
523    
524  //  //
# Line 587  static void TestOptions() { Line 743  static void TestOptions() {
743    Test_all_options();    Test_all_options();
744  }  }
745    
746    static void TestConstructors() {
747      printf("Testing constructors\n");
748    
749      RE_Options options;
750      options.set_dotall(true);
751      const char *str = "HELLO\n" "cruel\n" "world";
752    
753      RE orig("HELLO.*world", options);
754      CHECK(orig.FullMatch(str));
755    
756      RE copy1(orig);
757      CHECK(copy1.FullMatch(str));
758    
759      RE copy2("not a match");
760      CHECK(!copy2.FullMatch(str));
761      copy2 = copy1;
762      CHECK(copy2.FullMatch(str));
763      copy2 = orig;
764      CHECK(copy2.FullMatch(str));
765    
766      // Make sure when we assign to ourselves, nothing bad happens
767      orig = orig;
768      copy1 = copy1;
769      copy2 = copy2;
770      CHECK(orig.FullMatch(str));
771      CHECK(copy1.FullMatch(str));
772      CHECK(copy2.FullMatch(str));
773    }
774    
775  int main(int argc, char** argv) {  int main(int argc, char** argv) {
776    // Treat any flag as --help    // Treat any flag as --help
777    if (argc > 1 && argv[1][0] == '-') {    if (argc > 1 && argv[1][0] == '-') {
# Line 621  int main(int argc, char** argv) { Line 806  int main(int argc, char** argv) {
806    /***** FullMatch with no args *****/    /***** FullMatch with no args *****/
807    
808    CHECK(RE("h.*o").FullMatch("hello"));    CHECK(RE("h.*o").FullMatch("hello"));
809    CHECK(!RE("h.*o").FullMatch("othello"));    CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
810    CHECK(!RE("h.*o").FullMatch("hello!"));    CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
811      CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
812      CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
813      CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
814    
815    /***** FullMatch with args *****/    /***** FullMatch with args *****/
816    
# Line 905  int main(int argc, char** argv) { Line 1093  int main(int argc, char** argv) {
1093    CHECK(RE("h.*o").PartialMatch("hello!"));    CHECK(RE("h.*o").PartialMatch("hello!"));
1094    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1095    
1096      /***** other tests *****/
1097    
1098    RadixTests();    RadixTests();
1099    TestReplace();    TestReplace();
1100    TestExtract();    TestExtract();
1101    TestConsume();    TestConsume();
1102    TestFindAndConsume();    TestFindAndConsume();
1103      TestQuoteMetaAll();
1104    TestMatchNumberPeculiarity();    TestMatchNumberPeculiarity();
1105    
1106    // Check the pattern() accessor    // Check the pattern() accessor
# Line 1021  int main(int argc, char** argv) { Line 1212  int main(int argc, char** argv) {
1212      CHECK(!re.error().empty());      CHECK(!re.error().empty());
1213    }    }
1214    
1215    // Test that recursion is stopped: there will be some errors reported    // Test that recursion is stopped
1216    int matchlimit = 5000;    TestRecursion();
   int bytes = 15 * 1024;  // enough to crash if there was no match limit  
   TestRecursion(bytes, ".", matchlimit);  
   TestRecursion(bytes, "a", matchlimit);  
   TestRecursion(bytes, "a.", matchlimit);  
   TestRecursion(bytes, "ab.", matchlimit);  
   TestRecursion(bytes, "abc.", matchlimit);  
1217    
1218    // Test Options    // Test Options
1219    if (getenv("VERBOSE_TEST") != NULL)    if (getenv("VERBOSE_TEST") != NULL)
1220      VERBOSE_TEST  = true;      VERBOSE_TEST  = true;
1221    TestOptions();    TestOptions();
1222    
1223      // Test the constructors
1224      TestConstructors();
1225    
1226    // Done    // Done
1227    printf("OK\n");    printf("OK\n");
1228    

Legend:
Removed from v.81  
changed lines
  Added in v.186

  ViewVC Help
Powered by ViewVC 1.1.5