/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 326 by ph10, Sat Mar 8 17:24:02 2008 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // -*- coding: utf-8 -*-
2    //
3    // Copyright (c) 2005 - 2006, Google Inc.
4  // All rights reserved.  // All rights reserved.
5  //  //
6  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 31  Line 33 
33  //  //
34  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
35    
36    #ifdef HAVE_CONFIG_H
37    #include "config.h"
38    #endif
39    
40  #include <stdio.h>  #include <stdio.h>
41    #include <cassert>
42  #include <vector>  #include <vector>
 #include "config.h"  
43  #include "pcrecpp.h"  #include "pcrecpp.h"
44    
45  using pcrecpp::StringPiece;  using pcrecpp::StringPiece;
# Line 43  using pcrecpp::Hex; Line 49  using pcrecpp::Hex;
49  using pcrecpp::Octal;  using pcrecpp::Octal;
50  using pcrecpp::CRadix;  using pcrecpp::CRadix;
51    
52    static bool VERBOSE_TEST  = false;
53    
54  // CHECK dies with a fatal error if condition is not true.  It is *not*  // CHECK dies with a fatal error if condition is not true.  It is *not*
55  // controlled by NDEBUG, so the check will be executed regardless of  // controlled by NDEBUG, so the check will be executed regardless of
56  // compilation mode.  Therefore, it is safe to do things like:  // compilation mode.  Therefore, it is safe to do things like:
# Line 102  static void LeakTest() { Line 110  static void LeakTest() {
110        initial_size = VirtualProcessSize();        initial_size = VirtualProcessSize();
111        printf("Size after 50000: %llu\n", initial_size);        printf("Size after 50000: %llu\n", initial_size);
112      }      }
113      char buf[100];      char buf[100];  // definitely big enough
114      snprintf(buf, sizeof(buf), "pat%09d", i);      sprintf(buf, "pat%09d", i);
115      RE newre(buf);      RE newre(buf);
116    }    }
117    uint64 final_size = VirtualProcessSize();    uint64 final_size = VirtualProcessSize();
# Line 205  static void TestReplace() { Line 213  static void TestReplace() {
213      const char *original;      const char *original;
214      const char *single;      const char *single;
215      const char *global;      const char *global;
216        int global_count;         // the expected return value from ReplaceAll
217    };    };
218    static const ReplaceTest tests[] = {    static const ReplaceTest tests[] = {
219      { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",      { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220        "\\2\\1ay",        "\\2\\1ay",
221        "the quick brown fox jumps over the lazy dogs.",        "the quick brown fox jumps over the lazy dogs.",
222        "ethay quick brown fox jumps over the lazy dogs.",        "ethay quick brown fox jumps over the lazy dogs.",
223        "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },        "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224          9 },
225      { "\\w+",      { "\\w+",
226        "\\0-NOSPAM",        "\\0-NOSPAM",
227        "paul.haahr@google.com",        "paul.haahr@google.com",
228        "paul-NOSPAM.haahr@google.com",        "paul-NOSPAM.haahr@google.com",
229        "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },        "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230          4 },
231      { "^",      { "^",
232        "(START)",        "(START)",
233        "foo",        "foo",
234        "(START)foo",        "(START)foo",
235        "(START)foo" },        "(START)foo",
236          1 },
237      { "^",      { "^",
238        "(START)",        "(START)",
239        "",        "",
240        "(START)",        "(START)",
241        "(START)" },        "(START)",
242          1 },
243      { "$",      { "$",
244        "(END)",        "(END)",
245        "",        "",
246        "(END)",        "(END)",
247        "(END)" },        "(END)",
248          1 },
249      { "b",      { "b",
250        "bb",        "bb",
251        "ababababab",        "ababababab",
252        "abbabababab",        "abbabababab",
253        "abbabbabbabbabb" },        "abbabbabbabbabb",
254           5 },
255      { "b",      { "b",
256        "bb",        "bb",
257        "bbbbbb",        "bbbbbb",
258        "bbbbbbb",        "bbbbbbb",
259        "bbbbbbbbbbbb" },        "bbbbbbbbbbbb",
260          6 },
261      { "b+",      { "b+",
262        "bb",        "bb",
263        "bbbbbb",        "bbbbbb",
264        "bb",        "bb",
265        "bb" },        "bb",
266          1 },
267      { "b*",      { "b*",
268        "bb",        "bb",
269        "bbbbbb",        "bbbbbb",
270        "bb",        "bb",
271        "bb" },        "bb",
272          1 },
273      { "b*",      { "b*",
274        "bb",        "bb",
275        "aaaaa",        "aaaaa",
276        "bbaaaaa",        "bbaaaaa",
277        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb",
278      { "", NULL, NULL, NULL, NULL }        6 },
279        { "b*",
280          "bb",
281          "aa\naa\n",
282          "bbaa\naa\n",
283          "bbabbabb\nbbabbabb\nbb",
284          7 },
285        { "b*",
286          "bb",
287          "aa\raa\r",
288          "bbaa\raa\r",
289          "bbabbabb\rbbabbabb\rbb",
290          7 },
291        { "b*",
292          "bb",
293          "aa\r\naa\r\n",
294          "bbaa\r\naa\r\n",
295          "bbabbabb\r\nbbabbabb\r\nbb",
296          7 },
297    #ifdef SUPPORT_UTF8
298        { "b*",
299          "bb",
300          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
301          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
303          5 },
304        { "b*",
305          "bb",
306          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
307          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309           "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
310          9 },
311    #endif
312        { "", NULL, NULL, NULL, NULL, 0 }
313    };    };
314    
315    #ifdef SUPPORT_UTF8
316      const bool support_utf8 = true;
317    #else
318      const bool support_utf8 = false;
319    #endif
320    
321    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323        assert(re.error().empty());
324      string one(t->original);      string one(t->original);
325      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
326      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
327      string all(t->original);      string all(t->original);
328      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      const int replace_count = re.GlobalReplace(t->rewrite, &all);
329      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
330        CHECK_EQ(replace_count, t->global_count);
331      }
332    
333      // One final test: test \r\n replacement when we're not in CRLF mode
334      {
335        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336        assert(re.error().empty());
337        string all("aa\r\naa\r\n");
338        CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
340      }
341      {
342        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343        assert(re.error().empty());
344        string all("aa\r\naa\r\n");
345        CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
347    }    }
348      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349      //       Alas, the answer depends on how pcre was compiled.
350  }  }
351    
352  static void TestExtract() {  static void TestExtract() {
# Line 346  static void TestMatchNumberPeculiarity() Line 425  static void TestMatchNumberPeculiarity()
425    CHECK_EQ(a, "");    CHECK_EQ(a, "");
426  }  }
427    
428  static void TestRecursion(int size, const char *pattern, int match_limit) {  static void TestRecursion() {
429    printf("Testing recursion\n");    printf("Testing recursion\n");
430    
431    // Fill up a string repeating the pattern given    // Get one string that passes (sometimes), one that never does.
432    string domain;    string text_good("abcdefghijk");
433    domain.resize(size);    string text_bad("acdefghijkl");
434    int patlen = strlen(pattern);  
435    for (int i = 0; i < size; ++i) {    // According to pcretest, matching text_good against (\w+)*b
436      domain[i] = pattern[i % patlen];    // requires match_limit of at least 8192, and match_recursion_limit
437      // of at least 37.
438    
439      RE_Options options_ml;
440      options_ml.set_match_limit(8192);
441      RE re("(\\w+)*b", options_ml);
442      CHECK(re.PartialMatch(text_good) == true);
443      CHECK(re.PartialMatch(text_bad) == false);
444      CHECK(re.FullMatch(text_good) == false);
445      CHECK(re.FullMatch(text_bad) == false);
446    
447      options_ml.set_match_limit(1024);
448      RE re2("(\\w+)*b", options_ml);
449      CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
450      CHECK(re2.PartialMatch(text_bad) == false);
451      CHECK(re2.FullMatch(text_good) == false);
452      CHECK(re2.FullMatch(text_bad) == false);
453    
454      RE_Options options_mlr;
455      options_mlr.set_match_limit_recursion(50);
456      RE re3("(\\w+)*b", options_mlr);
457      CHECK(re3.PartialMatch(text_good) == true);
458      CHECK(re3.PartialMatch(text_bad) == false);
459      CHECK(re3.FullMatch(text_good) == false);
460      CHECK(re3.FullMatch(text_bad) == false);
461    
462      options_mlr.set_match_limit_recursion(10);
463      RE re4("(\\w+)*b", options_mlr);
464      CHECK(re4.PartialMatch(text_good) == false);
465      CHECK(re4.PartialMatch(text_bad) == false);
466      CHECK(re4.FullMatch(text_good) == false);
467      CHECK(re4.FullMatch(text_bad) == false);
468    }
469    
470    // A meta-quoted string, interpreted as a pattern, should always match
471    // the original unquoted string.
472    static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473      string quoted = RE::QuoteMeta(unquoted);
474      RE re(quoted, options);
475      CHECK(re.FullMatch(unquoted));
476    }
477    
478    // A string containing meaningful regexp characters, which is then meta-
479    // quoted, should not generally match a string the unquoted string does.
480    static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481                                      RE_Options options = RE_Options()) {
482      string quoted = RE::QuoteMeta(unquoted);
483      RE re(quoted, options);
484      CHECK(!re.FullMatch(should_not_match));
485    }
486    
487    // Tests that quoted meta characters match their original strings,
488    // and that a few things that shouldn't match indeed do not.
489    static void TestQuotaMetaSimple() {
490      TestQuoteMeta("foo");
491      TestQuoteMeta("foo.bar");
492      TestQuoteMeta("foo\\.bar");
493      TestQuoteMeta("[1-9]");
494      TestQuoteMeta("1.5-2.0?");
495      TestQuoteMeta("\\d");
496      TestQuoteMeta("Who doesn't like ice cream?");
497      TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498      TestQuoteMeta("((?!)xxx).*yyy");
499      TestQuoteMeta("([");
500      TestQuoteMeta(string("foo\0bar", 7));
501    }
502    
503    static void TestQuoteMetaSimpleNegative() {
504      NegativeTestQuoteMeta("foo", "bar");
505      NegativeTestQuoteMeta("...", "bar");
506      NegativeTestQuoteMeta("\\.", ".");
507      NegativeTestQuoteMeta("\\.", "..");
508      NegativeTestQuoteMeta("(a)", "a");
509      NegativeTestQuoteMeta("(a|b)", "a");
510      NegativeTestQuoteMeta("(a|b)", "(a)");
511      NegativeTestQuoteMeta("(a|b)", "a|b");
512      NegativeTestQuoteMeta("[0-9]", "0");
513      NegativeTestQuoteMeta("[0-9]", "0-9");
514      NegativeTestQuoteMeta("[0-9]", "[9]");
515      NegativeTestQuoteMeta("((?!)xxx)", "xxx");
516    }
517    
518    static void TestQuoteMetaLatin1() {
519      TestQuoteMeta("3\xb2 = 9");
520    }
521    
522    static void TestQuoteMetaUtf8() {
523    #ifdef SUPPORT_UTF8
524      TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
525      TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
526      TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
527      TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
528      TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
529      TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
530      TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
531      NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
532                            "27\\\xc2\\\xb0",
533                            pcrecpp::UTF8());
534    #endif
535    }
536    
537    static void TestQuoteMetaAll() {
538      printf("Testing QuoteMeta\n");
539      TestQuotaMetaSimple();
540      TestQuoteMetaSimpleNegative();
541      TestQuoteMetaLatin1();
542      TestQuoteMetaUtf8();
543    }
544    
545    //
546    // Options tests contributed by
547    // Giuseppe Maxia, CTO, Stardata s.r.l.
548    // July 2005
549    //
550    static void GetOneOptionResult(
551                    const char *option_name,
552                    const char *regex,
553                    const char *str,
554                    RE_Options options,
555                    bool full,
556                    string expected) {
557    
558      printf("Testing Option <%s>\n", option_name);
559      if(VERBOSE_TEST)
560        printf("/%s/ finds \"%s\" within \"%s\" \n",
561                        regex,
562                        expected.c_str(),
563                        str);
564      string captured("");
565      if (full)
566        RE(regex,options).FullMatch(str, &captured);
567      else
568        RE(regex,options).PartialMatch(str, &captured);
569      CHECK_EQ(captured, expected);
570    }
571    
572    static void TestOneOption(
573                    const char *option_name,
574                    const char *regex,
575                    const char *str,
576                    RE_Options options,
577                    bool full,
578                    bool assertive = true) {
579    
580      printf("Testing Option <%s>\n", option_name);
581      if (VERBOSE_TEST)
582        printf("'%s' %s /%s/ \n",
583                      str,
584                      (assertive? "matches" : "doesn't match"),
585                      regex);
586      if (assertive) {
587        if (full)
588          CHECK(RE(regex,options).FullMatch(str));
589        else
590          CHECK(RE(regex,options).PartialMatch(str));
591      } else {
592        if (full)
593          CHECK(!RE(regex,options).FullMatch(str));
594        else
595          CHECK(!RE(regex,options).PartialMatch(str));
596    }    }
597    // Just make sure it doesn't crash due to too much recursion.  }
598    
599    static void Test_CASELESS() {
600      RE_Options options;
601      RE_Options options2;
602    
603      options.set_caseless(true);
604      TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
605      TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
606      TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
607    
608      TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
609      TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
610      options.set_caseless(false);
611      TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
612    }
613    
614    static void Test_MULTILINE() {
615      RE_Options options;
616      RE_Options options2;
617      const char *str = "HELLO\n" "cruel\n" "world\n";
618    
619      options.set_multiline(true);
620      TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
621      TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
622      TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
623      options.set_multiline(false);
624      TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
625    }
626    
627    static void Test_DOTALL() {
628      RE_Options options;
629      RE_Options options2;
630      const char *str = "HELLO\n" "cruel\n" "world";
631    
632      options.set_dotall(true);
633      TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
634      TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
635      TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
636      options.set_dotall(false);
637      TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
638    }
639    
640    static void Test_DOLLAR_ENDONLY() {
641      RE_Options options;
642      RE_Options options2;
643      const char *str = "HELLO world\n";
644    
645      TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
646      options.set_dollar_endonly(true);
647      TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
648      TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
649    }
650    
651    static void Test_EXTRA() {
652      RE_Options options;
653      const char *str = "HELLO";
654    
655      options.set_extra(true);
656      TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
657      TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
658      options.set_extra(false);
659      TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
660    }
661    
662    static void Test_EXTENDED() {
663      RE_Options options;
664      RE_Options options2;
665      const char *str = "HELLO world";
666    
667      options.set_extended(true);
668      TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
669      TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
670      TestOneOption("EXTENDED (class)",
671                        "^ HE L{2} O "
672                        "\\s+        "
673                        "\\w+ $      ",
674                        str,
675                        options,
676                        false);
677    
678      TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
679      TestOneOption("EXTENDED (function)",
680                        "^ HE L{2} O "
681                        "\\s+        "
682                        "\\w+ $      ",
683                        str,
684                        pcrecpp::EXTENDED(),
685                        false);
686    
687      options.set_extended(false);
688      TestOneOption("no EXTENDED", "HELLO world", str, options, false);
689    }
690    
691    static void Test_NO_AUTO_CAPTURE() {
692      RE_Options options;
693      const char *str = "HELLO world";
694      string captured;
695    
696      printf("Testing Option <no NO_AUTO_CAPTURE>\n");
697      if (VERBOSE_TEST)
698        printf("parentheses capture text\n");
699      RE re("(world|universe)$", options);
700      CHECK(re.Extract("\\1", str , &captured));
701      CHECK_EQ(captured, "world");
702      options.set_no_auto_capture(true);
703      printf("testing Option <NO_AUTO_CAPTURE>\n");
704      if (VERBOSE_TEST)
705        printf("parentheses do not capture text\n");
706      re.Extract("\\1",str, &captured );
707      CHECK_EQ(captured, "world");
708    }
709    
710    static void Test_UNGREEDY() {
711      RE_Options options;
712      const char *str = "HELLO, 'this' is the 'world'";
713    
714      options.set_ungreedy(true);
715      GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
716      GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
717      GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
718    
719      options.set_ungreedy(false);
720      GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
721      GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
722    }
723    
724    static void Test_all_options() {
725      const char *str = "HELLO\n" "cruel\n" "world";
726    RE_Options options;    RE_Options options;
727    options.set_match_limit(match_limit);    options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
728    RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);  
729    re.FullMatch(domain);    TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
730      options.set_all_options(0);
731      TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
732      options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
733    
734      TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
735      TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
736                      " ^ c r u e l $ ",
737                      str,
738                      RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
739                      false);
740    
741      TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
742                      " ^ c r u e l $ ",
743                      str,
744                      RE_Options()
745                           .set_multiline(true)
746                           .set_extended(true),
747                      false);
748    
749      options.set_all_options(0);
750      TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
751    
752    }
753    
754    static void TestOptions() {
755      printf("Testing Options\n");
756      Test_CASELESS();
757      Test_MULTILINE();
758      Test_DOTALL();
759      Test_DOLLAR_ENDONLY();
760      Test_EXTENDED();
761      Test_NO_AUTO_CAPTURE();
762      Test_UNGREEDY();
763      Test_EXTRA();
764      Test_all_options();
765  }  }
766    
767    static void TestConstructors() {
768      printf("Testing constructors\n");
769    
770      RE_Options options;
771      options.set_dotall(true);
772      const char *str = "HELLO\n" "cruel\n" "world";
773    
774      RE orig("HELLO.*world", options);
775      CHECK(orig.FullMatch(str));
776    
777      RE copy1(orig);
778      CHECK(copy1.FullMatch(str));
779    
780      RE copy2("not a match");
781      CHECK(!copy2.FullMatch(str));
782      copy2 = copy1;
783      CHECK(copy2.FullMatch(str));
784      copy2 = orig;
785      CHECK(copy2.FullMatch(str));
786    
787      // Make sure when we assign to ourselves, nothing bad happens
788      orig = orig;
789      copy1 = copy1;
790      copy2 = copy2;
791      CHECK(orig.FullMatch(str));
792      CHECK(copy1.FullMatch(str));
793      CHECK(copy2.FullMatch(str));
794    }
795    
796  int main(int argc, char** argv) {  int main(int argc, char** argv) {
797    // Treat any flag as --help    // Treat any flag as --help
# Line 398  int main(int argc, char** argv) { Line 827  int main(int argc, char** argv) {
827    /***** FullMatch with no args *****/    /***** FullMatch with no args *****/
828    
829    CHECK(RE("h.*o").FullMatch("hello"));    CHECK(RE("h.*o").FullMatch("hello"));
830    CHECK(!RE("h.*o").FullMatch("othello"));    CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
831    CHECK(!RE("h.*o").FullMatch("hello!"));    CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
832      CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
833      CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
834      CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
835    
836    /***** FullMatch with args *****/    /***** FullMatch with args *****/
837    
# Line 443  int main(int argc, char** argv) { Line 875  int main(int argc, char** argv) {
875    CHECK_EQ(s, string("ruby"));    CHECK_EQ(s, string("ruby"));
876    CHECK_EQ(i, 1234);    CHECK_EQ(i, 1234);
877    
878      // Ignore non-void* NULL arg
879      CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
880      CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
881      CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
882      CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
883    #ifdef HAVE_LONG_LONG
884      CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
885    #endif
886      CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
887      CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
888    
889      // Fail on non-void* NULL arg if the match doesn't parse for the given type.
890      CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
891      CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
892      CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
893      CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
894      CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
895    
896    // Ignored arg    // Ignored arg
897    CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));    CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
898    CHECK_EQ(s, string("ruby"));    CHECK_EQ(s, string("ruby"));
# Line 494  int main(int argc, char** argv) { Line 944  int main(int argc, char** argv) {
944      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
945    }    }
946  #ifdef HAVE_LONG_LONG  #ifdef HAVE_LONG_LONG
947    # if defined(__MINGW__) || defined(__MINGW32__)
948    #   define LLD "%I64d"
949    #   define LLU "%I64u"
950    # else
951    #   define LLD "%lld"
952    #   define LLU "%llu"
953    # endif
954    {    {
955      long long v;      long long v;
956      static const long long max_value = 0x7fffffffffffffffLL;      static const long long max_value = 0x7fffffffffffffffLL;
957      static const long long min_value = -max_value - 1;      static const long long min_value = -max_value - 1;
958      char buf[32];      char buf[32];  // definitely big enough for a long long
959    
960      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
961      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
962    
963      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
964      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
965    
966      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
967      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
968    
969      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
970      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
971      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
972      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
973    
974      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
975      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
976      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
977      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
# Line 525  int main(int argc, char** argv) { Line 982  int main(int argc, char** argv) {
982      unsigned long long v;      unsigned long long v;
983      long long v2;      long long v2;
984      static const unsigned long long max_value = 0xffffffffffffffffULL;      static const unsigned long long max_value = 0xffffffffffffffffULL;
985      char buf[32];      char buf[32];  // definitely big enough for a unsigned long long
986    
987      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
988      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
989    
990      snprintf(buf, sizeof(buf), "%llu", max_value);      sprintf(buf, LLU, max_value);
991      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
992    
993      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
# Line 682  int main(int argc, char** argv) { Line 1139  int main(int argc, char** argv) {
1139    CHECK(RE("h.*o").PartialMatch("hello!"));    CHECK(RE("h.*o").PartialMatch("hello!"));
1140    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1141    
1142      /***** other tests *****/
1143    
1144    RadixTests();    RadixTests();
1145    TestReplace();    TestReplace();
1146    TestExtract();    TestExtract();
1147    TestConsume();    TestConsume();
1148    TestFindAndConsume();    TestFindAndConsume();
1149      TestQuoteMetaAll();
1150    TestMatchNumberPeculiarity();    TestMatchNumberPeculiarity();
1151    
1152    // Check the pattern() accessor    // Check the pattern() accessor
# Line 708  int main(int argc, char** argv) { Line 1168  int main(int argc, char** argv) {
1168      printf("Testing UTF-8 handling\n");      printf("Testing UTF-8 handling\n");
1169    
1170      // Three Japanese characters (nihongo)      // Three Japanese characters (nihongo)
1171      const char utf8_string[] = {      const unsigned char utf8_string[] = {
1172           0xe6, 0x97, 0xa5, // 65e5           0xe6, 0x97, 0xa5, // 65e5
1173           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1174           0xe8, 0xaa, 0x9e, // 8a9e           0xe8, 0xaa, 0x9e, // 8a9e
1175           0           0
1176      };      };
1177      const char utf8_pattern[] = {      const unsigned char utf8_pattern[] = {
1178           '.',           '.',
1179           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1180           '.',           '.',
# Line 798  int main(int argc, char** argv) { Line 1258  int main(int argc, char** argv) {
1258      CHECK(!re.error().empty());      CHECK(!re.error().empty());
1259    }    }
1260    
1261    // Test that recursion is stopped: there will be some errors reported    // Test that recursion is stopped
1262    int matchlimit = 5000;    TestRecursion();
1263    int bytes = 15 * 1024;  // enough to crash if there was no match limit  
1264    TestRecursion(bytes, ".", matchlimit);    // Test Options
1265    TestRecursion(bytes, "a", matchlimit);    if (getenv("VERBOSE_TEST") != NULL)
1266    TestRecursion(bytes, "a.", matchlimit);      VERBOSE_TEST  = true;
1267    TestRecursion(bytes, "ab.", matchlimit);    TestOptions();
1268    TestRecursion(bytes, "abc.", matchlimit);  
1269      // Test the constructors
1270      TestConstructors();
1271    
1272    // Done    // Done
1273    printf("OK\n");    printf("OK\n");

Legend:
Removed from v.77  
changed lines
  Added in v.326

  ViewVC Help
Powered by ViewVC 1.1.5