/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 297 by ph10, Fri Jan 4 19:44:00 2008 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // -*- coding: utf-8 -*-
2    //
3    // Copyright (c) 2005 - 2006, Google Inc.
4  // All rights reserved.  // All rights reserved.
5  //  //
6  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 31  Line 33 
33  //  //
34  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
35    
36    #ifdef HAVE_CONFIG_H
37    #include "config.h"
38    #endif
39    
40  #include <stdio.h>  #include <stdio.h>
41    #include <cassert>
42  #include <vector>  #include <vector>
 #include "config.h"  
43  #include "pcrecpp.h"  #include "pcrecpp.h"
44    
45  using pcrecpp::StringPiece;  using pcrecpp::StringPiece;
# Line 104  static void LeakTest() { Line 110  static void LeakTest() {
110        initial_size = VirtualProcessSize();        initial_size = VirtualProcessSize();
111        printf("Size after 50000: %llu\n", initial_size);        printf("Size after 50000: %llu\n", initial_size);
112      }      }
113      char buf[100];      char buf[100];  // definitely big enough
114      snprintf(buf, sizeof(buf), "pat%09d", i);      sprintf(buf, "pat%09d", i);
115      RE newre(buf);      RE newre(buf);
116    }    }
117    uint64 final_size = VirtualProcessSize();    uint64 final_size = VirtualProcessSize();
# Line 207  static void TestReplace() { Line 213  static void TestReplace() {
213      const char *original;      const char *original;
214      const char *single;      const char *single;
215      const char *global;      const char *global;
216        int global_count;         // the expected return value from ReplaceAll
217    };    };
218    static const ReplaceTest tests[] = {    static const ReplaceTest tests[] = {
219      { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",      { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220        "\\2\\1ay",        "\\2\\1ay",
221        "the quick brown fox jumps over the lazy dogs.",        "the quick brown fox jumps over the lazy dogs.",
222        "ethay quick brown fox jumps over the lazy dogs.",        "ethay quick brown fox jumps over the lazy dogs.",
223        "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },        "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224          9 },
225      { "\\w+",      { "\\w+",
226        "\\0-NOSPAM",        "\\0-NOSPAM",
227        "paul.haahr@google.com",        "paul.haahr@google.com",
228        "paul-NOSPAM.haahr@google.com",        "paul-NOSPAM.haahr@google.com",
229        "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },        "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230          4 },
231      { "^",      { "^",
232        "(START)",        "(START)",
233        "foo",        "foo",
234        "(START)foo",        "(START)foo",
235        "(START)foo" },        "(START)foo",
236          1 },
237      { "^",      { "^",
238        "(START)",        "(START)",
239        "",        "",
240        "(START)",        "(START)",
241        "(START)" },        "(START)",
242          1 },
243      { "$",      { "$",
244        "(END)",        "(END)",
245        "",        "",
246        "(END)",        "(END)",
247        "(END)" },        "(END)",
248          1 },
249      { "b",      { "b",
250        "bb",        "bb",
251        "ababababab",        "ababababab",
252        "abbabababab",        "abbabababab",
253        "abbabbabbabbabb" },        "abbabbabbabbabb",
254           5 },
255      { "b",      { "b",
256        "bb",        "bb",
257        "bbbbbb",        "bbbbbb",
258        "bbbbbbb",        "bbbbbbb",
259        "bbbbbbbbbbbb" },        "bbbbbbbbbbbb",
260          6 },
261      { "b+",      { "b+",
262        "bb",        "bb",
263        "bbbbbb",        "bbbbbb",
264        "bb",        "bb",
265        "bb" },        "bb",
266          1 },
267      { "b*",      { "b*",
268        "bb",        "bb",
269        "bbbbbb",        "bbbbbb",
270        "bb",        "bb",
271        "bb" },        "bb",
272          1 },
273      { "b*",      { "b*",
274        "bb",        "bb",
275        "aaaaa",        "aaaaa",
276        "bbaaaaa",        "bbaaaaa",
277        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb",
278      { "", NULL, NULL, NULL, NULL }        6 },
279        { "b*",
280          "bb",
281          "aa\naa\n",
282          "bbaa\naa\n",
283          "bbabbabb\nbbabbabb\nbb",
284          7 },
285        { "b*",
286          "bb",
287          "aa\raa\r",
288          "bbaa\raa\r",
289          "bbabbabb\rbbabbabb\rbb",
290          7 },
291        { "b*",
292          "bb",
293          "aa\r\naa\r\n",
294          "bbaa\r\naa\r\n",
295          "bbabbabb\r\nbbabbabb\r\nbb",
296          7 },
297    #ifdef SUPPORT_UTF8
298        { "b*",
299          "bb",
300          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
301          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
303          5 },
304        { "b*",
305          "bb",
306          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
307          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309           "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
310          9 },
311    #endif
312        { "", NULL, NULL, NULL, NULL, 0 }
313    };    };
314    
315    #ifdef SUPPORT_UTF8
316      const bool support_utf8 = true;
317    #else
318      const bool support_utf8 = false;
319    #endif
320    
321    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323        assert(re.error().empty());
324      string one(t->original);      string one(t->original);
325      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
326      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
327      string all(t->original);      string all(t->original);
328      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      const int replace_count = re.GlobalReplace(t->rewrite, &all);
329      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
330        CHECK_EQ(replace_count, t->global_count);
331      }
332    
333      // One final test: test \r\n replacement when we're not in CRLF mode
334      {
335        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336        assert(re.error().empty());
337        string all("aa\r\naa\r\n");
338        CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
340      }
341      {
342        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343        assert(re.error().empty());
344        string all("aa\r\naa\r\n");
345        CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
347    }    }
348      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349      //       Alas, the answer depends on how pcre was compiled.
350  }  }
351    
352  static void TestExtract() {  static void TestExtract() {
# Line 390  static void TestRecursion() { Line 467  static void TestRecursion() {
467    CHECK(re4.FullMatch(text_bad) == false);    CHECK(re4.FullMatch(text_bad) == false);
468  }  }
469    
470    // A meta-quoted string, interpreted as a pattern, should always match
471    // the original unquoted string.
472    static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473      string quoted = RE::QuoteMeta(unquoted);
474      RE re(quoted, options);
475      CHECK(re.FullMatch(unquoted));
476    }
477    
478    // A string containing meaningful regexp characters, which is then meta-
479    // quoted, should not generally match a string the unquoted string does.
480    static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481                                      RE_Options options = RE_Options()) {
482      string quoted = RE::QuoteMeta(unquoted);
483      RE re(quoted, options);
484      CHECK(!re.FullMatch(should_not_match));
485    }
486    
487    // Tests that quoted meta characters match their original strings,
488    // and that a few things that shouldn't match indeed do not.
489    static void TestQuotaMetaSimple() {
490      TestQuoteMeta("foo");
491      TestQuoteMeta("foo.bar");
492      TestQuoteMeta("foo\\.bar");
493      TestQuoteMeta("[1-9]");
494      TestQuoteMeta("1.5-2.0?");
495      TestQuoteMeta("\\d");
496      TestQuoteMeta("Who doesn't like ice cream?");
497      TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498      TestQuoteMeta("((?!)xxx).*yyy");
499      TestQuoteMeta("([");
500    }
501    
502    static void TestQuoteMetaSimpleNegative() {
503      NegativeTestQuoteMeta("foo", "bar");
504      NegativeTestQuoteMeta("...", "bar");
505      NegativeTestQuoteMeta("\\.", ".");
506      NegativeTestQuoteMeta("\\.", "..");
507      NegativeTestQuoteMeta("(a)", "a");
508      NegativeTestQuoteMeta("(a|b)", "a");
509      NegativeTestQuoteMeta("(a|b)", "(a)");
510      NegativeTestQuoteMeta("(a|b)", "a|b");
511      NegativeTestQuoteMeta("[0-9]", "0");
512      NegativeTestQuoteMeta("[0-9]", "0-9");
513      NegativeTestQuoteMeta("[0-9]", "[9]");
514      NegativeTestQuoteMeta("((?!)xxx)", "xxx");
515    }
516    
517    static void TestQuoteMetaLatin1() {
518      TestQuoteMeta("3\xb2 = 9");
519    }
520    
521    static void TestQuoteMetaUtf8() {
522    #ifdef SUPPORT_UTF8
523      TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
524      TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
525      TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
526      TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
527      TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
528      TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
529      TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
530      NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
531                            "27\\\xc2\\\xb0",
532                            pcrecpp::UTF8());
533    #endif
534    }
535    
536    static void TestQuoteMetaAll() {
537      printf("Testing QuoteMeta\n");
538      TestQuotaMetaSimple();
539      TestQuoteMetaSimpleNegative();
540      TestQuoteMetaLatin1();
541      TestQuoteMetaUtf8();
542    }
543    
544  //  //
545  // Options tests contributed by  // Options tests contributed by
546  // Giuseppe Maxia, CTO, Stardata s.r.l.  // Giuseppe Maxia, CTO, Stardata s.r.l.
# Line 612  static void TestOptions() { Line 763  static void TestOptions() {
763    Test_all_options();    Test_all_options();
764  }  }
765    
766    static void TestConstructors() {
767      printf("Testing constructors\n");
768    
769      RE_Options options;
770      options.set_dotall(true);
771      const char *str = "HELLO\n" "cruel\n" "world";
772    
773      RE orig("HELLO.*world", options);
774      CHECK(orig.FullMatch(str));
775    
776      RE copy1(orig);
777      CHECK(copy1.FullMatch(str));
778    
779      RE copy2("not a match");
780      CHECK(!copy2.FullMatch(str));
781      copy2 = copy1;
782      CHECK(copy2.FullMatch(str));
783      copy2 = orig;
784      CHECK(copy2.FullMatch(str));
785    
786      // Make sure when we assign to ourselves, nothing bad happens
787      orig = orig;
788      copy1 = copy1;
789      copy2 = copy2;
790      CHECK(orig.FullMatch(str));
791      CHECK(copy1.FullMatch(str));
792      CHECK(copy2.FullMatch(str));
793    }
794    
795  int main(int argc, char** argv) {  int main(int argc, char** argv) {
796    // Treat any flag as --help    // Treat any flag as --help
797    if (argc > 1 && argv[1][0] == '-') {    if (argc > 1 && argv[1][0] == '-') {
# Line 646  int main(int argc, char** argv) { Line 826  int main(int argc, char** argv) {
826    /***** FullMatch with no args *****/    /***** FullMatch with no args *****/
827    
828    CHECK(RE("h.*o").FullMatch("hello"));    CHECK(RE("h.*o").FullMatch("hello"));
829    CHECK(!RE("h.*o").FullMatch("othello"));    CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
830    CHECK(!RE("h.*o").FullMatch("hello!"));    CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
831      CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
832      CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
833      CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
834    
835    /***** FullMatch with args *****/    /***** FullMatch with args *****/
836    
# Line 691  int main(int argc, char** argv) { Line 874  int main(int argc, char** argv) {
874    CHECK_EQ(s, string("ruby"));    CHECK_EQ(s, string("ruby"));
875    CHECK_EQ(i, 1234);    CHECK_EQ(i, 1234);
876    
877      // Ignore non-void* NULL arg
878      CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
879      CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
880      CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
881      CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
882      CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
883      CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
884      CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
885    
886      // Fail on non-void* NULL arg if the match doesn't parse for the given type.
887      CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
888      CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
889      CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
890      CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
891      CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
892    
893    // Ignored arg    // Ignored arg
894    CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));    CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
895    CHECK_EQ(s, string("ruby"));    CHECK_EQ(s, string("ruby"));
# Line 742  int main(int argc, char** argv) { Line 941  int main(int argc, char** argv) {
941      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));      CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
942    }    }
943  #ifdef HAVE_LONG_LONG  #ifdef HAVE_LONG_LONG
944    # if defined(__MINGW__) || defined(__MINGW32__)
945    #   define LLD "%I64d"
946    #   define LLU "%I64u"
947    # else
948    #   define LLD "%lld"
949    #   define LLU "%llu"
950    # endif
951    {    {
952      long long v;      long long v;
953      static const long long max_value = 0x7fffffffffffffffLL;      static const long long max_value = 0x7fffffffffffffffLL;
954      static const long long min_value = -max_value - 1;      static const long long min_value = -max_value - 1;
955      char buf[32];      char buf[32];  // definitely big enough for a long long
956    
957      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
958      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
959    
960      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
961      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
962    
963      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
964      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
965    
966      snprintf(buf, sizeof(buf), "%lld", max_value);      sprintf(buf, LLD, max_value);
967      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
968      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
969      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
970    
971      snprintf(buf, sizeof(buf), "%lld", min_value);      sprintf(buf, LLD, min_value);
972      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
973      buf[strlen(buf)-1]++;      buf[strlen(buf)-1]++;
974      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));      CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
# Line 773  int main(int argc, char** argv) { Line 979  int main(int argc, char** argv) {
979      unsigned long long v;      unsigned long long v;
980      long long v2;      long long v2;
981      static const unsigned long long max_value = 0xffffffffffffffffULL;      static const unsigned long long max_value = 0xffffffffffffffffULL;
982      char buf[32];      char buf[32];  // definitely big enough for a unsigned long long
983    
984      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);      CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
985      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);      CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
986    
987      snprintf(buf, sizeof(buf), "%llu", max_value);      sprintf(buf, LLU, max_value);
988      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);      CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
989    
990      assert(buf[strlen(buf)-1] != '9');      assert(buf[strlen(buf)-1] != '9');
# Line 930  int main(int argc, char** argv) { Line 1136  int main(int argc, char** argv) {
1136    CHECK(RE("h.*o").PartialMatch("hello!"));    CHECK(RE("h.*o").PartialMatch("hello!"));
1137    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1138    
1139      /***** other tests *****/
1140    
1141    RadixTests();    RadixTests();
1142    TestReplace();    TestReplace();
1143    TestExtract();    TestExtract();
1144    TestConsume();    TestConsume();
1145    TestFindAndConsume();    TestFindAndConsume();
1146      TestQuoteMetaAll();
1147    TestMatchNumberPeculiarity();    TestMatchNumberPeculiarity();
1148    
1149    // Check the pattern() accessor    // Check the pattern() accessor
# Line 956  int main(int argc, char** argv) { Line 1165  int main(int argc, char** argv) {
1165      printf("Testing UTF-8 handling\n");      printf("Testing UTF-8 handling\n");
1166    
1167      // Three Japanese characters (nihongo)      // Three Japanese characters (nihongo)
1168      const char utf8_string[] = {      const unsigned char utf8_string[] = {
1169           0xe6, 0x97, 0xa5, // 65e5           0xe6, 0x97, 0xa5, // 65e5
1170           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1171           0xe8, 0xaa, 0x9e, // 8a9e           0xe8, 0xaa, 0x9e, // 8a9e
1172           0           0
1173      };      };
1174      const char utf8_pattern[] = {      const unsigned char utf8_pattern[] = {
1175           '.',           '.',
1176           0xe6, 0x9c, 0xac, // 627c           0xe6, 0x9c, 0xac, // 627c
1177           '.',           '.',
# Line 1054  int main(int argc, char** argv) { Line 1263  int main(int argc, char** argv) {
1263      VERBOSE_TEST  = true;      VERBOSE_TEST  = true;
1264    TestOptions();    TestOptions();
1265    
1266      // Test the constructors
1267      TestConstructors();
1268    
1269    // Done    // Done
1270    printf("OK\n");    printf("OK\n");
1271    

Legend:
Removed from v.87  
changed lines
  Added in v.297

  ViewVC Help
Powered by ViewVC 1.1.5