/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 594 - (show annotations)
Sun May 1 08:22:12 2011 UTC (8 years, 5 months ago) by ph10
File size: 39168 byte(s)
Error occurred while calculating annotation data.
Added #include <string.h> to some .cc files (needed in some environments).
1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #include <stdio.h>
41 #include <string.h> /* for memset and strcmp */
42 #include <cassert>
43 #include <vector>
44 #include "pcrecpp.h"
45
46 using pcrecpp::StringPiece;
47 using pcrecpp::RE;
48 using pcrecpp::RE_Options;
49 using pcrecpp::Hex;
50 using pcrecpp::Octal;
51 using pcrecpp::CRadix;
52
53 static bool VERBOSE_TEST = false;
54
55 // CHECK dies with a fatal error if condition is not true. It is *not*
56 // controlled by NDEBUG, so the check will be executed regardless of
57 // compilation mode. Therefore, it is safe to do things like:
58 // CHECK_EQ(fp->Write(x), 4)
59 #define CHECK(condition) do { \
60 if (!(condition)) { \
61 fprintf(stderr, "%s:%d: Check failed: %s\n", \
62 __FILE__, __LINE__, #condition); \
63 exit(1); \
64 } \
65 } while (0)
66
67 #define CHECK_EQ(a, b) CHECK(a == b)
68
69 static void Timing1(int num_iters) {
70 // Same pattern lots of times
71 RE pattern("ruby:\\d+");
72 StringPiece p("ruby:1234");
73 for (int j = num_iters; j > 0; j--) {
74 CHECK(pattern.FullMatch(p));
75 }
76 }
77
78 static void Timing2(int num_iters) {
79 // Same pattern lots of times
80 RE pattern("ruby:(\\d+)");
81 int i;
82 for (int j = num_iters; j > 0; j--) {
83 CHECK(pattern.FullMatch("ruby:1234", &i));
84 CHECK_EQ(i, 1234);
85 }
86 }
87
88 static void Timing3(int num_iters) {
89 string text_string;
90 for (int j = num_iters; j > 0; j--) {
91 text_string += "this is another line\n";
92 }
93
94 RE line_matcher(".*\n");
95 string line;
96 StringPiece text(text_string);
97 int counter = 0;
98 while (line_matcher.Consume(&text)) {
99 counter++;
100 }
101 printf("Matched %d lines\n", counter);
102 }
103
104 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105
106 static void LeakTest() {
107 // Check for memory leaks
108 unsigned long long initial_size = 0;
109 for (int i = 0; i < 100000; i++) {
110 if (i == 50000) {
111 initial_size = VirtualProcessSize();
112 printf("Size after 50000: %llu\n", initial_size);
113 }
114 char buf[100]; // definitely big enough
115 sprintf(buf, "pat%09d", i);
116 RE newre(buf);
117 }
118 uint64 final_size = VirtualProcessSize();
119 printf("Size after 100000: %llu\n", final_size);
120 const double growth = double(final_size - initial_size) / final_size;
121 printf("Growth: %0.2f%%", growth * 100);
122 CHECK(growth < 0.02); // Allow < 2% growth
123 }
124
125 #endif
126
127 static void RadixTests() {
128 printf("Testing hex\n");
129
130 #define CHECK_HEX(type, value) \
131 do { \
132 type v; \
133 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134 CHECK_EQ(v, 0x ## value); \
135 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136 CHECK_EQ(v, 0x ## value); \
137 } while(0)
138
139 CHECK_HEX(short, 2bad);
140 CHECK_HEX(unsigned short, 2badU);
141 CHECK_HEX(int, dead);
142 CHECK_HEX(unsigned int, deadU);
143 CHECK_HEX(long, 7eadbeefL);
144 CHECK_HEX(unsigned long, deadbeefUL);
145 #ifdef HAVE_LONG_LONG
146 CHECK_HEX(long long, 12345678deadbeefLL);
147 #endif
148 #ifdef HAVE_UNSIGNED_LONG_LONG
149 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150 #endif
151
152 #undef CHECK_HEX
153
154 printf("Testing octal\n");
155
156 #define CHECK_OCTAL(type, value) \
157 do { \
158 type v; \
159 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160 CHECK_EQ(v, 0 ## value); \
161 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162 CHECK_EQ(v, 0 ## value); \
163 } while(0)
164
165 CHECK_OCTAL(short, 77777);
166 CHECK_OCTAL(unsigned short, 177777U);
167 CHECK_OCTAL(int, 17777777777);
168 CHECK_OCTAL(unsigned int, 37777777777U);
169 CHECK_OCTAL(long, 17777777777L);
170 CHECK_OCTAL(unsigned long, 37777777777UL);
171 #ifdef HAVE_LONG_LONG
172 CHECK_OCTAL(long long, 777777777777777777777LL);
173 #endif
174 #ifdef HAVE_UNSIGNED_LONG_LONG
175 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176 #endif
177
178 #undef CHECK_OCTAL
179
180 printf("Testing decimal\n");
181
182 #define CHECK_DECIMAL(type, value) \
183 do { \
184 type v; \
185 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186 CHECK_EQ(v, value); \
187 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188 CHECK_EQ(v, value); \
189 } while(0)
190
191 CHECK_DECIMAL(short, -1);
192 CHECK_DECIMAL(unsigned short, 9999);
193 CHECK_DECIMAL(int, -1000);
194 CHECK_DECIMAL(unsigned int, 12345U);
195 CHECK_DECIMAL(long, -10000000L);
196 CHECK_DECIMAL(unsigned long, 3083324652U);
197 #ifdef HAVE_LONG_LONG
198 CHECK_DECIMAL(long long, -100000000000000LL);
199 #endif
200 #ifdef HAVE_UNSIGNED_LONG_LONG
201 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202 #endif
203
204 #undef CHECK_DECIMAL
205
206 }
207
208 static void TestReplace() {
209 printf("Testing Replace\n");
210
211 struct ReplaceTest {
212 const char *regexp;
213 const char *rewrite;
214 const char *original;
215 const char *single;
216 const char *global;
217 int global_count; // the expected return value from ReplaceAll
218 };
219 static const ReplaceTest tests[] = {
220 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221 "\\2\\1ay",
222 "the quick brown fox jumps over the lazy dogs.",
223 "ethay quick brown fox jumps over the lazy dogs.",
224 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225 9 },
226 { "\\w+",
227 "\\0-NOSPAM",
228 "paul.haahr@google.com",
229 "paul-NOSPAM.haahr@google.com",
230 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231 4 },
232 { "^",
233 "(START)",
234 "foo",
235 "(START)foo",
236 "(START)foo",
237 1 },
238 { "^",
239 "(START)",
240 "",
241 "(START)",
242 "(START)",
243 1 },
244 { "$",
245 "(END)",
246 "",
247 "(END)",
248 "(END)",
249 1 },
250 { "b",
251 "bb",
252 "ababababab",
253 "abbabababab",
254 "abbabbabbabbabb",
255 5 },
256 { "b",
257 "bb",
258 "bbbbbb",
259 "bbbbbbb",
260 "bbbbbbbbbbbb",
261 6 },
262 { "b+",
263 "bb",
264 "bbbbbb",
265 "bb",
266 "bb",
267 1 },
268 { "b*",
269 "bb",
270 "bbbbbb",
271 "bb",
272 "bbbb",
273 2 },
274 { "b*",
275 "bb",
276 "aaaaa",
277 "bbaaaaa",
278 "bbabbabbabbabbabb",
279 6 },
280 { "b*",
281 "bb",
282 "aa\naa\n",
283 "bbaa\naa\n",
284 "bbabbabb\nbbabbabb\nbb",
285 7 },
286 { "b*",
287 "bb",
288 "aa\raa\r",
289 "bbaa\raa\r",
290 "bbabbabb\rbbabbabb\rbb",
291 7 },
292 { "b*",
293 "bb",
294 "aa\r\naa\r\n",
295 "bbaa\r\naa\r\n",
296 "bbabbabb\r\nbbabbabb\r\nbb",
297 7 },
298 // Check empty-string matching (it's tricky!)
299 { "aa|b*",
300 "@",
301 "aa",
302 "@",
303 "@@",
304 2 },
305 { "b*|aa",
306 "@",
307 "aa",
308 "@aa",
309 "@@@",
310 3 },
311 #ifdef SUPPORT_UTF8
312 { "b*",
313 "bb",
314 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317 5 },
318 { "b*",
319 "bb",
320 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324 9 },
325 #endif
326 { "", NULL, NULL, NULL, NULL, 0 }
327 };
328
329 #ifdef SUPPORT_UTF8
330 const bool support_utf8 = true;
331 #else
332 const bool support_utf8 = false;
333 #endif
334
335 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337 assert(re.error().empty());
338 string one(t->original);
339 CHECK(re.Replace(t->rewrite, &one));
340 CHECK_EQ(one, t->single);
341 string all(t->original);
342 const int replace_count = re.GlobalReplace(t->rewrite, &all);
343 CHECK_EQ(all, t->global);
344 CHECK_EQ(replace_count, t->global_count);
345 }
346
347 // One final test: test \r\n replacement when we're not in CRLF mode
348 {
349 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350 assert(re.error().empty());
351 string all("aa\r\naa\r\n");
352 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
354 }
355 {
356 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357 assert(re.error().empty());
358 string all("aa\r\naa\r\n");
359 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361 }
362 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363 // Alas, the answer depends on how pcre was compiled.
364 }
365
366 static void TestExtract() {
367 printf("Testing Extract\n");
368
369 string s;
370
371 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372 CHECK_EQ(s, "kremvax!boris");
373
374 // check the RE interface as well
375 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376 CHECK_EQ(s, "'foo'");
377 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378 CHECK_EQ(s, "'foo'");
379 }
380
381 static void TestConsume() {
382 printf("Testing Consume\n");
383
384 string word;
385
386 string s(" aaa b!@#$@#$cccc");
387 StringPiece input(s);
388
389 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
390 CHECK(r.Consume(&input, &word));
391 CHECK_EQ(word, "aaa");
392 CHECK(r.Consume(&input, &word));
393 CHECK_EQ(word, "b");
394 CHECK(! r.Consume(&input, &word));
395 }
396
397 static void TestFindAndConsume() {
398 printf("Testing FindAndConsume\n");
399
400 string word;
401
402 string s(" aaa b!@#$@#$cccc");
403 StringPiece input(s);
404
405 RE r("(\\w+)"); // matches a word
406 CHECK(r.FindAndConsume(&input, &word));
407 CHECK_EQ(word, "aaa");
408 CHECK(r.FindAndConsume(&input, &word));
409 CHECK_EQ(word, "b");
410 CHECK(r.FindAndConsume(&input, &word));
411 CHECK_EQ(word, "cccc");
412 CHECK(! r.FindAndConsume(&input, &word));
413 }
414
415 static void TestMatchNumberPeculiarity() {
416 printf("Testing match-number peculiaraity\n");
417
418 string word1;
419 string word2;
420 string word3;
421
422 RE r("(foo)|(bar)|(baz)");
423 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424 CHECK_EQ(word1, "foo");
425 CHECK_EQ(word2, "");
426 CHECK_EQ(word3, "");
427 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428 CHECK_EQ(word1, "");
429 CHECK_EQ(word2, "bar");
430 CHECK_EQ(word3, "");
431 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432 CHECK_EQ(word1, "");
433 CHECK_EQ(word2, "");
434 CHECK_EQ(word3, "baz");
435 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
436
437 string a;
438 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439 CHECK_EQ(a, "");
440 }
441
442 static void TestRecursion() {
443 printf("Testing recursion\n");
444
445 // Get one string that passes (sometimes), one that never does.
446 string text_good("abcdefghijk");
447 string text_bad("acdefghijkl");
448
449 // According to pcretest, matching text_good against (\w+)*b
450 // requires match_limit of at least 8192, and match_recursion_limit
451 // of at least 37.
452
453 RE_Options options_ml;
454 options_ml.set_match_limit(8192);
455 RE re("(\\w+)*b", options_ml);
456 CHECK(re.PartialMatch(text_good) == true);
457 CHECK(re.PartialMatch(text_bad) == false);
458 CHECK(re.FullMatch(text_good) == false);
459 CHECK(re.FullMatch(text_bad) == false);
460
461 options_ml.set_match_limit(1024);
462 RE re2("(\\w+)*b", options_ml);
463 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
464 CHECK(re2.PartialMatch(text_bad) == false);
465 CHECK(re2.FullMatch(text_good) == false);
466 CHECK(re2.FullMatch(text_bad) == false);
467
468 RE_Options options_mlr;
469 options_mlr.set_match_limit_recursion(50);
470 RE re3("(\\w+)*b", options_mlr);
471 CHECK(re3.PartialMatch(text_good) == true);
472 CHECK(re3.PartialMatch(text_bad) == false);
473 CHECK(re3.FullMatch(text_good) == false);
474 CHECK(re3.FullMatch(text_bad) == false);
475
476 options_mlr.set_match_limit_recursion(10);
477 RE re4("(\\w+)*b", options_mlr);
478 CHECK(re4.PartialMatch(text_good) == false);
479 CHECK(re4.PartialMatch(text_bad) == false);
480 CHECK(re4.FullMatch(text_good) == false);
481 CHECK(re4.FullMatch(text_bad) == false);
482 }
483
484 // A meta-quoted string, interpreted as a pattern, should always match
485 // the original unquoted string.
486 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487 string quoted = RE::QuoteMeta(unquoted);
488 RE re(quoted, options);
489 CHECK(re.FullMatch(unquoted));
490 }
491
492 // A string containing meaningful regexp characters, which is then meta-
493 // quoted, should not generally match a string the unquoted string does.
494 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495 RE_Options options = RE_Options()) {
496 string quoted = RE::QuoteMeta(unquoted);
497 RE re(quoted, options);
498 CHECK(!re.FullMatch(should_not_match));
499 }
500
501 // Tests that quoted meta characters match their original strings,
502 // and that a few things that shouldn't match indeed do not.
503 static void TestQuotaMetaSimple() {
504 TestQuoteMeta("foo");
505 TestQuoteMeta("foo.bar");
506 TestQuoteMeta("foo\\.bar");
507 TestQuoteMeta("[1-9]");
508 TestQuoteMeta("1.5-2.0?");
509 TestQuoteMeta("\\d");
510 TestQuoteMeta("Who doesn't like ice cream?");
511 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512 TestQuoteMeta("((?!)xxx).*yyy");
513 TestQuoteMeta("([");
514 TestQuoteMeta(string("foo\0bar", 7));
515 }
516
517 static void TestQuoteMetaSimpleNegative() {
518 NegativeTestQuoteMeta("foo", "bar");
519 NegativeTestQuoteMeta("...", "bar");
520 NegativeTestQuoteMeta("\\.", ".");
521 NegativeTestQuoteMeta("\\.", "..");
522 NegativeTestQuoteMeta("(a)", "a");
523 NegativeTestQuoteMeta("(a|b)", "a");
524 NegativeTestQuoteMeta("(a|b)", "(a)");
525 NegativeTestQuoteMeta("(a|b)", "a|b");
526 NegativeTestQuoteMeta("[0-9]", "0");
527 NegativeTestQuoteMeta("[0-9]", "0-9");
528 NegativeTestQuoteMeta("[0-9]", "[9]");
529 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
530 }
531
532 static void TestQuoteMetaLatin1() {
533 TestQuoteMeta("3\xb2 = 9");
534 }
535
536 static void TestQuoteMetaUtf8() {
537 #ifdef SUPPORT_UTF8
538 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
540 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
541 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
542 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
543 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
546 "27\\\xc2\\\xb0",
547 pcrecpp::UTF8());
548 #endif
549 }
550
551 static void TestQuoteMetaAll() {
552 printf("Testing QuoteMeta\n");
553 TestQuotaMetaSimple();
554 TestQuoteMetaSimpleNegative();
555 TestQuoteMetaLatin1();
556 TestQuoteMetaUtf8();
557 }
558
559 //
560 // Options tests contributed by
561 // Giuseppe Maxia, CTO, Stardata s.r.l.
562 // July 2005
563 //
564 static void GetOneOptionResult(
565 const char *option_name,
566 const char *regex,
567 const char *str,
568 RE_Options options,
569 bool full,
570 string expected) {
571
572 printf("Testing Option <%s>\n", option_name);
573 if(VERBOSE_TEST)
574 printf("/%s/ finds \"%s\" within \"%s\" \n",
575 regex,
576 expected.c_str(),
577 str);
578 string captured("");
579 if (full)
580 RE(regex,options).FullMatch(str, &captured);
581 else
582 RE(regex,options).PartialMatch(str, &captured);
583 CHECK_EQ(captured, expected);
584 }
585
586 static void TestOneOption(
587 const char *option_name,
588 const char *regex,
589 const char *str,
590 RE_Options options,
591 bool full,
592 bool assertive = true) {
593
594 printf("Testing Option <%s>\n", option_name);
595 if (VERBOSE_TEST)
596 printf("'%s' %s /%s/ \n",
597 str,
598 (assertive? "matches" : "doesn't match"),
599 regex);
600 if (assertive) {
601 if (full)
602 CHECK(RE(regex,options).FullMatch(str));
603 else
604 CHECK(RE(regex,options).PartialMatch(str));
605 } else {
606 if (full)
607 CHECK(!RE(regex,options).FullMatch(str));
608 else
609 CHECK(!RE(regex,options).PartialMatch(str));
610 }
611 }
612
613 static void Test_CASELESS() {
614 RE_Options options;
615 RE_Options options2;
616
617 options.set_caseless(true);
618 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
619 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
620 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
621
622 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
623 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624 options.set_caseless(false);
625 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
626 }
627
628 static void Test_MULTILINE() {
629 RE_Options options;
630 RE_Options options2;
631 const char *str = "HELLO\n" "cruel\n" "world\n";
632
633 options.set_multiline(true);
634 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
635 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
636 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637 options.set_multiline(false);
638 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
639 }
640
641 static void Test_DOTALL() {
642 RE_Options options;
643 RE_Options options2;
644 const char *str = "HELLO\n" "cruel\n" "world";
645
646 options.set_dotall(true);
647 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
648 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
649 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
650 options.set_dotall(false);
651 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
652 }
653
654 static void Test_DOLLAR_ENDONLY() {
655 RE_Options options;
656 RE_Options options2;
657 const char *str = "HELLO world\n";
658
659 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660 options.set_dollar_endonly(true);
661 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
662 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
663 }
664
665 static void Test_EXTRA() {
666 RE_Options options;
667 const char *str = "HELLO";
668
669 options.set_extra(true);
670 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672 options.set_extra(false);
673 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
674 }
675
676 static void Test_EXTENDED() {
677 RE_Options options;
678 RE_Options options2;
679 const char *str = "HELLO world";
680
681 options.set_extended(true);
682 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
683 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
684 TestOneOption("EXTENDED (class)",
685 "^ HE L{2} O "
686 "\\s+ "
687 "\\w+ $ ",
688 str,
689 options,
690 false);
691
692 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693 TestOneOption("EXTENDED (function)",
694 "^ HE L{2} O "
695 "\\s+ "
696 "\\w+ $ ",
697 str,
698 pcrecpp::EXTENDED(),
699 false);
700
701 options.set_extended(false);
702 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
703 }
704
705 static void Test_NO_AUTO_CAPTURE() {
706 RE_Options options;
707 const char *str = "HELLO world";
708 string captured;
709
710 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711 if (VERBOSE_TEST)
712 printf("parentheses capture text\n");
713 RE re("(world|universe)$", options);
714 CHECK(re.Extract("\\1", str , &captured));
715 CHECK_EQ(captured, "world");
716 options.set_no_auto_capture(true);
717 printf("testing Option <NO_AUTO_CAPTURE>\n");
718 if (VERBOSE_TEST)
719 printf("parentheses do not capture text\n");
720 re.Extract("\\1",str, &captured );
721 CHECK_EQ(captured, "world");
722 }
723
724 static void Test_UNGREEDY() {
725 RE_Options options;
726 const char *str = "HELLO, 'this' is the 'world'";
727
728 options.set_ungreedy(true);
729 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732
733 options.set_ungreedy(false);
734 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
736 }
737
738 static void Test_all_options() {
739 const char *str = "HELLO\n" "cruel\n" "world";
740 RE_Options options;
741 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742
743 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744 options.set_all_options(0);
745 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747
748 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750 " ^ c r u e l $ ",
751 str,
752 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753 false);
754
755 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756 " ^ c r u e l $ ",
757 str,
758 RE_Options()
759 .set_multiline(true)
760 .set_extended(true),
761 false);
762
763 options.set_all_options(0);
764 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
765
766 }
767
768 static void TestOptions() {
769 printf("Testing Options\n");
770 Test_CASELESS();
771 Test_MULTILINE();
772 Test_DOTALL();
773 Test_DOLLAR_ENDONLY();
774 Test_EXTENDED();
775 Test_NO_AUTO_CAPTURE();
776 Test_UNGREEDY();
777 Test_EXTRA();
778 Test_all_options();
779 }
780
781 static void TestConstructors() {
782 printf("Testing constructors\n");
783
784 RE_Options options;
785 options.set_dotall(true);
786 const char *str = "HELLO\n" "cruel\n" "world";
787
788 RE orig("HELLO.*world", options);
789 CHECK(orig.FullMatch(str));
790
791 RE copy1(orig);
792 CHECK(copy1.FullMatch(str));
793
794 RE copy2("not a match");
795 CHECK(!copy2.FullMatch(str));
796 copy2 = copy1;
797 CHECK(copy2.FullMatch(str));
798 copy2 = orig;
799 CHECK(copy2.FullMatch(str));
800
801 // Make sure when we assign to ourselves, nothing bad happens
802 orig = orig;
803 copy1 = copy1;
804 copy2 = copy2;
805 CHECK(orig.FullMatch(str));
806 CHECK(copy1.FullMatch(str));
807 CHECK(copy2.FullMatch(str));
808 }
809
810 int main(int argc, char** argv) {
811 // Treat any flag as --help
812 if (argc > 1 && argv[1][0] == '-') {
813 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814 " If 'timingX ###' is specified, run the given timing test\n"
815 " with the given number of iterations, rather than running\n"
816 " the default corectness test.\n", argv[0]);
817 return 0;
818 }
819
820 if (argc > 1) {
821 if ( argc == 2 || atoi(argv[2]) == 0) {
822 printf("timing mode needs a num-iters argument\n");
823 return 1;
824 }
825 if (!strcmp(argv[1], "timing1"))
826 Timing1(atoi(argv[2]));
827 else if (!strcmp(argv[1], "timing2"))
828 Timing2(atoi(argv[2]));
829 else if (!strcmp(argv[1], "timing3"))
830 Timing3(atoi(argv[2]));
831 else
832 printf("Unknown argument '%s'\n", argv[1]);
833 return 0;
834 }
835
836 printf("Testing FullMatch\n");
837
838 int i;
839 string s;
840
841 /***** FullMatch with no args *****/
842
843 CHECK(RE("h.*o").FullMatch("hello"));
844 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
845 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
846 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
847 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
848 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
849
850 /***** FullMatch with args *****/
851
852 // Zero-arg
853 CHECK(RE("\\d+").FullMatch("1001"));
854
855 // Single-arg
856 CHECK(RE("(\\d+)").FullMatch("1001", &i));
857 CHECK_EQ(i, 1001);
858 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
859 CHECK_EQ(i, -123);
860 CHECK(!RE("()\\d+").FullMatch("10", &i));
861 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
862 &i));
863
864 // Digits surrounding integer-arg
865 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
866 CHECK_EQ(i, 23);
867 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
868 CHECK_EQ(i, 1);
869 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
870 CHECK_EQ(i, -1);
871 CHECK(RE("(\\d)").PartialMatch("1234", &i));
872 CHECK_EQ(i, 1);
873 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
874 CHECK_EQ(i, -1);
875
876 // String-arg
877 CHECK(RE("h(.*)o").FullMatch("hello", &s));
878 CHECK_EQ(s, string("ell"));
879
880 // StringPiece-arg
881 StringPiece sp;
882 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
883 CHECK_EQ(sp.size(), 4);
884 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
885 CHECK_EQ(i, 1234);
886
887 // Multi-arg
888 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
889 CHECK_EQ(s, string("ruby"));
890 CHECK_EQ(i, 1234);
891
892 // Ignore non-void* NULL arg
893 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
894 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
895 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
896 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
897 #ifdef HAVE_LONG_LONG
898 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
899 #endif
900 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
901 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
902
903 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
904 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
905 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
906 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
907 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
908 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
909
910 // Ignored arg
911 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
912 CHECK_EQ(s, string("ruby"));
913 CHECK_EQ(i, 1234);
914
915 // Type tests
916 {
917 char c;
918 CHECK(RE("(H)ello").FullMatch("Hello", &c));
919 CHECK_EQ(c, 'H');
920 }
921 {
922 unsigned char c;
923 CHECK(RE("(H)ello").FullMatch("Hello", &c));
924 CHECK_EQ(c, static_cast<unsigned char>('H'));
925 }
926 {
927 short v;
928 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
929 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
930 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
931 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
932 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
933 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
934 }
935 {
936 unsigned short v;
937 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
938 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
939 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
940 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
941 }
942 {
943 int v;
944 static const int max_value = 0x7fffffff;
945 static const int min_value = -max_value - 1;
946 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
947 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
948 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
949 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
950 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
951 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
952 }
953 {
954 unsigned int v;
955 static const unsigned int max_value = 0xfffffffful;
956 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
957 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
958 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
959 }
960 #ifdef HAVE_LONG_LONG
961 # if defined(__MINGW__) || defined(__MINGW32__)
962 # define LLD "%I64d"
963 # define LLU "%I64u"
964 # else
965 # define LLD "%lld"
966 # define LLU "%llu"
967 # endif
968 {
969 long long v;
970 static const long long max_value = 0x7fffffffffffffffLL;
971 static const long long min_value = -max_value - 1;
972 char buf[32]; // definitely big enough for a long long
973
974 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
975 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
976
977 sprintf(buf, LLD, max_value);
978 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
979
980 sprintf(buf, LLD, min_value);
981 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
982
983 sprintf(buf, LLD, max_value);
984 assert(buf[strlen(buf)-1] != '9');
985 buf[strlen(buf)-1]++;
986 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
987
988 sprintf(buf, LLD, min_value);
989 assert(buf[strlen(buf)-1] != '9');
990 buf[strlen(buf)-1]++;
991 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
992 }
993 #endif
994 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
995 {
996 unsigned long long v;
997 long long v2;
998 static const unsigned long long max_value = 0xffffffffffffffffULL;
999 char buf[32]; // definitely big enough for a unsigned long long
1000
1001 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1002 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1003
1004 sprintf(buf, LLU, max_value);
1005 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1006
1007 assert(buf[strlen(buf)-1] != '9');
1008 buf[strlen(buf)-1]++;
1009 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1010 }
1011 #endif
1012 {
1013 float v;
1014 CHECK(RE("(.*)").FullMatch("100", &v));
1015 CHECK(RE("(.*)").FullMatch("-100.", &v));
1016 CHECK(RE("(.*)").FullMatch("1e23", &v));
1017 }
1018 {
1019 double v;
1020 CHECK(RE("(.*)").FullMatch("100", &v));
1021 CHECK(RE("(.*)").FullMatch("-100.", &v));
1022 CHECK(RE("(.*)").FullMatch("1e23", &v));
1023 }
1024
1025 // Check that matching is fully anchored
1026 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1027 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1028 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1029 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1030
1031 // Braces
1032 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1033 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1034 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1035
1036 // Complicated RE
1037 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1039 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1040 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1041
1042 // Check full-match handling (needs '$' tacked on internally)
1043 CHECK(RE("fo|foo").FullMatch("fo"));
1044 CHECK(RE("fo|foo").FullMatch("foo"));
1045 CHECK(RE("fo|foo$").FullMatch("fo"));
1046 CHECK(RE("fo|foo$").FullMatch("foo"));
1047 CHECK(RE("foo$").FullMatch("foo"));
1048 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1049 CHECK(!RE("fo|bar").FullMatch("fox"));
1050
1051 // Uncomment the following if we change the handling of '$' to
1052 // prevent it from matching a trailing newline
1053 if (false) {
1054 // Check that we don't get bitten by pcre's special handling of a
1055 // '\n' at the end of the string matching '$'
1056 CHECK(!RE("foo$").PartialMatch("foo\n"));
1057 }
1058
1059 // Number of args
1060 int a[16];
1061 CHECK(RE("").FullMatch(""));
1062
1063 memset(a, 0, sizeof(0));
1064 CHECK(RE("(\\d){1}").FullMatch("1",
1065 &a[0]));
1066 CHECK_EQ(a[0], 1);
1067
1068 memset(a, 0, sizeof(0));
1069 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1070 &a[0], &a[1]));
1071 CHECK_EQ(a[0], 1);
1072 CHECK_EQ(a[1], 2);
1073
1074 memset(a, 0, sizeof(0));
1075 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1076 &a[0], &a[1], &a[2]));
1077 CHECK_EQ(a[0], 1);
1078 CHECK_EQ(a[1], 2);
1079 CHECK_EQ(a[2], 3);
1080
1081 memset(a, 0, sizeof(0));
1082 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1083 &a[0], &a[1], &a[2], &a[3]));
1084 CHECK_EQ(a[0], 1);
1085 CHECK_EQ(a[1], 2);
1086 CHECK_EQ(a[2], 3);
1087 CHECK_EQ(a[3], 4);
1088
1089 memset(a, 0, sizeof(0));
1090 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1091 &a[0], &a[1], &a[2],
1092 &a[3], &a[4]));
1093 CHECK_EQ(a[0], 1);
1094 CHECK_EQ(a[1], 2);
1095 CHECK_EQ(a[2], 3);
1096 CHECK_EQ(a[3], 4);
1097 CHECK_EQ(a[4], 5);
1098
1099 memset(a, 0, sizeof(0));
1100 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1101 &a[0], &a[1], &a[2],
1102 &a[3], &a[4], &a[5]));
1103 CHECK_EQ(a[0], 1);
1104 CHECK_EQ(a[1], 2);
1105 CHECK_EQ(a[2], 3);
1106 CHECK_EQ(a[3], 4);
1107 CHECK_EQ(a[4], 5);
1108 CHECK_EQ(a[5], 6);
1109
1110 memset(a, 0, sizeof(0));
1111 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1112 &a[0], &a[1], &a[2], &a[3],
1113 &a[4], &a[5], &a[6]));
1114 CHECK_EQ(a[0], 1);
1115 CHECK_EQ(a[1], 2);
1116 CHECK_EQ(a[2], 3);
1117 CHECK_EQ(a[3], 4);
1118 CHECK_EQ(a[4], 5);
1119 CHECK_EQ(a[5], 6);
1120 CHECK_EQ(a[6], 7);
1121
1122 memset(a, 0, sizeof(0));
1123 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1124 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1125 "1234567890123456",
1126 &a[0], &a[1], &a[2], &a[3],
1127 &a[4], &a[5], &a[6], &a[7],
1128 &a[8], &a[9], &a[10], &a[11],
1129 &a[12], &a[13], &a[14], &a[15]));
1130 CHECK_EQ(a[0], 1);
1131 CHECK_EQ(a[1], 2);
1132 CHECK_EQ(a[2], 3);
1133 CHECK_EQ(a[3], 4);
1134 CHECK_EQ(a[4], 5);
1135 CHECK_EQ(a[5], 6);
1136 CHECK_EQ(a[6], 7);
1137 CHECK_EQ(a[7], 8);
1138 CHECK_EQ(a[8], 9);
1139 CHECK_EQ(a[9], 0);
1140 CHECK_EQ(a[10], 1);
1141 CHECK_EQ(a[11], 2);
1142 CHECK_EQ(a[12], 3);
1143 CHECK_EQ(a[13], 4);
1144 CHECK_EQ(a[14], 5);
1145 CHECK_EQ(a[15], 6);
1146
1147 /***** PartialMatch *****/
1148
1149 printf("Testing PartialMatch\n");
1150
1151 CHECK(RE("h.*o").PartialMatch("hello"));
1152 CHECK(RE("h.*o").PartialMatch("othello"));
1153 CHECK(RE("h.*o").PartialMatch("hello!"));
1154 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1155
1156 /***** other tests *****/
1157
1158 RadixTests();
1159 TestReplace();
1160 TestExtract();
1161 TestConsume();
1162 TestFindAndConsume();
1163 TestQuoteMetaAll();
1164 TestMatchNumberPeculiarity();
1165
1166 // Check the pattern() accessor
1167 {
1168 const string kPattern = "http://([^/]+)/.*";
1169 const RE re(kPattern);
1170 CHECK_EQ(kPattern, re.pattern());
1171 }
1172
1173 // Check RE error field.
1174 {
1175 RE re("foo");
1176 CHECK(re.error().empty()); // Must have no error
1177 }
1178
1179 #ifdef SUPPORT_UTF8
1180 // Check UTF-8 handling
1181 {
1182 printf("Testing UTF-8 handling\n");
1183
1184 // Three Japanese characters (nihongo)
1185 const unsigned char utf8_string[] = {
1186 0xe6, 0x97, 0xa5, // 65e5
1187 0xe6, 0x9c, 0xac, // 627c
1188 0xe8, 0xaa, 0x9e, // 8a9e
1189 0
1190 };
1191 const unsigned char utf8_pattern[] = {
1192 '.',
1193 0xe6, 0x9c, 0xac, // 627c
1194 '.',
1195 0
1196 };
1197
1198 // Both should match in either mode, bytes or UTF-8
1199 RE re_test1(".........");
1200 CHECK(re_test1.FullMatch(utf8_string));
1201 RE re_test2("...", pcrecpp::UTF8());
1202 CHECK(re_test2.FullMatch(utf8_string));
1203
1204 // Check that '.' matches one byte or UTF-8 character
1205 // according to the mode.
1206 string ss;
1207 RE re_test3("(.)");
1208 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1209 CHECK_EQ(ss, string("\xe6"));
1210 RE re_test4("(.)", pcrecpp::UTF8());
1211 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1212 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1213
1214 // Check that string matches itself in either mode
1215 RE re_test5(utf8_string);
1216 CHECK(re_test5.FullMatch(utf8_string));
1217 RE re_test6(utf8_string, pcrecpp::UTF8());
1218 CHECK(re_test6.FullMatch(utf8_string));
1219
1220 // Check that pattern matches string only in UTF8 mode
1221 RE re_test7(utf8_pattern);
1222 CHECK(!re_test7.FullMatch(utf8_string));
1223 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1224 CHECK(re_test8.FullMatch(utf8_string));
1225 }
1226
1227 // Check that ungreedy, UTF8 regular expressions don't match when they
1228 // oughtn't -- see bug 82246.
1229 {
1230 // This code always worked.
1231 const char* pattern = "\\w+X";
1232 const string target = "a aX";
1233 RE match_sentence(pattern);
1234 RE match_sentence_re(pattern, pcrecpp::UTF8());
1235
1236 CHECK(!match_sentence.FullMatch(target));
1237 CHECK(!match_sentence_re.FullMatch(target));
1238 }
1239
1240 {
1241 const char* pattern = "(?U)\\w+X";
1242 const string target = "a aX";
1243 RE match_sentence(pattern);
1244 RE match_sentence_re(pattern, pcrecpp::UTF8());
1245
1246 CHECK(!match_sentence.FullMatch(target));
1247 CHECK(!match_sentence_re.FullMatch(target));
1248 }
1249 #endif /* def SUPPORT_UTF8 */
1250
1251 printf("Testing error reporting\n");
1252
1253 { RE re("a\\1"); CHECK(!re.error().empty()); }
1254 {
1255 RE re("a[x");
1256 CHECK(!re.error().empty());
1257 }
1258 {
1259 RE re("a[z-a]");
1260 CHECK(!re.error().empty());
1261 }
1262 {
1263 RE re("a[[:foobar:]]");
1264 CHECK(!re.error().empty());
1265 }
1266 {
1267 RE re("a(b");
1268 CHECK(!re.error().empty());
1269 }
1270 {
1271 RE re("a\\");
1272 CHECK(!re.error().empty());
1273 }
1274
1275 // Test that recursion is stopped
1276 TestRecursion();
1277
1278 // Test Options
1279 if (getenv("VERBOSE_TEST") != NULL)
1280 VERBOSE_TEST = true;
1281 TestOptions();
1282
1283 // Test the constructors
1284 TestConstructors();
1285
1286 // Done
1287 printf("OK\n");
1288
1289 return 0;
1290 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5