1 |
// Copyright (c) 2005, Google Inc.
|
2 |
// All rights reserved.
|
3 |
//
|
4 |
// Redistribution and use in source and binary forms, with or without
|
5 |
// modification, are permitted provided that the following conditions are
|
6 |
// met:
|
7 |
//
|
8 |
// * Redistributions of source code must retain the above copyright
|
9 |
// notice, this list of conditions and the following disclaimer.
|
10 |
// * Redistributions in binary form must reproduce the above
|
11 |
// copyright notice, this list of conditions and the following disclaimer
|
12 |
// in the documentation and/or other materials provided with the
|
13 |
// distribution.
|
14 |
// * Neither the name of Google Inc. nor the names of its
|
15 |
// contributors may be used to endorse or promote products derived from
|
16 |
// this software without specific prior written permission.
|
17 |
//
|
18 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19 |
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20 |
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21 |
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22 |
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23 |
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24 |
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25 |
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26 |
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27 |
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28 |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29 |
//
|
30 |
// Author: Sanjay Ghemawat
|
31 |
//
|
32 |
// TODO: Test extractions for PartialMatch/Consume
|
33 |
|
34 |
#include <stdio.h>
|
35 |
#include <vector>
|
36 |
#include "config.h"
|
37 |
#include "pcrecpp.h"
|
38 |
|
39 |
using pcrecpp::StringPiece;
|
40 |
using pcrecpp::RE;
|
41 |
using pcrecpp::RE_Options;
|
42 |
using pcrecpp::Hex;
|
43 |
using pcrecpp::Octal;
|
44 |
using pcrecpp::CRadix;
|
45 |
|
46 |
static bool VERBOSE_TEST = false;
|
47 |
|
48 |
// CHECK dies with a fatal error if condition is not true. It is *not*
|
49 |
// controlled by NDEBUG, so the check will be executed regardless of
|
50 |
// compilation mode. Therefore, it is safe to do things like:
|
51 |
// CHECK_EQ(fp->Write(x), 4)
|
52 |
#define CHECK(condition) do { \
|
53 |
if (!(condition)) { \
|
54 |
fprintf(stderr, "%s:%d: Check failed: %s\n", \
|
55 |
__FILE__, __LINE__, #condition); \
|
56 |
exit(1); \
|
57 |
} \
|
58 |
} while (0)
|
59 |
|
60 |
#define CHECK_EQ(a, b) CHECK(a == b)
|
61 |
|
62 |
static void Timing1(int num_iters) {
|
63 |
// Same pattern lots of times
|
64 |
RE pattern("ruby:\\d+");
|
65 |
StringPiece p("ruby:1234");
|
66 |
for (int j = num_iters; j > 0; j--) {
|
67 |
CHECK(pattern.FullMatch(p));
|
68 |
}
|
69 |
}
|
70 |
|
71 |
static void Timing2(int num_iters) {
|
72 |
// Same pattern lots of times
|
73 |
RE pattern("ruby:(\\d+)");
|
74 |
int i;
|
75 |
for (int j = num_iters; j > 0; j--) {
|
76 |
CHECK(pattern.FullMatch("ruby:1234", &i));
|
77 |
CHECK_EQ(i, 1234);
|
78 |
}
|
79 |
}
|
80 |
|
81 |
static void Timing3(int num_iters) {
|
82 |
string text_string;
|
83 |
for (int j = num_iters; j > 0; j--) {
|
84 |
text_string += "this is another line\n";
|
85 |
}
|
86 |
|
87 |
RE line_matcher(".*\n");
|
88 |
string line;
|
89 |
StringPiece text(text_string);
|
90 |
int counter = 0;
|
91 |
while (line_matcher.Consume(&text)) {
|
92 |
counter++;
|
93 |
}
|
94 |
printf("Matched %d lines\n", counter);
|
95 |
}
|
96 |
|
97 |
#if 0 // uncomment this if you have a way of defining VirtualProcessSize()
|
98 |
|
99 |
static void LeakTest() {
|
100 |
// Check for memory leaks
|
101 |
unsigned long long initial_size = 0;
|
102 |
for (int i = 0; i < 100000; i++) {
|
103 |
if (i == 50000) {
|
104 |
initial_size = VirtualProcessSize();
|
105 |
printf("Size after 50000: %llu\n", initial_size);
|
106 |
}
|
107 |
char buf[100];
|
108 |
snprintf(buf, sizeof(buf), "pat%09d", i);
|
109 |
RE newre(buf);
|
110 |
}
|
111 |
uint64 final_size = VirtualProcessSize();
|
112 |
printf("Size after 100000: %llu\n", final_size);
|
113 |
const double growth = double(final_size - initial_size) / final_size;
|
114 |
printf("Growth: %0.2f%%", growth * 100);
|
115 |
CHECK(growth < 0.02); // Allow < 2% growth
|
116 |
}
|
117 |
|
118 |
#endif
|
119 |
|
120 |
static void RadixTests() {
|
121 |
printf("Testing hex\n");
|
122 |
|
123 |
#define CHECK_HEX(type, value) \
|
124 |
do { \
|
125 |
type v; \
|
126 |
CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
|
127 |
CHECK_EQ(v, 0x ## value); \
|
128 |
CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
|
129 |
CHECK_EQ(v, 0x ## value); \
|
130 |
} while(0)
|
131 |
|
132 |
CHECK_HEX(short, 2bad);
|
133 |
CHECK_HEX(unsigned short, 2badU);
|
134 |
CHECK_HEX(int, dead);
|
135 |
CHECK_HEX(unsigned int, deadU);
|
136 |
CHECK_HEX(long, 7eadbeefL);
|
137 |
CHECK_HEX(unsigned long, deadbeefUL);
|
138 |
#ifdef HAVE_LONG_LONG
|
139 |
CHECK_HEX(long long, 12345678deadbeefLL);
|
140 |
#endif
|
141 |
#ifdef HAVE_UNSIGNED_LONG_LONG
|
142 |
CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
|
143 |
#endif
|
144 |
|
145 |
#undef CHECK_HEX
|
146 |
|
147 |
printf("Testing octal\n");
|
148 |
|
149 |
#define CHECK_OCTAL(type, value) \
|
150 |
do { \
|
151 |
type v; \
|
152 |
CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
|
153 |
CHECK_EQ(v, 0 ## value); \
|
154 |
CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
|
155 |
CHECK_EQ(v, 0 ## value); \
|
156 |
} while(0)
|
157 |
|
158 |
CHECK_OCTAL(short, 77777);
|
159 |
CHECK_OCTAL(unsigned short, 177777U);
|
160 |
CHECK_OCTAL(int, 17777777777);
|
161 |
CHECK_OCTAL(unsigned int, 37777777777U);
|
162 |
CHECK_OCTAL(long, 17777777777L);
|
163 |
CHECK_OCTAL(unsigned long, 37777777777UL);
|
164 |
#ifdef HAVE_LONG_LONG
|
165 |
CHECK_OCTAL(long long, 777777777777777777777LL);
|
166 |
#endif
|
167 |
#ifdef HAVE_UNSIGNED_LONG_LONG
|
168 |
CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
|
169 |
#endif
|
170 |
|
171 |
#undef CHECK_OCTAL
|
172 |
|
173 |
printf("Testing decimal\n");
|
174 |
|
175 |
#define CHECK_DECIMAL(type, value) \
|
176 |
do { \
|
177 |
type v; \
|
178 |
CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
|
179 |
CHECK_EQ(v, value); \
|
180 |
CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
|
181 |
CHECK_EQ(v, value); \
|
182 |
} while(0)
|
183 |
|
184 |
CHECK_DECIMAL(short, -1);
|
185 |
CHECK_DECIMAL(unsigned short, 9999);
|
186 |
CHECK_DECIMAL(int, -1000);
|
187 |
CHECK_DECIMAL(unsigned int, 12345U);
|
188 |
CHECK_DECIMAL(long, -10000000L);
|
189 |
CHECK_DECIMAL(unsigned long, 3083324652U);
|
190 |
#ifdef HAVE_LONG_LONG
|
191 |
CHECK_DECIMAL(long long, -100000000000000LL);
|
192 |
#endif
|
193 |
#ifdef HAVE_UNSIGNED_LONG_LONG
|
194 |
CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
|
195 |
#endif
|
196 |
|
197 |
#undef CHECK_DECIMAL
|
198 |
|
199 |
}
|
200 |
|
201 |
static void TestReplace() {
|
202 |
printf("Testing Replace\n");
|
203 |
|
204 |
struct ReplaceTest {
|
205 |
const char *regexp;
|
206 |
const char *rewrite;
|
207 |
const char *original;
|
208 |
const char *single;
|
209 |
const char *global;
|
210 |
};
|
211 |
static const ReplaceTest tests[] = {
|
212 |
{ "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
|
213 |
"\\2\\1ay",
|
214 |
"the quick brown fox jumps over the lazy dogs.",
|
215 |
"ethay quick brown fox jumps over the lazy dogs.",
|
216 |
"ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
|
217 |
{ "\\w+",
|
218 |
"\\0-NOSPAM",
|
219 |
"paul.haahr@google.com",
|
220 |
"paul-NOSPAM.haahr@google.com",
|
221 |
"paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
|
222 |
{ "^",
|
223 |
"(START)",
|
224 |
"foo",
|
225 |
"(START)foo",
|
226 |
"(START)foo" },
|
227 |
{ "^",
|
228 |
"(START)",
|
229 |
"",
|
230 |
"(START)",
|
231 |
"(START)" },
|
232 |
{ "$",
|
233 |
"(END)",
|
234 |
"",
|
235 |
"(END)",
|
236 |
"(END)" },
|
237 |
{ "b",
|
238 |
"bb",
|
239 |
"ababababab",
|
240 |
"abbabababab",
|
241 |
"abbabbabbabbabb" },
|
242 |
{ "b",
|
243 |
"bb",
|
244 |
"bbbbbb",
|
245 |
"bbbbbbb",
|
246 |
"bbbbbbbbbbbb" },
|
247 |
{ "b+",
|
248 |
"bb",
|
249 |
"bbbbbb",
|
250 |
"bb",
|
251 |
"bb" },
|
252 |
{ "b*",
|
253 |
"bb",
|
254 |
"bbbbbb",
|
255 |
"bb",
|
256 |
"bb" },
|
257 |
{ "b*",
|
258 |
"bb",
|
259 |
"aaaaa",
|
260 |
"bbaaaaa",
|
261 |
"bbabbabbabbabbabb" },
|
262 |
{ "", NULL, NULL, NULL, NULL }
|
263 |
};
|
264 |
|
265 |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
|
266 |
string one(t->original);
|
267 |
CHECK(RE(t->regexp).Replace(t->rewrite, &one));
|
268 |
CHECK_EQ(one, t->single);
|
269 |
string all(t->original);
|
270 |
CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
|
271 |
CHECK_EQ(all, t->global);
|
272 |
}
|
273 |
}
|
274 |
|
275 |
static void TestExtract() {
|
276 |
printf("Testing Extract\n");
|
277 |
|
278 |
string s;
|
279 |
|
280 |
CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
|
281 |
CHECK_EQ(s, "kremvax!boris");
|
282 |
|
283 |
// check the RE interface as well
|
284 |
CHECK(RE(".*").Extract("'\\0'", "foo", &s));
|
285 |
CHECK_EQ(s, "'foo'");
|
286 |
CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
|
287 |
CHECK_EQ(s, "'foo'");
|
288 |
}
|
289 |
|
290 |
static void TestConsume() {
|
291 |
printf("Testing Consume\n");
|
292 |
|
293 |
string word;
|
294 |
|
295 |
string s(" aaa b!@#$@#$cccc");
|
296 |
StringPiece input(s);
|
297 |
|
298 |
RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
|
299 |
CHECK(r.Consume(&input, &word));
|
300 |
CHECK_EQ(word, "aaa");
|
301 |
CHECK(r.Consume(&input, &word));
|
302 |
CHECK_EQ(word, "b");
|
303 |
CHECK(! r.Consume(&input, &word));
|
304 |
}
|
305 |
|
306 |
static void TestFindAndConsume() {
|
307 |
printf("Testing FindAndConsume\n");
|
308 |
|
309 |
string word;
|
310 |
|
311 |
string s(" aaa b!@#$@#$cccc");
|
312 |
StringPiece input(s);
|
313 |
|
314 |
RE r("(\\w+)"); // matches a word
|
315 |
CHECK(r.FindAndConsume(&input, &word));
|
316 |
CHECK_EQ(word, "aaa");
|
317 |
CHECK(r.FindAndConsume(&input, &word));
|
318 |
CHECK_EQ(word, "b");
|
319 |
CHECK(r.FindAndConsume(&input, &word));
|
320 |
CHECK_EQ(word, "cccc");
|
321 |
CHECK(! r.FindAndConsume(&input, &word));
|
322 |
}
|
323 |
|
324 |
static void TestMatchNumberPeculiarity() {
|
325 |
printf("Testing match-number peculiaraity\n");
|
326 |
|
327 |
string word1;
|
328 |
string word2;
|
329 |
string word3;
|
330 |
|
331 |
RE r("(foo)|(bar)|(baz)");
|
332 |
CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
|
333 |
CHECK_EQ(word1, "foo");
|
334 |
CHECK_EQ(word2, "");
|
335 |
CHECK_EQ(word3, "");
|
336 |
CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
|
337 |
CHECK_EQ(word1, "");
|
338 |
CHECK_EQ(word2, "bar");
|
339 |
CHECK_EQ(word3, "");
|
340 |
CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
|
341 |
CHECK_EQ(word1, "");
|
342 |
CHECK_EQ(word2, "");
|
343 |
CHECK_EQ(word3, "baz");
|
344 |
CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
|
345 |
|
346 |
string a;
|
347 |
CHECK(RE("(foo)|hello").FullMatch("hello", &a));
|
348 |
CHECK_EQ(a, "");
|
349 |
}
|
350 |
|
351 |
static void TestRecursion(int size, const char *pattern, int match_limit) {
|
352 |
printf("Testing recursion\n");
|
353 |
|
354 |
// Fill up a string repeating the pattern given
|
355 |
string domain;
|
356 |
domain.resize(size);
|
357 |
int patlen = strlen(pattern);
|
358 |
for (int i = 0; i < size; ++i) {
|
359 |
domain[i] = pattern[i % patlen];
|
360 |
}
|
361 |
// Just make sure it doesn't crash due to too much recursion.
|
362 |
RE_Options options;
|
363 |
options.set_match_limit(match_limit);
|
364 |
RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);
|
365 |
re.FullMatch(domain);
|
366 |
}
|
367 |
|
368 |
//
|
369 |
// Options tests contributed by
|
370 |
// Giuseppe Maxia, CTO, Stardata s.r.l.
|
371 |
// July 2005
|
372 |
//
|
373 |
static void GetOneOptionResult(
|
374 |
const char *option_name,
|
375 |
const char *regex,
|
376 |
const char *str,
|
377 |
RE_Options options,
|
378 |
bool full,
|
379 |
string expected) {
|
380 |
|
381 |
printf("Testing Option <%s>\n", option_name);
|
382 |
if(VERBOSE_TEST)
|
383 |
printf("/%s/ finds \"%s\" within \"%s\" \n",
|
384 |
regex,
|
385 |
expected.c_str(),
|
386 |
str);
|
387 |
string captured("");
|
388 |
if (full)
|
389 |
RE(regex,options).FullMatch(str, &captured);
|
390 |
else
|
391 |
RE(regex,options).PartialMatch(str, &captured);
|
392 |
CHECK_EQ(captured, expected);
|
393 |
}
|
394 |
|
395 |
static void TestOneOption(
|
396 |
const char *option_name,
|
397 |
const char *regex,
|
398 |
const char *str,
|
399 |
RE_Options options,
|
400 |
bool full,
|
401 |
bool assertive = true) {
|
402 |
|
403 |
printf("Testing Option <%s>\n", option_name);
|
404 |
if (VERBOSE_TEST)
|
405 |
printf("'%s' %s /%s/ \n",
|
406 |
str,
|
407 |
(assertive? "matches" : "doesn't match"),
|
408 |
regex);
|
409 |
if (assertive) {
|
410 |
if (full)
|
411 |
CHECK(RE(regex,options).FullMatch(str));
|
412 |
else
|
413 |
CHECK(RE(regex,options).PartialMatch(str));
|
414 |
} else {
|
415 |
if (full)
|
416 |
CHECK(!RE(regex,options).FullMatch(str));
|
417 |
else
|
418 |
CHECK(!RE(regex,options).PartialMatch(str));
|
419 |
}
|
420 |
}
|
421 |
|
422 |
static void Test_CASELESS() {
|
423 |
RE_Options options;
|
424 |
RE_Options options2;
|
425 |
|
426 |
options.set_caseless(true);
|
427 |
TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
|
428 |
TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
|
429 |
TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
|
430 |
|
431 |
TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
|
432 |
TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
|
433 |
options.set_caseless(false);
|
434 |
TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
|
435 |
}
|
436 |
|
437 |
static void Test_MULTILINE() {
|
438 |
RE_Options options;
|
439 |
RE_Options options2;
|
440 |
const char *str = "HELLO\n" "cruel\n" "world\n";
|
441 |
|
442 |
options.set_multiline(true);
|
443 |
TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
|
444 |
TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
|
445 |
TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
|
446 |
options.set_multiline(false);
|
447 |
TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
|
448 |
}
|
449 |
|
450 |
static void Test_DOTALL() {
|
451 |
RE_Options options;
|
452 |
RE_Options options2;
|
453 |
const char *str = "HELLO\n" "cruel\n" "world";
|
454 |
|
455 |
options.set_dotall(true);
|
456 |
TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
|
457 |
TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
|
458 |
TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
|
459 |
options.set_dotall(false);
|
460 |
TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
|
461 |
}
|
462 |
|
463 |
static void Test_DOLLAR_ENDONLY() {
|
464 |
RE_Options options;
|
465 |
RE_Options options2;
|
466 |
const char *str = "HELLO world\n";
|
467 |
|
468 |
TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
|
469 |
options.set_dollar_endonly(true);
|
470 |
TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
|
471 |
TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
|
472 |
}
|
473 |
|
474 |
static void Test_EXTRA() {
|
475 |
RE_Options options;
|
476 |
const char *str = "HELLO";
|
477 |
|
478 |
options.set_extra(true);
|
479 |
TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
|
480 |
TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
|
481 |
options.set_extra(false);
|
482 |
TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
|
483 |
}
|
484 |
|
485 |
static void Test_EXTENDED() {
|
486 |
RE_Options options;
|
487 |
RE_Options options2;
|
488 |
const char *str = "HELLO world";
|
489 |
|
490 |
options.set_extended(true);
|
491 |
TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
|
492 |
TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
|
493 |
TestOneOption("EXTENDED (class)",
|
494 |
"^ HE L{2} O "
|
495 |
"\\s+ "
|
496 |
"\\w+ $ ",
|
497 |
str,
|
498 |
options,
|
499 |
false);
|
500 |
|
501 |
TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
|
502 |
TestOneOption("EXTENDED (function)",
|
503 |
"^ HE L{2} O "
|
504 |
"\\s+ "
|
505 |
"\\w+ $ ",
|
506 |
str,
|
507 |
pcrecpp::EXTENDED(),
|
508 |
false);
|
509 |
|
510 |
options.set_extended(false);
|
511 |
TestOneOption("no EXTENDED", "HELLO world", str, options, false);
|
512 |
}
|
513 |
|
514 |
static void Test_NO_AUTO_CAPTURE() {
|
515 |
RE_Options options;
|
516 |
const char *str = "HELLO world";
|
517 |
string captured;
|
518 |
|
519 |
printf("Testing Option <no NO_AUTO_CAPTURE>\n");
|
520 |
if (VERBOSE_TEST)
|
521 |
printf("parentheses capture text\n");
|
522 |
RE re("(world|universe)$", options);
|
523 |
CHECK(re.Extract("\\1", str , &captured));
|
524 |
CHECK_EQ(captured, "world");
|
525 |
options.set_no_auto_capture(true);
|
526 |
printf("testing Option <NO_AUTO_CAPTURE>\n");
|
527 |
if (VERBOSE_TEST)
|
528 |
printf("parentheses do not capture text\n");
|
529 |
re.Extract("\\1",str, &captured );
|
530 |
CHECK_EQ(captured, "world");
|
531 |
}
|
532 |
|
533 |
static void Test_UNGREEDY() {
|
534 |
RE_Options options;
|
535 |
const char *str = "HELLO, 'this' is the 'world'";
|
536 |
|
537 |
options.set_ungreedy(true);
|
538 |
GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
|
539 |
GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
|
540 |
GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
|
541 |
|
542 |
options.set_ungreedy(false);
|
543 |
GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
|
544 |
GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
|
545 |
}
|
546 |
|
547 |
static void Test_all_options() {
|
548 |
const char *str = "HELLO\n" "cruel\n" "world";
|
549 |
RE_Options options;
|
550 |
options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
|
551 |
|
552 |
TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
|
553 |
options.set_all_options(0);
|
554 |
TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
|
555 |
options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
|
556 |
|
557 |
TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
|
558 |
TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
|
559 |
" ^ c r u e l $ ",
|
560 |
str,
|
561 |
RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
|
562 |
false);
|
563 |
|
564 |
TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
|
565 |
" ^ c r u e l $ ",
|
566 |
str,
|
567 |
RE_Options()
|
568 |
.set_multiline(true)
|
569 |
.set_extended(true),
|
570 |
false);
|
571 |
|
572 |
options.set_all_options(0);
|
573 |
TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
|
574 |
|
575 |
}
|
576 |
|
577 |
static void TestOptions() {
|
578 |
printf("Testing Options\n");
|
579 |
Test_CASELESS();
|
580 |
Test_MULTILINE();
|
581 |
Test_DOTALL();
|
582 |
Test_DOLLAR_ENDONLY();
|
583 |
Test_EXTENDED();
|
584 |
Test_NO_AUTO_CAPTURE();
|
585 |
Test_UNGREEDY();
|
586 |
Test_EXTRA();
|
587 |
Test_all_options();
|
588 |
}
|
589 |
|
590 |
int main(int argc, char** argv) {
|
591 |
// Treat any flag as --help
|
592 |
if (argc > 1 && argv[1][0] == '-') {
|
593 |
printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
|
594 |
" If 'timingX ###' is specified, run the given timing test\n"
|
595 |
" with the given number of iterations, rather than running\n"
|
596 |
" the default corectness test.\n", argv[0]);
|
597 |
return 0;
|
598 |
}
|
599 |
|
600 |
if (argc > 1) {
|
601 |
if ( argc == 2 || atoi(argv[2]) == 0) {
|
602 |
printf("timing mode needs a num-iters argument\n");
|
603 |
return 1;
|
604 |
}
|
605 |
if (!strcmp(argv[1], "timing1"))
|
606 |
Timing1(atoi(argv[2]));
|
607 |
else if (!strcmp(argv[1], "timing2"))
|
608 |
Timing2(atoi(argv[2]));
|
609 |
else if (!strcmp(argv[1], "timing3"))
|
610 |
Timing3(atoi(argv[2]));
|
611 |
else
|
612 |
printf("Unknown argument '%s'\n", argv[1]);
|
613 |
return 0;
|
614 |
}
|
615 |
|
616 |
printf("Testing FullMatch\n");
|
617 |
|
618 |
int i;
|
619 |
string s;
|
620 |
|
621 |
/***** FullMatch with no args *****/
|
622 |
|
623 |
CHECK(RE("h.*o").FullMatch("hello"));
|
624 |
CHECK(!RE("h.*o").FullMatch("othello"));
|
625 |
CHECK(!RE("h.*o").FullMatch("hello!"));
|
626 |
|
627 |
/***** FullMatch with args *****/
|
628 |
|
629 |
// Zero-arg
|
630 |
CHECK(RE("\\d+").FullMatch("1001"));
|
631 |
|
632 |
// Single-arg
|
633 |
CHECK(RE("(\\d+)").FullMatch("1001", &i));
|
634 |
CHECK_EQ(i, 1001);
|
635 |
CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
|
636 |
CHECK_EQ(i, -123);
|
637 |
CHECK(!RE("()\\d+").FullMatch("10", &i));
|
638 |
CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
|
639 |
&i));
|
640 |
|
641 |
// Digits surrounding integer-arg
|
642 |
CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
|
643 |
CHECK_EQ(i, 23);
|
644 |
CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
|
645 |
CHECK_EQ(i, 1);
|
646 |
CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
|
647 |
CHECK_EQ(i, -1);
|
648 |
CHECK(RE("(\\d)").PartialMatch("1234", &i));
|
649 |
CHECK_EQ(i, 1);
|
650 |
CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
|
651 |
CHECK_EQ(i, -1);
|
652 |
|
653 |
// String-arg
|
654 |
CHECK(RE("h(.*)o").FullMatch("hello", &s));
|
655 |
CHECK_EQ(s, string("ell"));
|
656 |
|
657 |
// StringPiece-arg
|
658 |
StringPiece sp;
|
659 |
CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
|
660 |
CHECK_EQ(sp.size(), 4);
|
661 |
CHECK(memcmp(sp.data(), "ruby", 4) == 0);
|
662 |
CHECK_EQ(i, 1234);
|
663 |
|
664 |
// Multi-arg
|
665 |
CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
|
666 |
CHECK_EQ(s, string("ruby"));
|
667 |
CHECK_EQ(i, 1234);
|
668 |
|
669 |
// Ignored arg
|
670 |
CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
|
671 |
CHECK_EQ(s, string("ruby"));
|
672 |
CHECK_EQ(i, 1234);
|
673 |
|
674 |
// Type tests
|
675 |
{
|
676 |
char c;
|
677 |
CHECK(RE("(H)ello").FullMatch("Hello", &c));
|
678 |
CHECK_EQ(c, 'H');
|
679 |
}
|
680 |
{
|
681 |
unsigned char c;
|
682 |
CHECK(RE("(H)ello").FullMatch("Hello", &c));
|
683 |
CHECK_EQ(c, static_cast<unsigned char>('H'));
|
684 |
}
|
685 |
{
|
686 |
short v;
|
687 |
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
|
688 |
CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
|
689 |
CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
|
690 |
CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
|
691 |
CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
|
692 |
CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
|
693 |
}
|
694 |
{
|
695 |
unsigned short v;
|
696 |
CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
|
697 |
CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
|
698 |
CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
|
699 |
CHECK(!RE("(\\d+)").FullMatch("65536", &v));
|
700 |
}
|
701 |
{
|
702 |
int v;
|
703 |
static const int max_value = 0x7fffffff;
|
704 |
static const int min_value = -max_value - 1;
|
705 |
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
|
706 |
CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
|
707 |
CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
|
708 |
CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
|
709 |
CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
|
710 |
CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
|
711 |
}
|
712 |
{
|
713 |
unsigned int v;
|
714 |
static const unsigned int max_value = 0xfffffffful;
|
715 |
CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
|
716 |
CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
|
717 |
CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
|
718 |
}
|
719 |
#ifdef HAVE_LONG_LONG
|
720 |
{
|
721 |
long long v;
|
722 |
static const long long max_value = 0x7fffffffffffffffLL;
|
723 |
static const long long min_value = -max_value - 1;
|
724 |
char buf[32];
|
725 |
|
726 |
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
|
727 |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
|
728 |
|
729 |
snprintf(buf, sizeof(buf), "%lld", max_value);
|
730 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
|
731 |
|
732 |
snprintf(buf, sizeof(buf), "%lld", min_value);
|
733 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
|
734 |
|
735 |
snprintf(buf, sizeof(buf), "%lld", max_value);
|
736 |
assert(buf[strlen(buf)-1] != '9');
|
737 |
buf[strlen(buf)-1]++;
|
738 |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
|
739 |
|
740 |
snprintf(buf, sizeof(buf), "%lld", min_value);
|
741 |
assert(buf[strlen(buf)-1] != '9');
|
742 |
buf[strlen(buf)-1]++;
|
743 |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
|
744 |
}
|
745 |
#endif
|
746 |
#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
|
747 |
{
|
748 |
unsigned long long v;
|
749 |
long long v2;
|
750 |
static const unsigned long long max_value = 0xffffffffffffffffULL;
|
751 |
char buf[32];
|
752 |
|
753 |
CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
|
754 |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
|
755 |
|
756 |
snprintf(buf, sizeof(buf), "%llu", max_value);
|
757 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
|
758 |
|
759 |
assert(buf[strlen(buf)-1] != '9');
|
760 |
buf[strlen(buf)-1]++;
|
761 |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
|
762 |
}
|
763 |
#endif
|
764 |
{
|
765 |
float v;
|
766 |
CHECK(RE("(.*)").FullMatch("100", &v));
|
767 |
CHECK(RE("(.*)").FullMatch("-100.", &v));
|
768 |
CHECK(RE("(.*)").FullMatch("1e23", &v));
|
769 |
}
|
770 |
{
|
771 |
double v;
|
772 |
CHECK(RE("(.*)").FullMatch("100", &v));
|
773 |
CHECK(RE("(.*)").FullMatch("-100.", &v));
|
774 |
CHECK(RE("(.*)").FullMatch("1e23", &v));
|
775 |
}
|
776 |
|
777 |
// Check that matching is fully anchored
|
778 |
CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
|
779 |
CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
|
780 |
CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
|
781 |
CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
|
782 |
|
783 |
// Braces
|
784 |
CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
|
785 |
CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
|
786 |
CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
|
787 |
|
788 |
// Complicated RE
|
789 |
CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
|
790 |
CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
|
791 |
CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
|
792 |
CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
|
793 |
|
794 |
// Check full-match handling (needs '$' tacked on internally)
|
795 |
CHECK(RE("fo|foo").FullMatch("fo"));
|
796 |
CHECK(RE("fo|foo").FullMatch("foo"));
|
797 |
CHECK(RE("fo|foo$").FullMatch("fo"));
|
798 |
CHECK(RE("fo|foo$").FullMatch("foo"));
|
799 |
CHECK(RE("foo$").FullMatch("foo"));
|
800 |
CHECK(!RE("foo\\$").FullMatch("foo$bar"));
|
801 |
CHECK(!RE("fo|bar").FullMatch("fox"));
|
802 |
|
803 |
// Uncomment the following if we change the handling of '$' to
|
804 |
// prevent it from matching a trailing newline
|
805 |
if (false) {
|
806 |
// Check that we don't get bitten by pcre's special handling of a
|
807 |
// '\n' at the end of the string matching '$'
|
808 |
CHECK(!RE("foo$").PartialMatch("foo\n"));
|
809 |
}
|
810 |
|
811 |
// Number of args
|
812 |
int a[16];
|
813 |
CHECK(RE("").FullMatch(""));
|
814 |
|
815 |
memset(a, 0, sizeof(0));
|
816 |
CHECK(RE("(\\d){1}").FullMatch("1",
|
817 |
&a[0]));
|
818 |
CHECK_EQ(a[0], 1);
|
819 |
|
820 |
memset(a, 0, sizeof(0));
|
821 |
CHECK(RE("(\\d)(\\d)").FullMatch("12",
|
822 |
&a[0], &a[1]));
|
823 |
CHECK_EQ(a[0], 1);
|
824 |
CHECK_EQ(a[1], 2);
|
825 |
|
826 |
memset(a, 0, sizeof(0));
|
827 |
CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
|
828 |
&a[0], &a[1], &a[2]));
|
829 |
CHECK_EQ(a[0], 1);
|
830 |
CHECK_EQ(a[1], 2);
|
831 |
CHECK_EQ(a[2], 3);
|
832 |
|
833 |
memset(a, 0, sizeof(0));
|
834 |
CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
|
835 |
&a[0], &a[1], &a[2], &a[3]));
|
836 |
CHECK_EQ(a[0], 1);
|
837 |
CHECK_EQ(a[1], 2);
|
838 |
CHECK_EQ(a[2], 3);
|
839 |
CHECK_EQ(a[3], 4);
|
840 |
|
841 |
memset(a, 0, sizeof(0));
|
842 |
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
|
843 |
&a[0], &a[1], &a[2],
|
844 |
&a[3], &a[4]));
|
845 |
CHECK_EQ(a[0], 1);
|
846 |
CHECK_EQ(a[1], 2);
|
847 |
CHECK_EQ(a[2], 3);
|
848 |
CHECK_EQ(a[3], 4);
|
849 |
CHECK_EQ(a[4], 5);
|
850 |
|
851 |
memset(a, 0, sizeof(0));
|
852 |
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
|
853 |
&a[0], &a[1], &a[2],
|
854 |
&a[3], &a[4], &a[5]));
|
855 |
CHECK_EQ(a[0], 1);
|
856 |
CHECK_EQ(a[1], 2);
|
857 |
CHECK_EQ(a[2], 3);
|
858 |
CHECK_EQ(a[3], 4);
|
859 |
CHECK_EQ(a[4], 5);
|
860 |
CHECK_EQ(a[5], 6);
|
861 |
|
862 |
memset(a, 0, sizeof(0));
|
863 |
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
|
864 |
&a[0], &a[1], &a[2], &a[3],
|
865 |
&a[4], &a[5], &a[6]));
|
866 |
CHECK_EQ(a[0], 1);
|
867 |
CHECK_EQ(a[1], 2);
|
868 |
CHECK_EQ(a[2], 3);
|
869 |
CHECK_EQ(a[3], 4);
|
870 |
CHECK_EQ(a[4], 5);
|
871 |
CHECK_EQ(a[5], 6);
|
872 |
CHECK_EQ(a[6], 7);
|
873 |
|
874 |
memset(a, 0, sizeof(0));
|
875 |
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
|
876 |
"(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
|
877 |
"1234567890123456",
|
878 |
&a[0], &a[1], &a[2], &a[3],
|
879 |
&a[4], &a[5], &a[6], &a[7],
|
880 |
&a[8], &a[9], &a[10], &a[11],
|
881 |
&a[12], &a[13], &a[14], &a[15]));
|
882 |
CHECK_EQ(a[0], 1);
|
883 |
CHECK_EQ(a[1], 2);
|
884 |
CHECK_EQ(a[2], 3);
|
885 |
CHECK_EQ(a[3], 4);
|
886 |
CHECK_EQ(a[4], 5);
|
887 |
CHECK_EQ(a[5], 6);
|
888 |
CHECK_EQ(a[6], 7);
|
889 |
CHECK_EQ(a[7], 8);
|
890 |
CHECK_EQ(a[8], 9);
|
891 |
CHECK_EQ(a[9], 0);
|
892 |
CHECK_EQ(a[10], 1);
|
893 |
CHECK_EQ(a[11], 2);
|
894 |
CHECK_EQ(a[12], 3);
|
895 |
CHECK_EQ(a[13], 4);
|
896 |
CHECK_EQ(a[14], 5);
|
897 |
CHECK_EQ(a[15], 6);
|
898 |
|
899 |
/***** PartialMatch *****/
|
900 |
|
901 |
printf("Testing PartialMatch\n");
|
902 |
|
903 |
CHECK(RE("h.*o").PartialMatch("hello"));
|
904 |
CHECK(RE("h.*o").PartialMatch("othello"));
|
905 |
CHECK(RE("h.*o").PartialMatch("hello!"));
|
906 |
CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
|
907 |
|
908 |
RadixTests();
|
909 |
TestReplace();
|
910 |
TestExtract();
|
911 |
TestConsume();
|
912 |
TestFindAndConsume();
|
913 |
TestMatchNumberPeculiarity();
|
914 |
|
915 |
// Check the pattern() accessor
|
916 |
{
|
917 |
const string kPattern = "http://([^/]+)/.*";
|
918 |
const RE re(kPattern);
|
919 |
CHECK_EQ(kPattern, re.pattern());
|
920 |
}
|
921 |
|
922 |
// Check RE error field.
|
923 |
{
|
924 |
RE re("foo");
|
925 |
CHECK(re.error().empty()); // Must have no error
|
926 |
}
|
927 |
|
928 |
#ifdef SUPPORT_UTF8
|
929 |
// Check UTF-8 handling
|
930 |
{
|
931 |
printf("Testing UTF-8 handling\n");
|
932 |
|
933 |
// Three Japanese characters (nihongo)
|
934 |
const char utf8_string[] = {
|
935 |
0xe6, 0x97, 0xa5, // 65e5
|
936 |
0xe6, 0x9c, 0xac, // 627c
|
937 |
0xe8, 0xaa, 0x9e, // 8a9e
|
938 |
0
|
939 |
};
|
940 |
const char utf8_pattern[] = {
|
941 |
'.',
|
942 |
0xe6, 0x9c, 0xac, // 627c
|
943 |
'.',
|
944 |
0
|
945 |
};
|
946 |
|
947 |
// Both should match in either mode, bytes or UTF-8
|
948 |
RE re_test1(".........");
|
949 |
CHECK(re_test1.FullMatch(utf8_string));
|
950 |
RE re_test2("...", pcrecpp::UTF8());
|
951 |
CHECK(re_test2.FullMatch(utf8_string));
|
952 |
|
953 |
// Check that '.' matches one byte or UTF-8 character
|
954 |
// according to the mode.
|
955 |
string ss;
|
956 |
RE re_test3("(.)");
|
957 |
CHECK(re_test3.PartialMatch(utf8_string, &ss));
|
958 |
CHECK_EQ(ss, string("\xe6"));
|
959 |
RE re_test4("(.)", pcrecpp::UTF8());
|
960 |
CHECK(re_test4.PartialMatch(utf8_string, &ss));
|
961 |
CHECK_EQ(ss, string("\xe6\x97\xa5"));
|
962 |
|
963 |
// Check that string matches itself in either mode
|
964 |
RE re_test5(utf8_string);
|
965 |
CHECK(re_test5.FullMatch(utf8_string));
|
966 |
RE re_test6(utf8_string, pcrecpp::UTF8());
|
967 |
CHECK(re_test6.FullMatch(utf8_string));
|
968 |
|
969 |
// Check that pattern matches string only in UTF8 mode
|
970 |
RE re_test7(utf8_pattern);
|
971 |
CHECK(!re_test7.FullMatch(utf8_string));
|
972 |
RE re_test8(utf8_pattern, pcrecpp::UTF8());
|
973 |
CHECK(re_test8.FullMatch(utf8_string));
|
974 |
}
|
975 |
|
976 |
// Check that ungreedy, UTF8 regular expressions don't match when they
|
977 |
// oughtn't -- see bug 82246.
|
978 |
{
|
979 |
// This code always worked.
|
980 |
const char* pattern = "\\w+X";
|
981 |
const string target = "a aX";
|
982 |
RE match_sentence(pattern);
|
983 |
RE match_sentence_re(pattern, pcrecpp::UTF8());
|
984 |
|
985 |
CHECK(!match_sentence.FullMatch(target));
|
986 |
CHECK(!match_sentence_re.FullMatch(target));
|
987 |
}
|
988 |
|
989 |
{
|
990 |
const char* pattern = "(?U)\\w+X";
|
991 |
const string target = "a aX";
|
992 |
RE match_sentence(pattern);
|
993 |
RE match_sentence_re(pattern, pcrecpp::UTF8());
|
994 |
|
995 |
CHECK(!match_sentence.FullMatch(target));
|
996 |
CHECK(!match_sentence_re.FullMatch(target));
|
997 |
}
|
998 |
#endif /* def SUPPORT_UTF8 */
|
999 |
|
1000 |
printf("Testing error reporting\n");
|
1001 |
|
1002 |
{ RE re("a\\1"); CHECK(!re.error().empty()); }
|
1003 |
{
|
1004 |
RE re("a[x");
|
1005 |
CHECK(!re.error().empty());
|
1006 |
}
|
1007 |
{
|
1008 |
RE re("a[z-a]");
|
1009 |
CHECK(!re.error().empty());
|
1010 |
}
|
1011 |
{
|
1012 |
RE re("a[[:foobar:]]");
|
1013 |
CHECK(!re.error().empty());
|
1014 |
}
|
1015 |
{
|
1016 |
RE re("a(b");
|
1017 |
CHECK(!re.error().empty());
|
1018 |
}
|
1019 |
{
|
1020 |
RE re("a\\");
|
1021 |
CHECK(!re.error().empty());
|
1022 |
}
|
1023 |
|
1024 |
// Test that recursion is stopped: there will be some errors reported
|
1025 |
int matchlimit = 5000;
|
1026 |
int bytes = 15 * 1024; // enough to crash if there was no match limit
|
1027 |
TestRecursion(bytes, ".", matchlimit);
|
1028 |
TestRecursion(bytes, "a", matchlimit);
|
1029 |
TestRecursion(bytes, "a.", matchlimit);
|
1030 |
TestRecursion(bytes, "ab.", matchlimit);
|
1031 |
TestRecursion(bytes, "abc.", matchlimit);
|
1032 |
|
1033 |
// Test Options
|
1034 |
if (getenv("VERBOSE_TEST") != NULL)
|
1035 |
VERBOSE_TEST = true;
|
1036 |
TestOptions();
|
1037 |
|
1038 |
// Done
|
1039 |
printf("OK\n");
|
1040 |
|
1041 |
return 0;
|
1042 |
}
|