1 |
// Copyright (c) 2005, Google Inc. |
// Copyright (c) 2010, Google Inc. |
2 |
// All rights reserved. |
// All rights reserved. |
3 |
// |
// |
4 |
// Redistribution and use in source and binary forms, with or without |
// Redistribution and use in source and binary forms, with or without |
331 |
bool RE::Replace(const StringPiece& rewrite, |
bool RE::Replace(const StringPiece& rewrite, |
332 |
string *str) const { |
string *str) const { |
333 |
int vec[kVecSize]; |
int vec[kVecSize]; |
334 |
int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize); |
int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize); |
335 |
if (matches == 0) |
if (matches == 0) |
336 |
return false; |
return false; |
337 |
|
|
384 |
string out; |
string out; |
385 |
int start = 0; |
int start = 0; |
386 |
int lastend = -1; |
int lastend = -1; |
387 |
|
bool last_match_was_empty_string = false; |
388 |
|
|
389 |
while (start <= static_cast<int>(str->length())) { |
while (start <= static_cast<int>(str->length())) { |
390 |
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize); |
// If the previous match was for the empty string, we shouldn't |
391 |
if (matches <= 0) |
// just match again: we'll match in the same way and get an |
392 |
break; |
// infinite loop. Instead, we do the match in a special way: |
393 |
int matchstart = vec[0], matchend = vec[1]; |
// anchored -- to force another try at the same position -- |
394 |
assert(matchstart >= start); |
// and with a flag saying that this time, ignore empty matches. |
395 |
assert(matchend >= matchstart); |
// If this special match returns, that means there's a non-empty |
396 |
if (matchstart == matchend && matchstart == lastend) { |
// match at this position as well, and we can continue. If not, |
397 |
// advance one character if we matched an empty string at the same |
// we do what perl does, and just advance by one. |
398 |
// place as the last match occurred |
// Notice that perl prints '@@@' for this; |
399 |
matchend = start + 1; |
// perl -le '$_ = "aa"; s/b*|aa/@/g; print' |
400 |
// If the current char is CR and we're in CRLF mode, skip LF too. |
int matches; |
401 |
// Note it's better to call pcre_fullinfo() than to examine |
if (last_match_was_empty_string) { |
402 |
// all_options(), since options_ could have changed bewteen |
matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize); |
403 |
// compile-time and now, but this is simpler and safe enough. |
if (matches <= 0) { |
404 |
// Modified by PH to add ANY and ANYCRLF. |
int matchend = start + 1; // advance one character. |
405 |
if (start+1 < static_cast<int>(str->length()) && |
// If the current char is CR and we're in CRLF mode, skip LF too. |
406 |
(*str)[start] == '\r' && (*str)[start+1] == '\n' && |
// Note it's better to call pcre_fullinfo() than to examine |
407 |
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || |
// all_options(), since options_ could have changed bewteen |
408 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || |
// compile-time and now, but this is simpler and safe enough. |
409 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF) |
// Modified by PH to add ANY and ANYCRLF. |
410 |
) { |
if (matchend < static_cast<int>(str->length()) && |
411 |
matchend++; |
(*str)[start] == '\r' && (*str)[matchend] == '\n' && |
412 |
} |
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || |
413 |
// We also need to advance more than one char if we're in utf8 mode. |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || |
414 |
#ifdef SUPPORT_UTF8 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) { |
|
if (options_.utf8()) { |
|
|
while (matchend < static_cast<int>(str->length()) && |
|
|
((*str)[matchend] & 0xc0) == 0x80) |
|
415 |
matchend++; |
matchend++; |
416 |
} |
} |
417 |
|
// We also need to advance more than one char if we're in utf8 mode. |
418 |
|
#ifdef SUPPORT_UTF8 |
419 |
|
if (options_.utf8()) { |
420 |
|
while (matchend < static_cast<int>(str->length()) && |
421 |
|
((*str)[matchend] & 0xc0) == 0x80) |
422 |
|
matchend++; |
423 |
|
} |
424 |
#endif |
#endif |
425 |
if (matchend <= static_cast<int>(str->length())) |
if (start < static_cast<int>(str->length())) |
426 |
out.append(*str, start, matchend - start); |
out.append(*str, start, matchend - start); |
427 |
start = matchend; |
start = matchend; |
428 |
|
last_match_was_empty_string = false; |
429 |
|
continue; |
430 |
|
} |
431 |
} else { |
} else { |
432 |
out.append(*str, start, matchstart - start); |
matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize); |
433 |
Rewrite(&out, rewrite, *str, vec, matches); |
if (matches <= 0) |
434 |
start = matchend; |
break; |
|
lastend = matchend; |
|
|
count++; |
|
435 |
} |
} |
436 |
|
int matchstart = vec[0], matchend = vec[1]; |
437 |
|
assert(matchstart >= start); |
438 |
|
assert(matchend >= matchstart); |
439 |
|
out.append(*str, start, matchstart - start); |
440 |
|
Rewrite(&out, rewrite, *str, vec, matches); |
441 |
|
start = matchend; |
442 |
|
lastend = matchend; |
443 |
|
count++; |
444 |
|
last_match_was_empty_string = (matchstart == matchend); |
445 |
} |
} |
446 |
|
|
447 |
if (count == 0) |
if (count == 0) |
457 |
const StringPiece& text, |
const StringPiece& text, |
458 |
string *out) const { |
string *out) const { |
459 |
int vec[kVecSize]; |
int vec[kVecSize]; |
460 |
int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize); |
int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); |
461 |
if (matches == 0) |
if (matches == 0) |
462 |
return false; |
return false; |
463 |
out->erase(); |
out->erase(); |
503 |
int RE::TryMatch(const StringPiece& text, |
int RE::TryMatch(const StringPiece& text, |
504 |
int startpos, |
int startpos, |
505 |
Anchor anchor, |
Anchor anchor, |
506 |
|
bool empty_ok, |
507 |
int *vec, |
int *vec, |
508 |
int vecsize) const { |
int vecsize) const { |
509 |
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; |
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; |
521 |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
522 |
extra.match_limit_recursion = options_.match_limit_recursion(); |
extra.match_limit_recursion = options_.match_limit_recursion(); |
523 |
} |
} |
524 |
|
|
525 |
|
int options = 0; |
526 |
|
if (anchor != UNANCHORED) |
527 |
|
options |= PCRE_ANCHORED; |
528 |
|
if (!empty_ok) |
529 |
|
options |= PCRE_NOTEMPTY; |
530 |
|
|
531 |
int rc = pcre_exec(re, // The regular expression object |
int rc = pcre_exec(re, // The regular expression object |
532 |
&extra, |
&extra, |
533 |
(text.data() == NULL) ? "" : text.data(), |
(text.data() == NULL) ? "" : text.data(), |
534 |
text.size(), |
text.size(), |
535 |
startpos, |
startpos, |
536 |
(anchor == UNANCHORED) ? 0 : PCRE_ANCHORED, |
options, |
537 |
vec, |
vec, |
538 |
vecsize); |
vecsize); |
539 |
|
|
563 |
int* vec, |
int* vec, |
564 |
int vecsize) const { |
int vecsize) const { |
565 |
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace |
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace |
566 |
int matches = TryMatch(text, 0, anchor, vec, vecsize); |
int matches = TryMatch(text, 0, anchor, true, vec, vecsize); |
567 |
assert(matches >= 0); // TryMatch never returns negatives |
assert(matches >= 0); // TryMatch never returns negatives |
568 |
if (matches == 0) |
if (matches == 0) |
569 |
return false; |
return false; |