332 |
return true; |
return true; |
333 |
} |
} |
334 |
|
|
335 |
|
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF. |
336 |
|
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF. |
337 |
|
static int NewlineMode(int pcre_options) { |
338 |
|
// TODO: if we can make it threadsafe, cache this var |
339 |
|
int newline_mode = 0; |
340 |
|
/* if (newline_mode) return newline_mode; */ // do this once it's cached |
341 |
|
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) { |
342 |
|
newline_mode = (pcre_options & |
343 |
|
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)); |
344 |
|
} else { |
345 |
|
int newline; |
346 |
|
pcre_config(PCRE_CONFIG_NEWLINE, &newline); |
347 |
|
if (newline == 10) |
348 |
|
newline_mode = PCRE_NEWLINE_LF; |
349 |
|
else if (newline == 13) |
350 |
|
newline_mode = PCRE_NEWLINE_CR; |
351 |
|
else if (newline == 3338) |
352 |
|
newline_mode = PCRE_NEWLINE_CRLF; |
353 |
|
else |
354 |
|
assert("" == "Unexpected return value from pcre_config(NEWLINE)"); |
355 |
|
} |
356 |
|
return newline_mode; |
357 |
|
} |
358 |
|
|
359 |
int RE::GlobalReplace(const StringPiece& rewrite, |
int RE::GlobalReplace(const StringPiece& rewrite, |
360 |
string *str) const { |
string *str) const { |
361 |
int count = 0; |
int count = 0; |
374 |
if (matchstart == matchend && matchstart == lastend) { |
if (matchstart == matchend && matchstart == lastend) { |
375 |
// advance one character if we matched an empty string at the same |
// advance one character if we matched an empty string at the same |
376 |
// place as the last match occurred |
// place as the last match occurred |
377 |
if (start < static_cast<int>(str->length())) |
matchend = start + 1; |
378 |
out.push_back((*str)[start]); |
// If the current char is CR and we're in CRLF mode, skip LF too. |
379 |
start++; |
// Note it's better to call pcre_fullinfo() than to examine |
380 |
|
// all_options(), since options_ could have changed bewteen |
381 |
|
// compile-time and now, but this is simpler and safe enough. |
382 |
|
if (start+1 < static_cast<int>(str->length()) && |
383 |
|
(*str)[start] == '\r' && (*str)[start+1] == '\n' && |
384 |
|
NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) { |
385 |
|
matchend++; |
386 |
|
} |
387 |
|
// We also need to advance more than one char if we're in utf8 mode. |
388 |
|
#ifdef SUPPORT_UTF8 |
389 |
|
if (options_.utf8()) { |
390 |
|
while (matchend < static_cast<int>(str->length()) && |
391 |
|
((*str)[matchend] & 0xc0) == 0x80) |
392 |
|
matchend++; |
393 |
|
} |
394 |
|
#endif |
395 |
|
if (matchend <= static_cast<int>(str->length())) |
396 |
|
out.append(*str, start, matchend - start); |
397 |
|
start = matchend; |
398 |
} else { |
} else { |
399 |
out.append(*str, start, matchstart - start); |
out.append(*str, start, matchstart - start); |
400 |
Rewrite(&out, rewrite, *str, vec, matches); |
Rewrite(&out, rewrite, *str, vec, matches); |