6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
7 |
|
|
8 |
Written by Philip Hazel |
Written by Philip Hazel |
9 |
Copyright (c) 1997-2006 University of Cambridge |
Copyright (c) 1997-2007 University of Cambridge |
10 |
|
|
11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
2073 |
< -1 => some kind of unexpected problem |
< -1 => some kind of unexpected problem |
2074 |
*/ |
*/ |
2075 |
|
|
2076 |
PCRE_DATA_SCOPE int |
PCRE_EXP_DEFN int |
2077 |
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, |
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, |
2078 |
const char *subject, int length, int start_offset, int options, int *offsets, |
const char *subject, int length, int start_offset, int options, int *offsets, |
2079 |
int offsetcount, int *workspace, int wscount) |
int offsetcount, int *workspace, int wscount) |
2166 |
/* Handle different types of newline. The three bits give eight cases. If |
/* Handle different types of newline. The three bits give eight cases. If |
2167 |
nothing is set at run time, whatever was used at compile time applies. */ |
nothing is set at run time, whatever was used at compile time applies. */ |
2168 |
|
|
2169 |
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) & |
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & |
2170 |
PCRE_NEWLINE_BITS) |
PCRE_NEWLINE_BITS) |
2171 |
{ |
{ |
2172 |
case 0: newline = NEWLINE; break; /* Compile-time default */ |
case 0: newline = NEWLINE; break; /* Compile-time default */ |
2175 |
case PCRE_NEWLINE_CR+ |
case PCRE_NEWLINE_CR+ |
2176 |
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break; |
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break; |
2177 |
case PCRE_NEWLINE_ANY: newline = -1; break; |
case PCRE_NEWLINE_ANY: newline = -1; break; |
2178 |
|
case PCRE_NEWLINE_ANYCRLF: newline = -2; break; |
2179 |
default: return PCRE_ERROR_BADNEWLINE; |
default: return PCRE_ERROR_BADNEWLINE; |
2180 |
} |
} |
2181 |
|
|
2182 |
if (newline < 0) |
if (newline == -2) |
2183 |
|
{ |
2184 |
|
md->nltype = NLTYPE_ANYCRLF; |
2185 |
|
} |
2186 |
|
else if (newline < 0) |
2187 |
{ |
{ |
2188 |
md->nltype = NLTYPE_ANY; |
md->nltype = NLTYPE_ANY; |
2189 |
} |
} |
2313 |
{ |
{ |
2314 |
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject)) |
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject)) |
2315 |
current_subject++; |
current_subject++; |
2316 |
|
|
2317 |
|
/* If we have just passed a CR and the newline option is ANY or |
2318 |
|
ANYCRLF, and we are now at a LF, advance the match position by one more |
2319 |
|
character. */ |
2320 |
|
|
2321 |
|
if (current_subject[-1] == '\r' && |
2322 |
|
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
2323 |
|
current_subject < end_subject && |
2324 |
|
*current_subject == '\n') |
2325 |
|
current_subject++; |
2326 |
} |
} |
2327 |
} |
} |
2328 |
|
|
2431 |
} |
} |
2432 |
if (current_subject > end_subject) break; |
if (current_subject > end_subject) break; |
2433 |
|
|
2434 |
/* If we have just passed a CR and the newline option is CRLF or ANY, and we |
/* If we have just passed a CR and the newline option is CRLF or ANY or |
2435 |
are now at a LF, advance the match position by one more character. */ |
ANYCRLF, and we are now at a LF, advance the match position by one more |
2436 |
|
character. */ |
2437 |
|
|
2438 |
if (current_subject[-1] == '\r' && |
if (current_subject[-1] == '\r' && |
2439 |
(md->nltype == NLTYPE_ANY || md->nllen == 2) && |
(md->nltype == NLTYPE_ANY || |
2440 |
|
md->nltype == NLTYPE_ANYCRLF || |
2441 |
|
md->nllen == 2) && |
2442 |
current_subject < end_subject && |
current_subject < end_subject && |
2443 |
*current_subject == '\n') |
*current_subject == '\n') |
2444 |
current_subject++; |
current_subject++; |