2651 |
if ((flags & PCRE_EXTRA_TABLES) != 0) |
if ((flags & PCRE_EXTRA_TABLES) != 0) |
2652 |
md->tables = extra_data->tables; |
md->tables = extra_data->tables; |
2653 |
} |
} |
2654 |
|
|
2655 |
/* Check that the first field in the block is the magic number. If it is not, |
/* Check that the first field in the block is the magic number. If it is not, |
2656 |
test for a regex that was compiled on a host of opposite endianness. If this is |
test for a regex that was compiled on a host of opposite endianness. If this is |
2657 |
the case, flipped values are put in internal_re and internal_study if there was |
the case, flipped values are put in internal_re and internal_study if there was |
2790 |
} |
} |
2791 |
else |
else |
2792 |
{ |
{ |
2793 |
if (startline && study != NULL && |
if (!startline && study != NULL && |
2794 |
(study->options & PCRE_STUDY_MAPPED) != 0) |
(study->flags & PCRE_STUDY_MAPPED) != 0) |
2795 |
start_bits = study->start_bits; |
start_bits = study->start_bits; |
2796 |
} |
} |
2797 |
} |
} |
2842 |
} |
} |
2843 |
|
|
2844 |
/* There are some optimizations that avoid running the match if a known |
/* There are some optimizations that avoid running the match if a known |
2845 |
starting point is not found, or if a known later character is not present. |
starting point is not found. However, there is an option that disables |
2846 |
However, there is an option that disables these, for testing and for |
these, for testing and for ensuring that all callouts do actually occur. */ |
|
ensuring that all callouts do actually occur. */ |
|
2847 |
|
|
2848 |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
2849 |
{ |
{ |
|
|
|
2850 |
/* Advance to a known first byte. */ |
/* Advance to a known first byte. */ |
2851 |
|
|
2852 |
if (first_byte >= 0) |
if (first_byte >= 0) |
2912 |
/* Restore fudged end_subject */ |
/* Restore fudged end_subject */ |
2913 |
|
|
2914 |
end_subject = save_end_subject; |
end_subject = save_end_subject; |
|
} |
|
2915 |
|
|
2916 |
/* If req_byte is set, we know that that character must appear in the subject |
/* The following two optimizations are disabled for partial matching or if |
2917 |
for the match to succeed. If the first character is set, req_byte must be |
disabling is explicitly requested (and of course, by the test above, this |
2918 |
later in the subject; otherwise the test starts at the match point. This |
code is not obeyed when restarting after a partial match). */ |
2919 |
optimization can save a huge amount of work in patterns with nested unlimited |
|
2920 |
repeats that aren't going to match. Writing separate code for cased/caseless |
if ((options & PCRE_NO_START_OPTIMIZE) == 0 && |
2921 |
versions makes it go faster, as does using an autoincrement and backing off |
(options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0) |
2922 |
on a match. |
{ |
2923 |
|
/* If the pattern was studied, a minimum subject length may be set. This |
2924 |
HOWEVER: when the subject string is very, very long, searching to its end can |
is a lower bound; no actual string of that length may actually match the |
2925 |
take a long time, and give bad performance on quite ordinary patterns. This |
pattern. Although the value is, strictly, in characters, we treat it as |
2926 |
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
bytes to avoid spending too much time in this optimization. */ |
2927 |
don't do this when the string is sufficiently long. |
|
2928 |
|
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && |
2929 |
ALSO: this processing is disabled when partial matching is requested, and can |
end_subject - current_subject < study->minlength) |
2930 |
also be explicitly deactivated. Furthermore, we have to disable when |
return PCRE_ERROR_NOMATCH; |
2931 |
restarting after a partial match, because the required character may have |
|
2932 |
already been matched. */ |
/* If req_byte is set, we know that that character must appear in the |
2933 |
|
subject for the match to succeed. If the first character is set, req_byte |
2934 |
if ((options & PCRE_NO_START_OPTIMIZE) == 0 && |
must be later in the subject; otherwise the test starts at the match |
2935 |
req_byte >= 0 && |
point. This optimization can save a huge amount of work in patterns with |
2936 |
end_subject - current_subject < REQ_BYTE_MAX && |
nested unlimited repeats that aren't going to match. Writing separate |
2937 |
(options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_RESTART)) == 0) |
code for cased/caseless versions makes it go faster, as does using an |
2938 |
{ |
autoincrement and backing off on a match. |
2939 |
register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0); |
|
2940 |
|
HOWEVER: when the subject string is very, very long, searching to its end |
2941 |
/* We don't need to repeat the search if we haven't yet reached the |
can take a long time, and give bad performance on quite ordinary |
2942 |
place we found it at last time. */ |
patterns. This showed up when somebody was matching /^C/ on a 32-megabyte |
2943 |
|
string... so we don't do this when the string is sufficiently long. */ |
2944 |
if (p > req_byte_ptr) |
|
2945 |
{ |
if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX) |
|
if (req_byte_caseless) |
|
2946 |
{ |
{ |
2947 |
while (p < end_subject) |
register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0); |
2948 |
|
|
2949 |
|
/* We don't need to repeat the search if we haven't yet reached the |
2950 |
|
place we found it at last time. */ |
2951 |
|
|
2952 |
|
if (p > req_byte_ptr) |
2953 |
{ |
{ |
2954 |
register int pp = *p++; |
if (req_byte_caseless) |
2955 |
if (pp == req_byte || pp == req_byte2) { p--; break; } |
{ |
2956 |
} |
while (p < end_subject) |
2957 |
} |
{ |
2958 |
else |
register int pp = *p++; |
2959 |
{ |
if (pp == req_byte || pp == req_byte2) { p--; break; } |
2960 |
while (p < end_subject) |
} |
2961 |
{ |
} |
2962 |
if (*p++ == req_byte) { p--; break; } |
else |
2963 |
|
{ |
2964 |
|
while (p < end_subject) |
2965 |
|
{ |
2966 |
|
if (*p++ == req_byte) { p--; break; } |
2967 |
|
} |
2968 |
|
} |
2969 |
|
|
2970 |
|
/* If we can't find the required character, break the matching loop, |
2971 |
|
which will cause a return or PCRE_ERROR_NOMATCH. */ |
2972 |
|
|
2973 |
|
if (p >= end_subject) break; |
2974 |
|
|
2975 |
|
/* If we have found the required character, save the point where we |
2976 |
|
found it, so that we don't search again next time round the loop if |
2977 |
|
the start hasn't passed this character yet. */ |
2978 |
|
|
2979 |
|
req_byte_ptr = p; |
2980 |
} |
} |
2981 |
} |
} |
|
|
|
|
/* If we can't find the required character, break the matching loop, |
|
|
which will cause a return or PCRE_ERROR_NOMATCH. */ |
|
|
|
|
|
if (p >= end_subject) break; |
|
|
|
|
|
/* If we have found the required character, save the point where we |
|
|
found it, so that we don't search again next time round the loop if |
|
|
the start hasn't passed this character yet. */ |
|
|
|
|
|
req_byte_ptr = p; |
|
2982 |
} |
} |
2983 |
} |
} /* End of optimizations that are done when not restarting */ |
2984 |
|
|
2985 |
/* OK, now we can do the business */ |
/* OK, now we can do the business */ |
2986 |
|
|