74 |
character that is to be tested in some way. This makes is possible to |
character that is to be tested in some way. This makes is possible to |
75 |
centralize the loading of these characters. In the case of Type * etc, the |
centralize the loading of these characters. In the case of Type * etc, the |
76 |
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a |
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a |
77 |
small value. */ |
small value. ***NOTE*** If the start of this table is modified, the two tables |
78 |
|
that follow must also be modified. */ |
79 |
|
|
80 |
static uschar coptable[] = { |
static uschar coptable[] = { |
81 |
0, /* End */ |
0, /* End */ |
82 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */ |
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ |
83 |
|
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ |
84 |
0, 0, /* Any, Anybyte */ |
0, 0, /* Any, Anybyte */ |
85 |
0, 0, 0, 0, /* NOTPROP, PROP, EXTUNI, ANYNL */ |
0, 0, 0, 0, /* NOTPROP, PROP, EXTUNI, ANYNL */ |
86 |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
129 |
and \w */ |
and \w */ |
130 |
|
|
131 |
static uschar toptable1[] = { |
static uschar toptable1[] = { |
132 |
0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, |
133 |
ctype_digit, ctype_digit, |
ctype_digit, ctype_digit, |
134 |
ctype_space, ctype_space, |
ctype_space, ctype_space, |
135 |
ctype_word, ctype_word, |
ctype_word, ctype_word, |
137 |
}; |
}; |
138 |
|
|
139 |
static uschar toptable2[] = { |
static uschar toptable2[] = { |
140 |
0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, |
141 |
ctype_digit, 0, |
ctype_digit, 0, |
142 |
ctype_space, 0, |
ctype_space, 0, |
143 |
ctype_word, 0, |
ctype_word, 0, |
502 |
const uschar *code; |
const uschar *code; |
503 |
int state_offset = current_state->offset; |
int state_offset = current_state->offset; |
504 |
int count, codevalue; |
int count, codevalue; |
505 |
|
#ifdef SUPPORT_UCP |
506 |
int chartype, script; |
int chartype, script; |
507 |
|
#endif |
508 |
|
|
509 |
#ifdef DEBUG |
#ifdef DEBUG |
510 |
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); |
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); |
787 |
break; |
break; |
788 |
|
|
789 |
|
|
|
#ifdef SUPPORT_UCP |
|
|
|
|
790 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
791 |
/* Check the next character by Unicode property. We will get here only |
/* Check the next character by Unicode property. We will get here only |
792 |
if the support is in the binary; otherwise a compile-time error occurs. |
if the support is in the binary; otherwise a compile-time error occurs. |
793 |
*/ |
*/ |
794 |
|
|
795 |
|
#ifdef SUPPORT_UCP |
796 |
case OP_PROP: |
case OP_PROP: |
797 |
case OP_NOTPROP: |
case OP_NOTPROP: |
798 |
if (clen > 0) |
if (clen > 0) |
973 |
argument. It keeps the code above fast for the other cases. The argument |
argument. It keeps the code above fast for the other cases. The argument |
974 |
is in the d variable. */ |
is in the d variable. */ |
975 |
|
|
976 |
|
#ifdef SUPPORT_UCP |
977 |
case OP_PROP_EXTRA + OP_TYPEPLUS: |
case OP_PROP_EXTRA + OP_TYPEPLUS: |
978 |
case OP_PROP_EXTRA + OP_TYPEMINPLUS: |
case OP_PROP_EXTRA + OP_TYPEMINPLUS: |
979 |
case OP_PROP_EXTRA + OP_TYPEPOSPLUS: |
case OP_PROP_EXTRA + OP_TYPEPOSPLUS: |
1053 |
ADD_NEW_DATA(-state_offset, count, ncount); |
ADD_NEW_DATA(-state_offset, count, ncount); |
1054 |
} |
} |
1055 |
break; |
break; |
1056 |
|
#endif |
1057 |
|
|
1058 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
1059 |
case OP_ANYNL_EXTRA + OP_TYPEPLUS: |
case OP_ANYNL_EXTRA + OP_TYPEPLUS: |
1090 |
break; |
break; |
1091 |
|
|
1092 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
1093 |
|
#ifdef SUPPORT_UCP |
1094 |
case OP_PROP_EXTRA + OP_TYPEQUERY: |
case OP_PROP_EXTRA + OP_TYPEQUERY: |
1095 |
case OP_PROP_EXTRA + OP_TYPEMINQUERY: |
case OP_PROP_EXTRA + OP_TYPEMINQUERY: |
1096 |
case OP_PROP_EXTRA + OP_TYPEPOSQUERY: |
case OP_PROP_EXTRA + OP_TYPEPOSQUERY: |
1188 |
ADD_NEW_DATA(-(state_offset + count), 0, ncount); |
ADD_NEW_DATA(-(state_offset + count), 0, ncount); |
1189 |
} |
} |
1190 |
break; |
break; |
1191 |
|
#endif |
1192 |
|
|
1193 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
1194 |
case OP_ANYNL_EXTRA + OP_TYPEQUERY: |
case OP_ANYNL_EXTRA + OP_TYPEQUERY: |
1233 |
break; |
break; |
1234 |
|
|
1235 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
1236 |
|
#ifdef SUPPORT_UCP |
1237 |
case OP_PROP_EXTRA + OP_TYPEEXACT: |
case OP_PROP_EXTRA + OP_TYPEEXACT: |
1238 |
case OP_PROP_EXTRA + OP_TYPEUPTO: |
case OP_PROP_EXTRA + OP_TYPEUPTO: |
1239 |
case OP_PROP_EXTRA + OP_TYPEMINUPTO: |
case OP_PROP_EXTRA + OP_TYPEMINUPTO: |
1321 |
{ ADD_NEW_DATA(-state_offset, count, ncount); } |
{ ADD_NEW_DATA(-state_offset, count, ncount); } |
1322 |
} |
} |
1323 |
break; |
break; |
1324 |
|
#endif |
1325 |
|
|
1326 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
1327 |
case OP_ANYNL_EXTRA + OP_TYPEEXACT: |
case OP_ANYNL_EXTRA + OP_TYPEEXACT: |
2184 |
case PCRE_NEWLINE_CR+ |
case PCRE_NEWLINE_CR+ |
2185 |
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break; |
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break; |
2186 |
case PCRE_NEWLINE_ANY: newline = -1; break; |
case PCRE_NEWLINE_ANY: newline = -1; break; |
2187 |
|
case PCRE_NEWLINE_ANYCRLF: newline = -2; break; |
2188 |
default: return PCRE_ERROR_BADNEWLINE; |
default: return PCRE_ERROR_BADNEWLINE; |
2189 |
} |
} |
2190 |
|
|
2191 |
if (newline < 0) |
if (newline == -2) |
2192 |
|
{ |
2193 |
|
md->nltype = NLTYPE_ANYCRLF; |
2194 |
|
} |
2195 |
|
else if (newline < 0) |
2196 |
{ |
{ |
2197 |
md->nltype = NLTYPE_ANY; |
md->nltype = NLTYPE_ANY; |
2198 |
} |
} |
2323 |
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject)) |
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject)) |
2324 |
current_subject++; |
current_subject++; |
2325 |
|
|
2326 |
/* If we have just passed a CR and the newline option is ANY, and we |
/* If we have just passed a CR and the newline option is ANY or |
2327 |
are now at a LF, advance the match position by one more character. */ |
ANYCRLF, and we are now at a LF, advance the match position by one more |
2328 |
|
character. */ |
2329 |
|
|
2330 |
if (current_subject[-1] == '\r' && |
if (current_subject[-1] == '\r' && |
2331 |
md->nltype == NLTYPE_ANY && |
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
2332 |
current_subject < end_subject && |
current_subject < end_subject && |
2333 |
*current_subject == '\n') |
*current_subject == '\n') |
2334 |
current_subject++; |
current_subject++; |
2440 |
} |
} |
2441 |
if (current_subject > end_subject) break; |
if (current_subject > end_subject) break; |
2442 |
|
|
2443 |
/* If we have just passed a CR and the newline option is CRLF or ANY, and we |
/* If we have just passed a CR and the newline option is CRLF or ANY or |
2444 |
are now at a LF, advance the match position by one more character. */ |
ANYCRLF, and we are now at a LF, advance the match position by one more |
2445 |
|
character. */ |
2446 |
|
|
2447 |
if (current_subject[-1] == '\r' && |
if (current_subject[-1] == '\r' && |
2448 |
(md->nltype == NLTYPE_ANY || md->nllen == 2) && |
(md->nltype == NLTYPE_ANY || |
2449 |
|
md->nltype == NLTYPE_ANYCRLF || |
2450 |
|
md->nllen == 2) && |
2451 |
current_subject < end_subject && |
current_subject < end_subject && |
2452 |
*current_subject == '\n') |
*current_subject == '\n') |
2453 |
current_subject++; |
current_subject++; |