1009 |
|
|
1010 |
|
|
1011 |
/************************************************* |
/************************************************* |
1012 |
* Find forward referenced subpattern * |
* Subroutine for finding forward reference * |
1013 |
*************************************************/ |
*************************************************/ |
1014 |
|
|
1015 |
/* This function scans along a pattern's text looking for capturing |
/* This recursive function is called only from find_parens() below. The |
1016 |
|
top-level call starts at the beginning of the pattern. All other calls must |
1017 |
|
start at a parenthesis. It scans along a pattern's text looking for capturing |
1018 |
subpatterns, and counting them. If it finds a named pattern that matches the |
subpatterns, and counting them. If it finds a named pattern that matches the |
1019 |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
1020 |
returns when it reaches a given numbered subpattern. This is used for forward |
returns when it reaches a given numbered subpattern. We know that if (?P< is |
1021 |
references to subpatterns. We know that if (?P< is encountered, the name will |
encountered, the name will be terminated by '>' because that is checked in the |
1022 |
be terminated by '>' because that is checked in the first pass. |
first pass. Recursion is used to keep track of subpatterns that reset the |
1023 |
|
capturing group numbers - the (?| feature. |
1024 |
|
|
1025 |
Arguments: |
Arguments: |
1026 |
ptr current position in the pattern |
ptrptr address of the current character pointer (updated) |
1027 |
cd compile background data |
cd compile background data |
1028 |
name name to seek, or NULL if seeking a numbered subpattern |
name name to seek, or NULL if seeking a numbered subpattern |
1029 |
lorn name length, or subpattern number if name is NULL |
lorn name length, or subpattern number if name is NULL |
1030 |
xmode TRUE if we are in /x mode |
xmode TRUE if we are in /x mode |
1031 |
|
count pointer to the current capturing subpattern number (updated) |
1032 |
|
|
1033 |
Returns: the number of the named subpattern, or -1 if not found |
Returns: the number of the named subpattern, or -1 if not found |
1034 |
*/ |
*/ |
1035 |
|
|
1036 |
static int |
static int |
1037 |
find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn, |
find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn, |
1038 |
BOOL xmode) |
BOOL xmode, int *count) |
1039 |
{ |
{ |
1040 |
const uschar *thisname; |
uschar *ptr = *ptrptr; |
1041 |
int count = cd->bracount; |
int start_count = *count; |
1042 |
|
int hwm_count = start_count; |
1043 |
|
BOOL dup_parens = FALSE; |
1044 |
|
|
1045 |
for (; *ptr != 0; ptr++) |
/* If the first character is a parenthesis, check on the type of group we are |
1046 |
|
dealing with. The very first call may not start with a parenthesis. */ |
1047 |
|
|
1048 |
|
if (ptr[0] == CHAR_LEFT_PARENTHESIS) |
1049 |
{ |
{ |
1050 |
int term; |
if (ptr[1] == CHAR_QUESTION_MARK && |
1051 |
|
ptr[2] == CHAR_VERTICAL_LINE) |
1052 |
|
{ |
1053 |
|
ptr += 3; |
1054 |
|
dup_parens = TRUE; |
1055 |
|
} |
1056 |
|
|
1057 |
|
/* Handle a normal, unnamed capturing parenthesis */ |
1058 |
|
|
1059 |
|
else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK) |
1060 |
|
{ |
1061 |
|
*count += 1; |
1062 |
|
if (name == NULL && *count == lorn) return *count; |
1063 |
|
ptr++; |
1064 |
|
} |
1065 |
|
|
1066 |
|
/* Handle a condition. If it is an assertion, just carry on so that it |
1067 |
|
is processed as normal. If not, skip to the closing parenthesis of the |
1068 |
|
condition (there can't be any nested parens. */ |
1069 |
|
|
1070 |
|
else if (ptr[2] == CHAR_LEFT_PARENTHESIS) |
1071 |
|
{ |
1072 |
|
ptr += 2; |
1073 |
|
if (ptr[1] != CHAR_QUESTION_MARK) |
1074 |
|
{ |
1075 |
|
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
1076 |
|
if (*ptr != 0) ptr++; |
1077 |
|
} |
1078 |
|
} |
1079 |
|
|
1080 |
|
/* We have either (? or (* and not a condition */ |
1081 |
|
|
1082 |
|
else |
1083 |
|
{ |
1084 |
|
ptr += 2; |
1085 |
|
if (*ptr == CHAR_P) ptr++; /* Allow optional P */ |
1086 |
|
|
1087 |
|
/* We have to disambiguate (?<! and (?<= from (?<name> for named groups */ |
1088 |
|
|
1089 |
|
if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && |
1090 |
|
ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) |
1091 |
|
{ |
1092 |
|
int term; |
1093 |
|
const uschar *thisname; |
1094 |
|
*count += 1; |
1095 |
|
if (name == NULL && *count == lorn) return *count; |
1096 |
|
term = *ptr++; |
1097 |
|
if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; |
1098 |
|
thisname = ptr; |
1099 |
|
while (*ptr != term) ptr++; |
1100 |
|
if (name != NULL && lorn == ptr - thisname && |
1101 |
|
strncmp((const char *)name, (const char *)thisname, lorn) == 0) |
1102 |
|
return *count; |
1103 |
|
} |
1104 |
|
} |
1105 |
|
} |
1106 |
|
|
1107 |
|
/* Past any initial parenthesis handling, scan for parentheses or vertical |
1108 |
|
bars. */ |
1109 |
|
|
1110 |
|
for (; *ptr != 0; ptr++) |
1111 |
|
{ |
1112 |
/* Skip over backslashed characters and also entire \Q...\E */ |
/* Skip over backslashed characters and also entire \Q...\E */ |
1113 |
|
|
1114 |
if (*ptr == CHAR_BACKSLASH) |
if (*ptr == CHAR_BACKSLASH) |
1115 |
{ |
{ |
1116 |
if (*(++ptr) == 0) return -1; |
if (*(++ptr) == 0) goto FAIL_EXIT; |
1117 |
if (*ptr == CHAR_Q) for (;;) |
if (*ptr == CHAR_Q) for (;;) |
1118 |
{ |
{ |
1119 |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
1120 |
if (*ptr == 0) return -1; |
if (*ptr == 0) goto FAIL_EXIT; |
1121 |
if (*(++ptr) == CHAR_E) break; |
if (*(++ptr) == CHAR_E) break; |
1122 |
} |
} |
1123 |
continue; |
continue; |
1162 |
if (*ptr == 0) return -1; |
if (*ptr == 0) return -1; |
1163 |
if (*ptr == CHAR_BACKSLASH) |
if (*ptr == CHAR_BACKSLASH) |
1164 |
{ |
{ |
1165 |
if (*(++ptr) == 0) return -1; |
if (*(++ptr) == 0) goto FAIL_EXIT; |
1166 |
if (*ptr == CHAR_Q) for (;;) |
if (*ptr == CHAR_Q) for (;;) |
1167 |
{ |
{ |
1168 |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
1169 |
if (*ptr == 0) return -1; |
if (*ptr == 0) goto FAIL_EXIT; |
1170 |
if (*(++ptr) == CHAR_E) break; |
if (*(++ptr) == CHAR_E) break; |
1171 |
} |
} |
1172 |
continue; |
continue; |
1180 |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
1181 |
{ |
{ |
1182 |
while (*(++ptr) != 0 && *ptr != CHAR_NL) {}; |
while (*(++ptr) != 0 && *ptr != CHAR_NL) {}; |
1183 |
if (*ptr == 0) return -1; |
if (*ptr == 0) goto FAIL_EXIT; |
1184 |
continue; |
continue; |
1185 |
} |
} |
1186 |
|
|
1187 |
/* An opening parens must now be a real metacharacter */ |
/* Check for the special metacharacters */ |
1188 |
|
|
1189 |
if (*ptr != CHAR_LEFT_PARENTHESIS) continue; |
if (*ptr == CHAR_LEFT_PARENTHESIS) |
1190 |
if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK) |
{ |
1191 |
|
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count); |
1192 |
|
if (rc > 0) return rc; |
1193 |
|
if (*ptr == 0) goto FAIL_EXIT; |
1194 |
|
} |
1195 |
|
|
1196 |
|
else if (*ptr == CHAR_RIGHT_PARENTHESIS) |
1197 |
{ |
{ |
1198 |
count++; |
if (dup_parens && *count < hwm_count) *count = hwm_count; |
1199 |
if (name == NULL && count == lorn) return count; |
*ptrptr = ptr; |
1200 |
continue; |
return -1; |
1201 |
} |
} |
1202 |
|
|
1203 |
|
else if (*ptr == CHAR_VERTICAL_LINE && dup_parens) |
1204 |
|
{ |
1205 |
|
if (*count > hwm_count) hwm_count = *count; |
1206 |
|
*count = start_count; |
1207 |
|
} |
1208 |
|
} |
1209 |
|
|
1210 |
ptr += 2; |
FAIL_EXIT: |
1211 |
if (*ptr == CHAR_P) ptr++; /* Allow optional P */ |
*ptrptr = ptr; |
1212 |
|
return -1; |
1213 |
|
} |
1214 |
|
|
|
/* We have to disambiguate (?<! and (?<= from (?<name> */ |
|
1215 |
|
|
|
if ((*ptr != CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_EXCLAMATION_MARK || |
|
|
ptr[1] == CHAR_EQUALS_SIGN) && *ptr != CHAR_APOSTROPHE) |
|
|
continue; |
|
1216 |
|
|
|
count++; |
|
1217 |
|
|
1218 |
if (name == NULL && count == lorn) return count; |
/************************************************* |
1219 |
term = *ptr++; |
* Find forward referenced subpattern * |
1220 |
if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; |
*************************************************/ |
|
thisname = ptr; |
|
|
while (*ptr != term) ptr++; |
|
|
if (name != NULL && lorn == ptr - thisname && |
|
|
strncmp((const char *)name, (const char *)thisname, lorn) == 0) |
|
|
return count; |
|
|
} |
|
1221 |
|
|
1222 |
return -1; |
/* This function scans along a pattern's text looking for capturing |
1223 |
|
subpatterns, and counting them. If it finds a named pattern that matches the |
1224 |
|
name it is given, it returns its number. Alternatively, if the name is NULL, it |
1225 |
|
returns when it reaches a given numbered subpattern. This is used for forward |
1226 |
|
references to subpatterns. We used to be able to start this scan from the |
1227 |
|
current compiling point, using the current count value from cd->bracount, and |
1228 |
|
do it all in a single loop, but the addition of the possibility of duplicate |
1229 |
|
subpattern numbers means that we have to scan from the very start, in order to |
1230 |
|
take account of such duplicates, and to use a recursive function to keep track |
1231 |
|
of the different types of group. |
1232 |
|
|
1233 |
|
Arguments: |
1234 |
|
cd compile background data |
1235 |
|
name name to seek, or NULL if seeking a numbered subpattern |
1236 |
|
lorn name length, or subpattern number if name is NULL |
1237 |
|
xmode TRUE if we are in /x mode |
1238 |
|
|
1239 |
|
Returns: the number of the found subpattern, or -1 if not found |
1240 |
|
*/ |
1241 |
|
|
1242 |
|
static int |
1243 |
|
find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode) |
1244 |
|
{ |
1245 |
|
uschar *ptr = (uschar *)cd->start_pattern; |
1246 |
|
int count = 0; |
1247 |
|
int rc; |
1248 |
|
|
1249 |
|
/* If the pattern does not start with an opening parenthesis, the first call |
1250 |
|
to find_parens_sub() will scan right to the end (if necessary). However, if it |
1251 |
|
does start with a parenthesis, find_parens_sub() will return when it hits the |
1252 |
|
matching closing parens. That is why we have to have a loop. */ |
1253 |
|
|
1254 |
|
for (;;) |
1255 |
|
{ |
1256 |
|
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count); |
1257 |
|
if (rc > 0 || *ptr++ == 0) break; |
1258 |
|
} |
1259 |
|
|
1260 |
|
return rc; |
1261 |
} |
} |
1262 |
|
|
1263 |
|
|
1264 |
|
|
1265 |
|
|
1266 |
/************************************************* |
/************************************************* |
1267 |
* Find first significant op code * |
* Find first significant op code * |
1268 |
*************************************************/ |
*************************************************/ |
4601 |
|
|
4602 |
/* Search the pattern for a forward reference */ |
/* Search the pattern for a forward reference */ |
4603 |
|
|
4604 |
else if ((i = find_parens(ptr, cd, name, namelen, |
else if ((i = find_parens(cd, name, namelen, |
4605 |
(options & PCRE_EXTENDED) != 0)) > 0) |
(options & PCRE_EXTENDED) != 0)) > 0) |
4606 |
{ |
{ |
4607 |
PUT2(code, 2+LINK_SIZE, i); |
PUT2(code, 2+LINK_SIZE, i); |
4900 |
recno = GET2(slot, 0); |
recno = GET2(slot, 0); |
4901 |
} |
} |
4902 |
else if ((recno = /* Forward back reference */ |
else if ((recno = /* Forward back reference */ |
4903 |
find_parens(ptr, cd, name, namelen, |
find_parens(cd, name, namelen, |
4904 |
(options & PCRE_EXTENDED) != 0)) <= 0) |
(options & PCRE_EXTENDED) != 0)) <= 0) |
4905 |
{ |
{ |
4906 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
5010 |
|
|
5011 |
if (called == NULL) |
if (called == NULL) |
5012 |
{ |
{ |
5013 |
if (find_parens(ptr, cd, NULL, recno, |
if (find_parens(cd, NULL, recno, |
5014 |
(options & PCRE_EXTENDED) != 0) < 0) |
(options & PCRE_EXTENDED) != 0) < 0) |
5015 |
{ |
{ |
5016 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |