115 |
#define COMPILE_WORK_SIZE (2048*LINK_SIZE) |
#define COMPILE_WORK_SIZE (2048*LINK_SIZE) |
116 |
#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE) |
#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE) |
117 |
|
|
118 |
|
/* This value determines the size of the initial vector that is used for |
119 |
|
remembering named groups during the pre-compile. It is allocated on the stack, |
120 |
|
but if it is too small, it is expanded using malloc(), in a similar way to the |
121 |
|
workspace. The value is the number of slots in the list. */ |
122 |
|
|
123 |
|
#define NAMED_GROUP_LIST_SIZE 20 |
124 |
|
|
125 |
/* The overrun tests check for a slightly smaller size so that they detect the |
/* The overrun tests check for a slightly smaller size so that they detect the |
126 |
overrun before it actually does run off the end of the data block. */ |
overrun before it actually does run off the end of the data block. */ |
127 |
|
|
1365 |
|
|
1366 |
|
|
1367 |
/************************************************* |
/************************************************* |
|
* Subroutine for finding forward reference * |
|
|
*************************************************/ |
|
|
|
|
|
/* This recursive function is called only from find_parens() below. The |
|
|
top-level call starts at the beginning of the pattern. All other calls must |
|
|
start at a parenthesis. It scans along a pattern's text looking for capturing |
|
|
subpatterns, and counting them. If it finds a named pattern that matches the |
|
|
name it is given, it returns its number. Alternatively, if the name is NULL, it |
|
|
returns when it reaches a given numbered subpattern. Recursion is used to keep |
|
|
track of subpatterns that reset the capturing group numbers - the (?| feature. |
|
|
|
|
|
This function was originally called only from the second pass, in which we know |
|
|
that if (?< or (?' or (?P< is encountered, the name will be correctly |
|
|
terminated because that is checked in the first pass. There is now one call to |
|
|
this function in the first pass, to check for a recursive back reference by |
|
|
name (so that we can make the whole group atomic). In this case, we need check |
|
|
only up to the current position in the pattern, and that is still OK because |
|
|
and previous occurrences will have been checked. To make this work, the test |
|
|
for "end of pattern" is a check against cd->end_pattern in the main loop, |
|
|
instead of looking for a binary zero. This means that the special first-pass |
|
|
call can adjust cd->end_pattern temporarily. (Checks for binary zero while |
|
|
processing items within the loop are OK, because afterwards the main loop will |
|
|
terminate.) |
|
|
|
|
|
Arguments: |
|
|
ptrptr address of the current character pointer (updated) |
|
|
cd compile background data |
|
|
name name to seek, or NULL if seeking a numbered subpattern |
|
|
lorn name length, or subpattern number if name is NULL |
|
|
xmode TRUE if we are in /x mode |
|
|
utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode |
|
|
count pointer to the current capturing subpattern number (updated) |
|
|
|
|
|
Returns: the number of the named subpattern, or -1 if not found |
|
|
*/ |
|
|
|
|
|
static int |
|
|
find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn, |
|
|
BOOL xmode, BOOL utf, int *count) |
|
|
{ |
|
|
pcre_uchar *ptr = *ptrptr; |
|
|
int start_count = *count; |
|
|
int hwm_count = start_count; |
|
|
BOOL dup_parens = FALSE; |
|
|
|
|
|
/* If the first character is a parenthesis, check on the type of group we are |
|
|
dealing with. The very first call may not start with a parenthesis. */ |
|
|
|
|
|
if (ptr[0] == CHAR_LEFT_PARENTHESIS) |
|
|
{ |
|
|
/* Handle specials such as (*SKIP) or (*UTF8) etc. */ |
|
|
|
|
|
if (ptr[1] == CHAR_ASTERISK) |
|
|
{ |
|
|
ptr += 2; |
|
|
while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
|
|
} |
|
|
|
|
|
/* Handle a normal, unnamed capturing parenthesis. */ |
|
|
|
|
|
else if (ptr[1] != CHAR_QUESTION_MARK) |
|
|
{ |
|
|
*count += 1; |
|
|
if (name == NULL && *count == lorn) return *count; |
|
|
ptr++; |
|
|
} |
|
|
|
|
|
/* All cases now have (? at the start. Remember when we are in a group |
|
|
where the parenthesis numbers are duplicated. */ |
|
|
|
|
|
else if (ptr[2] == CHAR_VERTICAL_LINE) |
|
|
{ |
|
|
ptr += 3; |
|
|
dup_parens = TRUE; |
|
|
} |
|
|
|
|
|
/* Handle comments; all characters are allowed until a ket is reached. */ |
|
|
|
|
|
else if (ptr[2] == CHAR_NUMBER_SIGN) |
|
|
{ |
|
|
for (ptr += 3; *ptr != CHAR_NULL; ptr++) |
|
|
if (*ptr == CHAR_RIGHT_PARENTHESIS) break; |
|
|
goto FAIL_EXIT; |
|
|
} |
|
|
|
|
|
/* Handle a condition. If it is an assertion, just carry on so that it |
|
|
is processed as normal. If not, skip to the closing parenthesis of the |
|
|
condition (there can't be any nested parens). */ |
|
|
|
|
|
else if (ptr[2] == CHAR_LEFT_PARENTHESIS) |
|
|
{ |
|
|
ptr += 2; |
|
|
if (ptr[1] != CHAR_QUESTION_MARK) |
|
|
{ |
|
|
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
|
|
if (*ptr != CHAR_NULL) ptr++; |
|
|
} |
|
|
} |
|
|
|
|
|
/* Start with (? but not a condition. */ |
|
|
|
|
|
else |
|
|
{ |
|
|
ptr += 2; |
|
|
if (*ptr == CHAR_P) ptr++; /* Allow optional P */ |
|
|
|
|
|
/* We have to disambiguate (?<! and (?<= from (?<name> for named groups */ |
|
|
|
|
|
if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && |
|
|
ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) |
|
|
{ |
|
|
pcre_uchar term; |
|
|
const pcre_uchar *thisname; |
|
|
*count += 1; |
|
|
if (name == NULL && *count == lorn) return *count; |
|
|
term = *ptr++; |
|
|
if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; |
|
|
thisname = ptr; |
|
|
while (*ptr != term) ptr++; |
|
|
if (name != NULL && lorn == (int)(ptr - thisname) && |
|
|
STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0) |
|
|
return *count; |
|
|
term++; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
/* Past any initial parenthesis handling, scan for parentheses or vertical |
|
|
bars. Stop if we get to cd->end_pattern. Note that this is important for the |
|
|
first-pass call when this value is temporarily adjusted to stop at the current |
|
|
position. So DO NOT change this to a test for binary zero. */ |
|
|
|
|
|
for (; ptr < cd->end_pattern; ptr++) |
|
|
{ |
|
|
/* Skip over backslashed characters and also entire \Q...\E */ |
|
|
|
|
|
if (*ptr == CHAR_BACKSLASH) |
|
|
{ |
|
|
if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT; |
|
|
if (*ptr == CHAR_Q) for (;;) |
|
|
{ |
|
|
while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {}; |
|
|
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
|
|
if (*(++ptr) == CHAR_E) break; |
|
|
} |
|
|
continue; |
|
|
} |
|
|
|
|
|
/* Skip over character classes; this logic must be similar to the way they |
|
|
are handled for real. If the first character is '^', skip it. Also, if the |
|
|
first few characters (either before or after ^) are \Q\E or \E we skip them |
|
|
too. This makes for compatibility with Perl. Note the use of STR macros to |
|
|
encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */ |
|
|
|
|
|
if (*ptr == CHAR_LEFT_SQUARE_BRACKET) |
|
|
{ |
|
|
BOOL negate_class = FALSE; |
|
|
for (;;) |
|
|
{ |
|
|
if (ptr[1] == CHAR_BACKSLASH) |
|
|
{ |
|
|
if (ptr[2] == CHAR_E) |
|
|
ptr+= 2; |
|
|
else if (STRNCMP_UC_C8(ptr + 2, |
|
|
STR_Q STR_BACKSLASH STR_E, 3) == 0) |
|
|
ptr += 4; |
|
|
else |
|
|
break; |
|
|
} |
|
|
else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT) |
|
|
{ |
|
|
negate_class = TRUE; |
|
|
ptr++; |
|
|
} |
|
|
else break; |
|
|
} |
|
|
|
|
|
/* If the next character is ']', it is a data character that must be |
|
|
skipped, except in JavaScript compatibility mode. */ |
|
|
|
|
|
if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET && |
|
|
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0) |
|
|
ptr++; |
|
|
|
|
|
while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET) |
|
|
{ |
|
|
if (*ptr == CHAR_NULL) return -1; |
|
|
if (*ptr == CHAR_BACKSLASH) |
|
|
{ |
|
|
if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT; |
|
|
if (*ptr == CHAR_Q) for (;;) |
|
|
{ |
|
|
while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {}; |
|
|
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
|
|
if (*(++ptr) == CHAR_E) break; |
|
|
} |
|
|
continue; |
|
|
} |
|
|
} |
|
|
continue; |
|
|
} |
|
|
|
|
|
/* Skip comments in /x mode */ |
|
|
|
|
|
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
|
|
{ |
|
|
ptr++; |
|
|
while (*ptr != CHAR_NULL) |
|
|
{ |
|
|
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
|
|
ptr++; |
|
|
#ifdef SUPPORT_UTF |
|
|
if (utf) FORWARDCHAR(ptr); |
|
|
#endif |
|
|
} |
|
|
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
|
|
continue; |
|
|
} |
|
|
|
|
|
/* Check for the special metacharacters */ |
|
|
|
|
|
if (*ptr == CHAR_LEFT_PARENTHESIS) |
|
|
{ |
|
|
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count); |
|
|
if (rc > 0) return rc; |
|
|
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
|
|
} |
|
|
|
|
|
else if (*ptr == CHAR_RIGHT_PARENTHESIS) |
|
|
{ |
|
|
if (dup_parens && *count < hwm_count) *count = hwm_count; |
|
|
goto FAIL_EXIT; |
|
|
} |
|
|
|
|
|
else if (*ptr == CHAR_VERTICAL_LINE && dup_parens) |
|
|
{ |
|
|
if (*count > hwm_count) hwm_count = *count; |
|
|
*count = start_count; |
|
|
} |
|
|
} |
|
|
|
|
|
FAIL_EXIT: |
|
|
*ptrptr = ptr; |
|
|
return -1; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/************************************************* |
|
|
* Find forward referenced subpattern * |
|
|
*************************************************/ |
|
|
|
|
|
/* This function scans along a pattern's text looking for capturing |
|
|
subpatterns, and counting them. If it finds a named pattern that matches the |
|
|
name it is given, it returns its number. Alternatively, if the name is NULL, it |
|
|
returns when it reaches a given numbered subpattern. This is used for forward |
|
|
references to subpatterns. We used to be able to start this scan from the |
|
|
current compiling point, using the current count value from cd->bracount, and |
|
|
do it all in a single loop, but the addition of the possibility of duplicate |
|
|
subpattern numbers means that we have to scan from the very start, in order to |
|
|
take account of such duplicates, and to use a recursive function to keep track |
|
|
of the different types of group. |
|
|
|
|
|
Arguments: |
|
|
cd compile background data |
|
|
name name to seek, or NULL if seeking a numbered subpattern |
|
|
lorn name length, or subpattern number if name is NULL |
|
|
xmode TRUE if we are in /x mode |
|
|
utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode |
|
|
|
|
|
Returns: the number of the found subpattern, or -1 if not found |
|
|
*/ |
|
|
|
|
|
static int |
|
|
find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode, |
|
|
BOOL utf) |
|
|
{ |
|
|
pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern; |
|
|
int count = 0; |
|
|
int rc; |
|
|
|
|
|
/* If the pattern does not start with an opening parenthesis, the first call |
|
|
to find_parens_sub() will scan right to the end (if necessary). However, if it |
|
|
does start with a parenthesis, find_parens_sub() will return when it hits the |
|
|
matching closing parens. That is why we have to have a loop. */ |
|
|
|
|
|
for (;;) |
|
|
{ |
|
|
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count); |
|
|
if (rc > 0 || *ptr++ == CHAR_NULL) break; |
|
|
} |
|
|
|
|
|
return rc; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/************************************************* |
|
1368 |
* Find first significant op code * |
* Find first significant op code * |
1369 |
*************************************************/ |
*************************************************/ |
1370 |
|
|
5656 |
slot += cd->name_entry_size; |
slot += cd->name_entry_size; |
5657 |
} |
} |
5658 |
|
|
5659 |
/* Found a previous named subpattern */ |
/* Found the named subpattern */ |
5660 |
|
|
5661 |
if (i < cd->names_found) |
if (i < cd->names_found) |
5662 |
{ |
{ |
5665 |
code[1+LINK_SIZE]++; |
code[1+LINK_SIZE]++; |
5666 |
} |
} |
5667 |
|
|
|
/* Search the pattern for a forward reference */ |
|
|
|
|
|
else if ((i = find_parens(cd, name, namelen, |
|
|
(options & PCRE_EXTENDED) != 0, utf)) > 0) |
|
|
{ |
|
|
PUT2(code, 2+LINK_SIZE, i); |
|
|
code[1+LINK_SIZE]++; |
|
|
} |
|
|
|
|
5668 |
/* If terminator == CHAR_NULL it means that the name followed directly |
/* If terminator == CHAR_NULL it means that the name followed directly |
5669 |
after the opening parenthesis [e.g. (?(abc)...] and in this case there |
after the opening parenthesis [e.g. (?(abc)...] and in this case there |
5670 |
are some further alternatives to try. For the cases where terminator != |
are some further alternatives to try. For the cases where terminator != |
5828 |
/* ------------------------------------------------------------ */ |
/* ------------------------------------------------------------ */ |
5829 |
DEFINE_NAME: /* Come here from (?< handling */ |
DEFINE_NAME: /* Come here from (?< handling */ |
5830 |
case CHAR_APOSTROPHE: |
case CHAR_APOSTROPHE: |
5831 |
{ |
terminator = (*ptr == CHAR_LESS_THAN_SIGN)? |
5832 |
terminator = (*ptr == CHAR_LESS_THAN_SIGN)? |
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; |
5833 |
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; |
name = ++ptr; |
|
name = ++ptr; |
|
5834 |
|
|
5835 |
while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; |
while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; |
5836 |
namelen = (int)(ptr - name); |
namelen = (int)(ptr - name); |
5837 |
|
|
5838 |
/* In the pre-compile phase, just do a syntax check. */ |
/* In the pre-compile phase, do a syntax check, remember the longest |
5839 |
|
name, and then remember the group in a vector, expanding it if |
5840 |
|
necessary. Duplicates for the same number are skipped; other duplicates |
5841 |
|
are checked for validity. In the actual compile, there is nothing to |
5842 |
|
do. */ |
5843 |
|
|
5844 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
5845 |
|
{ |
5846 |
|
named_group *ng; |
5847 |
|
pcre_uint32 number = cd->bracount + 1; |
5848 |
|
|
5849 |
|
if (*ptr != (pcre_uchar)terminator) |
5850 |
{ |
{ |
5851 |
if (*ptr != (pcre_uchar)terminator) |
*errorcodeptr = ERR42; |
5852 |
{ |
goto FAILED; |
5853 |
*errorcodeptr = ERR42; |
} |
5854 |
goto FAILED; |
|
5855 |
} |
if (cd->names_found >= MAX_NAME_COUNT) |
5856 |
if (cd->names_found >= MAX_NAME_COUNT) |
{ |
5857 |
|
*errorcodeptr = ERR49; |
5858 |
|
goto FAILED; |
5859 |
|
} |
5860 |
|
|
5861 |
|
if (namelen + IMM2_SIZE + 1 > cd->name_entry_size) |
5862 |
|
{ |
5863 |
|
cd->name_entry_size = namelen + IMM2_SIZE + 1; |
5864 |
|
if (namelen > MAX_NAME_SIZE) |
5865 |
{ |
{ |
5866 |
*errorcodeptr = ERR49; |
*errorcodeptr = ERR48; |
5867 |
goto FAILED; |
goto FAILED; |
5868 |
} |
} |
|
if (namelen + IMM2_SIZE + 1 > cd->name_entry_size) |
|
|
{ |
|
|
cd->name_entry_size = namelen + IMM2_SIZE + 1; |
|
|
if (namelen > MAX_NAME_SIZE) |
|
|
{ |
|
|
*errorcodeptr = ERR48; |
|
|
goto FAILED; |
|
|
} |
|
|
} |
|
5869 |
} |
} |
5870 |
|
|
5871 |
/* In the real compile, create the entry in the table, maintaining |
/* Scan the list to check for duplicates. For duplicate names, if the |
5872 |
alphabetical order. Duplicate names for different numbers are |
number is the same, break the loop, which causes the name to be |
5873 |
permitted only if PCRE_DUPNAMES is set. Duplicate names for the same |
discarded; otherwise, if DUPNAMES is not set, give an error. |
5874 |
number are always OK. (An existing number can be re-used if (?| |
If it is set, allow the name with a different number, but continue |
5875 |
appears in the pattern.) In either event, a duplicate name results in |
scanning in case this is a duplicate with the same number. For |
5876 |
a duplicate entry in the table, even if the number is the same. This |
non-duplicate names, give an error if the number is duplicated. */ |
5877 |
is because the number of names, and hence the table size, is computed |
|
5878 |
in the pre-compile, and it affects various numbers and pointers which |
ng = cd->named_groups; |
5879 |
would all have to be modified, and the compiled code moved down, if |
for (i = 0; i < cd->names_found; i++, ng++) |
5880 |
duplicates with the same number were omitted from the table. This |
{ |
5881 |
doesn't seem worth the hassle. However, *different* names for the |
if (namelen == ng->length && |
5882 |
same number are not permitted. */ |
STRNCMP_UC_UC(name, ng->name, namelen) == 0) |
5883 |
|
{ |
5884 |
else |
if (ng->number == number) break; |
5885 |
{ |
if ((options & PCRE_DUPNAMES) == 0) |
|
BOOL dupname = FALSE; |
|
|
slot = cd->name_table; |
|
|
|
|
|
for (i = 0; i < cd->names_found; i++) |
|
|
{ |
|
|
int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen)); |
|
|
if (crc == 0) |
|
|
{ |
|
|
if (slot[IMM2_SIZE+namelen] == 0) |
|
|
{ |
|
|
if (GET2(slot, 0) != cd->bracount + 1 && |
|
|
(options & PCRE_DUPNAMES) == 0) |
|
|
{ |
|
|
*errorcodeptr = ERR43; |
|
|
goto FAILED; |
|
|
} |
|
|
else dupname = TRUE; |
|
|
} |
|
|
else crc = -1; /* Current name is a substring */ |
|
|
} |
|
|
|
|
|
/* Make space in the table and break the loop for an earlier |
|
|
name. For a duplicate or later name, carry on. We do this for |
|
|
duplicates so that in the simple case (when ?(| is not used) they |
|
|
are in order of their numbers. */ |
|
|
|
|
|
if (crc < 0) |
|
5886 |
{ |
{ |
5887 |
memmove(slot + cd->name_entry_size, slot, |
*errorcodeptr = ERR43; |
5888 |
IN_UCHARS((cd->names_found - i) * cd->name_entry_size)); |
goto FAILED; |
5889 |
break; |
} |
5890 |
} |
} |
5891 |
|
else if (ng->number == number) |
5892 |
/* Continue the loop for a later or duplicate name */ |
{ |
5893 |
|
*errorcodeptr = ERR65; |
5894 |
slot += cd->name_entry_size; |
goto FAILED; |
5895 |
} |
} |
5896 |
|
} |
|
/* For non-duplicate names, check for a duplicate number before |
|
|
adding the new name. */ |
|
5897 |
|
|
5898 |
if (!dupname) |
if (i >= cd->names_found) /* Not a duplicate with same number */ |
5899 |
|
{ |
5900 |
|
/* Increase the list size if necessary */ |
5901 |
|
|
5902 |
|
if (cd->names_found >= cd->named_group_list_size) |
5903 |
{ |
{ |
5904 |
pcre_uchar *cslot = cd->name_table; |
int newsize = cd->named_group_list_size * 2; |
5905 |
for (i = 0; i < cd->names_found; i++) |
named_group *newspace = (PUBL(malloc)) |
5906 |
|
(newsize * sizeof(named_group)); |
5907 |
|
|
5908 |
|
if (newspace == NULL) |
5909 |
{ |
{ |
5910 |
if (cslot != slot) |
*errorcodeptr = ERR21; |
5911 |
{ |
goto FAILED; |
5912 |
if (GET2(cslot, 0) == cd->bracount + 1) |
} |
5913 |
{ |
|
5914 |
*errorcodeptr = ERR65; |
memcpy(newspace, cd->named_groups, |
5915 |
goto FAILED; |
cd->named_group_list_size * sizeof(named_group)); |
5916 |
} |
if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE) |
5917 |
} |
(PUBL(free))((void *)cd->named_groups); |
5918 |
else i--; |
cd->named_groups = newspace; |
5919 |
cslot += cd->name_entry_size; |
cd->named_group_list_size = newsize; |
5920 |
} |
} |
5921 |
} |
|
5922 |
|
cd->named_groups[cd->names_found].name = name; |
5923 |
PUT2(slot, 0, cd->bracount + 1); |
cd->named_groups[cd->names_found].length = namelen; |
5924 |
memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen)); |
cd->named_groups[cd->names_found].number = number; |
5925 |
slot[IMM2_SIZE + namelen] = 0; |
cd->names_found++; |
5926 |
} |
} |
5927 |
} |
} |
5928 |
|
|
5929 |
/* In both pre-compile and compile, count the number of names we've |
ptr++; /* Move past > or ' in both passes. */ |
|
encountered. */ |
|
|
|
|
|
cd->names_found++; |
|
|
ptr++; /* Move past > or ' */ |
|
5930 |
goto NUMBERED_GROUP; |
goto NUMBERED_GROUP; |
5931 |
|
|
5932 |
|
|
5956 |
|
|
5957 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
5958 |
{ |
{ |
5959 |
const pcre_uchar *temp; |
named_group *ng; |
5960 |
|
|
5961 |
if (namelen == 0) |
if (namelen == 0) |
5962 |
{ |
{ |
5963 |
*errorcodeptr = ERR62; |
*errorcodeptr = ERR62; |
5974 |
goto FAILED; |
goto FAILED; |
5975 |
} |
} |
5976 |
|
|
5977 |
/* The name table does not exist in the first pass, so we cannot |
/* The name table does not exist in the first pass; instead we must |
5978 |
do a simple search as in the code below. Instead, we have to scan the |
scan the list of names encountered so far in order to get the |
5979 |
pattern to find the number. It is important that we scan it only as |
number. The number may be negative if it is for a name that may be |
5980 |
far as we have got because the syntax of named subpatterns has not |
duplicated. If the name is not found, set the value to 0 for a |
5981 |
been checked for the rest of the pattern, and find_parens() assumes |
forward reference. */ |
5982 |
correct syntax. In any case, it's a waste of resources to scan |
|
5983 |
further. We stop the scan at the current point by temporarily |
ng = cd->named_groups; |
5984 |
adjusting the value of cd->endpattern. */ |
for (i = 0; i < cd->names_found; i++, ng++) |
5985 |
|
{ |
5986 |
temp = cd->end_pattern; |
if (namelen == ng->length && |
5987 |
cd->end_pattern = ptr; |
STRNCMP_UC_UC(name, ng->name, namelen) == 0) |
5988 |
recno = find_parens(cd, name, namelen, |
break; |
5989 |
(options & PCRE_EXTENDED) != 0, utf); |
} |
5990 |
cd->end_pattern = temp; |
recno = (i < cd->names_found)? ng->number : 0; |
|
if (recno < 0) recno = 0; /* Forward ref; set dummy number */ |
|
5991 |
} |
} |
5992 |
|
|
5993 |
/* In the real compile, seek the name in the table. We check the name |
/* In the real compile, search the name table. We check the name |
5994 |
first, and then check that we have reached the end of the name in the |
first, and then check that we have reached the end of the name in the |
5995 |
table. That way, if the name that is longer than any in the table, |
table. That way, if the name is longer than any in the table, the |
5996 |
the comparison will fail without reading beyond the table entry. */ |
comparison will fail without reading beyond the table entry. */ |
5997 |
|
|
5998 |
else |
else |
5999 |
{ |
{ |
6006 |
slot += cd->name_entry_size; |
slot += cd->name_entry_size; |
6007 |
} |
} |
6008 |
|
|
6009 |
if (i < cd->names_found) /* Back reference */ |
if (i < cd->names_found) |
6010 |
{ |
{ |
6011 |
recno = GET2(slot, 0); |
recno = GET2(slot, 0); |
6012 |
} |
} |
6013 |
else if ((recno = /* Forward back reference */ |
else |
|
find_parens(cd, name, namelen, |
|
|
(options & PCRE_EXTENDED) != 0, utf)) <= 0) |
|
6014 |
{ |
{ |
6015 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
6016 |
goto FAILED; |
goto FAILED; |
6120 |
|
|
6121 |
if (called == NULL) |
if (called == NULL) |
6122 |
{ |
{ |
6123 |
if (find_parens(cd, NULL, recno, |
if (recno > cd->final_bracount) |
|
(options & PCRE_EXTENDED) != 0, utf) < 0) |
|
6124 |
{ |
{ |
6125 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
6126 |
goto FAILED; |
goto FAILED; |
7465 |
|
|
7466 |
|
|
7467 |
/************************************************* |
/************************************************* |
7468 |
|
* Add an entry to the name/number table * |
7469 |
|
*************************************************/ |
7470 |
|
|
7471 |
|
/* This function is called between compiling passes to add an entry to the |
7472 |
|
name/number table, maintaining alphabetical order. Checking for permitted |
7473 |
|
and forbidden duplicates has already been done. |
7474 |
|
|
7475 |
|
Arguments: |
7476 |
|
cd the compile data block |
7477 |
|
name the name to add |
7478 |
|
length the length of the name |
7479 |
|
groupno the group number |
7480 |
|
|
7481 |
|
Returns: nothing |
7482 |
|
*/ |
7483 |
|
|
7484 |
|
static void |
7485 |
|
add_name(compile_data *cd, const pcre_uchar *name, int length, |
7486 |
|
unsigned int groupno) |
7487 |
|
{ |
7488 |
|
int i; |
7489 |
|
pcre_uchar *slot = cd->name_table; |
7490 |
|
|
7491 |
|
for (i = 0; i < cd->names_found; i++) |
7492 |
|
{ |
7493 |
|
int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length)); |
7494 |
|
if (crc == 0 && slot[IMM2_SIZE+length] != 0) |
7495 |
|
crc = -1; /* Current name is a substring */ |
7496 |
|
|
7497 |
|
/* Make space in the table and break the loop for an earlier name. For a |
7498 |
|
duplicate or later name, carry on. We do this for duplicates so that in the |
7499 |
|
simple case (when ?(| is not used) they are in order of their numbers. In all |
7500 |
|
cases they are in the order in which they appear in the pattern. */ |
7501 |
|
|
7502 |
|
if (crc < 0) |
7503 |
|
{ |
7504 |
|
memmove(slot + cd->name_entry_size, slot, |
7505 |
|
IN_UCHARS((cd->names_found - i) * cd->name_entry_size)); |
7506 |
|
break; |
7507 |
|
} |
7508 |
|
|
7509 |
|
/* Continue the loop for a later or duplicate name */ |
7510 |
|
|
7511 |
|
slot += cd->name_entry_size; |
7512 |
|
} |
7513 |
|
|
7514 |
|
PUT2(slot, 0, groupno); |
7515 |
|
memcpy(slot + IMM2_SIZE, name, IN_UCHARS(length)); |
7516 |
|
slot[IMM2_SIZE + length] = 0; |
7517 |
|
cd->names_found++; |
7518 |
|
} |
7519 |
|
|
7520 |
|
|
7521 |
|
|
7522 |
|
/************************************************* |
7523 |
* Compile a Regular Expression * |
* Compile a Regular Expression * |
7524 |
*************************************************/ |
*************************************************/ |
7525 |
|
|
7606 |
|
|
7607 |
pcre_uchar cworkspace[COMPILE_WORK_SIZE]; |
pcre_uchar cworkspace[COMPILE_WORK_SIZE]; |
7608 |
|
|
7609 |
|
/* This vector is used for remembering name groups during the pre-compile. In a |
7610 |
|
similar way to cworkspace, it can be expanded using malloc() if necessary. */ |
7611 |
|
|
7612 |
|
named_group named_groups[NAMED_GROUP_LIST_SIZE]; |
7613 |
|
|
7614 |
/* Set this early so that early errors get offset 0. */ |
/* Set this early so that early errors get offset 0. */ |
7615 |
|
|
7616 |
ptr = (const pcre_uchar *)pattern; |
ptr = (const pcre_uchar *)pattern; |
7877 |
cd->hwm = cworkspace; |
cd->hwm = cworkspace; |
7878 |
cd->start_workspace = cworkspace; |
cd->start_workspace = cworkspace; |
7879 |
cd->workspace_size = COMPILE_WORK_SIZE; |
cd->workspace_size = COMPILE_WORK_SIZE; |
7880 |
|
cd->named_groups = named_groups; |
7881 |
|
cd->named_group_list_size = NAMED_GROUP_LIST_SIZE; |
7882 |
cd->start_pattern = (const pcre_uchar *)pattern; |
cd->start_pattern = (const pcre_uchar *)pattern; |
7883 |
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); |
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); |
7884 |
cd->req_varyopt = 0; |
cd->req_varyopt = 0; |
7961 |
cd->assert_depth = 0; |
cd->assert_depth = 0; |
7962 |
cd->bracount = 0; |
cd->bracount = 0; |
7963 |
cd->max_lookbehind = 0; |
cd->max_lookbehind = 0; |
|
cd->names_found = 0; |
|
7964 |
cd->name_table = (pcre_uchar *)re + re->name_table_offset; |
cd->name_table = (pcre_uchar *)re + re->name_table_offset; |
7965 |
codestart = cd->name_table + re->name_entry_size * re->name_count; |
codestart = cd->name_table + re->name_entry_size * re->name_count; |
7966 |
cd->start_code = codestart; |
cd->start_code = codestart; |
7971 |
cd->check_lookbehind = FALSE; |
cd->check_lookbehind = FALSE; |
7972 |
cd->open_caps = NULL; |
cd->open_caps = NULL; |
7973 |
|
|
7974 |
|
/* If any named groups were found, create the name/number table from the list |
7975 |
|
created in the first pass. */ |
7976 |
|
|
7977 |
|
if (cd->names_found > 0) |
7978 |
|
{ |
7979 |
|
int i = cd->names_found; |
7980 |
|
named_group *ng = cd->named_groups; |
7981 |
|
cd->names_found = 0; |
7982 |
|
for (; i > 0; i--, ng++) |
7983 |
|
add_name(cd, ng->name, ng->length, ng->number); |
7984 |
|
if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE) |
7985 |
|
(PUBL(free))((void *)cd->named_groups); |
7986 |
|
} |
7987 |
|
|
7988 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
7989 |
error, errorcode will be set non-zero, so we don't need to look at the result |
error, errorcode will be set non-zero, so we don't need to look at the result |
7990 |
of the function here. */ |
of the function here. */ |