84 |
const pcre_uint32 *, unsigned int); |
const pcre_uint32 *, unsigned int); |
85 |
|
|
86 |
static BOOL |
static BOOL |
87 |
compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, |
compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int, |
88 |
int, int, int *, int *, branch_chain *, compile_data *, int *); |
pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *, |
89 |
|
compile_data *, int *); |
90 |
|
|
91 |
|
|
92 |
|
|
122 |
|
|
123 |
/* Private flags added to firstchar and reqchar. */ |
/* Private flags added to firstchar and reqchar. */ |
124 |
|
|
125 |
#define REQ_CASELESS 0x10000000l /* Indicates caselessness */ |
#define REQ_CASELESS (1 << 0) /* Indicates caselessness */ |
126 |
#define REQ_VARY 0x20000000l /* Reqchar followed non-literal item */ |
#define REQ_VARY (1 << 1) /* Reqchar followed non-literal item */ |
127 |
#define REQ_MASK (REQ_CASELESS | REQ_VARY) |
/* Negative values for the firstchar and reqchar flags */ |
128 |
|
#define REQ_UNSET (-2) |
129 |
|
#define REQ_NONE (-1) |
130 |
|
|
131 |
/* Repeated character flags. */ |
/* Repeated character flags. */ |
132 |
|
|
648 |
|
|
649 |
|
|
650 |
|
|
|
|
|
651 |
/************************************************* |
/************************************************* |
652 |
* Find an error text * |
* Find an error text * |
653 |
*************************************************/ |
*************************************************/ |
667 |
const char *s = error_texts; |
const char *s = error_texts; |
668 |
for (; n > 0; n--) |
for (; n > 0; n--) |
669 |
{ |
{ |
670 |
while (*s++ != 0) {}; |
while (*s++ != CHAR_NULL) {}; |
671 |
if (*s == 0) return "Error text not found (please report)"; |
if (*s == CHAR_NULL) return "Error text not found (please report)"; |
672 |
} |
} |
673 |
return s; |
return s; |
674 |
} |
} |
752 |
|
|
753 |
/* This function is called when a \ has been encountered. It either returns a |
/* This function is called when a \ has been encountered. It either returns a |
754 |
positive value for a simple escape such as \n, or 0 for a data character |
positive value for a simple escape such as \n, or 0 for a data character |
755 |
which will be placed in chptr. A backreference to group |
which will be placed in chptr. A backreference to group n is returned as |
756 |
n is returned as ESC_REF + n; ESC_REF is the highest ESC_xxx macro. When |
negative n. When UTF-8 is enabled, a positive value greater than 255 may |
757 |
UTF-8 is enabled, a positive value greater than 255 may be returned in chptr. |
be returned in chptr. |
758 |
On entry,ptr is pointing at the \. On exit, it is on the final character of the |
On entry,ptr is pointing at the \. On exit, it is on the final character of the |
759 |
escape sequence. |
escape sequence. |
760 |
|
|
768 |
|
|
769 |
Returns: zero => a data character |
Returns: zero => a data character |
770 |
positive => a special escape sequence |
positive => a special escape sequence |
771 |
|
negative => a back reference |
772 |
on error, errorcodeptr is set |
on error, errorcodeptr is set |
773 |
*/ |
*/ |
774 |
|
|
775 |
static int |
static int |
776 |
check_escape(const pcre_uchar **ptrptr, int *chptr, int *errorcodeptr, |
check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr, |
777 |
int bracount, int options, BOOL isclass) |
int bracount, int options, BOOL isclass) |
778 |
{ |
{ |
779 |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
780 |
BOOL utf = (options & PCRE_UTF8) != 0; |
BOOL utf = (options & PCRE_UTF8) != 0; |
781 |
const pcre_uchar *ptr = *ptrptr + 1; |
const pcre_uchar *ptr = *ptrptr + 1; |
782 |
pcre_int32 c; |
pcre_uint32 c; |
783 |
int escape = 0; |
int escape = 0; |
784 |
int i; |
int i; |
785 |
|
|
788 |
|
|
789 |
/* If backslash is at the end of the pattern, it's an error. */ |
/* If backslash is at the end of the pattern, it's an error. */ |
790 |
|
|
791 |
if (c == 0) *errorcodeptr = ERR1; |
if (c == CHAR_NULL) *errorcodeptr = ERR1; |
792 |
|
|
793 |
/* Non-alphanumerics are literals. For digits or letters, do an initial lookup |
/* Non-alphanumerics are literals. For digits or letters, do an initial lookup |
794 |
in a table. A non-zero result is something that can be returned immediately. |
in a table. A non-zero result is something that can be returned immediately. |
797 |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
798 |
/* Not alphanumeric */ |
/* Not alphanumeric */ |
799 |
else if (c < CHAR_0 || c > CHAR_z) {} |
else if (c < CHAR_0 || c > CHAR_z) {} |
800 |
else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = i; else escape = -i; } |
else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } |
801 |
|
|
802 |
#else /* EBCDIC coding */ |
#else /* EBCDIC coding */ |
803 |
/* Not alphanumeric */ |
/* Not alphanumeric */ |
804 |
else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {} |
else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {} |
805 |
else if ((i = escapes[c - 0x48]) != 0) { if (i > 0) c = i; else escape = -i; } |
else if ((i = escapes[c - 0x48]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } |
806 |
#endif |
#endif |
807 |
|
|
808 |
/* Escapes that need further processing, or are illegal. */ |
/* Escapes that need further processing, or are illegal. */ |
810 |
else |
else |
811 |
{ |
{ |
812 |
const pcre_uchar *oldptr; |
const pcre_uchar *oldptr; |
813 |
BOOL braced, negated; |
BOOL braced, negated, overflow; |
814 |
|
int s; |
815 |
|
|
816 |
switch (c) |
switch (c) |
817 |
{ |
{ |
836 |
c = 0; |
c = 0; |
837 |
for (i = 0; i < 4; ++i) |
for (i = 0; i < 4; ++i) |
838 |
{ |
{ |
839 |
register int cc = *(++ptr); |
register pcre_uint32 cc = *(++ptr); |
840 |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
841 |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
842 |
c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); |
c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); |
898 |
if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) |
if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) |
899 |
{ |
{ |
900 |
const pcre_uchar *p; |
const pcre_uchar *p; |
901 |
for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++) |
for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++) |
902 |
if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; |
if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; |
903 |
if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET) |
if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET) |
904 |
{ |
{ |
905 |
escape = ESC_k; |
escape = ESC_k; |
906 |
break; |
break; |
918 |
else negated = FALSE; |
else negated = FALSE; |
919 |
|
|
920 |
/* The integer range is limited by the machine's int representation. */ |
/* The integer range is limited by the machine's int representation. */ |
921 |
c = 0; |
s = 0; |
922 |
|
overflow = FALSE; |
923 |
while (IS_DIGIT(ptr[1])) |
while (IS_DIGIT(ptr[1])) |
924 |
{ |
{ |
925 |
if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */ |
if (s > INT_MAX / 10 - 1) /* Integer overflow */ |
926 |
{ |
{ |
927 |
c = -1; |
overflow = TRUE; |
928 |
break; |
break; |
929 |
} |
} |
930 |
c = c * 10 + *(++ptr) - CHAR_0; |
s = s * 10 + (int)(*(++ptr) - CHAR_0); |
931 |
} |
} |
932 |
if (((unsigned int)c) > INT_MAX) /* Integer overflow */ |
if (overflow) /* Integer overflow */ |
933 |
{ |
{ |
934 |
while (IS_DIGIT(ptr[1])) |
while (IS_DIGIT(ptr[1])) |
935 |
ptr++; |
ptr++; |
943 |
break; |
break; |
944 |
} |
} |
945 |
|
|
946 |
if (c == 0) |
if (s == 0) |
947 |
{ |
{ |
948 |
*errorcodeptr = ERR58; |
*errorcodeptr = ERR58; |
949 |
break; |
break; |
951 |
|
|
952 |
if (negated) |
if (negated) |
953 |
{ |
{ |
954 |
if (c > bracount) |
if (s > bracount) |
955 |
{ |
{ |
956 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
957 |
break; |
break; |
958 |
} |
} |
959 |
c = bracount - (c - 1); |
s = bracount - (s - 1); |
960 |
} |
} |
961 |
|
|
962 |
escape = ESC_REF + c; |
escape = -s; |
963 |
break; |
break; |
964 |
|
|
965 |
/* The handling of escape sequences consisting of a string of digits |
/* The handling of escape sequences consisting of a string of digits |
981 |
{ |
{ |
982 |
oldptr = ptr; |
oldptr = ptr; |
983 |
/* The integer range is limited by the machine's int representation. */ |
/* The integer range is limited by the machine's int representation. */ |
984 |
c -= CHAR_0; |
s = (int)(c -CHAR_0); |
985 |
|
overflow = FALSE; |
986 |
while (IS_DIGIT(ptr[1])) |
while (IS_DIGIT(ptr[1])) |
987 |
{ |
{ |
988 |
if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */ |
if (s > INT_MAX / 10 - 1) /* Integer overflow */ |
989 |
{ |
{ |
990 |
c = -1; |
overflow = TRUE; |
991 |
break; |
break; |
992 |
} |
} |
993 |
c = c * 10 + *(++ptr) - CHAR_0; |
s = s * 10 + (int)(*(++ptr) - CHAR_0); |
994 |
} |
} |
995 |
if (((unsigned int)c) > INT_MAX) /* Integer overflow */ |
if (overflow) /* Integer overflow */ |
996 |
{ |
{ |
997 |
while (IS_DIGIT(ptr[1])) |
while (IS_DIGIT(ptr[1])) |
998 |
ptr++; |
ptr++; |
999 |
*errorcodeptr = ERR61; |
*errorcodeptr = ERR61; |
1000 |
break; |
break; |
1001 |
} |
} |
1002 |
if (c < 10 || c <= bracount) |
if (s < 10 || s <= bracount) |
1003 |
{ |
{ |
1004 |
escape = ESC_REF + c; |
escape = -s; |
1005 |
break; |
break; |
1006 |
} |
} |
1007 |
ptr = oldptr; /* Put the pointer back and fall through */ |
ptr = oldptr; /* Put the pointer back and fall through */ |
1048 |
c = 0; |
c = 0; |
1049 |
for (i = 0; i < 2; ++i) |
for (i = 0; i < 2; ++i) |
1050 |
{ |
{ |
1051 |
register int cc = *(++ptr); |
register pcre_uint32 cc = *(++ptr); |
1052 |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
1053 |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
1054 |
c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); |
c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); |
1066 |
const pcre_uchar *pt = ptr + 2; |
const pcre_uchar *pt = ptr + 2; |
1067 |
|
|
1068 |
c = 0; |
c = 0; |
1069 |
|
overflow = FALSE; |
1070 |
while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) |
while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) |
1071 |
{ |
{ |
1072 |
register int cc = *pt++; |
register pcre_uint32 cc = *pt++; |
1073 |
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ |
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ |
1074 |
|
|
1075 |
|
#ifdef COMPILE_PCRE32 |
1076 |
|
if (c >= 0x10000000l) { overflow = TRUE; break; } |
1077 |
|
#endif |
1078 |
|
|
1079 |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
1080 |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
1081 |
c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); |
c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); |
1085 |
#endif |
#endif |
1086 |
|
|
1087 |
#if defined COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
1088 |
if (c > (utf ? 0x10ffff : 0xff)) { c = -1; break; } |
if (c > (utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; } |
1089 |
#elif defined COMPILE_PCRE16 |
#elif defined COMPILE_PCRE16 |
1090 |
if (c > (utf ? 0x10ffff : 0xffff)) { c = -1; break; } |
if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; } |
1091 |
#elif defined COMPILE_PCRE32 |
#elif defined COMPILE_PCRE32 |
1092 |
if (utf && c > 0x10ffff) { c = -1; break; } |
if (utf && c > 0x10ffff) { overflow = TRUE; break; } |
1093 |
#endif |
#endif |
1094 |
} |
} |
1095 |
|
|
1096 |
if (c < 0) |
if (overflow) |
1097 |
{ |
{ |
1098 |
while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++; |
while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++; |
1099 |
*errorcodeptr = ERR34; |
*errorcodeptr = ERR34; |
1115 |
c = 0; |
c = 0; |
1116 |
while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0) |
1117 |
{ |
{ |
1118 |
int cc; /* Some compilers don't like */ |
pcre_uint32 cc; /* Some compilers don't like */ |
1119 |
cc = *(++ptr); /* ++ in initializers */ |
cc = *(++ptr); /* ++ in initializers */ |
1120 |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
1121 |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ |
1134 |
|
|
1135 |
case CHAR_c: |
case CHAR_c: |
1136 |
c = *(++ptr); |
c = *(++ptr); |
1137 |
if (c == 0) |
if (c == CHAR_NULL) |
1138 |
{ |
{ |
1139 |
*errorcodeptr = ERR2; |
*errorcodeptr = ERR2; |
1140 |
break; |
break; |
1203 |
Argument: |
Argument: |
1204 |
ptrptr points to the pattern position pointer |
ptrptr points to the pattern position pointer |
1205 |
negptr points to a boolean that is set TRUE for negation else FALSE |
negptr points to a boolean that is set TRUE for negation else FALSE |
1206 |
dptr points to an int that is set to the detailed property value |
ptypeptr points to an unsigned int that is set to the type value |
1207 |
|
pdataptr points to an unsigned int that is set to the detailed property value |
1208 |
errorcodeptr points to the error code variable |
errorcodeptr points to the error code variable |
1209 |
|
|
1210 |
Returns: type value from ucp_type_table, or -1 for an invalid type |
Returns: TRUE if the type value was found, or FALSE for an invalid type |
1211 |
*/ |
*/ |
1212 |
|
|
1213 |
static int |
static BOOL |
1214 |
get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr) |
get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr, |
1215 |
|
unsigned int *pdataptr, int *errorcodeptr) |
1216 |
{ |
{ |
1217 |
int c, i, bot, top; |
pcre_uchar c; |
1218 |
|
int i, bot, top; |
1219 |
const pcre_uchar *ptr = *ptrptr; |
const pcre_uchar *ptr = *ptrptr; |
1220 |
pcre_uchar name[32]; |
pcre_uchar name[32]; |
1221 |
|
|
1222 |
c = *(++ptr); |
c = *(++ptr); |
1223 |
if (c == 0) goto ERROR_RETURN; |
if (c == CHAR_NULL) goto ERROR_RETURN; |
1224 |
|
|
1225 |
*negptr = FALSE; |
*negptr = FALSE; |
1226 |
|
|
1237 |
for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++) |
for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++) |
1238 |
{ |
{ |
1239 |
c = *(++ptr); |
c = *(++ptr); |
1240 |
if (c == 0) goto ERROR_RETURN; |
if (c == CHAR_NULL) goto ERROR_RETURN; |
1241 |
if (c == CHAR_RIGHT_CURLY_BRACKET) break; |
if (c == CHAR_RIGHT_CURLY_BRACKET) break; |
1242 |
name[i] = c; |
name[i] = c; |
1243 |
} |
} |
1262 |
|
|
1263 |
while (bot < top) |
while (bot < top) |
1264 |
{ |
{ |
1265 |
|
int r; |
1266 |
i = (bot + top) >> 1; |
i = (bot + top) >> 1; |
1267 |
c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); |
r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); |
1268 |
if (c == 0) |
if (r == 0) |
1269 |
{ |
{ |
1270 |
*dptr = PRIV(utt)[i].value; |
*ptypeptr = PRIV(utt)[i].type; |
1271 |
return PRIV(utt)[i].type; |
*pdataptr = PRIV(utt)[i].value; |
1272 |
|
return TRUE; |
1273 |
} |
} |
1274 |
if (c > 0) bot = i + 1; else top = i; |
if (r > 0) bot = i + 1; else top = i; |
1275 |
} |
} |
1276 |
|
|
1277 |
*errorcodeptr = ERR47; |
*errorcodeptr = ERR47; |
1278 |
*ptrptr = ptr; |
*ptrptr = ptr; |
1279 |
return -1; |
return FALSE; |
1280 |
|
|
1281 |
ERROR_RETURN: |
ERROR_RETURN: |
1282 |
*errorcodeptr = ERR46; |
*errorcodeptr = ERR46; |
1283 |
*ptrptr = ptr; |
*ptrptr = ptr; |
1284 |
return -1; |
return FALSE; |
1285 |
} |
} |
1286 |
#endif |
#endif |
1287 |
|
|
1316 |
/* Read the minimum value and do a paranoid check: a negative value indicates |
/* Read the minimum value and do a paranoid check: a negative value indicates |
1317 |
an integer overflow. */ |
an integer overflow. */ |
1318 |
|
|
1319 |
while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0; |
while (IS_DIGIT(*p)) min = min * 10 + (int)(*p++ - CHAR_0); |
1320 |
if (min < 0 || min > 65535) |
if (min < 0 || min > 65535) |
1321 |
{ |
{ |
1322 |
*errorcodeptr = ERR5; |
*errorcodeptr = ERR5; |
1331 |
if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) |
if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) |
1332 |
{ |
{ |
1333 |
max = 0; |
max = 0; |
1334 |
while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0; |
while(IS_DIGIT(*p)) max = max * 10 + (int)(*p++ - CHAR_0); |
1335 |
if (max < 0 || max > 65535) |
if (max < 0 || max > 65535) |
1336 |
{ |
{ |
1337 |
*errorcodeptr = ERR5; |
*errorcodeptr = ERR5; |
1432 |
|
|
1433 |
else if (ptr[2] == CHAR_NUMBER_SIGN) |
else if (ptr[2] == CHAR_NUMBER_SIGN) |
1434 |
{ |
{ |
1435 |
for (ptr += 3; *ptr != 0; ptr++) if (*ptr == CHAR_RIGHT_PARENTHESIS) break; |
for (ptr += 3; *ptr != CHAR_NULL; ptr++) |
1436 |
|
if (*ptr == CHAR_RIGHT_PARENTHESIS) break; |
1437 |
goto FAIL_EXIT; |
goto FAIL_EXIT; |
1438 |
} |
} |
1439 |
|
|
1446 |
ptr += 2; |
ptr += 2; |
1447 |
if (ptr[1] != CHAR_QUESTION_MARK) |
if (ptr[1] != CHAR_QUESTION_MARK) |
1448 |
{ |
{ |
1449 |
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
1450 |
if (*ptr != 0) ptr++; |
if (*ptr != CHAR_NULL) ptr++; |
1451 |
} |
} |
1452 |
} |
} |
1453 |
|
|
1463 |
if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && |
if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && |
1464 |
ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) |
ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) |
1465 |
{ |
{ |
1466 |
int term; |
pcre_uchar term; |
1467 |
const pcre_uchar *thisname; |
const pcre_uchar *thisname; |
1468 |
*count += 1; |
*count += 1; |
1469 |
if (name == NULL && *count == lorn) return *count; |
if (name == NULL && *count == lorn) return *count; |
1471 |
if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; |
if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; |
1472 |
thisname = ptr; |
thisname = ptr; |
1473 |
while (*ptr != term) ptr++; |
while (*ptr != term) ptr++; |
1474 |
if (name != NULL && lorn == ptr - thisname && |
if (name != NULL && lorn == (int)(ptr - thisname) && |
1475 |
STRNCMP_UC_UC(name, thisname, lorn) == 0) |
STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0) |
1476 |
return *count; |
return *count; |
1477 |
term++; |
term++; |
1478 |
} |
} |
1490 |
|
|
1491 |
if (*ptr == CHAR_BACKSLASH) |
if (*ptr == CHAR_BACKSLASH) |
1492 |
{ |
{ |
1493 |
if (*(++ptr) == 0) goto FAIL_EXIT; |
if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT; |
1494 |
if (*ptr == CHAR_Q) for (;;) |
if (*ptr == CHAR_Q) for (;;) |
1495 |
{ |
{ |
1496 |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {}; |
1497 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
1498 |
if (*(++ptr) == CHAR_E) break; |
if (*(++ptr) == CHAR_E) break; |
1499 |
} |
} |
1500 |
continue; |
continue; |
1538 |
|
|
1539 |
while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET) |
while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET) |
1540 |
{ |
{ |
1541 |
if (*ptr == 0) return -1; |
if (*ptr == CHAR_NULL) return -1; |
1542 |
if (*ptr == CHAR_BACKSLASH) |
if (*ptr == CHAR_BACKSLASH) |
1543 |
{ |
{ |
1544 |
if (*(++ptr) == 0) goto FAIL_EXIT; |
if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT; |
1545 |
if (*ptr == CHAR_Q) for (;;) |
if (*ptr == CHAR_Q) for (;;) |
1546 |
{ |
{ |
1547 |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {}; |
1548 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
1549 |
if (*(++ptr) == CHAR_E) break; |
if (*(++ptr) == CHAR_E) break; |
1550 |
} |
} |
1551 |
continue; |
continue; |
1559 |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
1560 |
{ |
{ |
1561 |
ptr++; |
ptr++; |
1562 |
while (*ptr != 0) |
while (*ptr != CHAR_NULL) |
1563 |
{ |
{ |
1564 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
1565 |
ptr++; |
ptr++; |
1567 |
if (utf) FORWARDCHAR(ptr); |
if (utf) FORWARDCHAR(ptr); |
1568 |
#endif |
#endif |
1569 |
} |
} |
1570 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
1571 |
continue; |
continue; |
1572 |
} |
} |
1573 |
|
|
1577 |
{ |
{ |
1578 |
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count); |
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count); |
1579 |
if (rc > 0) return rc; |
if (rc > 0) return rc; |
1580 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == CHAR_NULL) goto FAIL_EXIT; |
1581 |
} |
} |
1582 |
|
|
1583 |
else if (*ptr == CHAR_RIGHT_PARENTHESIS) |
else if (*ptr == CHAR_RIGHT_PARENTHESIS) |
1642 |
for (;;) |
for (;;) |
1643 |
{ |
{ |
1644 |
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count); |
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count); |
1645 |
if (rc > 0 || *ptr++ == 0) break; |
if (rc > 0 || *ptr++ == CHAR_NULL) break; |
1646 |
} |
} |
1647 |
|
|
1648 |
return rc; |
return rc; |
1750 |
{ |
{ |
1751 |
int d; |
int d; |
1752 |
pcre_uchar *ce, *cs; |
pcre_uchar *ce, *cs; |
1753 |
register int op = *cc; |
register pcre_uchar op = *cc; |
1754 |
|
|
1755 |
switch (op) |
switch (op) |
1756 |
{ |
{ |
1858 |
case OP_NOTI: |
case OP_NOTI: |
1859 |
branchlength++; |
branchlength++; |
1860 |
cc += 2; |
cc += 2; |
1861 |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
#ifdef SUPPORT_UTF |
1862 |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1863 |
#endif |
#endif |
1864 |
break; |
break; |
1870 |
case OP_EXACTI: |
case OP_EXACTI: |
1871 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
1872 |
case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
1873 |
branchlength += GET2(cc,1); |
branchlength += (int)GET2(cc,1); |
1874 |
cc += 2 + IMM2_SIZE; |
cc += 2 + IMM2_SIZE; |
1875 |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
#ifdef SUPPORT_UTF |
1876 |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1877 |
#endif |
#endif |
1878 |
break; |
break; |
1915 |
|
|
1916 |
/* Check a class for variable quantification */ |
/* Check a class for variable quantification */ |
1917 |
|
|
|
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
|
|
case OP_XCLASS: |
|
|
cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; |
|
|
/* Fall through */ |
|
|
#endif |
|
|
|
|
1918 |
case OP_CLASS: |
case OP_CLASS: |
1919 |
case OP_NCLASS: |
case OP_NCLASS: |
1920 |
|
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
1921 |
|
case OP_XCLASS: |
1922 |
|
/* The original code caused an unsigned overflow in 64 bit systems, |
1923 |
|
so now we use a conditional statement. */ |
1924 |
|
if (op == OP_XCLASS) |
1925 |
|
cc += GET(cc, 1); |
1926 |
|
else |
1927 |
|
cc += PRIV(OP_lengths)[OP_CLASS]; |
1928 |
|
#else |
1929 |
cc += PRIV(OP_lengths)[OP_CLASS]; |
cc += PRIV(OP_lengths)[OP_CLASS]; |
1930 |
|
#endif |
1931 |
|
|
1932 |
switch (*cc) |
switch (*cc) |
1933 |
{ |
{ |
1942 |
case OP_CRRANGE: |
case OP_CRRANGE: |
1943 |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
1944 |
if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; |
if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; |
1945 |
branchlength += GET2(cc,1); |
branchlength += (int)GET2(cc,1); |
1946 |
cc += 1 + 2 * IMM2_SIZE; |
cc += 1 + 2 * IMM2_SIZE; |
1947 |
break; |
break; |
1948 |
|
|
2069 |
{ |
{ |
2070 |
for (;;) |
for (;;) |
2071 |
{ |
{ |
2072 |
register int c = *code; |
register pcre_uchar c = *code; |
2073 |
|
|
2074 |
if (c == OP_END) return NULL; |
if (c == OP_END) return NULL; |
2075 |
|
|
2092 |
else if (c == OP_CBRA || c == OP_SCBRA || |
else if (c == OP_CBRA || c == OP_SCBRA || |
2093 |
c == OP_CBRAPOS || c == OP_SCBRAPOS) |
c == OP_CBRAPOS || c == OP_SCBRAPOS) |
2094 |
{ |
{ |
2095 |
int n = GET2(code, 1+LINK_SIZE); |
int n = (int)GET2(code, 1+LINK_SIZE); |
2096 |
if (n == number) return (pcre_uchar *)code; |
if (n == number) return (pcre_uchar *)code; |
2097 |
code += PRIV(OP_lengths)[c]; |
code += PRIV(OP_lengths)[c]; |
2098 |
} |
} |
2207 |
{ |
{ |
2208 |
for (;;) |
for (;;) |
2209 |
{ |
{ |
2210 |
register int c = *code; |
register pcre_uchar c = *code; |
2211 |
if (c == OP_END) return NULL; |
if (c == OP_END) return NULL; |
2212 |
if (c == OP_RECURSE) return code; |
if (c == OP_RECURSE) return code; |
2213 |
|
|
2361 |
could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, |
could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, |
2362 |
BOOL utf, compile_data *cd) |
BOOL utf, compile_data *cd) |
2363 |
{ |
{ |
2364 |
register int c; |
register pcre_uchar c; |
2365 |
for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); |
for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); |
2366 |
code < endcode; |
code < endcode; |
2367 |
code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) |
code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) |
2395 |
/* Test for forward reference */ |
/* Test for forward reference */ |
2396 |
|
|
2397 |
for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE) |
for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE) |
2398 |
if (GET(scode, 0) == code + 1 - cd->start_code) return TRUE; |
if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE; |
2399 |
|
|
2400 |
/* Not a forward reference, test for completed backward reference */ |
/* Not a forward reference, test for completed backward reference */ |
2401 |
|
|
2716 |
static BOOL |
static BOOL |
2717 |
check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr) |
check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr) |
2718 |
{ |
{ |
2719 |
int terminator; /* Don't combine these lines; the Solaris cc */ |
pcre_uchar terminator; /* Don't combine these lines; the Solaris cc */ |
2720 |
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ |
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ |
2721 |
for (++ptr; *ptr != 0; ptr++) |
for (++ptr; *ptr != CHAR_NULL; ptr++) |
2722 |
{ |
{ |
2723 |
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) |
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) |
2724 |
ptr++; |
ptr++; |
2765 |
while (posix_name_lengths[yield] != 0) |
while (posix_name_lengths[yield] != 0) |
2766 |
{ |
{ |
2767 |
if (len == posix_name_lengths[yield] && |
if (len == posix_name_lengths[yield] && |
2768 |
STRNCMP_UC_C8(ptr, pn, len) == 0) return yield; |
STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield; |
2769 |
pn += posix_name_lengths[yield] + 1; |
pn += posix_name_lengths[yield] + 1; |
2770 |
yield++; |
yield++; |
2771 |
} |
} |
2820 |
|
|
2821 |
for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) |
for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) |
2822 |
{ |
{ |
2823 |
offset = GET(hc, 0); |
offset = (int)GET(hc, 0); |
2824 |
if (cd->start_code + offset == ptr + 1) |
if (cd->start_code + offset == ptr + 1) |
2825 |
{ |
{ |
2826 |
PUT(hc, 0, offset + adjust); |
PUT(hc, 0, offset + adjust); |
2833 |
|
|
2834 |
if (hc >= cd->hwm) |
if (hc >= cd->hwm) |
2835 |
{ |
{ |
2836 |
offset = GET(ptr, 1); |
offset = (int)GET(ptr, 1); |
2837 |
if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); |
if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); |
2838 |
} |
} |
2839 |
|
|
2919 |
*/ |
*/ |
2920 |
|
|
2921 |
static int |
static int |
2922 |
get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr, |
get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr, |
2923 |
unsigned int *odptr) |
pcre_uint32 *odptr) |
2924 |
{ |
{ |
2925 |
unsigned int c, othercase, next; |
pcre_uint32 c, othercase, next; |
2926 |
int co; |
unsigned int co; |
2927 |
|
|
2928 |
/* Find the first character that has an other case. If it has multiple other |
/* Find the first character that has an other case. If it has multiple other |
2929 |
cases, return its case offset value. */ |
cases, return its case offset value. */ |
2934 |
{ |
{ |
2935 |
*ocptr = c++; /* Character that has the set */ |
*ocptr = c++; /* Character that has the set */ |
2936 |
*cptr = c; /* Rest of input range */ |
*cptr = c; /* Rest of input range */ |
2937 |
return co; |
return (int)co; |
2938 |
} |
} |
2939 |
if ((othercase = UCD_OTHERCASE(c)) != c) break; |
if ((othercase = UCD_OTHERCASE(c)) != c) break; |
2940 |
} |
} |
2974 |
*/ |
*/ |
2975 |
|
|
2976 |
static BOOL |
static BOOL |
2977 |
check_char_prop(int c, int ptype, int pdata, BOOL negated) |
check_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata, BOOL negated) |
2978 |
{ |
{ |
2979 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2980 |
const pcre_uint32 *p; |
const pcre_uint32 *p; |
3025 |
p = PRIV(ucd_caseless_sets) + prop->caseset; |
p = PRIV(ucd_caseless_sets) + prop->caseset; |
3026 |
for (;;) |
for (;;) |
3027 |
{ |
{ |
3028 |
if ((unsigned int)c < *p) return !negated; |
if (c < *p) return !negated; |
3029 |
if ((unsigned int)c == *p++) return negated; |
if (c == *p++) return negated; |
3030 |
} |
} |
3031 |
break; /* Control never reaches here */ |
break; /* Control never reaches here */ |
3032 |
#endif |
#endif |
3060 |
check_auto_possessive(const pcre_uchar *previous, BOOL utf, |
check_auto_possessive(const pcre_uchar *previous, BOOL utf, |
3061 |
const pcre_uchar *ptr, int options, compile_data *cd) |
const pcre_uchar *ptr, int options, compile_data *cd) |
3062 |
{ |
{ |
3063 |
pcre_int32 c = NOTACHAR; // FIXMEchpe pcre_uint32 |
pcre_uint32 c = NOTACHAR; |
3064 |
pcre_int32 next; |
pcre_uint32 next; |
3065 |
int escape; |
int escape; |
3066 |
int op_code = *previous++; |
pcre_uchar op_code = *previous++; |
3067 |
|
|
3068 |
/* Skip whitespace and comments in extended mode */ |
/* Skip whitespace and comments in extended mode */ |
3069 |
|
|
3075 |
if (*ptr == CHAR_NUMBER_SIGN) |
if (*ptr == CHAR_NUMBER_SIGN) |
3076 |
{ |
{ |
3077 |
ptr++; |
ptr++; |
3078 |
while (*ptr != 0) |
while (*ptr != CHAR_NULL) |
3079 |
{ |
{ |
3080 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
3081 |
ptr++; |
ptr++; |
3118 |
if (*ptr == CHAR_NUMBER_SIGN) |
if (*ptr == CHAR_NUMBER_SIGN) |
3119 |
{ |
{ |
3120 |
ptr++; |
ptr++; |
3121 |
while (*ptr != 0) |
while (*ptr != CHAR_NULL) |
3122 |
{ |
{ |
3123 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
3124 |
ptr++; |
ptr++; |
3141 |
|
|
3142 |
if (op_code == OP_CHAR || op_code == OP_CHARI || |
if (op_code == OP_CHAR || op_code == OP_CHARI || |
3143 |
op_code == OP_NOT || op_code == OP_NOTI) |
op_code == OP_NOT || op_code == OP_NOTI) |
|
//if (escape == 0) switch(op_code) |
|
3144 |
{ |
{ |
3145 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3146 |
GETCHARTEST(c, previous); |
GETCHARTEST(c, previous); |
3158 |
case, which maps to the special PT_CLIST property. Check this first. */ |
case, which maps to the special PT_CLIST property. Check this first. */ |
3159 |
|
|
3160 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
3161 |
if (utf && (unsigned int)c != NOTACHAR && (options & PCRE_CASELESS) != 0) |
if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0) |
3162 |
{ |
{ |
3163 |
int ocs = UCD_CASESET(next); |
unsigned int ocs = UCD_CASESET(next); |
3164 |
if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT); |
if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT); |
3165 |
} |
} |
3166 |
#endif |
#endif |
3180 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3181 |
if (utf) |
if (utf) |
3182 |
{ |
{ |
3183 |
unsigned int othercase; |
pcre_uint32 othercase; |
3184 |
if (next < 128) othercase = cd->fcc[next]; else |
if (next < 128) othercase = cd->fcc[next]; else |
3185 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
3186 |
othercase = UCD_OTHERCASE((unsigned int)next); |
othercase = UCD_OTHERCASE(next); |
3187 |
#else |
#else |
3188 |
othercase = NOTACHAR; |
othercase = NOTACHAR; |
3189 |
#endif |
#endif |
3190 |
return (unsigned int)c != othercase; |
return c != othercase; |
3191 |
} |
} |
3192 |
else |
else |
3193 |
#endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
3194 |
return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Not UTF */ |
return (c != TABLE_GET(next, cd->fcc, next)); /* Not UTF */ |
3195 |
|
|
3196 |
case OP_NOT: |
case OP_NOT: |
3197 |
return c == next; |
return c == next; |
3201 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3202 |
if (utf) |
if (utf) |
3203 |
{ |
{ |
3204 |
unsigned int othercase; |
pcre_uint32 othercase; |
3205 |
if (next < 128) othercase = cd->fcc[next]; else |
if (next < 128) othercase = cd->fcc[next]; else |
3206 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
3207 |
othercase = UCD_OTHERCASE((unsigned int)next); |
othercase = UCD_OTHERCASE(next); |
3208 |
#else |
#else |
3209 |
othercase = NOTACHAR; |
othercase = NOTACHAR; |
3210 |
#endif |
#endif |
3211 |
return (unsigned int)c == othercase; |
return c == othercase; |
3212 |
} |
} |
3213 |
else |
else |
3214 |
#endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
3215 |
return (c == TABLE_GET((unsigned int)next, cd->fcc, next)); /* Not UTF */ |
return (c == TABLE_GET(next, cd->fcc, next)); /* Not UTF */ |
3216 |
|
|
3217 |
/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. |
/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. |
3218 |
When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ |
When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ |
3346 |
case ESC_p: |
case ESC_p: |
3347 |
case ESC_P: |
case ESC_P: |
3348 |
{ |
{ |
3349 |
int ptype, pdata, errorcodeptr; |
unsigned int ptype = 0, pdata = 0; |
3350 |
|
int errorcodeptr; |
3351 |
BOOL negated; |
BOOL negated; |
3352 |
|
|
3353 |
ptr--; /* Make ptr point at the p or P */ |
ptr--; /* Make ptr point at the p or P */ |
3354 |
ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr); |
if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcodeptr)) |
3355 |
if (ptype < 0) return FALSE; |
return FALSE; |
3356 |
ptr++; /* Point past the final curly ket */ |
ptr++; /* Point past the final curly ket */ |
3357 |
|
|
3358 |
/* If the property item is optional, we have to give up. (When generated |
/* If the property item is optional, we have to give up. (When generated |
3676 |
codeptr points to the pointer to the current code point |
codeptr points to the pointer to the current code point |
3677 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
3678 |
errorcodeptr points to error code variable |
errorcodeptr points to error code variable |
3679 |
firstcharptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) |
firstcharptr place to put the first required character |
3680 |
reqcharptr set to the last literal character required, else < 0 |
firstcharflagsptr place to put the first character flags, or a negative number |
3681 |
|
reqcharptr place to put the last required character |
3682 |
|
reqcharflagsptr place to put the last required character flags, or a negative number |
3683 |
bcptr points to current branch chain |
bcptr points to current branch chain |
3684 |
cond_depth conditional nesting depth |
cond_depth conditional nesting depth |
3685 |
cd contains pointers to tables etc. |
cd contains pointers to tables etc. |
3692 |
|
|
3693 |
static BOOL |
static BOOL |
3694 |
compile_branch(int *optionsptr, pcre_uchar **codeptr, |
compile_branch(int *optionsptr, pcre_uchar **codeptr, |
3695 |
const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr, |
const pcre_uchar **ptrptr, int *errorcodeptr, |
3696 |
pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth, |
pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, |
3697 |
|
pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, |
3698 |
|
branch_chain *bcptr, int cond_depth, |
3699 |
compile_data *cd, int *lengthptr) |
compile_data *cd, int *lengthptr) |
3700 |
{ |
{ |
3701 |
int repeat_type, op_type; |
int repeat_type, op_type; |
3702 |
int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ |
int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ |
3703 |
int bravalue = 0; |
int bravalue = 0; |
3704 |
int greedy_default, greedy_non_default; |
int greedy_default, greedy_non_default; |
3705 |
pcre_int32 firstchar, reqchar; |
pcre_uint32 firstchar, reqchar; |
3706 |
pcre_int32 zeroreqchar, zerofirstchar; |
pcre_int32 firstcharflags, reqcharflags; |
3707 |
|
pcre_uint32 zeroreqchar, zerofirstchar; |
3708 |
|
pcre_int32 zeroreqcharflags, zerofirstcharflags; |
3709 |
pcre_int32 req_caseopt, reqvary, tempreqvary; |
pcre_int32 req_caseopt, reqvary, tempreqvary; |
3710 |
int options = *optionsptr; /* May change dynamically */ |
int options = *optionsptr; /* May change dynamically */ |
3711 |
int after_manual_callout = 0; |
int after_manual_callout = 0; |
3712 |
int length_prevgroup = 0; |
int length_prevgroup = 0; |
3713 |
register int c; |
register pcre_uint32 c; |
3714 |
int escape; |
int escape; |
3715 |
register pcre_uchar *code = *codeptr; |
register pcre_uchar *code = *codeptr; |
3716 |
pcre_uchar *last_code = code; |
pcre_uchar *last_code = code; |
3733 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3734 |
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ |
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ |
3735 |
BOOL utf = (options & PCRE_UTF8) != 0; |
BOOL utf = (options & PCRE_UTF8) != 0; |
3736 |
|
#ifndef COMPILE_PCRE32 |
3737 |
pcre_uchar utf_chars[6]; |
pcre_uchar utf_chars[6]; |
3738 |
|
#endif |
3739 |
#else |
#else |
3740 |
BOOL utf = FALSE; |
BOOL utf = FALSE; |
3741 |
#endif |
#endif |
3770 |
zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual |
zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual |
3771 |
item types that can be repeated set these backoff variables appropriately. */ |
item types that can be repeated set these backoff variables appropriately. */ |
3772 |
|
|
3773 |
firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET; |
firstchar = reqchar = zerofirstchar = zeroreqchar = 0; |
3774 |
|
firstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET; |
3775 |
|
|
3776 |
/* The variable req_caseopt contains either the REQ_CASELESS value |
/* The variable req_caseopt contains either the REQ_CASELESS value |
3777 |
or zero, according to the current setting of the caseless flag. The |
or zero, according to the current setting of the caseless flag. The |
3797 |
int recno; |
int recno; |
3798 |
int refsign; |
int refsign; |
3799 |
int skipbytes; |
int skipbytes; |
3800 |
int subreqchar; |
pcre_uint32 subreqchar, subfirstchar; |
3801 |
int subfirstchar; |
pcre_int32 subreqcharflags, subfirstcharflags; |
3802 |
int terminator; |
int terminator; |
3803 |
int mclength; |
unsigned int mclength; |
3804 |
int tempbracount; |
unsigned int tempbracount; |
3805 |
int ec; // FIXMEchpe pcre_uint32 |
pcre_uint32 ec; |
3806 |
pcre_uchar mcbuffer[8]; |
pcre_uchar mcbuffer[8]; |
3807 |
|
|
3808 |
/* Get next character in the pattern */ |
/* Get next character in the pattern */ |
3812 |
/* If we are at the end of a nested substitution, revert to the outer level |
/* If we are at the end of a nested substitution, revert to the outer level |
3813 |
string. Nesting only happens one level deep. */ |
string. Nesting only happens one level deep. */ |
3814 |
|
|
3815 |
if (c == 0 && nestptr != NULL) |
if (c == CHAR_NULL && nestptr != NULL) |
3816 |
{ |
{ |
3817 |
ptr = nestptr; |
ptr = nestptr; |
3818 |
nestptr = NULL; |
nestptr = NULL; |
3887 |
|
|
3888 |
/* If in \Q...\E, check for the end; if not, we have a literal */ |
/* If in \Q...\E, check for the end; if not, we have a literal */ |
3889 |
|
|
3890 |
if (inescq && c != 0) |
if (inescq && c != CHAR_NULL) |
3891 |
{ |
{ |
3892 |
if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) |
if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) |
3893 |
{ |
{ |
3935 |
if (c == CHAR_NUMBER_SIGN) |
if (c == CHAR_NUMBER_SIGN) |
3936 |
{ |
{ |
3937 |
ptr++; |
ptr++; |
3938 |
while (*ptr != 0) |
while (*ptr != CHAR_NULL) |
3939 |
{ |
{ |
3940 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
3941 |
ptr++; |
ptr++; |
3943 |
if (utf) FORWARDCHAR(ptr); |
if (utf) FORWARDCHAR(ptr); |
3944 |
#endif |
#endif |
3945 |
} |
} |
3946 |
if (*ptr != 0) continue; |
if (*ptr != CHAR_NULL) continue; |
3947 |
|
|
3948 |
/* Else fall through to handle end of string */ |
/* Else fall through to handle end of string */ |
3949 |
c = 0; |
c = 0; |
3965 |
case CHAR_VERTICAL_LINE: /* or | or ) */ |
case CHAR_VERTICAL_LINE: /* or | or ) */ |
3966 |
case CHAR_RIGHT_PARENTHESIS: |
case CHAR_RIGHT_PARENTHESIS: |
3967 |
*firstcharptr = firstchar; |
*firstcharptr = firstchar; |
3968 |
|
*firstcharflagsptr = firstcharflags; |
3969 |
*reqcharptr = reqchar; |
*reqcharptr = reqchar; |
3970 |
|
*reqcharflagsptr = reqcharflags; |
3971 |
*codeptr = code; |
*codeptr = code; |
3972 |
*ptrptr = ptr; |
*ptrptr = ptr; |
3973 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
3991 |
previous = NULL; |
previous = NULL; |
3992 |
if ((options & PCRE_MULTILINE) != 0) |
if ((options & PCRE_MULTILINE) != 0) |
3993 |
{ |
{ |
3994 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
3995 |
*code++ = OP_CIRCM; |
*code++ = OP_CIRCM; |
3996 |
} |
} |
3997 |
else *code++ = OP_CIRC; |
else *code++ = OP_CIRC; |
4006 |
repeats. The value of reqchar doesn't change either. */ |
repeats. The value of reqchar doesn't change either. */ |
4007 |
|
|
4008 |
case CHAR_DOT: |
case CHAR_DOT: |
4009 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
4010 |
zerofirstchar = firstchar; |
zerofirstchar = firstchar; |
4011 |
|
zerofirstcharflags = firstcharflags; |
4012 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
4013 |
|
zeroreqcharflags = reqcharflags; |
4014 |
previous = code; |
previous = code; |
4015 |
*code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; |
*code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; |
4016 |
break; |
break; |
4084 |
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
4085 |
{ |
{ |
4086 |
*code++ = negate_class? OP_ALLANY : OP_FAIL; |
*code++ = negate_class? OP_ALLANY : OP_FAIL; |
4087 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
4088 |
zerofirstchar = firstchar; |
zerofirstchar = firstchar; |
4089 |
|
zerofirstcharflags = firstcharflags; |
4090 |
break; |
break; |
4091 |
} |
} |
4092 |
|
|
4121 |
means that an initial ] is taken as a data character. At the start of the |
means that an initial ] is taken as a data character. At the start of the |
4122 |
loop, c contains the first byte of the character. */ |
loop, c contains the first byte of the character. */ |
4123 |
|
|
4124 |
if (c != 0) do |
if (c != CHAR_NULL) do |
4125 |
{ |
{ |
4126 |
const pcre_uchar *oldptr; |
const pcre_uchar *oldptr; |
4127 |
|
|
4128 |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
#ifdef SUPPORT_UTF |
4129 |
if (utf && HAS_EXTRALEN(c)) |
if (utf && HAS_EXTRALEN(c)) |
4130 |
{ /* Braces are required because the */ |
{ /* Braces are required because the */ |
4131 |
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ |
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ |
4384 |
case ESC_P: |
case ESC_P: |
4385 |
{ |
{ |
4386 |
BOOL negated; |
BOOL negated; |
4387 |
int pdata; |
unsigned int ptype = 0, pdata = 0; |
4388 |
int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr); |
if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) |
4389 |
if (ptype < 0) goto FAILED; |
goto FAILED; |
4390 |
*class_uchardata++ = ((escape == ESC_p) != negated)? |
*class_uchardata++ = ((escape == ESC_p) != negated)? |
4391 |
XCL_PROP : XCL_NOTPROP; |
XCL_PROP : XCL_NOTPROP; |
4392 |
*class_uchardata++ = ptype; |
*class_uchardata++ = ptype; |
4440 |
|
|
4441 |
if (!inescq && ptr[1] == CHAR_MINUS) |
if (!inescq && ptr[1] == CHAR_MINUS) |
4442 |
{ |
{ |
4443 |
int d; |
pcre_uint32 d; |
4444 |
ptr += 2; |
ptr += 2; |
4445 |
while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2; |
while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2; |
4446 |
|
|
4459 |
/* Minus (hyphen) at the end of a class is treated as a literal, so put |
/* Minus (hyphen) at the end of a class is treated as a literal, so put |
4460 |
back the pointer and jump to handle the character that preceded it. */ |
back the pointer and jump to handle the character that preceded it. */ |
4461 |
|
|
4462 |
if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET)) |
if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET)) |
4463 |
{ |
{ |
4464 |
ptr = oldptr; |
ptr = oldptr; |
4465 |
goto CLASS_SINGLE_CHARACTER; |
goto CLASS_SINGLE_CHARACTER; |
4488 |
|
|
4489 |
/* \b is backspace; any other special means the '-' was literal. */ |
/* \b is backspace; any other special means the '-' was literal. */ |
4490 |
|
|
4491 |
if (descape > 0) |
if (descape != 0) |
4492 |
{ |
{ |
4493 |
if (descape == ESC_b) d = CHAR_BS; else |
if (descape == ESC_b) d = CHAR_BS; else |
4494 |
{ |
{ |
4546 |
{ |
{ |
4547 |
ptr++; |
ptr++; |
4548 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
4549 |
|
zeroreqcharflags = reqcharflags; |
4550 |
|
|
4551 |
if (negate_class) |
if (negate_class) |
4552 |
{ |
{ |
4553 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
|
// FIXMEchpe pcreuint32? |
|
4554 |
int d; |
int d; |
4555 |
#endif |
#endif |
4556 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
4557 |
zerofirstchar = firstchar; |
zerofirstchar = firstchar; |
4558 |
|
zerofirstcharflags = firstcharflags; |
4559 |
|
|
4560 |
/* For caseless UTF-8 mode when UCP support is available, check |
/* For caseless UTF-8 mode when UCP support is available, check |
4561 |
whether this character has more than one other case. If so, generate |
whether this character has more than one other case. If so, generate |
4614 |
If we are at the end of an internal nested string, revert to the outer |
If we are at the end of an internal nested string, revert to the outer |
4615 |
string. */ |
string. */ |
4616 |
|
|
4617 |
while (((c = *(++ptr)) != 0 || |
while (((c = *(++ptr)) != CHAR_NULL || |
4618 |
(nestptr != NULL && |
(nestptr != NULL && |
4619 |
(ptr = nestptr, nestptr = NULL, c = *(++ptr)) != 0)) && |
(ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) && |
4620 |
(c != CHAR_RIGHT_SQUARE_BRACKET || inescq)); |
(c != CHAR_RIGHT_SQUARE_BRACKET || inescq)); |
4621 |
|
|
4622 |
/* Check for missing terminating ']' */ |
/* Check for missing terminating ']' */ |
4623 |
|
|
4624 |
if (c == 0) |
if (c == CHAR_NULL) |
4625 |
{ |
{ |
4626 |
*errorcodeptr = ERR6; |
*errorcodeptr = ERR6; |
4627 |
goto FAILED; |
goto FAILED; |
4643 |
setting, whatever the repeat count. Any reqchar setting must remain |
setting, whatever the repeat count. Any reqchar setting must remain |
4644 |
unchanged after any kind of repeat. */ |
unchanged after any kind of repeat. */ |
4645 |
|
|
4646 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
4647 |
zerofirstchar = firstchar; |
zerofirstchar = firstchar; |
4648 |
|
zerofirstcharflags = firstcharflags; |
4649 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
4650 |
|
zeroreqcharflags = reqcharflags; |
4651 |
|
|
4652 |
/* If there are characters with values > 255, we have to compile an |
/* If there are characters with values > 255, we have to compile an |
4653 |
extended class, with its own opcode, unless there was a negated special |
extended class, with its own opcode, unless there was a negated special |
4742 |
if (repeat_min == 0) |
if (repeat_min == 0) |
4743 |
{ |
{ |
4744 |
firstchar = zerofirstchar; /* Adjust for zero repeat */ |
firstchar = zerofirstchar; /* Adjust for zero repeat */ |
4745 |
|
firstcharflags = zerofirstcharflags; |
4746 |
reqchar = zeroreqchar; /* Ditto */ |
reqchar = zeroreqchar; /* Ditto */ |
4747 |
|
reqcharflags = zeroreqcharflags; |
4748 |
} |
} |
4749 |
|
|
4750 |
/* Remember whether this is a variable length repeat */ |
/* Remember whether this is a variable length repeat */ |
4847 |
{ |
{ |
4848 |
c = code[-1]; |
c = code[-1]; |
4849 |
if (*previous <= OP_CHARI && repeat_min > 1) |
if (*previous <= OP_CHARI && repeat_min > 1) |
4850 |
reqchar = c | req_caseopt | cd->req_varyopt; |
{ |
4851 |
|
reqchar = c; |
4852 |
|
reqcharflags = req_caseopt | cd->req_varyopt; |
4853 |
|
} |
4854 |
} |
} |
4855 |
|
|
4856 |
/* If the repetition is unlimited, it pays to see if the next thing on |
/* If the repetition is unlimited, it pays to see if the next thing on |
5232 |
|
|
5233 |
else |
else |
5234 |
{ |
{ |
5235 |
if (groupsetfirstchar && reqchar < 0) reqchar = firstchar; |
if (groupsetfirstchar && reqcharflags < 0) |
5236 |
|
{ |
5237 |
|
reqchar = firstchar; |
5238 |
|
reqcharflags = firstcharflags; |
5239 |
|
} |
5240 |
|
|
5241 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
5242 |
{ |
{ |
5515 |
else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) |
else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) |
5516 |
{ |
{ |
5517 |
tempcode += PRIV(OP_lengths)[*tempcode]; |
tempcode += PRIV(OP_lengths)[*tempcode]; |
5518 |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
#ifdef SUPPORT_UTF |
5519 |
if (utf && HAS_EXTRALEN(tempcode[-1])) |
if (utf && HAS_EXTRALEN(tempcode[-1])) |
5520 |
tempcode += GET_EXTRALEN(tempcode[-1]); |
tempcode += GET_EXTRALEN(tempcode[-1]); |
5521 |
#endif |
#endif |
5611 |
if (*ptr == CHAR_COLON) |
if (*ptr == CHAR_COLON) |
5612 |
{ |
{ |
5613 |
arg = ++ptr; |
arg = ++ptr; |
5614 |
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
5615 |
arglen = (int)(ptr - arg); |
arglen = (int)(ptr - arg); |
5616 |
if ((unsigned int)arglen > MAX_MARK) |
if ((unsigned int)arglen > MAX_MARK) |
5617 |
{ |
{ |
5656 |
(cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; |
(cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; |
5657 |
|
|
5658 |
/* Do not set firstchar after *ACCEPT */ |
/* Do not set firstchar after *ACCEPT */ |
5659 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
5660 |
} |
} |
5661 |
|
|
5662 |
/* Handle other cases with/without an argument */ |
/* Handle other cases with/without an argument */ |
5725 |
{ |
{ |
5726 |
case CHAR_NUMBER_SIGN: /* Comment; skip to ket */ |
case CHAR_NUMBER_SIGN: /* Comment; skip to ket */ |
5727 |
ptr++; |
ptr++; |
5728 |
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
5729 |
if (*ptr == 0) |
if (*ptr == CHAR_NULL) |
5730 |
{ |
{ |
5731 |
*errorcodeptr = ERR18; |
*errorcodeptr = ERR18; |
5732 |
goto FAILED; |
goto FAILED; |
5802 |
} |
} |
5803 |
else |
else |
5804 |
{ |
{ |
5805 |
terminator = 0; |
terminator = CHAR_NULL; |
5806 |
if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr); |
if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr); |
5807 |
} |
} |
5808 |
|
|
5822 |
while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) |
while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) |
5823 |
{ |
{ |
5824 |
if (recno >= 0) |
if (recno >= 0) |
5825 |
recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1; |
recno = (IS_DIGIT(*ptr))? recno * 10 + (int)(*ptr - CHAR_0) : -1; |
5826 |
ptr++; |
ptr++; |
5827 |
} |
} |
5828 |
namelen = (int)(ptr - name); |
namelen = (int)(ptr - name); |
5829 |
|
|
5830 |
if ((terminator > 0 && *ptr++ != terminator) || |
if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) || |
5831 |
*ptr++ != CHAR_RIGHT_PARENTHESIS) |
*ptr++ != CHAR_RIGHT_PARENTHESIS) |
5832 |
{ |
{ |
5833 |
ptr--; /* Error offset */ |
ptr--; /* Error offset */ |
5892 |
code[1+LINK_SIZE]++; |
code[1+LINK_SIZE]++; |
5893 |
} |
} |
5894 |
|
|
5895 |
/* If terminator == 0 it means that the name followed directly after |
/* If terminator == CHAR_NULL it means that the name followed directly |
5896 |
the opening parenthesis [e.g. (?(abc)...] and in this case there are |
after the opening parenthesis [e.g. (?(abc)...] and in this case there |
5897 |
some further alternatives to try. For the cases where terminator != 0 |
are some further alternatives to try. For the cases where terminator != |
5898 |
[things like (?(<name>... or (?('name')... or (?(R&name)... ] we have |
0 [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have |
5899 |
now checked all the possibilities, so give an error. */ |
now checked all the possibilities, so give an error. */ |
5900 |
|
|
5901 |
else if (terminator != 0) |
else if (terminator != CHAR_NULL) |
5902 |
{ |
{ |
5903 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
5904 |
goto FAILED; |
goto FAILED; |
6067 |
|
|
6068 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
6069 |
{ |
{ |
6070 |
if (*ptr != terminator) |
if (*ptr != (pcre_uchar)terminator) |
6071 |
{ |
{ |
6072 |
*errorcodeptr = ERR42; |
*errorcodeptr = ERR42; |
6073 |
goto FAILED; |
goto FAILED; |
6209 |
*errorcodeptr = ERR62; |
*errorcodeptr = ERR62; |
6210 |
goto FAILED; |
goto FAILED; |
6211 |
} |
} |
6212 |
if (*ptr != terminator) |
if (*ptr != (pcre_uchar)terminator) |
6213 |
{ |
{ |
6214 |
*errorcodeptr = ERR42; |
*errorcodeptr = ERR42; |
6215 |
goto FAILED; |
goto FAILED; |
6315 |
while(IS_DIGIT(*ptr)) |
while(IS_DIGIT(*ptr)) |
6316 |
recno = recno * 10 + *ptr++ - CHAR_0; |
recno = recno * 10 + *ptr++ - CHAR_0; |
6317 |
|
|
6318 |
if (*ptr != terminator) |
if (*ptr != (pcre_uchar)terminator) |
6319 |
{ |
{ |
6320 |
*errorcodeptr = ERR29; |
*errorcodeptr = ERR29; |
6321 |
goto FAILED; |
goto FAILED; |
6419 |
|
|
6420 |
/* Can't determine a first byte now */ |
/* Can't determine a first byte now */ |
6421 |
|
|
6422 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
6423 |
continue; |
continue; |
6424 |
|
|
6425 |
|
|
6553 |
cond_depth + |
cond_depth + |
6554 |
((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */ |
((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */ |
6555 |
&subfirstchar, /* For possible first char */ |
&subfirstchar, /* For possible first char */ |
6556 |
|
&subfirstcharflags, |
6557 |
&subreqchar, /* For possible last char */ |
&subreqchar, /* For possible last char */ |
6558 |
|
&subreqcharflags, |
6559 |
bcptr, /* Current branch chain */ |
bcptr, /* Current branch chain */ |
6560 |
cd, /* Tables block */ |
cd, /* Tables block */ |
6561 |
(lengthptr == NULL)? NULL : /* Actual compile phase */ |
(lengthptr == NULL)? NULL : /* Actual compile phase */ |
6616 |
*errorcodeptr = ERR27; |
*errorcodeptr = ERR27; |
6617 |
goto FAILED; |
goto FAILED; |
6618 |
} |
} |
6619 |
if (condcount == 1) subfirstchar = subreqchar = REQ_NONE; |
if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE; |
6620 |
} |
} |
6621 |
} |
} |
6622 |
|
|
6665 |
back off. */ |
back off. */ |
6666 |
|
|
6667 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
6668 |
|
zeroreqcharflags = reqcharflags; |
6669 |
zerofirstchar = firstchar; |
zerofirstchar = firstchar; |
6670 |
|
zerofirstcharflags = firstcharflags; |
6671 |
groupsetfirstchar = FALSE; |
groupsetfirstchar = FALSE; |
6672 |
|
|
6673 |
if (bravalue >= OP_ONCE) |
if (bravalue >= OP_ONCE) |
6678 |
no firstchar, set "none" for the whole branch. In both cases, a zero |
no firstchar, set "none" for the whole branch. In both cases, a zero |
6679 |
repeat forces firstchar to "none". */ |
repeat forces firstchar to "none". */ |
6680 |
|
|
6681 |
if (firstchar == REQ_UNSET) |
if (firstcharflags == REQ_UNSET) |
6682 |
{ |
{ |
6683 |
if (subfirstchar >= 0) |
if (subfirstcharflags >= 0) |
6684 |
{ |
{ |
6685 |
firstchar = subfirstchar; |
firstchar = subfirstchar; |
6686 |
|
firstcharflags = subfirstcharflags; |
6687 |
groupsetfirstchar = TRUE; |
groupsetfirstchar = TRUE; |
6688 |
} |
} |
6689 |
else firstchar = REQ_NONE; |
else firstcharflags = REQ_NONE; |
6690 |
zerofirstchar = REQ_NONE; |
zerofirstcharflags = REQ_NONE; |
6691 |
} |
} |
6692 |
|
|
6693 |
/* If firstchar was previously set, convert the subpattern's firstchar |
/* If firstchar was previously set, convert the subpattern's firstchar |
6694 |
into reqchar if there wasn't one, using the vary flag that was in |
into reqchar if there wasn't one, using the vary flag that was in |
6695 |
existence beforehand. */ |
existence beforehand. */ |
6696 |
|
|
6697 |
else if (subfirstchar >= 0 && subreqchar < 0) |
else if (subfirstcharflags >= 0 && subreqcharflags < 0) |
6698 |
subreqchar = subfirstchar | tempreqvary; |
{ |
6699 |
|
subreqchar = subfirstchar; |
6700 |
|
subreqcharflags = subfirstcharflags | tempreqvary; |
6701 |
|
} |
6702 |
|
|
6703 |
/* If the subpattern set a required byte (or set a first byte that isn't |
/* If the subpattern set a required byte (or set a first byte that isn't |
6704 |
really the first byte - see above), set it. */ |
really the first byte - see above), set it. */ |
6705 |
|
|
6706 |
if (subreqchar >= 0) reqchar = subreqchar; |
if (subreqcharflags >= 0) |
6707 |
|
{ |
6708 |
|
reqchar = subreqchar; |
6709 |
|
reqcharflags = subreqcharflags; |
6710 |
|
} |
6711 |
} |
} |
6712 |
|
|
6713 |
/* For a forward assertion, we take the reqchar, if set. This can be |
/* For a forward assertion, we take the reqchar, if set. This can be |
6718 |
of a firstchar. This is overcome by a scan at the end if there's no |
of a firstchar. This is overcome by a scan at the end if there's no |
6719 |
firstchar, looking for an asserted first char. */ |
firstchar, looking for an asserted first char. */ |
6720 |
|
|
6721 |
else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar; |
else if (bravalue == OP_ASSERT && subreqcharflags >= 0) |
6722 |
|
{ |
6723 |
|
reqchar = subreqchar; |
6724 |
|
reqcharflags = subreqcharflags; |
6725 |
|
} |
6726 |
break; /* End of processing '(' */ |
break; /* End of processing '(' */ |
6727 |
|
|
6728 |
|
|
6730 |
/* Handle metasequences introduced by \. For ones like \d, the ESC_ values |
/* Handle metasequences introduced by \. For ones like \d, the ESC_ values |
6731 |
are arranged to be the negation of the corresponding OP_values in the |
are arranged to be the negation of the corresponding OP_values in the |
6732 |
default case when PCRE_UCP is not set. For the back references, the values |
default case when PCRE_UCP is not set. For the back references, the values |
6733 |
are ESC_REF plus the reference number. Only back references and those types |
are negative the reference number. Only back references and those types |
6734 |
that consume a character may be repeated. We can test for values between |
that consume a character may be repeated. We can test for values between |
6735 |
ESC_b and ESC_Z for the latter; this may have to change if any new ones are |
ESC_b and ESC_Z for the latter; this may have to change if any new ones are |
6736 |
ever created. */ |
ever created. */ |
6758 |
/* For metasequences that actually match a character, we disable the |
/* For metasequences that actually match a character, we disable the |
6759 |
setting of a first character if it hasn't already been set. */ |
setting of a first character if it hasn't already been set. */ |
6760 |
|
|
6761 |
if (firstchar == REQ_UNSET && escape > ESC_b && escape < ESC_Z) |
if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z) |
6762 |
firstchar = REQ_NONE; |
firstcharflags = REQ_NONE; |
6763 |
|
|
6764 |
/* Set values to reset to if this is followed by a zero repeat. */ |
/* Set values to reset to if this is followed by a zero repeat. */ |
6765 |
|
|
6766 |
zerofirstchar = firstchar; |
zerofirstchar = firstchar; |
6767 |
|
zerofirstcharflags = firstcharflags; |
6768 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
6769 |
|
zeroreqcharflags = reqcharflags; |
6770 |
|
|
6771 |
/* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n' |
/* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n' |
6772 |
is a subroutine call by number (Oniguruma syntax). In fact, the value |
is a subroutine call by number (Oniguruma syntax). In fact, the value |
6773 |
ESC_g is returned only for these cases. So we don't need to check for < |
ESC_g is returned only for these cases. So we don't need to check for < |
6774 |
or ' if the value is ESC_g. For the Perl syntax \g{n} the value is |
or ' if the value is ESC_g. For the Perl syntax \g{n} the value is |
6775 |
ESC_REF+n, and for the Perl syntax \g{name} the result is ESC_k (as |
-n, and for the Perl syntax \g{name} the result is ESC_k (as |
6776 |
that is a synonym for a named back reference). */ |
that is a synonym for a named back reference). */ |
6777 |
|
|
6778 |
if (escape == ESC_g) |
if (escape == ESC_g) |
6795 |
if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS) |
if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS) |
6796 |
{ |
{ |
6797 |
BOOL is_a_number = TRUE; |
BOOL is_a_number = TRUE; |
6798 |
for (p = ptr + 1; *p != 0 && *p != terminator; p++) |
for (p = ptr + 1; *p != CHAR_NULL && *p != (pcre_uchar)terminator; p++) |
6799 |
{ |
{ |
6800 |
if (!MAX_255(*p)) { is_a_number = FALSE; break; } |
if (!MAX_255(*p)) { is_a_number = FALSE; break; } |
6801 |
if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE; |
if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE; |
6802 |
if ((cd->ctypes[*p] & ctype_word) == 0) break; |
if ((cd->ctypes[*p] & ctype_word) == 0) break; |
6803 |
} |
} |
6804 |
if (*p != terminator) |
if (*p != (pcre_uchar)terminator) |
6805 |
{ |
{ |
6806 |
*errorcodeptr = ERR57; |
*errorcodeptr = ERR57; |
6807 |
break; |
break; |
6819 |
|
|
6820 |
p = ptr + 2; |
p = ptr + 2; |
6821 |
while (IS_DIGIT(*p)) p++; |
while (IS_DIGIT(*p)) p++; |
6822 |
if (*p != terminator) |
if (*p != (pcre_uchar)terminator) |
6823 |
{ |
{ |
6824 |
*errorcodeptr = ERR57; |
*errorcodeptr = ERR57; |
6825 |
break; |
break; |
6850 |
not set to cope with cases like (?=(\w+))\1: which would otherwise set |
not set to cope with cases like (?=(\w+))\1: which would otherwise set |
6851 |
':' later. */ |
':' later. */ |
6852 |
|
|
6853 |
if (escape >= ESC_REF) |
if (escape < 0) |
6854 |
{ |
{ |
6855 |
open_capitem *oc; |
open_capitem *oc; |
6856 |
recno = escape - ESC_REF; |
recno = -escape; |
6857 |
|
|
6858 |
HANDLE_REFERENCE: /* Come here from named backref handling */ |
HANDLE_REFERENCE: /* Come here from named backref handling */ |
6859 |
if (firstchar == REQ_UNSET) firstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; |
6860 |
previous = code; |
previous = code; |
6861 |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; |
6862 |
PUT2INC(code, 0, recno); |
PUT2INC(code, 0, recno); |
6883 |
else if (escape == ESC_P || escape == ESC_p) |
else if (escape == ESC_P || escape == ESC_p) |
6884 |
{ |
{ |
6885 |
BOOL negated; |
BOOL negated; |
6886 |
int pdata; |
unsigned int ptype = 0, pdata = 0; |
6887 |
int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr); |
if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) |
6888 |
if (ptype < 0) goto FAILED; |
goto FAILED; |
6889 |
previous = code; |
previous = code; |
6890 |
*code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; |
*code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; |
6891 |
*code++ = ptype; |
*code++ = ptype; |
6959 |
mclength = 1; |
mclength = 1; |
6960 |
mcbuffer[0] = c; |
mcbuffer[0] = c; |
6961 |
|
|
6962 |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
#ifdef SUPPORT_UTF |
6963 |
if (utf && HAS_EXTRALEN(c)) |
if (utf && HAS_EXTRALEN(c)) |
6964 |
ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); |
ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); |
6965 |
#endif |
#endif |
6983 |
*code++ = OP_PROP; |
*code++ = OP_PROP; |
6984 |
*code++ = PT_CLIST; |
*code++ = PT_CLIST; |
6985 |
*code++ = c; |
*code++ = c; |
6986 |
if (firstchar == REQ_UNSET) firstchar = zerofirstchar = REQ_NONE; |
if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE; |
6987 |
break; |
break; |
6988 |
} |
} |
6989 |
} |
} |
7004 |
Otherwise, leave the firstchar value alone, and don't change it on a zero |
Otherwise, leave the firstchar value alone, and don't change it on a zero |
7005 |
repeat. */ |
repeat. */ |
7006 |
|
|
7007 |
if (firstchar == REQ_UNSET) |
if (firstcharflags == REQ_UNSET) |
7008 |
{ |
{ |
7009 |
zerofirstchar = REQ_NONE; |
zerofirstcharflags = REQ_NONE; |
7010 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
7011 |
|
zeroreqcharflags = reqcharflags; |
7012 |
|
|
7013 |
/* If the character is more than one byte long, we can set firstchar |
/* If the character is more than one byte long, we can set firstchar |
7014 |
only if it is not to be matched caselessly. */ |
only if it is not to be matched caselessly. */ |
7016 |
if (mclength == 1 || req_caseopt == 0) |
if (mclength == 1 || req_caseopt == 0) |
7017 |
{ |
{ |
7018 |
firstchar = mcbuffer[0] | req_caseopt; |
firstchar = mcbuffer[0] | req_caseopt; |
7019 |
if (mclength != 1) reqchar = code[-1] | cd->req_varyopt; |
firstchar = mcbuffer[0]; |
7020 |
|
firstcharflags = req_caseopt; |
7021 |
|
|
7022 |
|
if (mclength != 1) |
7023 |
|
{ |
7024 |
|
reqchar = code[-1]; |
7025 |
|
reqcharflags = cd->req_varyopt; |
7026 |
|
} |
7027 |
} |
} |
7028 |
else firstchar = reqchar = REQ_NONE; |
else firstcharflags = reqcharflags = REQ_NONE; |
7029 |
} |
} |
7030 |
|
|
7031 |
/* firstchar was previously set; we can set reqchar only if the length is |
/* firstchar was previously set; we can set reqchar only if the length is |
7034 |
else |
else |
7035 |
{ |
{ |
7036 |
zerofirstchar = firstchar; |
zerofirstchar = firstchar; |
7037 |
|
zerofirstcharflags = firstcharflags; |
7038 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
7039 |
|
zeroreqcharflags = reqcharflags; |
7040 |
if (mclength == 1 || req_caseopt == 0) |
if (mclength == 1 || req_caseopt == 0) |
7041 |
reqchar = code[-1] | req_caseopt | cd->req_varyopt; |
{ |
7042 |
|
reqchar = code[-1]; |
7043 |
|
reqcharflags = req_caseopt | cd->req_varyopt; |
7044 |
|
} |
7045 |
} |
} |
7046 |
|
|
7047 |
break; /* End of literal character handling */ |
break; /* End of literal character handling */ |
7060 |
|
|
7061 |
|
|
7062 |
|
|
|
|
|
7063 |
/************************************************* |
/************************************************* |
7064 |
* Compile sequence of alternatives * |
* Compile sequence of alternatives * |
7065 |
*************************************************/ |
*************************************************/ |
7080 |
reset_bracount TRUE to reset the count for each branch |
reset_bracount TRUE to reset the count for each branch |
7081 |
skipbytes skip this many bytes at start (for brackets and OP_COND) |
skipbytes skip this many bytes at start (for brackets and OP_COND) |
7082 |
cond_depth depth of nesting for conditional subpatterns |
cond_depth depth of nesting for conditional subpatterns |
7083 |
firstcharptr place to put the first required character, or a negative number |
firstcharptr place to put the first required character |
7084 |
reqcharptr place to put the last required character, or a negative number |
firstcharflagsptr place to put the first character flags, or a negative number |
7085 |
|
reqcharptr place to put the last required character |
7086 |
|
reqcharflagsptr place to put the last required character flags, or a negative number |
7087 |
bcptr pointer to the chain of currently open branches |
bcptr pointer to the chain of currently open branches |
7088 |
cd points to the data block with tables pointers etc. |
cd points to the data block with tables pointers etc. |
7089 |
lengthptr NULL during the real compile phase |
lengthptr NULL during the real compile phase |
7095 |
static BOOL |
static BOOL |
7096 |
compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr, |
compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr, |
7097 |
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, |
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, |
7098 |
int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr, |
int cond_depth, |
7099 |
|
pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, |
7100 |
|
pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, |
7101 |
branch_chain *bcptr, compile_data *cd, int *lengthptr) |
branch_chain *bcptr, compile_data *cd, int *lengthptr) |
7102 |
{ |
{ |
7103 |
const pcre_uchar *ptr = *ptrptr; |
const pcre_uchar *ptr = *ptrptr; |
7107 |
pcre_uchar *reverse_count = NULL; |
pcre_uchar *reverse_count = NULL; |
7108 |
open_capitem capitem; |
open_capitem capitem; |
7109 |
int capnumber = 0; |
int capnumber = 0; |
7110 |
pcre_int32 firstchar, reqchar; |
pcre_uint32 firstchar, reqchar; |
7111 |
pcre_int32 branchfirstchar, branchreqchar; |
pcre_int32 firstcharflags, reqcharflags; |
7112 |
|
pcre_uint32 branchfirstchar, branchreqchar; |
7113 |
|
pcre_int32 branchfirstcharflags, branchreqcharflags; |
7114 |
int length; |
int length; |
7115 |
int orig_bracount; |
unsigned int orig_bracount; |
7116 |
int max_bracount; |
unsigned int max_bracount; |
7117 |
branch_chain bc; |
branch_chain bc; |
7118 |
|
|
7119 |
bc.outer = bcptr; |
bc.outer = bcptr; |
7120 |
bc.current_branch = code; |
bc.current_branch = code; |
7121 |
|
|
7122 |
firstchar = reqchar = REQ_UNSET; |
firstchar = reqchar = 0; |
7123 |
|
firstcharflags = reqcharflags = REQ_UNSET; |
7124 |
|
|
7125 |
/* Accumulate the length for use in the pre-compile phase. Start with the |
/* Accumulate the length for use in the pre-compile phase. Start with the |
7126 |
length of the BRA and KET and any extra bytes that are required at the |
length of the BRA and KET and any extra bytes that are required at the |
7180 |
into the length. */ |
into the length. */ |
7181 |
|
|
7182 |
if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar, |
if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar, |
7183 |
&branchreqchar, &bc, cond_depth, cd, |
&branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc, |
7184 |
(lengthptr == NULL)? NULL : &length)) |
cond_depth, cd, (lengthptr == NULL)? NULL : &length)) |
7185 |
{ |
{ |
7186 |
*ptrptr = ptr; |
*ptrptr = ptr; |
7187 |
return FALSE; |
return FALSE; |
7202 |
if (*last_branch != OP_ALT) |
if (*last_branch != OP_ALT) |
7203 |
{ |
{ |
7204 |
firstchar = branchfirstchar; |
firstchar = branchfirstchar; |
7205 |
|
firstcharflags = branchfirstcharflags; |
7206 |
reqchar = branchreqchar; |
reqchar = branchreqchar; |
7207 |
|
reqcharflags = branchreqcharflags; |
7208 |
} |
} |
7209 |
|
|
7210 |
/* If this is not the first branch, the first char and reqchar have to |
/* If this is not the first branch, the first char and reqchar have to |
7218 |
we have to abandon the firstchar for the regex, but if there was |
we have to abandon the firstchar for the regex, but if there was |
7219 |
previously no reqchar, it takes on the value of the old firstchar. */ |
previously no reqchar, it takes on the value of the old firstchar. */ |
7220 |
|
|
7221 |
if (firstchar >= 0 && firstchar != branchfirstchar) |
if (firstcharflags >= 0 && |
7222 |
|
(firstcharflags != branchfirstcharflags || firstchar != branchfirstchar)) |
7223 |
{ |
{ |
7224 |
if (reqchar < 0) reqchar = firstchar; |
if (reqcharflags < 0) |
7225 |
firstchar = REQ_NONE; |
{ |
7226 |
|
reqchar = firstchar; |
7227 |
|
reqcharflags = firstcharflags; |
7228 |
|
} |
7229 |
|
firstcharflags = REQ_NONE; |
7230 |
} |
} |
7231 |
|
|
7232 |
/* If we (now or from before) have no firstchar, a firstchar from the |
/* If we (now or from before) have no firstchar, a firstchar from the |
7233 |
branch becomes a reqchar if there isn't a branch reqchar. */ |
branch becomes a reqchar if there isn't a branch reqchar. */ |
7234 |
|
|
7235 |
if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0) |
if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0) |
7236 |
branchreqchar = branchfirstchar; |
{ |
7237 |
|
branchreqchar = branchfirstchar; |
7238 |
|
branchreqcharflags = branchfirstcharflags; |
7239 |
|
} |
7240 |
|
|
7241 |
/* Now ensure that the reqchars match */ |
/* Now ensure that the reqchars match */ |
7242 |
|
|
7243 |
if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY)) |
if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) || |
7244 |
reqchar = REQ_NONE; |
reqchar != branchreqchar) |
7245 |
else reqchar |= branchreqchar; /* To "or" REQ_VARY */ |
reqcharflags = REQ_NONE; |
7246 |
|
else |
7247 |
|
{ |
7248 |
|
reqchar = branchreqchar; |
7249 |
|
reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */ |
7250 |
|
} |
7251 |
} |
} |
7252 |
|
|
7253 |
/* If lookbehind, check that this branch matches a fixed-length string, and |
/* If lookbehind, check that this branch matches a fixed-length string, and |
7343 |
*codeptr = code; |
*codeptr = code; |
7344 |
*ptrptr = ptr; |
*ptrptr = ptr; |
7345 |
*firstcharptr = firstchar; |
*firstcharptr = firstchar; |
7346 |
|
*firstcharflagsptr = firstcharflags; |
7347 |
*reqcharptr = reqchar; |
*reqcharptr = reqchar; |
7348 |
|
*reqcharflagsptr = reqcharflags; |
7349 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
7350 |
{ |
{ |
7351 |
if (OFLOW_MAX - *lengthptr < length) |
if (OFLOW_MAX - *lengthptr < length) |
7632 |
|
|
7633 |
Arguments: |
Arguments: |
7634 |
code points to start of expression (the bracket) |
code points to start of expression (the bracket) |
7635 |
|
flags points to the first char flags, or to REQ_NONE |
7636 |
inassert TRUE if in an assertion |
inassert TRUE if in an assertion |
7637 |
|
|
7638 |
Returns: -1 or the fixed first char |
Returns: the fixed first char, or 0 with REQ_NONE in flags |
7639 |
*/ |
*/ |
7640 |
|
|
7641 |
static int |
static pcre_uint32 |
7642 |
find_firstassertedchar(const pcre_uchar *code, BOOL inassert) |
find_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags, |
7643 |
|
BOOL inassert) |
7644 |
{ |
{ |
7645 |
register int c = -1; |
register pcre_uint32 c = 0; |
7646 |
|
int cflags = REQ_NONE; |
7647 |
|
|
7648 |
|
*flags = REQ_NONE; |
7649 |
do { |
do { |
7650 |
int d; |
pcre_uint32 d; |
7651 |
|
int dflags; |
7652 |
int xl = (*code == OP_CBRA || *code == OP_SCBRA || |
int xl = (*code == OP_CBRA || *code == OP_SCBRA || |
7653 |
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; |
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; |
7654 |
const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl, |
const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl, |
7655 |
TRUE); |
TRUE); |
7656 |
register int op = *scode; |
register pcre_uchar op = *scode; |
7657 |
|
|
7658 |
switch(op) |
switch(op) |
7659 |
{ |
{ |
7660 |
default: |
default: |
7661 |
return -1; |
return 0; |
7662 |
|
|
7663 |
case OP_BRA: |
case OP_BRA: |
7664 |
case OP_BRAPOS: |
case OP_BRAPOS: |
7670 |
case OP_ONCE: |
case OP_ONCE: |
7671 |
case OP_ONCE_NC: |
case OP_ONCE_NC: |
7672 |
case OP_COND: |
case OP_COND: |
7673 |
if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0) |
d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT); |
7674 |
return -1; |
if (dflags < 0) |
7675 |
if (c < 0) c = d; else if (c != d) return -1; |
return 0; |
7676 |
|
if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0; |
7677 |
break; |
break; |
7678 |
|
|
7679 |
case OP_EXACT: |
case OP_EXACT: |
7684 |
case OP_PLUS: |
case OP_PLUS: |
7685 |
case OP_MINPLUS: |
case OP_MINPLUS: |
7686 |
case OP_POSPLUS: |
case OP_POSPLUS: |
7687 |
if (!inassert) return -1; |
if (!inassert) return 0; |
7688 |
if (c < 0) c = scode[1]; |
if (cflags < 0) { c = scode[1]; cflags = 0; } |
7689 |
else if (c != scode[1]) return -1; |
else if (c != scode[1]) return 0; |
7690 |
break; |
break; |
7691 |
|
|
7692 |
case OP_EXACTI: |
case OP_EXACTI: |
7697 |
case OP_PLUSI: |
case OP_PLUSI: |
7698 |
case OP_MINPLUSI: |
case OP_MINPLUSI: |
7699 |
case OP_POSPLUSI: |
case OP_POSPLUSI: |
7700 |
if (!inassert) return -1; |
if (!inassert) return 0; |
7701 |
if (c < 0) c = scode[1] | REQ_CASELESS; |
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } |
7702 |
else if (c != scode[1]) return -1; |
else if (c != scode[1]) return 0; |
7703 |
break; |
break; |
7704 |
} |
} |
7705 |
|
|
7706 |
code += GET(code, 1); |
code += GET(code, 1); |
7707 |
} |
} |
7708 |
while (*code == OP_ALT); |
while (*code == OP_ALT); |
7709 |
|
|
7710 |
|
*flags = cflags; |
7711 |
return c; |
return c; |
7712 |
} |
} |
7713 |
|
|
7775 |
{ |
{ |
7776 |
REAL_PCRE *re; |
REAL_PCRE *re; |
7777 |
int length = 1; /* For final END opcode */ |
int length = 1; /* For final END opcode */ |
7778 |
pcre_int32 firstchar, reqchar; |
pcre_uint32 firstchar, reqchar; |
7779 |
|
pcre_int32 firstcharflags, reqcharflags; |
7780 |
int newline; |
int newline; |
7781 |
int errorcode = 0; |
int errorcode = 0; |
7782 |
int skipatstart = 0; |
int skipatstart = 0; |
7848 |
{ |
{ |
7849 |
int newnl = 0; |
int newnl = 0; |
7850 |
int newbsr = 0; |
int newbsr = 0; |
7851 |
|
|
7852 |
|
/* For completeness and backward compatibility, (*UTFn) is supported in the |
7853 |
|
relevant libraries, but (*UTF) is generic and always supported. Note that |
7854 |
|
PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ |
7855 |
|
|
7856 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
7857 |
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0) |
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0) |
7858 |
{ skipatstart += 7; options |= PCRE_UTF8; continue; } |
{ skipatstart += 7; options |= PCRE_UTF8; continue; } |
7859 |
#endif |
#endif |
7860 |
#ifdef COMPILE_PCRE16 |
#ifdef COMPILE_PCRE16 |
7861 |
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0) |
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF16_RIGHTPAR, 6) == 0) |
7862 |
{ skipatstart += 8; options |= PCRE_UTF16; continue; } |
{ skipatstart += 8; options |= PCRE_UTF16; continue; } |
7863 |
#endif |
#endif |
7864 |
#ifdef COMPILE_PCRE32 |
#ifdef COMPILE_PCRE32 |
7865 |
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0) |
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF32_RIGHTPAR, 6) == 0) |
7866 |
{ skipatstart += 8; options |= PCRE_UTF32; continue; } |
{ skipatstart += 8; options |= PCRE_UTF32; continue; } |
7867 |
#endif |
#endif |
7868 |
|
|
7869 |
|
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 4) == 0) |
7870 |
|
{ skipatstart += 6; options |= PCRE_UTF8; continue; } |
7871 |
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0) |
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0) |
7872 |
{ skipatstart += 6; options |= PCRE_UCP; continue; } |
{ skipatstart += 6; options |= PCRE_UCP; continue; } |
7873 |
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) |
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) |
8033 |
code = cworkspace; |
code = cworkspace; |
8034 |
*code = OP_BRA; |
*code = OP_BRA; |
8035 |
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, |
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, |
8036 |
FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length); |
FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, |
8037 |
|
cd, &length); |
8038 |
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; |
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; |
8039 |
|
|
8040 |
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, |
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, |
8112 |
code = (pcre_uchar *)codestart; |
code = (pcre_uchar *)codestart; |
8113 |
*code = OP_BRA; |
*code = OP_BRA; |
8114 |
(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0, |
(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0, |
8115 |
&firstchar, &reqchar, NULL, cd, NULL); |
&firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL); |
8116 |
re->top_bracket = cd->bracount; |
re->top_bracket = cd->bracount; |
8117 |
re->top_backref = cd->top_backref; |
re->top_backref = cd->top_backref; |
8118 |
re->max_lookbehind = cd->max_lookbehind; |
re->max_lookbehind = cd->max_lookbehind; |
8119 |
re->flags = cd->external_flags | PCRE_MODE; |
re->flags = cd->external_flags | PCRE_MODE; |
8120 |
|
|
8121 |
if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */ |
if (cd->had_accept) |
8122 |
|
{ |
8123 |
|
reqchar = 0; /* Must disable after (*ACCEPT) */ |
8124 |
|
reqcharflags = REQ_NONE; |
8125 |
|
} |
8126 |
|
|
8127 |
/* If not reached end of pattern on success, there's an excess bracket. */ |
/* If not reached end of pattern on success, there's an excess bracket. */ |
8128 |
|
|
8129 |
if (errorcode == 0 && *ptr != 0) errorcode = ERR22; |
if (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22; |
8130 |
|
|
8131 |
/* Fill in the terminating state and check for disastrous overflow, but |
/* Fill in the terminating state and check for disastrous overflow, but |
8132 |
if debugging, leave the test till after things are printed out. */ |
if debugging, leave the test till after things are printed out. */ |
8137 |
if (code - codestart > length) errorcode = ERR23; |
if (code - codestart > length) errorcode = ERR23; |
8138 |
#endif |
#endif |
8139 |
|
|
8140 |
|
#ifdef SUPPORT_VALGRIND |
8141 |
|
/* If the estimated length exceeds the really used length, mark the extra |
8142 |
|
allocated memory as unadressable, so that any out-of-bound reads can be |
8143 |
|
detected. */ |
8144 |
|
VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar)); |
8145 |
|
#endif |
8146 |
|
|
8147 |
/* Fill in any forward references that are required. There may be repeated |
/* Fill in any forward references that are required. There may be repeated |
8148 |
references; optimize for them, as searching a large regex takes time. */ |
references; optimize for them, as searching a large regex takes time. */ |
8149 |
|
|
8179 |
|
|
8180 |
/* If there were any lookbehind assertions that contained OP_RECURSE |
/* If there were any lookbehind assertions that contained OP_RECURSE |
8181 |
(recursions or subroutine calls), a flag is set for them to be checked here, |
(recursions or subroutine calls), a flag is set for them to be checked here, |
8182 |
because they may contain forward references. Actual recursions can't be fixed |
because they may contain forward references. Actual recursions cannot be fixed |
8183 |
length, but subroutine calls can. It is done like this so that those without |
length, but subroutine calls can. It is done like this so that those without |
8184 |
OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The |
OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The |
8185 |
exceptional ones forgo this. We scan the pattern to check that they are fixed |
exceptional ones forgo this. We scan the pattern to check that they are fixed |
8250 |
if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED; |
if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED; |
8251 |
else |
else |
8252 |
{ |
{ |
8253 |
if (firstchar < 0) |
if (firstcharflags < 0) |
8254 |
firstchar = find_firstassertedchar(codestart, FALSE); |
firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE); |
8255 |
if (firstchar >= 0) /* Remove caseless flag for non-caseable chars */ |
if (firstcharflags >= 0) /* Remove caseless flag for non-caseable chars */ |
8256 |
{ |
{ |
8257 |
#if defined COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
8258 |
re->first_char = firstchar & 0xff; |
re->first_char = firstchar & 0xff; |
8259 |
#elif defined COMPILE_PCRE16 |
#elif defined COMPILE_PCRE16 |
8260 |
re->first_char = firstchar & 0xffff; |
re->first_char = firstchar & 0xffff; |
8261 |
#elif defined COMPILE_PCRE32 |
#elif defined COMPILE_PCRE32 |
8262 |
re->first_char = firstchar & ~REQ_MASK; |
re->first_char = firstchar; |
8263 |
#endif |
#endif |
8264 |
if ((firstchar & REQ_CASELESS) != 0) |
if ((firstcharflags & REQ_CASELESS) != 0) |
8265 |
{ |
{ |
8266 |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
8267 |
/* We ignore non-ASCII first chars in 8 bit mode. */ |
/* We ignore non-ASCII first chars in 8 bit mode. */ |
8293 |
variable length item in the regex. Remove the caseless flag for non-caseable |
variable length item in the regex. Remove the caseless flag for non-caseable |
8294 |
bytes. */ |
bytes. */ |
8295 |
|
|
8296 |
if (reqchar >= 0 && |
if (reqcharflags >= 0 && |
8297 |
((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0)) |
((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0)) |
8298 |
{ |
{ |
8299 |
#if defined COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
8300 |
re->req_char = reqchar & 0xff; |
re->req_char = reqchar & 0xff; |
8301 |
#elif defined COMPILE_PCRE16 |
#elif defined COMPILE_PCRE16 |
8302 |
re->req_char = reqchar & 0xffff; |
re->req_char = reqchar & 0xffff; |
8303 |
#elif defined COMPILE_PCRE32 |
#elif defined COMPILE_PCRE32 |
8304 |
re->req_char = reqchar & ~REQ_MASK; |
re->req_char = reqchar; |
8305 |
#endif |
#endif |
8306 |
if ((reqchar & REQ_CASELESS) != 0) |
if ((reqcharflags & REQ_CASELESS) != 0) |
8307 |
{ |
{ |
8308 |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
8309 |
/* We ignore non-ASCII first chars in 8 bit mode. */ |
/* We ignore non-ASCII first chars in 8 bit mode. */ |