7 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
8 |
|
|
9 |
Written by Philip Hazel |
Written by Philip Hazel |
10 |
Copyright (c) 1997-2012 University of Cambridge |
Copyright (c) 1997-2013 University of Cambridge |
11 |
|
|
12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
194 |
typedef unsigned char pcre_uint8; |
typedef unsigned char pcre_uint8; |
195 |
|
|
196 |
#if USHRT_MAX == 65535 |
#if USHRT_MAX == 65535 |
197 |
typedef unsigned short pcre_uint16; |
typedef unsigned short pcre_uint16; |
198 |
typedef short pcre_int16; |
typedef short pcre_int16; |
199 |
|
#define PCRE_UINT16_MAX USHRT_MAX |
200 |
|
#define PCRE_INT16_MAX SHRT_MAX |
201 |
#elif UINT_MAX == 65535 |
#elif UINT_MAX == 65535 |
202 |
typedef unsigned int pcre_uint16; |
typedef unsigned int pcre_uint16; |
203 |
typedef int pcre_int16; |
typedef int pcre_int16; |
204 |
|
#define PCRE_UINT16_MAX UINT_MAX |
205 |
|
#define PCRE_INT16_MAX INT_MAX |
206 |
#else |
#else |
207 |
# error Cannot determine a type for 16-bit unsigned integers |
#error Cannot determine a type for 16-bit integers |
208 |
#endif |
#endif |
209 |
|
|
210 |
#if UINT_MAX == 4294967295 |
#if UINT_MAX == 4294967295U |
211 |
typedef unsigned int pcre_uint32; |
typedef unsigned int pcre_uint32; |
212 |
typedef int pcre_int32; |
typedef int pcre_int32; |
213 |
#elif ULONG_MAX == 4294967295 |
#define PCRE_UINT32_MAX UINT_MAX |
214 |
typedef unsigned long int pcre_uint32; |
#define PCRE_INT32_MAX INT_MAX |
215 |
typedef long int pcre_int32; |
#elif ULONG_MAX == 4294967295UL |
216 |
|
typedef unsigned long int pcre_uint32; |
217 |
|
typedef long int pcre_int32; |
218 |
|
#define PCRE_UINT32_MAX ULONG_MAX |
219 |
|
#define PCRE_INT32_MAX LONG_MAX |
220 |
#else |
#else |
221 |
# error Cannot determine a type for 32-bit unsigned integers |
#error Cannot determine a type for 32-bit integers |
222 |
#endif |
#endif |
223 |
|
|
224 |
/* When checking for integer overflow in pcre_compile(), we need to handle |
/* When checking for integer overflow in pcre_compile(), we need to handle |
1129 |
|
|
1130 |
|
|
1131 |
/* Private flags containing information about the compiled regex. They used to |
/* Private flags containing information about the compiled regex. They used to |
1132 |
live at the top end of the options word, but that got almost full, so now they |
live at the top end of the options word, but that got almost full, so they were |
1133 |
are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as |
moved to a 16-bit flags word - which got almost full, so now they are in a |
1134 |
the restrictions on partial matching have been lifted. It remains for backwards |
32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the |
1135 |
|
restrictions on partial matching have been lifted. It remains for backwards |
1136 |
compatibility. */ |
compatibility. */ |
1137 |
|
|
1138 |
#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */ |
#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */ |
1139 |
#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */ |
#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */ |
1140 |
#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */ |
#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */ |
1141 |
#define PCRE_FIRSTSET 0x0010 /* first_char is set */ |
#define PCRE_FIRSTSET 0x00000010 /* first_char is set */ |
1142 |
#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */ |
#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */ |
1143 |
#define PCRE_REQCHSET 0x0040 /* req_byte is set */ |
#define PCRE_REQCHSET 0x00000040 /* req_byte is set */ |
1144 |
#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */ |
#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */ |
1145 |
#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */ |
#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */ |
1146 |
#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */ |
#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */ |
1147 |
#define PCRE_JCHANGED 0x0400 /* j option used in regex */ |
#define PCRE_JCHANGED 0x00000400 /* j option used in regex */ |
1148 |
#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */ |
#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ |
1149 |
#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */ |
#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */ |
1150 |
|
#define PCRE_MLSET 0x00002000 /* match limit set by regex */ |
1151 |
|
#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */ |
1152 |
|
#define PCRE_MATCH_EMPTY 0x00008000 /* pattern can match empty string */ |
1153 |
|
|
1154 |
#if defined COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
1155 |
#define PCRE_MODE PCRE_MODE8 |
#define PCRE_MODE PCRE_MODE8 |
1174 |
#define PUBLIC_COMPILE_OPTIONS \ |
#define PUBLIC_COMPILE_OPTIONS \ |
1175 |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
1176 |
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ |
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ |
1177 |
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ |
PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESSIFY| \ |
1178 |
|
PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ |
1179 |
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ |
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ |
1180 |
PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE) |
PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF) |
1181 |
|
|
1182 |
#define PUBLIC_EXEC_OPTIONS \ |
#define PUBLIC_EXEC_OPTIONS \ |
1183 |
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ |
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ |
1547 |
#define STRING_UTF_RIGHTPAR "UTF)" |
#define STRING_UTF_RIGHTPAR "UTF)" |
1548 |
#define STRING_UCP_RIGHTPAR "UCP)" |
#define STRING_UCP_RIGHTPAR "UCP)" |
1549 |
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" |
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" |
1550 |
|
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" |
1551 |
|
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" |
1552 |
|
|
1553 |
#else /* SUPPORT_UTF */ |
#else /* SUPPORT_UTF */ |
1554 |
|
|
1810 |
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS |
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS |
1811 |
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS |
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS |
1812 |
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS |
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS |
1813 |
|
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN |
1814 |
|
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN |
1815 |
|
|
1816 |
#endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
1817 |
|
|
1852 |
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ |
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ |
1853 |
#define PT_WORD 8 /* Word - L plus N plus underscore */ |
#define PT_WORD 8 /* Word - L plus N plus underscore */ |
1854 |
#define PT_CLIST 9 /* Pseudo-property: match character list */ |
#define PT_CLIST 9 /* Pseudo-property: match character list */ |
1855 |
|
#define PT_UCNC 10 /* Universal Character nameable character */ |
1856 |
|
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ |
1857 |
|
|
1858 |
|
/* The following special properties are used only in XCLASS items, when POSIX |
1859 |
|
classes are specified and PCRE_UCP is set - in other words, for Unicode |
1860 |
|
handling of these classes. They are not available via the \p or \P escapes like |
1861 |
|
those in the above list, and so they do not take part in the autopossessifying |
1862 |
|
table. */ |
1863 |
|
|
1864 |
|
#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */ |
1865 |
|
#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */ |
1866 |
|
#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */ |
1867 |
|
|
1868 |
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
1869 |
contain characters with values greater than 255. */ |
contain characters with values greater than 255. */ |
1878 |
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ |
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ |
1879 |
|
|
1880 |
/* These are escaped items that aren't just an encoding of a particular data |
/* These are escaped items that aren't just an encoding of a particular data |
1881 |
value such as \n. They must have non-zero values, as check_escape() returns |
value such as \n. They must have non-zero values, as check_escape() returns 0 |
1882 |
0 for a data character. Also, they must appear in the same order as in the opcode |
for a data character. Also, they must appear in the same order as in the |
1883 |
definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it |
opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it |
1884 |
corresponds to "." in DOTALL mode rather than an escape sequence. It is also |
corresponds to "." in DOTALL mode rather than an escape sequence. It is also |
1885 |
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In |
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In |
1886 |
non-DOTALL mode, "." behaves like \N. |
non-DOTALL mode, "." behaves like \N. |
1903 |
ESC_E, ESC_Q, ESC_g, ESC_k, |
ESC_E, ESC_Q, ESC_g, ESC_k, |
1904 |
ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu }; |
ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu }; |
1905 |
|
|
|
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to |
|
|
OP_EOD must correspond in order to the list of escapes immediately above. |
|
1906 |
|
|
1907 |
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions |
/********************** Opcode definitions ******************/ |
1908 |
that follow must also be updated to match. There are also tables called |
|
1909 |
"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ |
/****** NOTE NOTE NOTE ****** |
1910 |
|
|
1911 |
|
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in |
1912 |
|
order to the list of escapes immediately above. Furthermore, values up to |
1913 |
|
OP_DOLLM must not be changed without adjusting the table called autoposstab in |
1914 |
|
pcre_compile.c |
1915 |
|
|
1916 |
|
Whenever this list is updated, the two macro definitions that follow must be |
1917 |
|
updated to match. The possessification table called "opcode_possessify" in |
1918 |
|
pcre_compile.c must also be updated, and also the tables called "coptable" |
1919 |
|
and "poptable" in pcre_dfa_exec.c. |
1920 |
|
|
1921 |
|
****** NOTE NOTE NOTE ******/ |
1922 |
|
|
1923 |
|
|
1924 |
|
/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive, |
1925 |
|
are used in a table for deciding whether a repeated character type can be |
1926 |
|
auto-possessified. */ |
1927 |
|
|
1928 |
|
#define FIRST_AUTOTAB_OP OP_NOT_DIGIT |
1929 |
|
#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI |
1930 |
|
#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM |
1931 |
|
|
1932 |
enum { |
enum { |
1933 |
OP_END, /* 0 End of pattern */ |
OP_END, /* 0 End of pattern */ |
1959 |
OP_EXTUNI, /* 22 \X (extended Unicode sequence */ |
OP_EXTUNI, /* 22 \X (extended Unicode sequence */ |
1960 |
OP_EODN, /* 23 End of data or \n at end of data (\Z) */ |
OP_EODN, /* 23 End of data or \n at end of data (\Z) */ |
1961 |
OP_EOD, /* 24 End of data (\z) */ |
OP_EOD, /* 24 End of data (\z) */ |
1962 |
|
|
1963 |
|
/* Line end assertions */ |
1964 |
|
|
1965 |
OP_CIRC, /* 25 Start of line - not multiline */ |
OP_DOLL, /* 25 End of line - not multiline */ |
1966 |
OP_CIRCM, /* 26 Start of line - multiline */ |
OP_DOLLM, /* 26 End of line - multiline */ |
1967 |
OP_DOLL, /* 27 End of line - not multiline */ |
OP_CIRC, /* 27 Start of line - not multiline */ |
1968 |
OP_DOLLM, /* 28 End of line - multiline */ |
OP_CIRCM, /* 28 Start of line - multiline */ |
1969 |
|
|
1970 |
|
/* Single characters; caseful must precede the caseless ones */ |
1971 |
|
|
1972 |
OP_CHAR, /* 29 Match one character, casefully */ |
OP_CHAR, /* 29 Match one character, casefully */ |
1973 |
OP_CHARI, /* 30 Match one character, caselessly */ |
OP_CHARI, /* 30 Match one character, caselessly */ |
1974 |
OP_NOT, /* 31 Match one character, not the given one, casefully */ |
OP_NOT, /* 31 Match one character, not the given one, casefully */ |
1977 |
/* The following sets of 13 opcodes must always be kept in step because |
/* The following sets of 13 opcodes must always be kept in step because |
1978 |
the offset from the first one is used to generate the others. */ |
the offset from the first one is used to generate the others. */ |
1979 |
|
|
1980 |
/**** Single characters, caseful, must precede the caseless ones ****/ |
/* Repeated characters; caseful must precede the caseless ones */ |
1981 |
|
|
1982 |
OP_STAR, /* 33 The maximizing and minimizing versions of */ |
OP_STAR, /* 33 The maximizing and minimizing versions of */ |
1983 |
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ |
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ |
1995 |
OP_POSQUERY, /* 44 Posesssified query, caseful */ |
OP_POSQUERY, /* 44 Posesssified query, caseful */ |
1996 |
OP_POSUPTO, /* 45 Possessified upto, caseful */ |
OP_POSUPTO, /* 45 Possessified upto, caseful */ |
1997 |
|
|
1998 |
/**** Single characters, caseless, must follow the caseful ones */ |
/* Repeated characters; caseless must follow the caseful ones */ |
1999 |
|
|
2000 |
OP_STARI, /* 46 */ |
OP_STARI, /* 46 */ |
2001 |
OP_MINSTARI, /* 47 */ |
OP_MINSTARI, /* 47 */ |
2013 |
OP_POSQUERYI, /* 57 Posesssified query, caseless */ |
OP_POSQUERYI, /* 57 Posesssified query, caseless */ |
2014 |
OP_POSUPTOI, /* 58 Possessified upto, caseless */ |
OP_POSUPTOI, /* 58 Possessified upto, caseless */ |
2015 |
|
|
2016 |
/**** The negated ones must follow the non-negated ones, and match them ****/ |
/* The negated ones must follow the non-negated ones, and match them */ |
2017 |
/**** Negated single character, caseful; must precede the caseless ones ****/ |
/* Negated repeated character, caseful; must precede the caseless ones */ |
2018 |
|
|
2019 |
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ |
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ |
2020 |
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ |
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ |
2032 |
OP_NOTPOSQUERY, /* 70 */ |
OP_NOTPOSQUERY, /* 70 */ |
2033 |
OP_NOTPOSUPTO, /* 71 */ |
OP_NOTPOSUPTO, /* 71 */ |
2034 |
|
|
2035 |
/**** Negated single character, caseless; must follow the caseful ones ****/ |
/* Negated repeated character, caseless; must follow the caseful ones */ |
2036 |
|
|
2037 |
OP_NOTSTARI, /* 72 */ |
OP_NOTSTARI, /* 72 */ |
2038 |
OP_NOTMINSTARI, /* 73 */ |
OP_NOTMINSTARI, /* 73 */ |
2050 |
OP_NOTPOSQUERYI, /* 83 */ |
OP_NOTPOSQUERYI, /* 83 */ |
2051 |
OP_NOTPOSUPTOI, /* 84 */ |
OP_NOTPOSUPTOI, /* 84 */ |
2052 |
|
|
2053 |
/**** Character types ****/ |
/* Character types */ |
2054 |
|
|
2055 |
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */ |
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */ |
2056 |
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */ |
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */ |
2081 |
OP_CRRANGE, /* 104 These are different to the three sets above. */ |
OP_CRRANGE, /* 104 These are different to the three sets above. */ |
2082 |
OP_CRMINRANGE, /* 105 */ |
OP_CRMINRANGE, /* 105 */ |
2083 |
|
|
2084 |
|
OP_CRPOSSTAR, /* 106 Possessified versions */ |
2085 |
|
OP_CRPOSPLUS, /* 107 */ |
2086 |
|
OP_CRPOSQUERY, /* 108 */ |
2087 |
|
OP_CRPOSRANGE, /* 109 */ |
2088 |
|
|
2089 |
/* End of quantifier opcodes */ |
/* End of quantifier opcodes */ |
2090 |
|
|
2091 |
OP_CLASS, /* 106 Match a character class, chars < 256 only */ |
OP_CLASS, /* 110 Match a character class, chars < 256 only */ |
2092 |
OP_NCLASS, /* 107 Same, but the bitmap was created from a negative |
OP_NCLASS, /* 111 Same, but the bitmap was created from a negative |
2093 |
class - the difference is relevant only when a |
class - the difference is relevant only when a |
2094 |
character > 255 is encountered. */ |
character > 255 is encountered. */ |
2095 |
OP_XCLASS, /* 108 Extended class for handling > 255 chars within the |
OP_XCLASS, /* 112 Extended class for handling > 255 chars within the |
2096 |
class. This does both positive and negative. */ |
class. This does both positive and negative. */ |
2097 |
OP_REF, /* 109 Match a back reference, casefully */ |
OP_REF, /* 113 Match a back reference, casefully */ |
2098 |
OP_REFI, /* 110 Match a back reference, caselessly */ |
OP_REFI, /* 114 Match a back reference, caselessly */ |
2099 |
OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */ |
OP_DNREF, /* 115 Match a duplicate name backref, casefully */ |
2100 |
OP_CALLOUT, /* 112 Call out to external function if provided */ |
OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */ |
2101 |
|
OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */ |
2102 |
OP_ALT, /* 113 Start of alternation */ |
OP_CALLOUT, /* 118 Call out to external function if provided */ |
2103 |
OP_KET, /* 114 End of group that doesn't have an unbounded repeat */ |
|
2104 |
OP_KETRMAX, /* 115 These two must remain together and in this */ |
OP_ALT, /* 119 Start of alternation */ |
2105 |
OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */ |
OP_KET, /* 120 End of group that doesn't have an unbounded repeat */ |
2106 |
OP_KETRPOS, /* 117 Possessive unlimited repeat. */ |
OP_KETRMAX, /* 121 These two must remain together and in this */ |
2107 |
|
OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */ |
2108 |
|
OP_KETRPOS, /* 123 Possessive unlimited repeat. */ |
2109 |
|
|
2110 |
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four |
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four |
2111 |
asserts must remain in order. */ |
asserts must remain in order. */ |
2112 |
|
|
2113 |
OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */ |
OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */ |
2114 |
OP_ASSERT, /* 119 Positive lookahead */ |
OP_ASSERT, /* 125 Positive lookahead */ |
2115 |
OP_ASSERT_NOT, /* 120 Negative lookahead */ |
OP_ASSERT_NOT, /* 126 Negative lookahead */ |
2116 |
OP_ASSERTBACK, /* 121 Positive lookbehind */ |
OP_ASSERTBACK, /* 127 Positive lookbehind */ |
2117 |
OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */ |
OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */ |
2118 |
|
|
2119 |
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately |
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately |
2120 |
after the assertions, with ONCE first, as there's a test for >= ONCE for a |
after the assertions, with ONCE first, as there's a test for >= ONCE for a |
2121 |
subpattern that isn't an assertion. The POS versions must immediately follow |
subpattern that isn't an assertion. The POS versions must immediately follow |
2122 |
the non-POS versions in each case. */ |
the non-POS versions in each case. */ |
2123 |
|
|
2124 |
OP_ONCE, /* 123 Atomic group, contains captures */ |
OP_ONCE, /* 129 Atomic group, contains captures */ |
2125 |
OP_ONCE_NC, /* 124 Atomic group containing no captures */ |
OP_ONCE_NC, /* 130 Atomic group containing no captures */ |
2126 |
OP_BRA, /* 125 Start of non-capturing bracket */ |
OP_BRA, /* 131 Start of non-capturing bracket */ |
2127 |
OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */ |
OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */ |
2128 |
OP_CBRA, /* 127 Start of capturing bracket */ |
OP_CBRA, /* 133 Start of capturing bracket */ |
2129 |
OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */ |
OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */ |
2130 |
OP_COND, /* 129 Conditional group */ |
OP_COND, /* 135 Conditional group */ |
2131 |
|
|
2132 |
/* These five must follow the previous five, in the same order. There's a |
/* These five must follow the previous five, in the same order. There's a |
2133 |
check for >= SBRA to distinguish the two sets. */ |
check for >= SBRA to distinguish the two sets. */ |
2134 |
|
|
2135 |
OP_SBRA, /* 130 Start of non-capturing bracket, check empty */ |
OP_SBRA, /* 136 Start of non-capturing bracket, check empty */ |
2136 |
OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */ |
OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */ |
2137 |
OP_SCBRA, /* 132 Start of capturing bracket, check empty */ |
OP_SCBRA, /* 138 Start of capturing bracket, check empty */ |
2138 |
OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */ |
OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */ |
2139 |
OP_SCOND, /* 134 Conditional group, check empty */ |
OP_SCOND, /* 140 Conditional group, check empty */ |
2140 |
|
|
2141 |
/* The next two pairs must (respectively) be kept together. */ |
/* The next two pairs must (respectively) be kept together. */ |
2142 |
|
|
2143 |
OP_CREF, /* 135 Used to hold a capture number as condition */ |
OP_CREF, /* 141 Used to hold a capture number as condition */ |
2144 |
OP_NCREF, /* 136 Same, but generated by a name reference*/ |
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */ |
2145 |
OP_RREF, /* 137 Used to hold a recursion number as condition */ |
OP_RREF, /* 143 Used to hold a recursion number as condition */ |
2146 |
OP_NRREF, /* 138 Same, but generated by a name reference*/ |
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */ |
2147 |
OP_DEF, /* 139 The DEFINE condition */ |
OP_DEF, /* 145 The DEFINE condition */ |
2148 |
|
|
2149 |
OP_BRAZERO, /* 140 These two must remain together and in this */ |
OP_BRAZERO, /* 146 These two must remain together and in this */ |
2150 |
OP_BRAMINZERO, /* 141 order. */ |
OP_BRAMINZERO, /* 147 order. */ |
2151 |
OP_BRAPOSZERO, /* 142 */ |
OP_BRAPOSZERO, /* 148 */ |
2152 |
|
|
2153 |
/* These are backtracking control verbs */ |
/* These are backtracking control verbs */ |
2154 |
|
|
2155 |
OP_MARK, /* 143 always has an argument */ |
OP_MARK, /* 149 always has an argument */ |
2156 |
OP_PRUNE, /* 144 */ |
OP_PRUNE, /* 150 */ |
2157 |
OP_PRUNE_ARG, /* 145 same, but with argument */ |
OP_PRUNE_ARG, /* 151 same, but with argument */ |
2158 |
OP_SKIP, /* 146 */ |
OP_SKIP, /* 152 */ |
2159 |
OP_SKIP_ARG, /* 147 same, but with argument */ |
OP_SKIP_ARG, /* 153 same, but with argument */ |
2160 |
OP_THEN, /* 148 */ |
OP_THEN, /* 154 */ |
2161 |
OP_THEN_ARG, /* 149 same, but with argument */ |
OP_THEN_ARG, /* 155 same, but with argument */ |
2162 |
OP_COMMIT, /* 150 */ |
OP_COMMIT, /* 156 */ |
2163 |
|
|
2164 |
/* These are forced failure and success verbs */ |
/* These are forced failure and success verbs */ |
2165 |
|
|
2166 |
OP_FAIL, /* 151 */ |
OP_FAIL, /* 157 */ |
2167 |
OP_ACCEPT, /* 152 */ |
OP_ACCEPT, /* 158 */ |
2168 |
OP_ASSERT_ACCEPT, /* 153 Used inside assertions */ |
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */ |
2169 |
OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */ |
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */ |
2170 |
|
|
2171 |
/* This is used to skip a subpattern with a {0} quantifier */ |
/* This is used to skip a subpattern with a {0} quantifier */ |
2172 |
|
|
2173 |
OP_SKIPZERO, /* 155 */ |
OP_SKIPZERO, /* 161 */ |
2174 |
|
|
2175 |
/* This is not an opcode, but is used to check that tables indexed by opcode |
/* This is not an opcode, but is used to check that tables indexed by opcode |
2176 |
are the correct length, in order to catch updating errors - there have been |
are the correct length, in order to catch updating errors - there have been |
2181 |
|
|
2182 |
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro |
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro |
2183 |
definitions that follow must also be updated to match. There are also tables |
definitions that follow must also be updated to match. There are also tables |
2184 |
called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ |
called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in |
2185 |
|
pcre_dfa_exec.c that must be updated. */ |
2186 |
|
|
2187 |
|
|
2188 |
/* This macro defines textual names for all the opcodes. These are used only |
/* This macro defines textual names for all the opcodes. These are used only |
2195 |
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ |
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ |
2196 |
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ |
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ |
2197 |
"extuni", "\\Z", "\\z", \ |
"extuni", "\\Z", "\\z", \ |
2198 |
"^", "^", "$", "$", "char", "chari", "not", "noti", \ |
"$", "$", "^", "^", "char", "chari", "not", "noti", \ |
2199 |
"*", "*?", "+", "+?", "?", "??", \ |
"*", "*?", "+", "+?", "?", "??", \ |
2200 |
"{", "{", "{", \ |
"{", "{", "{", \ |
2201 |
"*+","++", "?+", "{", \ |
"*+","++", "?+", "{", \ |
2211 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ |
2212 |
"*+","++", "?+", "{", \ |
"*+","++", "?+", "{", \ |
2213 |
"*", "*?", "+", "+?", "?", "??", "{", "{", \ |
"*", "*?", "+", "+?", "?", "??", "{", "{", \ |
2214 |
"class", "nclass", "xclass", "Ref", "Refi", \ |
"*+","++", "?+", "{", \ |
2215 |
|
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ |
2216 |
"Recurse", "Callout", \ |
"Recurse", "Callout", \ |
2217 |
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ |
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ |
2218 |
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \ |
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \ |
2221 |
"Cond", \ |
"Cond", \ |
2222 |
"SBra", "SBraPos", "SCBra", "SCBraPos", \ |
"SBra", "SBraPos", "SCBra", "SCBraPos", \ |
2223 |
"SCond", \ |
"SCond", \ |
2224 |
"Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \ |
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \ |
2225 |
"Brazero", "Braminzero", "Braposzero", \ |
"Brazero", "Braminzero", "Braposzero", \ |
2226 |
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ |
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ |
2227 |
"*THEN", "*THEN", "*COMMIT", "*FAIL", \ |
"*THEN", "*THEN", "*COMMIT", "*FAIL", \ |
2246 |
3, 3, /* \P, \p */ \ |
3, 3, /* \P, \p */ \ |
2247 |
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ |
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ |
2248 |
1, /* \X */ \ |
1, /* \X */ \ |
2249 |
1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \ |
1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \ |
2250 |
2, /* Char - the minimum length */ \ |
2, /* Char - the minimum length */ \ |
2251 |
2, /* Chari - the minimum length */ \ |
2, /* Chari - the minimum length */ \ |
2252 |
2, /* not */ \ |
2, /* not */ \ |
2277 |
/* Character class & ref repeats */ \ |
/* Character class & ref repeats */ \ |
2278 |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ |
2279 |
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ |
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ |
2280 |
|
1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \ |
2281 |
1+(32/sizeof(pcre_uchar)), /* CLASS */ \ |
1+(32/sizeof(pcre_uchar)), /* CLASS */ \ |
2282 |
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \ |
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \ |
2283 |
0, /* XCLASS - variable length */ \ |
0, /* XCLASS - variable length */ \ |
2284 |
1+IMM2_SIZE, /* REF */ \ |
1+IMM2_SIZE, /* REF */ \ |
2285 |
1+IMM2_SIZE, /* REFI */ \ |
1+IMM2_SIZE, /* REFI */ \ |
2286 |
|
1+2*IMM2_SIZE, /* DNREF */ \ |
2287 |
|
1+2*IMM2_SIZE, /* DNREFI */ \ |
2288 |
1+LINK_SIZE, /* RECURSE */ \ |
1+LINK_SIZE, /* RECURSE */ \ |
2289 |
2+2*LINK_SIZE, /* CALLOUT */ \ |
2+2*LINK_SIZE, /* CALLOUT */ \ |
2290 |
1+LINK_SIZE, /* Alt */ \ |
1+LINK_SIZE, /* Alt */ \ |
2309 |
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ |
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ |
2310 |
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ |
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ |
2311 |
1+LINK_SIZE, /* SCOND */ \ |
1+LINK_SIZE, /* SCOND */ \ |
2312 |
1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \ |
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \ |
2313 |
1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \ |
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \ |
2314 |
1, /* DEF */ \ |
1, /* DEF */ \ |
2315 |
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ |
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ |
2316 |
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ |
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ |
2319 |
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ |
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ |
2320 |
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ |
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ |
2321 |
|
|
2322 |
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion" |
/* A magic value for OP_RREF to indicate the "any recursion" condition. */ |
|
condition. */ |
|
2323 |
|
|
2324 |
#define RREF_ANY 0xffff |
#define RREF_ANY 0xffff |
2325 |
|
|
2334 |
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, |
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, |
2335 |
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, |
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, |
2336 |
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, |
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, |
2337 |
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT }; |
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, |
2338 |
|
ERR80, ERR81, ERRCOUNT }; |
2339 |
|
|
2340 |
/* JIT compiling modes. The function list is indexed by them. */ |
/* JIT compiling modes. The function list is indexed by them. */ |
2341 |
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, |
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, |
2345 |
code vector run on as long as necessary after the end. We store an explicit |
code vector run on as long as necessary after the end. We store an explicit |
2346 |
offset to the name table so that if a regex is compiled on one host, saved, and |
offset to the name table so that if a regex is compiled on one host, saved, and |
2347 |
then run on another where the size of pointers is different, all might still |
then run on another where the size of pointers is different, all might still |
2348 |
be well. For the case of compiled-on-4 and run-on-8, we include an extra |
be well. |
|
pointer that is always NULL. For future-proofing, a few dummy fields were |
|
|
originally included - even though you can never get this planning right - but |
|
|
there is only one left now. |
|
|
|
|
|
NOTE NOTE NOTE: |
|
|
Because people can now save and re-use compiled patterns, any additions to this |
|
|
structure should be made at the end, and something earlier (e.g. a new |
|
|
flag in the options or one of the dummy fields) should indicate that the new |
|
|
fields are present. Currently PCRE always sets the dummy fields to zero. |
|
|
NOTE NOTE NOTE |
|
|
*/ |
|
2349 |
|
|
2350 |
#if defined COMPILE_PCRE8 |
The size of the structure must be a multiple of 8 bytes. For the case of |
2351 |
#define REAL_PCRE real_pcre |
compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so |
2352 |
#elif defined COMPILE_PCRE16 |
that there are an even number of pointers which therefore are a multiple of 8 |
2353 |
#define REAL_PCRE real_pcre16 |
bytes. |
2354 |
#elif defined COMPILE_PCRE32 |
|
2355 |
#define REAL_PCRE real_pcre32 |
It is necessary to fork the struct for the 32 bit library, since it needs to |
2356 |
#endif |
use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the |
2357 |
|
typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit |
2358 |
/* It is necessary to fork the struct for 32 bit, since it needs to use |
variants. |
2359 |
* pcre_uchar for first_char and req_char. Can't put an ifdef inside the |
|
2360 |
* typedef since pcretest needs access to the struct of the 8-, 16- |
*** WARNING *** |
2361 |
* and 32-bit variants. */ |
When new fields are added to these structures, remember to adjust the code in |
2362 |
|
pcre_byte_order.c that is concerned with swapping the byte order of the fields |
2363 |
|
when a compiled regex is reloaded on a host with different endianness. |
2364 |
|
*** WARNING *** |
2365 |
|
There is also similar byte-flipping code in pcretest.c, which is used for |
2366 |
|
testing the byte-flipping features. It must also be kept in step. |
2367 |
|
*** WARNING *** |
2368 |
|
*/ |
2369 |
|
|
2370 |
typedef struct real_pcre8_or_16 { |
typedef struct real_pcre8_or_16 { |
2371 |
pcre_uint32 magic_number; |
pcre_uint32 magic_number; |
2372 |
pcre_uint32 size; /* Total that was malloced */ |
pcre_uint32 size; /* Total that was malloced */ |
2373 |
pcre_uint32 options; /* Public options */ |
pcre_uint32 options; /* Public options */ |
2374 |
pcre_uint16 flags; /* Private flags */ |
pcre_uint32 flags; /* Private flags */ |
2375 |
|
pcre_uint32 limit_match; /* Limit set from regex */ |
2376 |
|
pcre_uint32 limit_recursion; /* Limit set from regex */ |
2377 |
|
pcre_uint16 first_char; /* Starting character */ |
2378 |
|
pcre_uint16 req_char; /* This character must be seen */ |
2379 |
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ |
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ |
2380 |
pcre_uint16 top_bracket; /* Highest numbered group */ |
pcre_uint16 top_bracket; /* Highest numbered group */ |
2381 |
pcre_uint16 top_backref; /* Highest numbered back reference */ |
pcre_uint16 top_backref; /* Highest numbered back reference */ |
|
pcre_uint16 first_char; /* Starting character */ |
|
|
pcre_uint16 req_char; /* This character must be seen */ |
|
2382 |
pcre_uint16 name_table_offset; /* Offset to name table that follows */ |
pcre_uint16 name_table_offset; /* Offset to name table that follows */ |
2383 |
pcre_uint16 name_entry_size; /* Size of any name items */ |
pcre_uint16 name_entry_size; /* Size of any name items */ |
2384 |
pcre_uint16 name_count; /* Number of name items */ |
pcre_uint16 name_count; /* Number of name items */ |
2385 |
pcre_uint16 ref_count; /* Reference count */ |
pcre_uint16 ref_count; /* Reference count */ |
2386 |
|
pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */ |
2387 |
|
pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */ |
2388 |
|
pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */ |
2389 |
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ |
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ |
2390 |
const pcre_uint8 *nullpad; /* NULL padding */ |
void *nullpad; /* NULL padding */ |
2391 |
} real_pcre8_or_16; |
} real_pcre8_or_16; |
2392 |
|
|
2393 |
typedef struct real_pcre8_or_16 real_pcre; |
typedef struct real_pcre8_or_16 real_pcre; |
2397 |
pcre_uint32 magic_number; |
pcre_uint32 magic_number; |
2398 |
pcre_uint32 size; /* Total that was malloced */ |
pcre_uint32 size; /* Total that was malloced */ |
2399 |
pcre_uint32 options; /* Public options */ |
pcre_uint32 options; /* Public options */ |
2400 |
pcre_uint16 flags; /* Private flags */ |
pcre_uint32 flags; /* Private flags */ |
2401 |
|
pcre_uint32 limit_match; /* Limit set from regex */ |
2402 |
|
pcre_uint32 limit_recursion; /* Limit set from regex */ |
2403 |
|
pcre_uint32 first_char; /* Starting character */ |
2404 |
|
pcre_uint32 req_char; /* This character must be seen */ |
2405 |
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ |
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ |
2406 |
pcre_uint16 top_bracket; /* Highest numbered group */ |
pcre_uint16 top_bracket; /* Highest numbered group */ |
2407 |
pcre_uint16 top_backref; /* Highest numbered back reference */ |
pcre_uint16 top_backref; /* Highest numbered back reference */ |
|
pcre_uint32 first_char; /* Starting character */ |
|
|
pcre_uint32 req_char; /* This character must be seen */ |
|
2408 |
pcre_uint16 name_table_offset; /* Offset to name table that follows */ |
pcre_uint16 name_table_offset; /* Offset to name table that follows */ |
2409 |
pcre_uint16 name_entry_size; /* Size of any name items */ |
pcre_uint16 name_entry_size; /* Size of any name items */ |
2410 |
pcre_uint16 name_count; /* Number of name items */ |
pcre_uint16 name_count; /* Number of name items */ |
2411 |
pcre_uint16 ref_count; /* Reference count */ |
pcre_uint16 ref_count; /* Reference count */ |
2412 |
pcre_uint16 dummy1; /* for later expansion */ |
pcre_uint16 dummy; /* To ensure size is a multiple of 8 */ |
|
pcre_uint16 dummy2; /* for later expansion */ |
|
2413 |
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ |
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ |
2414 |
void *nullpad; /* for later expansion */ |
void *nullpad; /* NULL padding */ |
2415 |
} real_pcre32; |
} real_pcre32; |
2416 |
|
|
2417 |
|
#if defined COMPILE_PCRE8 |
2418 |
|
#define REAL_PCRE real_pcre |
2419 |
|
#elif defined COMPILE_PCRE16 |
2420 |
|
#define REAL_PCRE real_pcre16 |
2421 |
|
#elif defined COMPILE_PCRE32 |
2422 |
|
#define REAL_PCRE real_pcre32 |
2423 |
|
#endif |
2424 |
|
|
2425 |
/* Assert that the size of REAL_PCRE is divisible by 8 */ |
/* Assert that the size of REAL_PCRE is divisible by 8 */ |
2426 |
typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1]; |
typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1]; |
2427 |
|
|
2455 |
pcre_uint16 flag; /* Set TRUE if recursive back ref */ |
pcre_uint16 flag; /* Set TRUE if recursive back ref */ |
2456 |
} open_capitem; |
} open_capitem; |
2457 |
|
|
2458 |
|
/* Structure for building a list of named groups during the first pass of |
2459 |
|
compiling. */ |
2460 |
|
|
2461 |
|
typedef struct named_group { |
2462 |
|
const pcre_uchar *name; /* Points to the name in the pattern */ |
2463 |
|
int length; /* Length of the name */ |
2464 |
|
pcre_uint32 number; /* Group number */ |
2465 |
|
} named_group; |
2466 |
|
|
2467 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
2468 |
doing the compiling, so that they are thread-safe. */ |
doing the compiling, so that they are thread-safe. */ |
2469 |
|
|
2476 |
const pcre_uchar *start_code; /* The start of the compiled code */ |
const pcre_uchar *start_code; /* The start of the compiled code */ |
2477 |
const pcre_uchar *start_pattern; /* The start of the pattern */ |
const pcre_uchar *start_pattern; /* The start of the pattern */ |
2478 |
const pcre_uchar *end_pattern; /* The end of the pattern */ |
const pcre_uchar *end_pattern; /* The end of the pattern */ |
|
open_capitem *open_caps; /* Chain of open capture items */ |
|
2479 |
pcre_uchar *hwm; /* High watermark of workspace */ |
pcre_uchar *hwm; /* High watermark of workspace */ |
2480 |
|
open_capitem *open_caps; /* Chain of open capture items */ |
2481 |
|
named_group *named_groups; /* Points to vector in pre-compile */ |
2482 |
pcre_uchar *name_table; /* The name/number table */ |
pcre_uchar *name_table; /* The name/number table */ |
2483 |
int names_found; /* Number of entries so far */ |
int names_found; /* Number of entries so far */ |
2484 |
int name_entry_size; /* Size of each entry */ |
int name_entry_size; /* Size of each entry */ |
2485 |
|
int named_group_list_size; /* Number of entries in the list */ |
2486 |
int workspace_size; /* Size of workspace */ |
int workspace_size; /* Size of workspace */ |
2487 |
unsigned int bracount; /* Count of capturing parens as we compile */ |
unsigned int bracount; /* Count of capturing parens as we compile */ |
2488 |
int final_bracount; /* Saved value after first pass */ |
int final_bracount; /* Saved value after first pass */ |
2489 |
int max_lookbehind; /* Maximum lookbehind (characters) */ |
int max_lookbehind; /* Maximum lookbehind (characters) */ |
2490 |
int top_backref; /* Maximum back reference */ |
int top_backref; /* Maximum back reference */ |
2491 |
unsigned int backref_map; /* Bitmap of low back refs */ |
unsigned int backref_map; /* Bitmap of low back refs */ |
2492 |
|
unsigned int namedrefcount; /* Number of backreferences by name */ |
2493 |
int assert_depth; /* Depth of nested assertions */ |
int assert_depth; /* Depth of nested assertions */ |
2494 |
int external_options; /* External (initial) options */ |
pcre_uint32 external_options; /* External (initial) options */ |
2495 |
int external_flags; /* External flag bits to be set */ |
pcre_uint32 external_flags; /* External flag bits to be set */ |
2496 |
int req_varyopt; /* "After variable item" flag for reqbyte */ |
int req_varyopt; /* "After variable item" flag for reqbyte */ |
2497 |
BOOL had_accept; /* (*ACCEPT) encountered */ |
BOOL had_accept; /* (*ACCEPT) encountered */ |
2498 |
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ |
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ |
2499 |
BOOL check_lookbehind; /* Lookbehinds need later checking */ |
BOOL check_lookbehind; /* Lookbehinds need later checking */ |
2500 |
|
BOOL dupnames; /* Duplicate names exist */ |
2501 |
int nltype; /* Newline type */ |
int nltype; /* Newline type */ |
2502 |
int nllen; /* Newline string length */ |
int nllen; /* Newline string length */ |
2503 |
pcre_uchar nl[4]; /* Newline string when fixed length */ |
pcre_uchar nl[4]; /* Newline string when fixed length */ |
2519 |
unsigned int group_num; /* Number of group that was called */ |
unsigned int group_num; /* Number of group that was called */ |
2520 |
int *offset_save; /* Pointer to start of saved offsets */ |
int *offset_save; /* Pointer to start of saved offsets */ |
2521 |
int saved_max; /* Number of saved offsets */ |
int saved_max; /* Number of saved offsets */ |
2522 |
int saved_capture_last; /* Last capture number */ |
int saved_capture_last; /* Last capture number */ |
2523 |
PCRE_PUCHAR subject_position; /* Position at start of recursion */ |
PCRE_PUCHAR subject_position; /* Position at start of recursion */ |
2524 |
} recursion_info; |
} recursion_info; |
2525 |
|
|
2556 |
int nllen; /* Newline string length */ |
int nllen; /* Newline string length */ |
2557 |
int name_count; /* Number of names in name table */ |
int name_count; /* Number of names in name table */ |
2558 |
int name_entry_size; /* Size of entry in names table */ |
int name_entry_size; /* Size of entry in names table */ |
2559 |
|
unsigned int skip_arg_count; /* For counting SKIP_ARGs */ |
2560 |
|
unsigned int ignore_skip_arg; /* For re-run when SKIP arg name not found */ |
2561 |
pcre_uchar *name_table; /* Table of names */ |
pcre_uchar *name_table; /* Table of names */ |
2562 |
pcre_uchar nl[4]; /* Newline string when fixed */ |
pcre_uchar nl[4]; /* Newline string when fixed */ |
2563 |
const pcre_uint8 *lcc; /* Points to lower casing table */ |
const pcre_uint8 *lcc; /* Points to lower casing table */ |
2574 |
BOOL hitend; /* Hit the end of the subject at some point */ |
BOOL hitend; /* Hit the end of the subject at some point */ |
2575 |
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ |
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ |
2576 |
BOOL hasthen; /* Pattern contains (*THEN) */ |
BOOL hasthen; /* Pattern contains (*THEN) */ |
|
BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ |
|
2577 |
const pcre_uchar *start_code; /* For use when recursing */ |
const pcre_uchar *start_code; /* For use when recursing */ |
2578 |
PCRE_PUCHAR start_subject; /* Start of the subject string */ |
PCRE_PUCHAR start_subject; /* Start of the subject string */ |
2579 |
PCRE_PUCHAR end_subject; /* End of the subject string */ |
PCRE_PUCHAR end_subject; /* End of the subject string */ |