140 |
#endif |
#endif |
141 |
|
|
142 |
|
|
143 |
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is |
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is |
144 |
searched linearly. Put all the names into a single string, in order to reduce |
searched linearly. Put all the names into a single string, in order to reduce |
145 |
the number of relocations when a shared library is dynamically linked. */ |
the number of relocations when a shared library is dynamically linked. */ |
146 |
|
|
147 |
typedef struct verbitem { |
typedef struct verbitem { |
150 |
} verbitem; |
} verbitem; |
151 |
|
|
152 |
static const char verbnames[] = |
static const char verbnames[] = |
153 |
"ACCEPT\0" |
"ACCEPT\0" |
154 |
"COMMIT\0" |
"COMMIT\0" |
155 |
"F\0" |
"F\0" |
156 |
"FAIL\0" |
"FAIL\0" |
157 |
"PRUNE\0" |
"PRUNE\0" |
158 |
"SKIP\0" |
"SKIP\0" |
159 |
"THEN"; |
"THEN"; |
160 |
|
|
161 |
static verbitem verbs[] = { |
static verbitem verbs[] = { |
162 |
{ 6, OP_ACCEPT }, |
{ 6, OP_ACCEPT }, |
171 |
static int verbcount = sizeof(verbs)/sizeof(verbitem); |
static int verbcount = sizeof(verbs)/sizeof(verbitem); |
172 |
|
|
173 |
|
|
174 |
/* Tables of names of POSIX character classes and their lengths. The names are |
/* Tables of names of POSIX character classes and their lengths. The names are |
175 |
now all in a single string, to reduce the number of relocations when a shared |
now all in a single string, to reduce the number of relocations when a shared |
176 |
library is dynamically loaded. The list of lengths is terminated by a zero |
library is dynamically loaded. The list of lengths is terminated by a zero |
177 |
length entry. The first three must be alpha, lower, upper, as this is assumed |
length entry. The first three must be alpha, lower, upper, as this is assumed |
178 |
for handling case independence. */ |
for handling case independence. */ |
179 |
|
|
180 |
static const char posix_names[] = |
static const char posix_names[] = |
181 |
"alpha\0" "lower\0" "upper\0" "alnum\0" "ascii\0" "blank\0" |
"alpha\0" "lower\0" "upper\0" "alnum\0" "ascii\0" "blank\0" |
182 |
"cntrl\0" "digit\0" "graph\0" "print\0" "punct\0" "space\0" |
"cntrl\0" "digit\0" "graph\0" "print\0" "punct\0" "space\0" |
183 |
"word\0" "xdigit"; |
"word\0" "xdigit"; |
184 |
|
|
185 |
static const uschar posix_name_lengths[] = { |
static const uschar posix_name_lengths[] = { |
219 |
/* The texts of compile-time error messages. These are "char *" because they |
/* The texts of compile-time error messages. These are "char *" because they |
220 |
are passed to the outside world. Do not ever re-use any error number, because |
are passed to the outside world. Do not ever re-use any error number, because |
221 |
they are documented. Always add a new error instead. Messages marked DEAD below |
they are documented. Always add a new error instead. Messages marked DEAD below |
222 |
are no longer used. This used to be a table of strings, but in order to reduce |
are no longer used. This used to be a table of strings, but in order to reduce |
223 |
the number of relocations needed when a shared library is loaded dynamically, |
the number of relocations needed when a shared library is loaded dynamically, |
224 |
it is now one long string. We cannot use a table of offsets, because the |
it is now one long string. We cannot use a table of offsets, because the |
225 |
lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we |
lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we |
226 |
simply count through to the one we want - this isn't a performance issue |
simply count through to the one we want - this isn't a performance issue |
227 |
because these strings are used only when there is a compilation error. */ |
because these strings are used only when there is a compilation error. */ |
228 |
|
|
229 |
static const char error_texts[] = |
static const char error_texts[] = |
439 |
* Find an error text * |
* Find an error text * |
440 |
*************************************************/ |
*************************************************/ |
441 |
|
|
442 |
/* The error texts are now all in one long string, to save on relocations. As |
/* The error texts are now all in one long string, to save on relocations. As |
443 |
some of the text is of unknown length, we can't use a table of offsets. |
some of the text is of unknown length, we can't use a table of offsets. |
444 |
Instead, just count through the strings. This is not a performance issue |
Instead, just count through the strings. This is not a performance issue |
445 |
because it happens only when there has been a compilation error. |
because it happens only when there has been a compilation error. |
446 |
|
|
447 |
Argument: the error number |
Argument: the error number |
452 |
find_error_text(int n) |
find_error_text(int n) |
453 |
{ |
{ |
454 |
const char *s = error_texts; |
const char *s = error_texts; |
455 |
for (; n > 0; n--) while (*s++ != 0); |
for (; n > 0; n--) while (*s++ != 0); |
456 |
return s; |
return s; |
457 |
} |
} |
458 |
|
|
1777 |
{ |
{ |
1778 |
if (len == posix_name_lengths[yield] && |
if (len == posix_name_lengths[yield] && |
1779 |
strncmp((const char *)ptr, pn, len) == 0) return yield; |
strncmp((const char *)ptr, pn, len) == 0) return yield; |
1780 |
pn += posix_name_lengths[yield] + 1; |
pn += posix_name_lengths[yield] + 1; |
1781 |
yield++; |
yield++; |
1782 |
} |
} |
1783 |
return -1; |
return -1; |
2383 |
for (;; ptr++) |
for (;; ptr++) |
2384 |
{ |
{ |
2385 |
BOOL negate_class; |
BOOL negate_class; |
2386 |
|
BOOL should_flip_negation; |
2387 |
BOOL possessive_quantifier; |
BOOL possessive_quantifier; |
2388 |
BOOL is_quantifier; |
BOOL is_quantifier; |
2389 |
BOOL is_recurse; |
BOOL is_recurse; |
2632 |
else break; |
else break; |
2633 |
} |
} |
2634 |
|
|
2635 |
|
/* If a class contains a negative special such as \S, we need to flip the |
2636 |
|
negation flag at the end, so that support for characters > 255 works |
2637 |
|
correctly (they are all included in the class). */ |
2638 |
|
|
2639 |
|
should_flip_negation = FALSE; |
2640 |
|
|
2641 |
/* Keep a count of chars with values < 256 so that we can optimize the case |
/* Keep a count of chars with values < 256 so that we can optimize the case |
2642 |
of just a single character (as long as it's < 256). However, For higher |
of just a single character (as long as it's < 256). However, For higher |
2643 |
valued UTF-8 characters, we don't yet do any optimization. */ |
valued UTF-8 characters, we don't yet do any optimization. */ |
2710 |
if (*ptr == '^') |
if (*ptr == '^') |
2711 |
{ |
{ |
2712 |
local_negate = TRUE; |
local_negate = TRUE; |
2713 |
|
should_flip_negation = TRUE; /* Note negative special */ |
2714 |
ptr++; |
ptr++; |
2715 |
} |
} |
2716 |
|
|
2813 |
continue; |
continue; |
2814 |
|
|
2815 |
case ESC_D: |
case ESC_D: |
2816 |
|
should_flip_negation = TRUE; |
2817 |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; |
2818 |
continue; |
continue; |
2819 |
|
|
2822 |
continue; |
continue; |
2823 |
|
|
2824 |
case ESC_W: |
case ESC_W: |
2825 |
|
should_flip_negation = TRUE; |
2826 |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; |
2827 |
continue; |
continue; |
2828 |
|
|
2832 |
continue; |
continue; |
2833 |
|
|
2834 |
case ESC_S: |
case ESC_S: |
2835 |
|
should_flip_negation = TRUE; |
2836 |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; |
2837 |
classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ |
classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ |
2838 |
continue; |
continue; |
3338 |
zeroreqbyte = reqbyte; |
zeroreqbyte = reqbyte; |
3339 |
|
|
3340 |
/* If there are characters with values > 255, we have to compile an |
/* If there are characters with values > 255, we have to compile an |
3341 |
extended class, with its own opcode. If there are no characters < 256, |
extended class, with its own opcode, unless there was a negated special |
3342 |
we can omit the bitmap in the actual compiled code. */ |
such as \S in the class, because in that case all characters > 255 are in |
3343 |
|
the class, so any that were explicitly given as well can be ignored. If |
3344 |
|
(when there are explicit characters > 255 that must be listed) there are no |
3345 |
|
characters < 256, we can omit the bitmap in the actual compiled code. */ |
3346 |
|
|
3347 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
3348 |
if (class_utf8) |
if (class_utf8 && !should_flip_negation) |
3349 |
{ |
{ |
3350 |
*class_utf8data++ = XCL_END; /* Marks the end of extra data */ |
*class_utf8data++ = XCL_END; /* Marks the end of extra data */ |
3351 |
*code++ = OP_XCLASS; |
*code++ = OP_XCLASS; |
3371 |
} |
} |
3372 |
#endif |
#endif |
3373 |
|
|
3374 |
/* If there are no characters > 255, negate the 32-byte map if necessary, |
/* If there are no characters > 255, set the opcode to OP_CLASS or |
3375 |
and copy it into the code vector. If this is the first thing in the branch, |
OP_NCLASS, depending on whether the whole class was negated and whether |
3376 |
there can be no first char setting, whatever the repeat count. Any reqbyte |
there were negative specials such as \S in the class. Then copy the 32-byte |
3377 |
setting must remain unchanged after any kind of repeat. */ |
map into the code vector, negating it if necessary. */ |
3378 |
|
|
3379 |
|
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; |
3380 |
if (negate_class) |
if (negate_class) |
3381 |
{ |
{ |
|
*code++ = OP_NCLASS; |
|
3382 |
if (lengthptr == NULL) /* Save time in the pre-compile phase */ |
if (lengthptr == NULL) /* Save time in the pre-compile phase */ |
3383 |
for (c = 0; c < 32; c++) code[c] = ~classbits[c]; |
for (c = 0; c < 32; c++) code[c] = ~classbits[c]; |
3384 |
} |
} |
3385 |
else |
else |
3386 |
{ |
{ |
|
*code++ = OP_CLASS; |
|
3387 |
memcpy(code, classbits, 32); |
memcpy(code, classbits, 32); |
3388 |
} |
} |
3389 |
code += 32; |
code += 32; |
4077 |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
4078 |
{ |
{ |
4079 |
int i, namelen; |
int i, namelen; |
4080 |
const char *vn = verbnames; |
const char *vn = verbnames; |
4081 |
const uschar *name = ++ptr; |
const uschar *name = ++ptr; |
4082 |
previous = NULL; |
previous = NULL; |
4083 |
while ((cd->ctypes[*++ptr] & ctype_letter) != 0); |
while ((cd->ctypes[*++ptr] & ctype_letter) != 0); |
4101 |
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE; |
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE; |
4102 |
break; |
break; |
4103 |
} |
} |
4104 |
vn += verbs[i].len + 1; |
vn += verbs[i].len + 1; |
4105 |
} |
} |
4106 |
if (i < verbcount) continue; |
if (i < verbcount) continue; |
4107 |
*errorcodeptr = ERR60; |
*errorcodeptr = ERR60; |