9 |
|
|
10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
11 |
|
|
12 |
Copyright (c) 1998 University of Cambridge |
Copyright (c) 1997-1999 University of Cambridge |
13 |
|
|
14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
25 |
|
|
26 |
3. Altered versions must be plainly marked as such, and must not be |
3. Altered versions must be plainly marked as such, and must not be |
27 |
misrepresented as being the original software. |
misrepresented as being the original software. |
28 |
|
|
29 |
|
4. If PCRE is embedded in any software that is released under the GNU |
30 |
|
General Purpose Licence (GPL), then the terms of that licence shall |
31 |
|
supersede any condition above with which it is incompatible. |
32 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
33 |
*/ |
*/ |
34 |
|
|
111 |
|
|
112 |
static BOOL |
static BOOL |
113 |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
114 |
BOOL, int); |
BOOL, int, compile_data *); |
|
|
|
|
/* Structure for passing "static" information around between the functions |
|
|
doing the matching, so that they are thread-safe. */ |
|
|
|
|
|
typedef struct match_data { |
|
|
int errorcode; /* As it says */ |
|
|
int *offset_vector; /* Offset vector */ |
|
|
int offset_end; /* One past the end */ |
|
|
int offset_max; /* The maximum usable for return data */ |
|
|
BOOL offset_overflow; /* Set if too many extractions */ |
|
|
BOOL notbol; /* NOTBOL flag */ |
|
|
BOOL noteol; /* NOTEOL flag */ |
|
|
BOOL endonly; /* Dollar not before final \n */ |
|
|
const uschar *start_subject; /* Start of the subject string */ |
|
|
const uschar *end_subject; /* End of the subject string */ |
|
|
const uschar *end_match_ptr; /* Subject position at end match */ |
|
|
int end_offset_top; /* Highwater mark at end of match */ |
|
|
} match_data; |
|
115 |
|
|
116 |
|
|
117 |
|
|
131 |
|
|
132 |
|
|
133 |
/************************************************* |
/************************************************* |
134 |
|
* Default character tables * |
135 |
|
*************************************************/ |
136 |
|
|
137 |
|
/* A default set of character tables is included in the PCRE binary. Its source |
138 |
|
is built by the maketables auxiliary program, which uses the default C ctypes |
139 |
|
functions, and put in the file chartables.c. These tables are used by PCRE |
140 |
|
whenever the caller of pcre_compile() does not provide an alternate set of |
141 |
|
tables. */ |
142 |
|
|
143 |
|
#include "chartables.c" |
144 |
|
|
145 |
|
|
146 |
|
|
147 |
|
/************************************************* |
148 |
* Return version string * |
* Return version string * |
149 |
*************************************************/ |
*************************************************/ |
150 |
|
|
237 |
bracount number of previous extracting brackets |
bracount number of previous extracting brackets |
238 |
options the options bits |
options the options bits |
239 |
isclass TRUE if inside a character class |
isclass TRUE if inside a character class |
240 |
|
cd pointer to char tables block |
241 |
|
|
242 |
Returns: zero or positive => a data character |
Returns: zero or positive => a data character |
243 |
negative => a special escape sequence |
negative => a special escape sequence |
246 |
|
|
247 |
static int |
static int |
248 |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
249 |
int options, BOOL isclass) |
int options, BOOL isclass, compile_data *cd) |
250 |
{ |
{ |
251 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
252 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
289 |
{ |
{ |
290 |
oldptr = ptr; |
oldptr = ptr; |
291 |
c -= '0'; |
c -= '0'; |
292 |
while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0) |
while ((cd->ctypes[ptr[1]] & ctype_digit) != 0) |
293 |
c = c * 10 + *(++ptr) - '0'; |
c = c * 10 + *(++ptr) - '0'; |
294 |
if (c < 10 || c <= bracount) |
if (c < 10 || c <= bracount) |
295 |
{ |
{ |
315 |
|
|
316 |
case '0': |
case '0': |
317 |
c -= '0'; |
c -= '0'; |
318 |
while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 && |
while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && |
319 |
ptr[1] != '8' && ptr[1] != '9') |
ptr[1] != '8' && ptr[1] != '9') |
320 |
c = c * 8 + *(++ptr) - '0'; |
c = c * 8 + *(++ptr) - '0'; |
321 |
break; |
break; |
324 |
|
|
325 |
case 'x': |
case 'x': |
326 |
c = 0; |
c = 0; |
327 |
while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) |
328 |
{ |
{ |
329 |
ptr++; |
ptr++; |
330 |
c = c * 16 + pcre_lcc[*ptr] - |
c = c * 16 + cd->lcc[*ptr] - |
331 |
(((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
(((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
332 |
} |
} |
333 |
break; |
break; |
334 |
|
|
342 |
|
|
343 |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
344 |
|
|
345 |
if (c >= 'a' && c <= 'z') c = pcre_fcc[c]; |
if (c >= 'a' && c <= 'z') c = cd->fcc[c]; |
346 |
c ^= 0x40; |
c ^= 0x40; |
347 |
break; |
break; |
348 |
|
|
349 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
350 |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
351 |
for Perl compatibility, it is a literal. */ |
for Perl compatibility, it is a literal. This code looks a bit odd, but |
352 |
|
there used to be some cases other than the default, and there may be again |
353 |
|
in future, so I haven't "optimized" it. */ |
354 |
|
|
355 |
default: |
default: |
356 |
if ((options & PCRE_EXTRA) != 0) switch(c) |
if ((options & PCRE_EXTRA) != 0) switch(c) |
380 |
|
|
381 |
Arguments: |
Arguments: |
382 |
p pointer to the first char after '{' |
p pointer to the first char after '{' |
383 |
|
cd pointer to char tables block |
384 |
|
|
385 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
386 |
*/ |
*/ |
387 |
|
|
388 |
static BOOL |
static BOOL |
389 |
is_counted_repeat(const uschar *p) |
is_counted_repeat(const uschar *p, compile_data *cd) |
390 |
{ |
{ |
391 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
392 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
393 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
394 |
|
|
395 |
if (*p++ != ',') return FALSE; |
if (*p++ != ',') return FALSE; |
396 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
397 |
|
|
398 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
399 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
400 |
return (*p == '}'); |
return (*p == '}'); |
401 |
} |
} |
402 |
|
|
416 |
maxp pointer to int for max |
maxp pointer to int for max |
417 |
returned as -1 if no max |
returned as -1 if no max |
418 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
419 |
|
cd pointer to character tables clock |
420 |
|
|
421 |
Returns: pointer to '}' on success; |
Returns: pointer to '}' on success; |
422 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
423 |
*/ |
*/ |
424 |
|
|
425 |
static const uschar * |
static const uschar * |
426 |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, |
427 |
|
const char **errorptr, compile_data *cd) |
428 |
{ |
{ |
429 |
int min = 0; |
int min = 0; |
430 |
int max = -1; |
int max = -1; |
431 |
|
|
432 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
433 |
|
|
434 |
if (*p == '}') max = min; else |
if (*p == '}') max = min; else |
435 |
{ |
{ |
436 |
if (*(++p) != '}') |
if (*(++p) != '}') |
437 |
{ |
{ |
438 |
max = 0; |
max = 0; |
439 |
while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
440 |
if (max < min) |
if (max < min) |
441 |
{ |
{ |
442 |
*errorptr = ERR4; |
*errorptr = ERR4; |
621 |
/* Scan the pattern, compiling it into the code vector. |
/* Scan the pattern, compiling it into the code vector. |
622 |
|
|
623 |
Arguments: |
Arguments: |
624 |
options the option bits |
options the option bits |
625 |
brackets points to number of brackets used |
brackets points to number of brackets used |
626 |
code points to the pointer to the current code point |
code points to the pointer to the current code point |
627 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
628 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
629 |
optchanged set to the value of the last OP_OPT item compiled |
optchanged set to the value of the last OP_OPT item compiled |
630 |
|
cd contains pointers to tables |
631 |
|
|
632 |
Returns: TRUE on success |
Returns: TRUE on success |
633 |
FALSE, with *errorptr set on error |
FALSE, with *errorptr set on error |
634 |
*/ |
*/ |
635 |
|
|
636 |
static BOOL |
static BOOL |
637 |
compile_branch(int options, int *brackets, uschar **codeptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
638 |
const uschar **ptrptr, const char **errorptr, int *optchanged) |
const uschar **ptrptr, const char **errorptr, int *optchanged, |
639 |
|
compile_data *cd) |
640 |
{ |
{ |
641 |
int repeat_type, op_type; |
int repeat_type, op_type; |
642 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
668 |
c = *ptr; |
c = *ptr; |
669 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
670 |
{ |
{ |
671 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
672 |
if (c == '#') |
if (c == '#') |
673 |
{ |
{ |
674 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
756 |
|
|
757 |
if (c == '\\') |
if (c == '\\') |
758 |
{ |
{ |
759 |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
760 |
if (-c == ESC_b) c = '\b'; |
if (-c == ESC_b) c = '\b'; |
761 |
else if (c < 0) |
else if (c < 0) |
762 |
{ |
{ |
763 |
|
register const uschar *cbits = cd->cbits; |
764 |
class_charcount = 10; |
class_charcount = 10; |
765 |
switch (-c) |
switch (-c) |
766 |
{ |
{ |
767 |
case ESC_d: |
case ESC_d: |
768 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit]; |
769 |
continue; |
continue; |
770 |
|
|
771 |
case ESC_D: |
case ESC_D: |
772 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit]; |
773 |
continue; |
continue; |
774 |
|
|
775 |
case ESC_w: |
case ESC_w: |
776 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
777 |
class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]); |
778 |
continue; |
continue; |
779 |
|
|
780 |
case ESC_W: |
case ESC_W: |
781 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
782 |
class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]); |
783 |
continue; |
continue; |
784 |
|
|
785 |
case ESC_s: |
case ESC_s: |
786 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space]; |
787 |
continue; |
continue; |
788 |
|
|
789 |
case ESC_S: |
case ESC_S: |
790 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space]; |
791 |
continue; |
continue; |
792 |
|
|
793 |
default: |
default: |
819 |
|
|
820 |
if (d == '\\') |
if (d == '\\') |
821 |
{ |
{ |
822 |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
823 |
if (d < 0) |
if (d < 0) |
824 |
{ |
{ |
825 |
if (d == -ESC_b) d = '\b'; else |
if (d == -ESC_b) d = '\b'; else |
841 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
842 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
843 |
{ |
{ |
844 |
int uc = pcre_fcc[c]; /* flip case */ |
int uc = cd->fcc[c]; /* flip case */ |
845 |
class[uc/8] |= (1 << (uc&7)); |
class[uc/8] |= (1 << (uc&7)); |
846 |
} |
} |
847 |
class_charcount++; /* in case a one-char range */ |
class_charcount++; /* in case a one-char range */ |
856 |
class [c/8] |= (1 << (c&7)); |
class [c/8] |= (1 << (c&7)); |
857 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
858 |
{ |
{ |
859 |
c = pcre_fcc[c]; /* flip case */ |
c = cd->fcc[c]; /* flip case */ |
860 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
861 |
} |
} |
862 |
class_charcount++; |
class_charcount++; |
903 |
/* Various kinds of repeat */ |
/* Various kinds of repeat */ |
904 |
|
|
905 |
case '{': |
case '{': |
906 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; |
907 |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr); |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); |
908 |
if (*errorptr != NULL) goto FAILED; |
if (*errorptr != NULL) goto FAILED; |
909 |
goto REPEAT; |
goto REPEAT; |
910 |
|
|
1200 |
|
|
1201 |
case '(': |
case '(': |
1202 |
bravalue = OP_COND; /* Conditional group */ |
bravalue = OP_COND; /* Conditional group */ |
1203 |
if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0) |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
1204 |
{ |
{ |
1205 |
condref = *ptr - '0'; |
condref = *ptr - '0'; |
1206 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
1333 |
errorptr, /* Where to put an error message */ |
errorptr, /* Where to put an error message */ |
1334 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
1335 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
1336 |
condref)) /* Condition reference number */ |
condref, /* Condition reference number */ |
1337 |
|
cd)) /* Tables block */ |
1338 |
goto FAILED; |
goto FAILED; |
1339 |
|
|
1340 |
/* At the end of compiling, code is still pointing to the start of the |
/* At the end of compiling, code is still pointing to the start of the |
1382 |
|
|
1383 |
case '\\': |
case '\\': |
1384 |
tempptr = ptr; |
tempptr = ptr; |
1385 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1386 |
|
|
1387 |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
1388 |
are arranged to be the negation of the corresponding OP_values. For the |
are arranged to be the negation of the corresponding OP_values. For the |
1427 |
{ |
{ |
1428 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
1429 |
{ |
{ |
1430 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
1431 |
if (c == '#') |
if (c == '#') |
1432 |
{ |
{ |
1433 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
1443 |
if (c == '\\') |
if (c == '\\') |
1444 |
{ |
{ |
1445 |
tempptr = ptr; |
tempptr = ptr; |
1446 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1447 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
1448 |
} |
} |
1449 |
|
|
1455 |
|
|
1456 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
1457 |
|
|
1458 |
while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); |
1459 |
|
|
1460 |
/* Compute the length and set it in the data vector, and advance to |
/* Compute the length and set it in the data vector, and advance to |
1461 |
the next state. */ |
the next state. */ |
1500 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
1501 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
1502 |
condref > 0 for OPT_CREF setting at start of conditional group |
condref > 0 for OPT_CREF setting at start of conditional group |
1503 |
|
cd points to the data block with tables pointers |
1504 |
|
|
1505 |
Returns: TRUE on success |
Returns: TRUE on success |
1506 |
*/ |
*/ |
1507 |
|
|
1508 |
static BOOL |
static BOOL |
1509 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
1510 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref) |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
1511 |
|
compile_data *cd) |
1512 |
{ |
{ |
1513 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
1514 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
1555 |
|
|
1556 |
/* Now compile the branch */ |
/* Now compile the branch */ |
1557 |
|
|
1558 |
if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged)) |
if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd)) |
1559 |
{ |
{ |
1560 |
*ptrptr = ptr; |
*ptrptr = ptr; |
1561 |
return FALSE; |
return FALSE; |
1825 |
options various option bits |
options various option bits |
1826 |
errorptr pointer to pointer to error text |
errorptr pointer to pointer to error text |
1827 |
erroroffset ptr offset in pattern where error was detected |
erroroffset ptr offset in pattern where error was detected |
1828 |
|
tables pointer to character tables or NULL |
1829 |
|
|
1830 |
Returns: pointer to compiled data block, or NULL on error, |
Returns: pointer to compiled data block, or NULL on error, |
1831 |
with errorptr and erroroffset set |
with errorptr and erroroffset set |
1833 |
|
|
1834 |
pcre * |
pcre * |
1835 |
pcre_compile(const char *pattern, int options, const char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
1836 |
int *erroroffset) |
int *erroroffset, const unsigned char *tables) |
1837 |
{ |
{ |
1838 |
real_pcre *re; |
real_pcre *re; |
1839 |
int length = 3; /* For initial BRA plus length */ |
int length = 3; /* For initial BRA plus length */ |
1846 |
unsigned int brastackptr = 0; |
unsigned int brastackptr = 0; |
1847 |
uschar *code; |
uschar *code; |
1848 |
const uschar *ptr; |
const uschar *ptr; |
1849 |
|
compile_data compile_block; |
1850 |
int brastack[BRASTACK_SIZE]; |
int brastack[BRASTACK_SIZE]; |
1851 |
uschar bralenstack[BRASTACK_SIZE]; |
uschar bralenstack[BRASTACK_SIZE]; |
1852 |
|
|
1875 |
return NULL; |
return NULL; |
1876 |
} |
} |
1877 |
|
|
1878 |
|
/* Set up pointers to the individual character tables */ |
1879 |
|
|
1880 |
|
if (tables == NULL) tables = pcre_default_tables; |
1881 |
|
compile_block.lcc = tables + lcc_offset; |
1882 |
|
compile_block.fcc = tables + fcc_offset; |
1883 |
|
compile_block.cbits = tables + cbits_offset; |
1884 |
|
compile_block.ctypes = tables + ctypes_offset; |
1885 |
|
|
1886 |
|
/* Reflect pattern for debugging output */ |
1887 |
|
|
1888 |
DPRINTF(("------------------------------------------------------------------\n")); |
DPRINTF(("------------------------------------------------------------------\n")); |
1889 |
DPRINTF(("%s\n", pattern)); |
DPRINTF(("%s\n", pattern)); |
1890 |
|
|
1903 |
|
|
1904 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
1905 |
{ |
{ |
1906 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
1907 |
if (c == '#') |
if (c == '#') |
1908 |
{ |
{ |
1909 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
1921 |
case '\\': |
case '\\': |
1922 |
{ |
{ |
1923 |
const uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
1924 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); |
1925 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1926 |
if (c >= 0) |
if (c >= 0) |
1927 |
{ |
{ |
1941 |
int refnum = -c - ESC_REF; |
int refnum = -c - ESC_REF; |
1942 |
if (refnum > top_backref) top_backref = refnum; |
if (refnum > top_backref) top_backref = refnum; |
1943 |
length++; /* For single back reference */ |
length++; /* For single back reference */ |
1944 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
1945 |
{ |
{ |
1946 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
1947 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1948 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
1949 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
1967 |
or back reference. */ |
or back reference. */ |
1968 |
|
|
1969 |
case '{': |
case '{': |
1970 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; |
1971 |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); |
1972 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1973 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
1974 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2003 |
{ |
{ |
2004 |
if (*ptr == '\\') |
if (*ptr == '\\') |
2005 |
{ |
{ |
2006 |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, |
2007 |
|
&compile_block); |
2008 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2009 |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
2010 |
} |
} |
2021 |
|
|
2022 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
2023 |
|
|
2024 |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2025 |
{ |
{ |
2026 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
2027 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2028 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2029 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2089 |
group. */ |
group. */ |
2090 |
|
|
2091 |
case '(': |
case '(': |
2092 |
if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0) |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
2093 |
{ |
{ |
2094 |
ptr += 4; |
ptr += 4; |
2095 |
length += 2; |
length += 2; |
2096 |
while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
2097 |
if (*ptr != ')') |
if (*ptr != ')') |
2098 |
{ |
{ |
2099 |
*errorptr = ERR26; |
*errorptr = ERR26; |
2262 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
2263 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
2264 |
|
|
2265 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2266 |
{ |
{ |
2267 |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr, |
2268 |
|
&compile_block); |
2269 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2270 |
} |
} |
2271 |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
2296 |
{ |
{ |
2297 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
2298 |
{ |
{ |
2299 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
2300 |
if (c == '#') |
if (c == '#') |
2301 |
{ |
{ |
2302 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
2310 |
if (c == '\\') |
if (c == '\\') |
2311 |
{ |
{ |
2312 |
const uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
2313 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, |
2314 |
|
&compile_block); |
2315 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2316 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
2317 |
} |
} |
2323 |
|
|
2324 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
2325 |
|
|
2326 |
while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (runlength < 255 && |
2327 |
|
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); |
2328 |
|
|
2329 |
ptr--; |
ptr--; |
2330 |
length += runlength; |
length += runlength; |
2359 |
|
|
2360 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
2361 |
re->options = options; |
re->options = options; |
2362 |
|
re->tables = tables; |
2363 |
|
|
2364 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
2365 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
2369 |
code = re->code; |
code = re->code; |
2370 |
*code = OP_BRA; |
*code = OP_BRA; |
2371 |
bracount = 0; |
bracount = 0; |
2372 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1); |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
2373 |
|
&compile_block); |
2374 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
2375 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
2376 |
|
|
2667 |
|
|
2668 |
|
|
2669 |
/************************************************* |
/************************************************* |
|
* Match a character type * |
|
|
*************************************************/ |
|
|
|
|
|
/* Not used in all the places it might be as it's sometimes faster |
|
|
to put the code inline. |
|
|
|
|
|
Arguments: |
|
|
type the character type |
|
|
c the character |
|
|
dotall the dotall flag |
|
|
|
|
|
Returns: TRUE if character is of the type |
|
|
*/ |
|
|
|
|
|
static BOOL |
|
|
match_type(int type, int c, BOOL dotall) |
|
|
{ |
|
|
|
|
|
#ifdef DEBUG |
|
|
if (isprint(c)) printf("matching subject %c against ", c); |
|
|
else printf("matching subject \\x%02x against ", c); |
|
|
printf("%s\n", OP_names[type]); |
|
|
#endif |
|
|
|
|
|
switch(type) |
|
|
{ |
|
|
case OP_ANY: return dotall || c != '\n'; |
|
|
case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0; |
|
|
case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0; |
|
|
case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0; |
|
|
case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0; |
|
|
case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0; |
|
|
case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0; |
|
|
} |
|
|
return FALSE; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/************************************************* |
|
2670 |
* Match a back-reference * |
* Match a back-reference * |
2671 |
*************************************************/ |
*************************************************/ |
2672 |
|
|
2709 |
/* Separate the caselesss case for speed */ |
/* Separate the caselesss case for speed */ |
2710 |
|
|
2711 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
2712 |
{ while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; } |
{ |
2713 |
|
while (length-- > 0) |
2714 |
|
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; |
2715 |
|
} |
2716 |
else |
else |
2717 |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
2718 |
|
|
3165 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
3166 |
{ |
{ |
3167 |
BOOL prev_is_word = (eptr != md->start_subject) && |
BOOL prev_is_word = (eptr != md->start_subject) && |
3168 |
((pcre_ctypes[eptr[-1]] & ctype_word) != 0); |
((md->ctypes[eptr[-1]] & ctype_word) != 0); |
3169 |
BOOL cur_is_word = (eptr < md->end_subject) && |
BOOL cur_is_word = (eptr < md->end_subject) && |
3170 |
((pcre_ctypes[*eptr] & ctype_word) != 0); |
((md->ctypes[*eptr] & ctype_word) != 0); |
3171 |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
3172 |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
3173 |
return FALSE; |
return FALSE; |
3184 |
break; |
break; |
3185 |
|
|
3186 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3187 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0) |
if (eptr >= md->end_subject || |
3188 |
|
(md->ctypes[*eptr++] & ctype_digit) != 0) |
3189 |
return FALSE; |
return FALSE; |
3190 |
ecode++; |
ecode++; |
3191 |
break; |
break; |
3192 |
|
|
3193 |
case OP_DIGIT: |
case OP_DIGIT: |
3194 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0) |
if (eptr >= md->end_subject || |
3195 |
|
(md->ctypes[*eptr++] & ctype_digit) == 0) |
3196 |
return FALSE; |
return FALSE; |
3197 |
ecode++; |
ecode++; |
3198 |
break; |
break; |
3199 |
|
|
3200 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3201 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0) |
if (eptr >= md->end_subject || |
3202 |
|
(md->ctypes[*eptr++] & ctype_space) != 0) |
3203 |
return FALSE; |
return FALSE; |
3204 |
ecode++; |
ecode++; |
3205 |
break; |
break; |
3206 |
|
|
3207 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3208 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0) |
if (eptr >= md->end_subject || |
3209 |
|
(md->ctypes[*eptr++] & ctype_space) == 0) |
3210 |
return FALSE; |
return FALSE; |
3211 |
ecode++; |
ecode++; |
3212 |
break; |
break; |
3213 |
|
|
3214 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3215 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0) |
if (eptr >= md->end_subject || |
3216 |
|
(md->ctypes[*eptr++] & ctype_word) != 0) |
3217 |
return FALSE; |
return FALSE; |
3218 |
ecode++; |
ecode++; |
3219 |
break; |
break; |
3220 |
|
|
3221 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3222 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0) |
if (eptr >= md->end_subject || |
3223 |
|
(md->ctypes[*eptr++] & ctype_word) == 0) |
3224 |
return FALSE; |
return FALSE; |
3225 |
ecode++; |
ecode++; |
3226 |
break; |
break; |
3452 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
3453 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3454 |
{ |
{ |
3455 |
while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE; |
while (length-- > 0) |
3456 |
|
if (md->lcc[*ecode++] != md->lcc[*eptr++]) |
3457 |
|
return FALSE; |
3458 |
} |
} |
3459 |
else |
else |
3460 |
{ |
{ |
3511 |
|
|
3512 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3513 |
{ |
{ |
3514 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3515 |
for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3516 |
|
if (c != md->lcc[*eptr++]) return FALSE; |
3517 |
if (min == max) continue; |
if (min == max) continue; |
3518 |
if (minimize) |
if (minimize) |
3519 |
{ |
{ |
3521 |
{ |
{ |
3522 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3523 |
return TRUE; |
return TRUE; |
3524 |
if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3525 |
|
c != md->lcc[*eptr++]) |
3526 |
return FALSE; |
return FALSE; |
3527 |
} |
} |
3528 |
/* Control never gets here */ |
/* Control never gets here */ |
3532 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3533 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3534 |
{ |
{ |
3535 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; |
3536 |
eptr++; |
eptr++; |
3537 |
} |
} |
3538 |
while (eptr >= pp) |
while (eptr >= pp) |
3582 |
ecode++; |
ecode++; |
3583 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3584 |
{ |
{ |
3585 |
if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE; |
if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE; |
3586 |
} |
} |
3587 |
else |
else |
3588 |
{ |
{ |
3642 |
|
|
3643 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3644 |
{ |
{ |
3645 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3646 |
for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3647 |
|
if (c == md->lcc[*eptr++]) return FALSE; |
3648 |
if (min == max) continue; |
if (min == max) continue; |
3649 |
if (minimize) |
if (minimize) |
3650 |
{ |
{ |
3652 |
{ |
{ |
3653 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3654 |
return TRUE; |
return TRUE; |
3655 |
if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3656 |
|
c == md->lcc[*eptr++]) |
3657 |
return FALSE; |
return FALSE; |
3658 |
} |
} |
3659 |
/* Control never gets here */ |
/* Control never gets here */ |
3663 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3664 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3665 |
{ |
{ |
3666 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; |
3667 |
eptr++; |
eptr++; |
3668 |
} |
} |
3669 |
while (eptr >= pp) |
while (eptr >= pp) |
3757 |
|
|
3758 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3759 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3760 |
if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
3761 |
break; |
break; |
3762 |
|
|
3763 |
case OP_DIGIT: |
case OP_DIGIT: |
3764 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3765 |
if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
3766 |
break; |
break; |
3767 |
|
|
3768 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3769 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3770 |
if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
3771 |
break; |
break; |
3772 |
|
|
3773 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3774 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3775 |
if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
3776 |
break; |
break; |
3777 |
|
|
3778 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3779 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0) |
for (i = 1; i <= min; i++) |
3780 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) != 0) |
3781 |
|
return FALSE; |
3782 |
break; |
break; |
3783 |
|
|
3784 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3785 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0) |
for (i = 1; i <= min; i++) |
3786 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) == 0) |
3787 |
|
return FALSE; |
3788 |
break; |
break; |
3789 |
} |
} |
3790 |
|
|
3793 |
if (min == max) continue; |
if (min == max) continue; |
3794 |
|
|
3795 |
/* If minimizing, we have to test the rest of the pattern before each |
/* If minimizing, we have to test the rest of the pattern before each |
3796 |
subsequent match, so inlining isn't much help; just use the function. */ |
subsequent match. */ |
3797 |
|
|
3798 |
if (minimize) |
if (minimize) |
3799 |
{ |
{ |
3800 |
for (i = min;; i++) |
for (i = min;; i++) |
3801 |
{ |
{ |
3802 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
3803 |
if (i >= max || eptr >= md->end_subject || |
if (i >= max || eptr >= md->end_subject) return FALSE; |
3804 |
!match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0)) |
|
3805 |
return FALSE; |
c = *eptr++; |
3806 |
|
switch(ctype) |
3807 |
|
{ |
3808 |
|
case OP_ANY: |
3809 |
|
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
3810 |
|
break; |
3811 |
|
|
3812 |
|
case OP_NOT_DIGIT: |
3813 |
|
if ((md->ctypes[c] & ctype_digit) != 0) return FALSE; |
3814 |
|
break; |
3815 |
|
|
3816 |
|
case OP_DIGIT: |
3817 |
|
if ((md->ctypes[c] & ctype_digit) == 0) return FALSE; |
3818 |
|
break; |
3819 |
|
|
3820 |
|
case OP_NOT_WHITESPACE: |
3821 |
|
if ((md->ctypes[c] & ctype_space) != 0) return FALSE; |
3822 |
|
break; |
3823 |
|
|
3824 |
|
case OP_WHITESPACE: |
3825 |
|
if ((md->ctypes[c] & ctype_space) == 0) return FALSE; |
3826 |
|
break; |
3827 |
|
|
3828 |
|
case OP_NOT_WORDCHAR: |
3829 |
|
if ((md->ctypes[c] & ctype_word) != 0) return FALSE; |
3830 |
|
break; |
3831 |
|
|
3832 |
|
case OP_WORDCHAR: |
3833 |
|
if ((md->ctypes[c] & ctype_word) == 0) return FALSE; |
3834 |
|
break; |
3835 |
|
} |
3836 |
} |
} |
3837 |
/* Control never gets here */ |
/* Control never gets here */ |
3838 |
} |
} |
3865 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3866 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3867 |
{ |
{ |
3868 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) |
3869 |
break; |
break; |
3870 |
eptr++; |
eptr++; |
3871 |
} |
} |
3874 |
case OP_DIGIT: |
case OP_DIGIT: |
3875 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3876 |
{ |
{ |
3877 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) |
3878 |
break; |
break; |
3879 |
eptr++; |
eptr++; |
3880 |
} |
} |
3883 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3884 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3885 |
{ |
{ |
3886 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) |
3887 |
break; |
break; |
3888 |
eptr++; |
eptr++; |
3889 |
} |
} |
3892 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3893 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3894 |
{ |
{ |
3895 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) |
3896 |
break; |
break; |
3897 |
eptr++; |
eptr++; |
3898 |
} |
} |
3901 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3902 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3903 |
{ |
{ |
3904 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) |
3905 |
break; |
break; |
3906 |
eptr++; |
eptr++; |
3907 |
} |
} |
3910 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3911 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3912 |
{ |
{ |
3913 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) |
3914 |
break; |
break; |
3915 |
eptr++; |
eptr++; |
3916 |
} |
} |
4000 |
|
|
4001 |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
4002 |
|
|
4003 |
|
match_block.lcc = re->tables + lcc_offset; |
4004 |
|
match_block.ctypes = re->tables + ctypes_offset; |
4005 |
|
|
4006 |
/* The ims options can vary during the matching as a result of the presence |
/* The ims options can vary during the matching as a result of the presence |
4007 |
of (?ims) items in the pattern. They are kept in a local variable so that |
of (?ims) items in the pattern. They are kept in a local variable so that |
4008 |
restoring at the exit of a group is easy. */ |
restoring at the exit of a group is easy. */ |
4037 |
resetcount = 2 + re->top_bracket * 2; |
resetcount = 2 + re->top_bracket * 2; |
4038 |
if (resetcount > offsetcount) resetcount = ocount; |
if (resetcount > offsetcount) resetcount = ocount; |
4039 |
|
|
4040 |
|
/* Reset the working variable associated with each extraction. These should |
4041 |
|
never be used unless previously set, but they get saved and restored, and so we |
4042 |
|
initialize them to avoid reading uninitialized locations. */ |
4043 |
|
|
4044 |
|
if (match_block.offset_vector != NULL) |
4045 |
|
{ |
4046 |
|
register int *iptr = match_block.offset_vector + ocount; |
4047 |
|
register int *iend = iptr - resetcount/2 + 1; |
4048 |
|
while (--iptr >= iend) *iptr = -1; |
4049 |
|
} |
4050 |
|
|
4051 |
/* Set up the first character to match, if available. The first_char value is |
/* Set up the first character to match, if available. The first_char value is |
4052 |
never set for an anchored regular expression, but the anchoring may be forced |
never set for an anchored regular expression, but the anchoring may be forced |
4053 |
at run time, so we have to test for anchoring. The first char may be unset for |
at run time, so we have to test for anchoring. The first char may be unset for |
4059 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->options & PCRE_FIRSTSET) != 0) |
4060 |
{ |
{ |
4061 |
first_char = re->first_char; |
first_char = re->first_char; |
4062 |
if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char]; |
if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char]; |
4063 |
} |
} |
4064 |
else |
else |
4065 |
if (!startline && extra != NULL && |
if (!startline && extra != NULL && |
4084 |
if (first_char >= 0) |
if (first_char >= 0) |
4085 |
{ |
{ |
4086 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
4087 |
while (start_match < end_subject && pcre_lcc[*start_match] != first_char) |
while (start_match < end_subject && |
4088 |
|
match_block.lcc[*start_match] != first_char) |
4089 |
start_match++; |
start_match++; |
4090 |
else |
else |
4091 |
while (start_match < end_subject && *start_match != first_char) |
while (start_match < end_subject && *start_match != first_char) |
4158 |
DPRINTF((">>>> returning %d\n", rc)); |
DPRINTF((">>>> returning %d\n", rc)); |
4159 |
return rc; |
return rc; |
4160 |
} |
} |
4161 |
|
|
4162 |
|
/* This "while" is the end of the "do" above */ |
4163 |
|
|
4164 |
while (!anchored && |
while (!anchored && |
4165 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
4166 |
start_match++ < end_subject); |
start_match++ < end_subject); |