9 |
|
|
10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
11 |
|
|
12 |
Copyright (c) 1998 University of Cambridge |
Copyright (c) 1997-1999 University of Cambridge |
13 |
|
|
14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
107 |
|
|
108 |
static BOOL |
static BOOL |
109 |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
110 |
BOOL, int); |
BOOL, int, compile_data *); |
|
|
|
|
/* Structure for passing "static" information around between the functions |
|
|
doing the matching, so that they are thread-safe. */ |
|
|
|
|
|
typedef struct match_data { |
|
|
int errorcode; /* As it says */ |
|
|
int *offset_vector; /* Offset vector */ |
|
|
int offset_end; /* One past the end */ |
|
|
int offset_max; /* The maximum usable for return data */ |
|
|
BOOL offset_overflow; /* Set if too many extractions */ |
|
|
BOOL notbol; /* NOTBOL flag */ |
|
|
BOOL noteol; /* NOTEOL flag */ |
|
|
BOOL endonly; /* Dollar not before final \n */ |
|
|
const uschar *start_subject; /* Start of the subject string */ |
|
|
const uschar *end_subject; /* End of the subject string */ |
|
|
const uschar *end_match_ptr; /* Subject position at end match */ |
|
|
int end_offset_top; /* Highwater mark at end of match */ |
|
|
} match_data; |
|
111 |
|
|
112 |
|
|
113 |
|
|
127 |
|
|
128 |
|
|
129 |
/************************************************* |
/************************************************* |
130 |
|
* Default character tables * |
131 |
|
*************************************************/ |
132 |
|
|
133 |
|
/* A default set of character tables is included in the PCRE binary. Its source |
134 |
|
is built by the maketables auxiliary program, which uses the default C ctypes |
135 |
|
functions, and put in the file chartables.c. These tables are used by PCRE |
136 |
|
whenever the caller of pcre_compile() does not provide an alternate set of |
137 |
|
tables. */ |
138 |
|
|
139 |
|
#include "chartables.c" |
140 |
|
|
141 |
|
|
142 |
|
|
143 |
|
/************************************************* |
144 |
* Return version string * |
* Return version string * |
145 |
*************************************************/ |
*************************************************/ |
146 |
|
|
233 |
bracount number of previous extracting brackets |
bracount number of previous extracting brackets |
234 |
options the options bits |
options the options bits |
235 |
isclass TRUE if inside a character class |
isclass TRUE if inside a character class |
236 |
|
cd pointer to char tables block |
237 |
|
|
238 |
Returns: zero or positive => a data character |
Returns: zero or positive => a data character |
239 |
negative => a special escape sequence |
negative => a special escape sequence |
242 |
|
|
243 |
static int |
static int |
244 |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
245 |
int options, BOOL isclass) |
int options, BOOL isclass, compile_data *cd) |
246 |
{ |
{ |
247 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
248 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
285 |
{ |
{ |
286 |
oldptr = ptr; |
oldptr = ptr; |
287 |
c -= '0'; |
c -= '0'; |
288 |
while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0) |
while ((cd->ctypes[ptr[1]] & ctype_digit) != 0) |
289 |
c = c * 10 + *(++ptr) - '0'; |
c = c * 10 + *(++ptr) - '0'; |
290 |
if (c < 10 || c <= bracount) |
if (c < 10 || c <= bracount) |
291 |
{ |
{ |
311 |
|
|
312 |
case '0': |
case '0': |
313 |
c -= '0'; |
c -= '0'; |
314 |
while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 && |
while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && |
315 |
ptr[1] != '8' && ptr[1] != '9') |
ptr[1] != '8' && ptr[1] != '9') |
316 |
c = c * 8 + *(++ptr) - '0'; |
c = c * 8 + *(++ptr) - '0'; |
317 |
break; |
break; |
320 |
|
|
321 |
case 'x': |
case 'x': |
322 |
c = 0; |
c = 0; |
323 |
while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) |
324 |
{ |
{ |
325 |
ptr++; |
ptr++; |
326 |
c = c * 16 + pcre_lcc[*ptr] - |
c = c * 16 + cd->lcc[*ptr] - |
327 |
(((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
(((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
328 |
} |
} |
329 |
break; |
break; |
330 |
|
|
338 |
|
|
339 |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
340 |
|
|
341 |
if (c >= 'a' && c <= 'z') c = pcre_fcc[c]; |
if (c >= 'a' && c <= 'z') c = cd->fcc[c]; |
342 |
c ^= 0x40; |
c ^= 0x40; |
343 |
break; |
break; |
344 |
|
|
345 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
346 |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
347 |
for Perl compatibility, it is a literal. */ |
for Perl compatibility, it is a literal. This code looks a bit odd, but |
348 |
|
there used to be some cases other than the default, and there may be again |
349 |
|
in future, so I haven't "optimized" it. */ |
350 |
|
|
351 |
default: |
default: |
352 |
if ((options & PCRE_EXTRA) != 0) switch(c) |
if ((options & PCRE_EXTRA) != 0) switch(c) |
376 |
|
|
377 |
Arguments: |
Arguments: |
378 |
p pointer to the first char after '{' |
p pointer to the first char after '{' |
379 |
|
cd pointer to char tables block |
380 |
|
|
381 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
382 |
*/ |
*/ |
383 |
|
|
384 |
static BOOL |
static BOOL |
385 |
is_counted_repeat(const uschar *p) |
is_counted_repeat(const uschar *p, compile_data *cd) |
386 |
{ |
{ |
387 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
388 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
389 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
390 |
|
|
391 |
if (*p++ != ',') return FALSE; |
if (*p++ != ',') return FALSE; |
392 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
393 |
|
|
394 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
395 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
396 |
return (*p == '}'); |
return (*p == '}'); |
397 |
} |
} |
398 |
|
|
412 |
maxp pointer to int for max |
maxp pointer to int for max |
413 |
returned as -1 if no max |
returned as -1 if no max |
414 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
415 |
|
cd pointer to character tables clock |
416 |
|
|
417 |
Returns: pointer to '}' on success; |
Returns: pointer to '}' on success; |
418 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
419 |
*/ |
*/ |
420 |
|
|
421 |
static const uschar * |
static const uschar * |
422 |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, |
423 |
|
const char **errorptr, compile_data *cd) |
424 |
{ |
{ |
425 |
int min = 0; |
int min = 0; |
426 |
int max = -1; |
int max = -1; |
427 |
|
|
428 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
429 |
|
|
430 |
if (*p == '}') max = min; else |
if (*p == '}') max = min; else |
431 |
{ |
{ |
432 |
if (*(++p) != '}') |
if (*(++p) != '}') |
433 |
{ |
{ |
434 |
max = 0; |
max = 0; |
435 |
while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
436 |
if (max < min) |
if (max < min) |
437 |
{ |
{ |
438 |
*errorptr = ERR4; |
*errorptr = ERR4; |
617 |
/* Scan the pattern, compiling it into the code vector. |
/* Scan the pattern, compiling it into the code vector. |
618 |
|
|
619 |
Arguments: |
Arguments: |
620 |
options the option bits |
options the option bits |
621 |
brackets points to number of brackets used |
brackets points to number of brackets used |
622 |
code points to the pointer to the current code point |
code points to the pointer to the current code point |
623 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
624 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
625 |
optchanged set to the value of the last OP_OPT item compiled |
optchanged set to the value of the last OP_OPT item compiled |
626 |
|
cd contains pointers to tables |
627 |
|
|
628 |
Returns: TRUE on success |
Returns: TRUE on success |
629 |
FALSE, with *errorptr set on error |
FALSE, with *errorptr set on error |
630 |
*/ |
*/ |
631 |
|
|
632 |
static BOOL |
static BOOL |
633 |
compile_branch(int options, int *brackets, uschar **codeptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
634 |
const uschar **ptrptr, const char **errorptr, int *optchanged) |
const uschar **ptrptr, const char **errorptr, int *optchanged, |
635 |
|
compile_data *cd) |
636 |
{ |
{ |
637 |
int repeat_type, op_type; |
int repeat_type, op_type; |
638 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
664 |
c = *ptr; |
c = *ptr; |
665 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
666 |
{ |
{ |
667 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
668 |
if (c == '#') |
if (c == '#') |
669 |
{ |
{ |
670 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
752 |
|
|
753 |
if (c == '\\') |
if (c == '\\') |
754 |
{ |
{ |
755 |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
756 |
if (-c == ESC_b) c = '\b'; |
if (-c == ESC_b) c = '\b'; |
757 |
else if (c < 0) |
else if (c < 0) |
758 |
{ |
{ |
759 |
|
register const uschar *cbits = cd->cbits; |
760 |
class_charcount = 10; |
class_charcount = 10; |
761 |
switch (-c) |
switch (-c) |
762 |
{ |
{ |
763 |
case ESC_d: |
case ESC_d: |
764 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit]; |
765 |
continue; |
continue; |
766 |
|
|
767 |
case ESC_D: |
case ESC_D: |
768 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit]; |
769 |
continue; |
continue; |
770 |
|
|
771 |
case ESC_w: |
case ESC_w: |
772 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
773 |
class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]); |
774 |
continue; |
continue; |
775 |
|
|
776 |
case ESC_W: |
case ESC_W: |
777 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
778 |
class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]); |
779 |
continue; |
continue; |
780 |
|
|
781 |
case ESC_s: |
case ESC_s: |
782 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space]; |
783 |
continue; |
continue; |
784 |
|
|
785 |
case ESC_S: |
case ESC_S: |
786 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space]; |
787 |
continue; |
continue; |
788 |
|
|
789 |
default: |
default: |
815 |
|
|
816 |
if (d == '\\') |
if (d == '\\') |
817 |
{ |
{ |
818 |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
819 |
if (d < 0) |
if (d < 0) |
820 |
{ |
{ |
821 |
if (d == -ESC_b) d = '\b'; else |
if (d == -ESC_b) d = '\b'; else |
837 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
838 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
839 |
{ |
{ |
840 |
int uc = pcre_fcc[c]; /* flip case */ |
int uc = cd->fcc[c]; /* flip case */ |
841 |
class[uc/8] |= (1 << (uc&7)); |
class[uc/8] |= (1 << (uc&7)); |
842 |
} |
} |
843 |
class_charcount++; /* in case a one-char range */ |
class_charcount++; /* in case a one-char range */ |
852 |
class [c/8] |= (1 << (c&7)); |
class [c/8] |= (1 << (c&7)); |
853 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
854 |
{ |
{ |
855 |
c = pcre_fcc[c]; /* flip case */ |
c = cd->fcc[c]; /* flip case */ |
856 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
857 |
} |
} |
858 |
class_charcount++; |
class_charcount++; |
899 |
/* Various kinds of repeat */ |
/* Various kinds of repeat */ |
900 |
|
|
901 |
case '{': |
case '{': |
902 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; |
903 |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr); |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); |
904 |
if (*errorptr != NULL) goto FAILED; |
if (*errorptr != NULL) goto FAILED; |
905 |
goto REPEAT; |
goto REPEAT; |
906 |
|
|
1196 |
|
|
1197 |
case '(': |
case '(': |
1198 |
bravalue = OP_COND; /* Conditional group */ |
bravalue = OP_COND; /* Conditional group */ |
1199 |
if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0) |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
1200 |
{ |
{ |
1201 |
condref = *ptr - '0'; |
condref = *ptr - '0'; |
1202 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
1329 |
errorptr, /* Where to put an error message */ |
errorptr, /* Where to put an error message */ |
1330 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
1331 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
1332 |
condref)) /* Condition reference number */ |
condref, /* Condition reference number */ |
1333 |
|
cd)) /* Tables block */ |
1334 |
goto FAILED; |
goto FAILED; |
1335 |
|
|
1336 |
/* At the end of compiling, code is still pointing to the start of the |
/* At the end of compiling, code is still pointing to the start of the |
1378 |
|
|
1379 |
case '\\': |
case '\\': |
1380 |
tempptr = ptr; |
tempptr = ptr; |
1381 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1382 |
|
|
1383 |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
1384 |
are arranged to be the negation of the corresponding OP_values. For the |
are arranged to be the negation of the corresponding OP_values. For the |
1423 |
{ |
{ |
1424 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
1425 |
{ |
{ |
1426 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
1427 |
if (c == '#') |
if (c == '#') |
1428 |
{ |
{ |
1429 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
1439 |
if (c == '\\') |
if (c == '\\') |
1440 |
{ |
{ |
1441 |
tempptr = ptr; |
tempptr = ptr; |
1442 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1443 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
1444 |
} |
} |
1445 |
|
|
1451 |
|
|
1452 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
1453 |
|
|
1454 |
while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); |
1455 |
|
|
1456 |
/* Compute the length and set it in the data vector, and advance to |
/* Compute the length and set it in the data vector, and advance to |
1457 |
the next state. */ |
the next state. */ |
1496 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
1497 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
1498 |
condref > 0 for OPT_CREF setting at start of conditional group |
condref > 0 for OPT_CREF setting at start of conditional group |
1499 |
|
cd points to the data block with tables pointers |
1500 |
|
|
1501 |
Returns: TRUE on success |
Returns: TRUE on success |
1502 |
*/ |
*/ |
1503 |
|
|
1504 |
static BOOL |
static BOOL |
1505 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
1506 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref) |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
1507 |
|
compile_data *cd) |
1508 |
{ |
{ |
1509 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
1510 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
1551 |
|
|
1552 |
/* Now compile the branch */ |
/* Now compile the branch */ |
1553 |
|
|
1554 |
if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged)) |
if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd)) |
1555 |
{ |
{ |
1556 |
*ptrptr = ptr; |
*ptrptr = ptr; |
1557 |
return FALSE; |
return FALSE; |
1821 |
options various option bits |
options various option bits |
1822 |
errorptr pointer to pointer to error text |
errorptr pointer to pointer to error text |
1823 |
erroroffset ptr offset in pattern where error was detected |
erroroffset ptr offset in pattern where error was detected |
1824 |
|
tables pointer to character tables or NULL |
1825 |
|
|
1826 |
Returns: pointer to compiled data block, or NULL on error, |
Returns: pointer to compiled data block, or NULL on error, |
1827 |
with errorptr and erroroffset set |
with errorptr and erroroffset set |
1829 |
|
|
1830 |
pcre * |
pcre * |
1831 |
pcre_compile(const char *pattern, int options, const char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
1832 |
int *erroroffset) |
int *erroroffset, const unsigned char *tables) |
1833 |
{ |
{ |
1834 |
real_pcre *re; |
real_pcre *re; |
1835 |
int length = 3; /* For initial BRA plus length */ |
int length = 3; /* For initial BRA plus length */ |
1842 |
unsigned int brastackptr = 0; |
unsigned int brastackptr = 0; |
1843 |
uschar *code; |
uschar *code; |
1844 |
const uschar *ptr; |
const uschar *ptr; |
1845 |
|
compile_data compile_block; |
1846 |
int brastack[BRASTACK_SIZE]; |
int brastack[BRASTACK_SIZE]; |
1847 |
uschar bralenstack[BRASTACK_SIZE]; |
uschar bralenstack[BRASTACK_SIZE]; |
1848 |
|
|
1871 |
return NULL; |
return NULL; |
1872 |
} |
} |
1873 |
|
|
1874 |
|
/* Set up pointers to the individual character tables */ |
1875 |
|
|
1876 |
|
if (tables == NULL) tables = pcre_default_tables; |
1877 |
|
compile_block.lcc = tables + lcc_offset; |
1878 |
|
compile_block.fcc = tables + fcc_offset; |
1879 |
|
compile_block.cbits = tables + cbits_offset; |
1880 |
|
compile_block.ctypes = tables + ctypes_offset; |
1881 |
|
|
1882 |
|
/* Reflect pattern for debugging output */ |
1883 |
|
|
1884 |
DPRINTF(("------------------------------------------------------------------\n")); |
DPRINTF(("------------------------------------------------------------------\n")); |
1885 |
DPRINTF(("%s\n", pattern)); |
DPRINTF(("%s\n", pattern)); |
1886 |
|
|
1899 |
|
|
1900 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
1901 |
{ |
{ |
1902 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
1903 |
if (c == '#') |
if (c == '#') |
1904 |
{ |
{ |
1905 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
1917 |
case '\\': |
case '\\': |
1918 |
{ |
{ |
1919 |
const uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
1920 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); |
1921 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1922 |
if (c >= 0) |
if (c >= 0) |
1923 |
{ |
{ |
1937 |
int refnum = -c - ESC_REF; |
int refnum = -c - ESC_REF; |
1938 |
if (refnum > top_backref) top_backref = refnum; |
if (refnum > top_backref) top_backref = refnum; |
1939 |
length++; /* For single back reference */ |
length++; /* For single back reference */ |
1940 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
1941 |
{ |
{ |
1942 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
1943 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1944 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
1945 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
1963 |
or back reference. */ |
or back reference. */ |
1964 |
|
|
1965 |
case '{': |
case '{': |
1966 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; |
1967 |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); |
1968 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1969 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
1970 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
1999 |
{ |
{ |
2000 |
if (*ptr == '\\') |
if (*ptr == '\\') |
2001 |
{ |
{ |
2002 |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, |
2003 |
|
&compile_block); |
2004 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2005 |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
2006 |
} |
} |
2017 |
|
|
2018 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
2019 |
|
|
2020 |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2021 |
{ |
{ |
2022 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
2023 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2024 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2025 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2085 |
group. */ |
group. */ |
2086 |
|
|
2087 |
case '(': |
case '(': |
2088 |
if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0) |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
2089 |
{ |
{ |
2090 |
ptr += 4; |
ptr += 4; |
2091 |
length += 2; |
length += 2; |
2092 |
while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
2093 |
if (*ptr != ')') |
if (*ptr != ')') |
2094 |
{ |
{ |
2095 |
*errorptr = ERR26; |
*errorptr = ERR26; |
2258 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
2259 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
2260 |
|
|
2261 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2262 |
{ |
{ |
2263 |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr, |
2264 |
|
&compile_block); |
2265 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2266 |
} |
} |
2267 |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
2292 |
{ |
{ |
2293 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
2294 |
{ |
{ |
2295 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
2296 |
if (c == '#') |
if (c == '#') |
2297 |
{ |
{ |
2298 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
2306 |
if (c == '\\') |
if (c == '\\') |
2307 |
{ |
{ |
2308 |
const uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
2309 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, |
2310 |
|
&compile_block); |
2311 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2312 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
2313 |
} |
} |
2319 |
|
|
2320 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
2321 |
|
|
2322 |
while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (runlength < 255 && |
2323 |
|
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); |
2324 |
|
|
2325 |
ptr--; |
ptr--; |
2326 |
length += runlength; |
length += runlength; |
2355 |
|
|
2356 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
2357 |
re->options = options; |
re->options = options; |
2358 |
|
re->tables = tables; |
2359 |
|
|
2360 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
2361 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
2365 |
code = re->code; |
code = re->code; |
2366 |
*code = OP_BRA; |
*code = OP_BRA; |
2367 |
bracount = 0; |
bracount = 0; |
2368 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1); |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
2369 |
|
&compile_block); |
2370 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
2371 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
2372 |
|
|
2663 |
|
|
2664 |
|
|
2665 |
/************************************************* |
/************************************************* |
|
* Match a character type * |
|
|
*************************************************/ |
|
|
|
|
|
/* Not used in all the places it might be as it's sometimes faster |
|
|
to put the code inline. |
|
|
|
|
|
Arguments: |
|
|
type the character type |
|
|
c the character |
|
|
dotall the dotall flag |
|
|
|
|
|
Returns: TRUE if character is of the type |
|
|
*/ |
|
|
|
|
|
static BOOL |
|
|
match_type(int type, int c, BOOL dotall) |
|
|
{ |
|
|
|
|
|
#ifdef DEBUG |
|
|
if (isprint(c)) printf("matching subject %c against ", c); |
|
|
else printf("matching subject \\x%02x against ", c); |
|
|
printf("%s\n", OP_names[type]); |
|
|
#endif |
|
|
|
|
|
switch(type) |
|
|
{ |
|
|
case OP_ANY: return dotall || c != '\n'; |
|
|
case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0; |
|
|
case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0; |
|
|
case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0; |
|
|
case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0; |
|
|
case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0; |
|
|
case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0; |
|
|
} |
|
|
return FALSE; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/************************************************* |
|
2666 |
* Match a back-reference * |
* Match a back-reference * |
2667 |
*************************************************/ |
*************************************************/ |
2668 |
|
|
2705 |
/* Separate the caselesss case for speed */ |
/* Separate the caselesss case for speed */ |
2706 |
|
|
2707 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
2708 |
{ while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; } |
{ |
2709 |
|
while (length-- > 0) |
2710 |
|
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; |
2711 |
|
} |
2712 |
else |
else |
2713 |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
2714 |
|
|
3161 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
3162 |
{ |
{ |
3163 |
BOOL prev_is_word = (eptr != md->start_subject) && |
BOOL prev_is_word = (eptr != md->start_subject) && |
3164 |
((pcre_ctypes[eptr[-1]] & ctype_word) != 0); |
((md->ctypes[eptr[-1]] & ctype_word) != 0); |
3165 |
BOOL cur_is_word = (eptr < md->end_subject) && |
BOOL cur_is_word = (eptr < md->end_subject) && |
3166 |
((pcre_ctypes[*eptr] & ctype_word) != 0); |
((md->ctypes[*eptr] & ctype_word) != 0); |
3167 |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
3168 |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
3169 |
return FALSE; |
return FALSE; |
3180 |
break; |
break; |
3181 |
|
|
3182 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3183 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0) |
if (eptr >= md->end_subject || |
3184 |
|
(md->ctypes[*eptr++] & ctype_digit) != 0) |
3185 |
return FALSE; |
return FALSE; |
3186 |
ecode++; |
ecode++; |
3187 |
break; |
break; |
3188 |
|
|
3189 |
case OP_DIGIT: |
case OP_DIGIT: |
3190 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0) |
if (eptr >= md->end_subject || |
3191 |
|
(md->ctypes[*eptr++] & ctype_digit) == 0) |
3192 |
return FALSE; |
return FALSE; |
3193 |
ecode++; |
ecode++; |
3194 |
break; |
break; |
3195 |
|
|
3196 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3197 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0) |
if (eptr >= md->end_subject || |
3198 |
|
(md->ctypes[*eptr++] & ctype_space) != 0) |
3199 |
return FALSE; |
return FALSE; |
3200 |
ecode++; |
ecode++; |
3201 |
break; |
break; |
3202 |
|
|
3203 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3204 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0) |
if (eptr >= md->end_subject || |
3205 |
|
(md->ctypes[*eptr++] & ctype_space) == 0) |
3206 |
return FALSE; |
return FALSE; |
3207 |
ecode++; |
ecode++; |
3208 |
break; |
break; |
3209 |
|
|
3210 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3211 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0) |
if (eptr >= md->end_subject || |
3212 |
|
(md->ctypes[*eptr++] & ctype_word) != 0) |
3213 |
return FALSE; |
return FALSE; |
3214 |
ecode++; |
ecode++; |
3215 |
break; |
break; |
3216 |
|
|
3217 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3218 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0) |
if (eptr >= md->end_subject || |
3219 |
|
(md->ctypes[*eptr++] & ctype_word) == 0) |
3220 |
return FALSE; |
return FALSE; |
3221 |
ecode++; |
ecode++; |
3222 |
break; |
break; |
3448 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
3449 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3450 |
{ |
{ |
3451 |
while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE; |
while (length-- > 0) |
3452 |
|
if (md->lcc[*ecode++] != md->lcc[*eptr++]) |
3453 |
|
return FALSE; |
3454 |
} |
} |
3455 |
else |
else |
3456 |
{ |
{ |
3507 |
|
|
3508 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3509 |
{ |
{ |
3510 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3511 |
for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3512 |
|
if (c != md->lcc[*eptr++]) return FALSE; |
3513 |
if (min == max) continue; |
if (min == max) continue; |
3514 |
if (minimize) |
if (minimize) |
3515 |
{ |
{ |
3517 |
{ |
{ |
3518 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3519 |
return TRUE; |
return TRUE; |
3520 |
if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3521 |
|
c != md->lcc[*eptr++]) |
3522 |
return FALSE; |
return FALSE; |
3523 |
} |
} |
3524 |
/* Control never gets here */ |
/* Control never gets here */ |
3528 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3529 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3530 |
{ |
{ |
3531 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; |
3532 |
eptr++; |
eptr++; |
3533 |
} |
} |
3534 |
while (eptr >= pp) |
while (eptr >= pp) |
3578 |
ecode++; |
ecode++; |
3579 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3580 |
{ |
{ |
3581 |
if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE; |
if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE; |
3582 |
} |
} |
3583 |
else |
else |
3584 |
{ |
{ |
3638 |
|
|
3639 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3640 |
{ |
{ |
3641 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3642 |
for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3643 |
|
if (c == md->lcc[*eptr++]) return FALSE; |
3644 |
if (min == max) continue; |
if (min == max) continue; |
3645 |
if (minimize) |
if (minimize) |
3646 |
{ |
{ |
3648 |
{ |
{ |
3649 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3650 |
return TRUE; |
return TRUE; |
3651 |
if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3652 |
|
c == md->lcc[*eptr++]) |
3653 |
return FALSE; |
return FALSE; |
3654 |
} |
} |
3655 |
/* Control never gets here */ |
/* Control never gets here */ |
3659 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3660 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3661 |
{ |
{ |
3662 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; |
3663 |
eptr++; |
eptr++; |
3664 |
} |
} |
3665 |
while (eptr >= pp) |
while (eptr >= pp) |
3753 |
|
|
3754 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3755 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3756 |
if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
3757 |
break; |
break; |
3758 |
|
|
3759 |
case OP_DIGIT: |
case OP_DIGIT: |
3760 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3761 |
if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
3762 |
break; |
break; |
3763 |
|
|
3764 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3765 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3766 |
if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
3767 |
break; |
break; |
3768 |
|
|
3769 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3770 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3771 |
if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
3772 |
break; |
break; |
3773 |
|
|
3774 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3775 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0) |
for (i = 1; i <= min; i++) |
3776 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) != 0) |
3777 |
|
return FALSE; |
3778 |
break; |
break; |
3779 |
|
|
3780 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3781 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0) |
for (i = 1; i <= min; i++) |
3782 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) == 0) |
3783 |
|
return FALSE; |
3784 |
break; |
break; |
3785 |
} |
} |
3786 |
|
|
3789 |
if (min == max) continue; |
if (min == max) continue; |
3790 |
|
|
3791 |
/* If minimizing, we have to test the rest of the pattern before each |
/* If minimizing, we have to test the rest of the pattern before each |
3792 |
subsequent match, so inlining isn't much help; just use the function. */ |
subsequent match. */ |
3793 |
|
|
3794 |
if (minimize) |
if (minimize) |
3795 |
{ |
{ |
3796 |
for (i = min;; i++) |
for (i = min;; i++) |
3797 |
{ |
{ |
3798 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
3799 |
if (i >= max || eptr >= md->end_subject || |
if (i >= max || eptr >= md->end_subject) return FALSE; |
3800 |
!match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0)) |
|
3801 |
return FALSE; |
c = *eptr++; |
3802 |
|
switch(ctype) |
3803 |
|
{ |
3804 |
|
case OP_ANY: |
3805 |
|
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
3806 |
|
break; |
3807 |
|
|
3808 |
|
case OP_NOT_DIGIT: |
3809 |
|
if ((md->ctypes[c] & ctype_digit) != 0) return FALSE; |
3810 |
|
break; |
3811 |
|
|
3812 |
|
case OP_DIGIT: |
3813 |
|
if ((md->ctypes[c] & ctype_digit) == 0) return FALSE; |
3814 |
|
break; |
3815 |
|
|
3816 |
|
case OP_NOT_WHITESPACE: |
3817 |
|
if ((md->ctypes[c] & ctype_space) != 0) return FALSE; |
3818 |
|
break; |
3819 |
|
|
3820 |
|
case OP_WHITESPACE: |
3821 |
|
if ((md->ctypes[c] & ctype_space) == 0) return FALSE; |
3822 |
|
break; |
3823 |
|
|
3824 |
|
case OP_NOT_WORDCHAR: |
3825 |
|
if ((md->ctypes[c] & ctype_word) != 0) return FALSE; |
3826 |
|
break; |
3827 |
|
|
3828 |
|
case OP_WORDCHAR: |
3829 |
|
if ((md->ctypes[c] & ctype_word) == 0) return FALSE; |
3830 |
|
break; |
3831 |
|
} |
3832 |
} |
} |
3833 |
/* Control never gets here */ |
/* Control never gets here */ |
3834 |
} |
} |
3861 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3862 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3863 |
{ |
{ |
3864 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) |
3865 |
break; |
break; |
3866 |
eptr++; |
eptr++; |
3867 |
} |
} |
3870 |
case OP_DIGIT: |
case OP_DIGIT: |
3871 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3872 |
{ |
{ |
3873 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) |
3874 |
break; |
break; |
3875 |
eptr++; |
eptr++; |
3876 |
} |
} |
3879 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3880 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3881 |
{ |
{ |
3882 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) |
3883 |
break; |
break; |
3884 |
eptr++; |
eptr++; |
3885 |
} |
} |
3888 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3889 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3890 |
{ |
{ |
3891 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) |
3892 |
break; |
break; |
3893 |
eptr++; |
eptr++; |
3894 |
} |
} |
3897 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3898 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3899 |
{ |
{ |
3900 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) |
3901 |
break; |
break; |
3902 |
eptr++; |
eptr++; |
3903 |
} |
} |
3906 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3907 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3908 |
{ |
{ |
3909 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) |
3910 |
break; |
break; |
3911 |
eptr++; |
eptr++; |
3912 |
} |
} |
3996 |
|
|
3997 |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
3998 |
|
|
3999 |
|
match_block.lcc = re->tables + lcc_offset; |
4000 |
|
match_block.ctypes = re->tables + ctypes_offset; |
4001 |
|
|
4002 |
/* The ims options can vary during the matching as a result of the presence |
/* The ims options can vary during the matching as a result of the presence |
4003 |
of (?ims) items in the pattern. They are kept in a local variable so that |
of (?ims) items in the pattern. They are kept in a local variable so that |
4004 |
restoring at the exit of a group is easy. */ |
restoring at the exit of a group is easy. */ |
4033 |
resetcount = 2 + re->top_bracket * 2; |
resetcount = 2 + re->top_bracket * 2; |
4034 |
if (resetcount > offsetcount) resetcount = ocount; |
if (resetcount > offsetcount) resetcount = ocount; |
4035 |
|
|
4036 |
|
/* Reset the working variable associated with each extraction. These should |
4037 |
|
never be used unless previously set, but they get saved and restored, and so we |
4038 |
|
initialize them to avoid reading uninitialized locations. */ |
4039 |
|
|
4040 |
|
if (match_block.offset_vector != NULL) |
4041 |
|
{ |
4042 |
|
register int *iptr = match_block.offset_vector + ocount; |
4043 |
|
register int *iend = iptr - resetcount/2 + 1; |
4044 |
|
while (--iptr >= iend) *iptr = -1; |
4045 |
|
} |
4046 |
|
|
4047 |
/* Set up the first character to match, if available. The first_char value is |
/* Set up the first character to match, if available. The first_char value is |
4048 |
never set for an anchored regular expression, but the anchoring may be forced |
never set for an anchored regular expression, but the anchoring may be forced |
4049 |
at run time, so we have to test for anchoring. The first char may be unset for |
at run time, so we have to test for anchoring. The first char may be unset for |
4055 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->options & PCRE_FIRSTSET) != 0) |
4056 |
{ |
{ |
4057 |
first_char = re->first_char; |
first_char = re->first_char; |
4058 |
if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char]; |
if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char]; |
4059 |
} |
} |
4060 |
else |
else |
4061 |
if (!startline && extra != NULL && |
if (!startline && extra != NULL && |
4080 |
if (first_char >= 0) |
if (first_char >= 0) |
4081 |
{ |
{ |
4082 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
4083 |
while (start_match < end_subject && pcre_lcc[*start_match] != first_char) |
while (start_match < end_subject && |
4084 |
|
match_block.lcc[*start_match] != first_char) |
4085 |
start_match++; |
start_match++; |
4086 |
else |
else |
4087 |
while (start_match < end_subject && *start_match != first_char) |
while (start_match < end_subject && *start_match != first_char) |
4154 |
DPRINTF((">>>> returning %d\n", rc)); |
DPRINTF((">>>> returning %d\n", rc)); |
4155 |
return rc; |
return rc; |
4156 |
} |
} |
4157 |
|
|
4158 |
|
/* This "while" is the end of the "do" above */ |
4159 |
|
|
4160 |
while (!anchored && |
while (!anchored && |
4161 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
4162 |
start_match++ < end_subject); |
start_match++ < end_subject); |