4446 |
/* Get next character in the pattern */ |
/* Get next character in the pattern */ |
4447 |
|
|
4448 |
c = *ptr; |
c = *ptr; |
4449 |
|
|
4450 |
/* If we are at the end of a nested substitution, revert to the outer level |
/* If we are at the end of a nested substitution, revert to the outer level |
4451 |
string. Nesting only happens one level deep. */ |
string. Nesting only happens one level deep. */ |
4452 |
|
|
4548 |
} |
} |
4549 |
goto NORMAL_CHAR; |
goto NORMAL_CHAR; |
4550 |
} |
} |
4551 |
|
/* Control does not reach here. */ |
4552 |
|
} |
4553 |
|
|
4554 |
|
/* In extended mode, skip white space and comments. We need a loop in order |
4555 |
|
to check for more white space and more comments after a comment. */ |
4556 |
|
|
4557 |
|
if ((options & PCRE_EXTENDED) != 0) |
4558 |
|
{ |
4559 |
|
for (;;) |
4560 |
|
{ |
4561 |
|
while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr); |
4562 |
|
if (c != CHAR_NUMBER_SIGN) break; |
4563 |
|
ptr++; |
4564 |
|
while (*ptr != CHAR_NULL) |
4565 |
|
{ |
4566 |
|
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ |
4567 |
|
{ /* IS_NEWLINE sets cd->nllen. */ |
4568 |
|
ptr += cd->nllen; |
4569 |
|
break; |
4570 |
|
} |
4571 |
|
ptr++; |
4572 |
|
#ifdef SUPPORT_UTF |
4573 |
|
if (utf) FORWARDCHAR(ptr); |
4574 |
|
#endif |
4575 |
|
} |
4576 |
|
c = *ptr; /* Either NULL or the char after a newline */ |
4577 |
|
} |
4578 |
} |
} |
4579 |
|
|
4580 |
|
/* See if the next thing is a quantifier. */ |
4581 |
|
|
4582 |
is_quantifier = |
is_quantifier = |
4583 |
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || |
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || |
4584 |
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); |
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); |
4594 |
previous_callout = NULL; |
previous_callout = NULL; |
4595 |
} |
} |
4596 |
|
|
4597 |
/* In extended mode, skip white space and comments. */ |
/* Create auto callout, except for quantifiers, or while processing property |
4598 |
|
strings that are substituted for \w etc in UCP mode. */ |
|
if ((options & PCRE_EXTENDED) != 0) |
|
|
{ |
|
|
if (MAX_255(*ptr) && (cd->ctypes[c] & ctype_space) != 0) continue; |
|
|
if (c == CHAR_NUMBER_SIGN) |
|
|
{ |
|
|
ptr++; |
|
|
while (*ptr != CHAR_NULL) |
|
|
{ |
|
|
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
|
|
ptr++; |
|
|
#ifdef SUPPORT_UTF |
|
|
if (utf) FORWARDCHAR(ptr); |
|
|
#endif |
|
|
} |
|
|
if (*ptr != CHAR_NULL) continue; |
|
|
|
|
|
/* Else fall through to handle end of string */ |
|
|
c = 0; |
|
|
} |
|
|
} |
|
|
|
|
|
/* No auto callout for quantifiers, or while processing property strings that |
|
|
are substituted for \w etc in UCP mode. */ |
|
4599 |
|
|
4600 |
if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL) |
if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL) |
4601 |
{ |
{ |
4602 |
previous_callout = code; |
previous_callout = code; |
4603 |
code = auto_callout(code, ptr, cd); |
code = auto_callout(code, ptr, cd); |
4604 |
} |
} |
4605 |
|
|
4606 |
|
/* Process the next pattern item. */ |
4607 |
|
|
4608 |
switch(c) |
switch(c) |
4609 |
{ |
{ |
4610 |
/* ===================================================================*/ |
/* ===================================================================*/ |
4611 |
case 0: /* The branch terminates at string end */ |
case CHAR_NULL: /* The branch terminates at string end */ |
4612 |
case CHAR_VERTICAL_LINE: /* or | or ) */ |
case CHAR_VERTICAL_LINE: /* or | or ) */ |
4613 |
case CHAR_RIGHT_PARENTHESIS: |
case CHAR_RIGHT_PARENTHESIS: |
4614 |
*firstcharptr = firstchar; |
*firstcharptr = firstchar; |
5453 |
insert something before it. */ |
insert something before it. */ |
5454 |
|
|
5455 |
tempcode = previous; |
tempcode = previous; |
5456 |
|
|
5457 |
|
/* Before checking for a possessive quantifier, we must skip over |
5458 |
|
whitespace and comments in extended mode because Perl allows white space at |
5459 |
|
this point. */ |
5460 |
|
|
5461 |
|
if ((options & PCRE_EXTENDED) != 0) |
5462 |
|
{ |
5463 |
|
const pcre_uchar *p = ptr + 1; |
5464 |
|
for (;;) |
5465 |
|
{ |
5466 |
|
while (MAX_255(*p) && (cd->ctypes[*p] & ctype_space) != 0) p++; |
5467 |
|
if (*p != CHAR_NUMBER_SIGN) break; |
5468 |
|
p++; |
5469 |
|
while (*p != CHAR_NULL) |
5470 |
|
{ |
5471 |
|
if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */ |
5472 |
|
{ /* IS_NEWLINE sets cd->nllen. */ |
5473 |
|
p += cd->nllen; |
5474 |
|
break; |
5475 |
|
} |
5476 |
|
p++; |
5477 |
|
#ifdef SUPPORT_UTF |
5478 |
|
if (utf) FORWARDCHAR(p); |
5479 |
|
#endif |
5480 |
|
} /* Loop for comment characters */ |
5481 |
|
} /* Loop for multiple comments */ |
5482 |
|
ptr = p - 1; /* Character before the next significant one. */ |
5483 |
|
} |
5484 |
|
|
5485 |
/* If the next character is '+', we have a possessive quantifier. This |
/* If the next character is '+', we have a possessive quantifier. This |
5486 |
implies greediness, whatever the setting of the PCRE_UNGREEDY option. |
implies greediness, whatever the setting of the PCRE_UNGREEDY option. |
7788 |
|
|
7789 |
/* ===================================================================*/ |
/* ===================================================================*/ |
7790 |
/* Handle a literal character. It is guaranteed not to be whitespace or # |
/* Handle a literal character. It is guaranteed not to be whitespace or # |
7791 |
when the extended flag is set. If we are in UTF-8 mode, it may be a |
when the extended flag is set. If we are in a UTF mode, it may be a |
7792 |
multi-byte literal character. */ |
multi-unit literal character. */ |
7793 |
|
|
7794 |
default: |
default: |
7795 |
NORMAL_CHAR: |
NORMAL_CHAR: |
8935 |
cd->nl[0] = newline; |
cd->nl[0] = newline; |
8936 |
} |
} |
8937 |
} |
} |
8938 |
|
|
8939 |
/* Maximum back reference and backref bitmap. The bitmap records up to 31 back |
/* Maximum back reference and backref bitmap. The bitmap records up to 31 back |
8940 |
references to help in deciding whether (.*) can be treated as anchored or not. |
references to help in deciding whether (.*) can be treated as anchored or not. |
8941 |
*/ |
*/ |
8988 |
ptr += skipatstart; |
ptr += skipatstart; |
8989 |
code = cworkspace; |
code = cworkspace; |
8990 |
*code = OP_BRA; |
*code = OP_BRA; |
8991 |
|
|
8992 |
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, |
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, |
8993 |
FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, |
FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, |
8994 |
cd, &length); |
cd, &length); |