7 |
below for why this module is different). |
below for why this module is different). |
8 |
|
|
9 |
Written by Philip Hazel |
Written by Philip Hazel |
10 |
Copyright (c) 1997-2011 University of Cambridge |
Copyright (c) 1997-2012 University of Cambridge |
11 |
|
|
12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
38 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
39 |
*/ |
*/ |
40 |
|
|
|
|
|
41 |
/* This module contains the external function pcre_dfa_exec(), which is an |
/* This module contains the external function pcre_dfa_exec(), which is an |
42 |
alternative matching function that uses a sort of DFA algorithm (not a true |
alternative matching function that uses a sort of DFA algorithm (not a true |
43 |
FSM). This is NOT Perl- compatible, but it has advantages in certain |
FSM). This is NOT Perl-compatible, but it has advantages in certain |
44 |
applications. */ |
applications. */ |
45 |
|
|
46 |
|
|
112 |
the character is to be found. ***NOTE*** If the start of this table is |
the character is to be found. ***NOTE*** If the start of this table is |
113 |
modified, the three tables that follow must also be modified. */ |
modified, the three tables that follow must also be modified. */ |
114 |
|
|
115 |
static const uschar coptable[] = { |
static const pcre_uint8 coptable[] = { |
116 |
0, /* End */ |
0, /* End */ |
117 |
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ |
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ |
118 |
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ |
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ |
127 |
1, /* noti */ |
1, /* noti */ |
128 |
/* Positive single-char repeats */ |
/* Positive single-char repeats */ |
129 |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
130 |
3, 3, 3, /* upto, minupto, exact */ |
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */ |
131 |
1, 1, 1, 3, /* *+, ++, ?+, upto+ */ |
1+IMM2_SIZE, /* exact */ |
132 |
|
1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */ |
133 |
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ |
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ |
134 |
3, 3, 3, /* upto I, minupto I, exact I */ |
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */ |
135 |
1, 1, 1, 3, /* *+I, ++I, ?+I, upto+I */ |
1+IMM2_SIZE, /* exact I */ |
136 |
|
1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ |
137 |
/* Negative single-char repeats - only for chars < 256 */ |
/* Negative single-char repeats - only for chars < 256 */ |
138 |
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ |
139 |
3, 3, 3, /* NOT upto, minupto, exact */ |
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */ |
140 |
1, 1, 1, 3, /* NOT *+, ++, ?+, upto+ */ |
1+IMM2_SIZE, /* NOT exact */ |
141 |
|
1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */ |
142 |
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ |
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ |
143 |
3, 3, 3, /* NOT upto I, minupto I, exact I */ |
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */ |
144 |
1, 1, 1, 3, /* NOT *+I, ++I, ?+I, upto+I */ |
1+IMM2_SIZE, /* NOT exact I */ |
145 |
|
1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */ |
146 |
/* Positive type repeats */ |
/* Positive type repeats */ |
147 |
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ |
148 |
3, 3, 3, /* Type upto, minupto, exact */ |
1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */ |
149 |
1, 1, 1, 3, /* Type *+, ++, ?+, upto+ */ |
1+IMM2_SIZE, /* Type exact */ |
150 |
|
1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */ |
151 |
/* Character class & ref repeats */ |
/* Character class & ref repeats */ |
152 |
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ |
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ |
153 |
0, 0, /* CRRANGE, CRMINRANGE */ |
0, 0, /* CRRANGE, CRMINRANGE */ |
186 |
the subject is reached. ***NOTE*** If the start of this table is modified, the |
the subject is reached. ***NOTE*** If the start of this table is modified, the |
187 |
two tables that follow must also be modified. */ |
two tables that follow must also be modified. */ |
188 |
|
|
189 |
static const uschar poptable[] = { |
static const pcre_uint8 poptable[] = { |
190 |
0, /* End */ |
0, /* End */ |
191 |
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */ |
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */ |
192 |
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ |
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ |
253 |
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, |
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, |
254 |
and \w */ |
and \w */ |
255 |
|
|
256 |
static const uschar toptable1[] = { |
static const pcre_uint8 toptable1[] = { |
257 |
0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, |
258 |
ctype_digit, ctype_digit, |
ctype_digit, ctype_digit, |
259 |
ctype_space, ctype_space, |
ctype_space, ctype_space, |
261 |
0, 0 /* OP_ANY, OP_ALLANY */ |
0, 0 /* OP_ANY, OP_ALLANY */ |
262 |
}; |
}; |
263 |
|
|
264 |
static const uschar toptable2[] = { |
static const pcre_uint8 toptable2[] = { |
265 |
0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, |
266 |
ctype_digit, 0, |
ctype_digit, 0, |
267 |
ctype_space, 0, |
ctype_space, 0, |
281 |
int data; /* Some use extra data */ |
int data; /* Some use extra data */ |
282 |
} stateblock; |
} stateblock; |
283 |
|
|
284 |
#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int)) |
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int)) |
285 |
|
|
286 |
|
|
287 |
#ifdef PCRE_DEBUG |
#ifdef PCRE_DEBUG |
300 |
*/ |
*/ |
301 |
|
|
302 |
static void |
static void |
303 |
pchars(unsigned char *p, int length, FILE *f) |
pchars(const pcre_uchar *p, int length, FILE *f) |
304 |
{ |
{ |
305 |
int c; |
int c; |
306 |
while (length-- > 0) |
while (length-- > 0) |
381 |
next_new_state->count = (y); \ |
next_new_state->count = (y); \ |
382 |
next_new_state->data = (z); \ |
next_new_state->data = (z); \ |
383 |
next_new_state++; \ |
next_new_state++; \ |
384 |
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ |
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \ |
385 |
|
(x), (y), (z), __LINE__)); \ |
386 |
} \ |
} \ |
387 |
else return PCRE_ERROR_DFA_WSSIZE |
else return PCRE_ERROR_DFA_WSSIZE |
388 |
|
|
391 |
static int |
static int |
392 |
internal_dfa_exec( |
internal_dfa_exec( |
393 |
dfa_match_data *md, |
dfa_match_data *md, |
394 |
const uschar *this_start_code, |
const pcre_uchar *this_start_code, |
395 |
const uschar *current_subject, |
const pcre_uchar *current_subject, |
396 |
int start_offset, |
int start_offset, |
397 |
int *offsets, |
int *offsets, |
398 |
int offsetcount, |
int offsetcount, |
403 |
stateblock *active_states, *new_states, *temp_states; |
stateblock *active_states, *new_states, *temp_states; |
404 |
stateblock *next_active_state, *next_new_state; |
stateblock *next_active_state, *next_new_state; |
405 |
|
|
406 |
const uschar *ctypes, *lcc, *fcc; |
const pcre_uint8 *ctypes, *lcc, *fcc; |
407 |
const uschar *ptr; |
const pcre_uchar *ptr; |
408 |
const uschar *end_code, *first_op; |
const pcre_uchar *end_code, *first_op; |
409 |
|
|
410 |
dfa_recursion_info new_recursive; |
dfa_recursion_info new_recursive; |
411 |
|
|
414 |
/* Some fields in the md block are frequently referenced, so we load them into |
/* Some fields in the md block are frequently referenced, so we load them into |
415 |
independent variables in the hope that this will perform better. */ |
independent variables in the hope that this will perform better. */ |
416 |
|
|
417 |
const uschar *start_subject = md->start_subject; |
const pcre_uchar *start_subject = md->start_subject; |
418 |
const uschar *end_subject = md->end_subject; |
const pcre_uchar *end_subject = md->end_subject; |
419 |
const uschar *start_code = md->start_code; |
const pcre_uchar *start_code = md->start_code; |
420 |
|
|
421 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
422 |
BOOL utf8 = (md->poptions & PCRE_UTF8) != 0; |
BOOL utf = (md->poptions & PCRE_UTF8) != 0; |
423 |
#else |
#else |
424 |
BOOL utf8 = FALSE; |
BOOL utf = FALSE; |
425 |
#endif |
#endif |
426 |
|
|
427 |
|
BOOL reset_could_continue = FALSE; |
428 |
|
|
429 |
rlevel++; |
rlevel++; |
430 |
offsetcount &= (-2); |
offsetcount &= (-2); |
431 |
|
|
449 |
|
|
450 |
first_op = this_start_code + 1 + LINK_SIZE + |
first_op = this_start_code + 1 + LINK_SIZE + |
451 |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
452 |
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0); |
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) |
453 |
|
? IMM2_SIZE:0); |
454 |
|
|
455 |
/* The first thing in any (sub) pattern is a bracket of some sort. Push all |
/* The first thing in any (sub) pattern is a bracket of some sort. Push all |
456 |
the alternative states onto the list, and find out where the end is. This |
the alternative states onto the list, and find out where the end is. This |
478 |
/* If we can't go back the amount required for the longest lookbehind |
/* If we can't go back the amount required for the longest lookbehind |
479 |
pattern, go back as far as we can; some alternatives may still be viable. */ |
pattern, go back as far as we can; some alternatives may still be viable. */ |
480 |
|
|
481 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
482 |
/* In character mode we have to step back character by character */ |
/* In character mode we have to step back character by character */ |
483 |
|
|
484 |
if (utf8) |
if (utf) |
485 |
{ |
{ |
486 |
for (gone_back = 0; gone_back < max_back; gone_back++) |
for (gone_back = 0; gone_back < max_back; gone_back++) |
487 |
{ |
{ |
488 |
if (current_subject <= start_subject) break; |
if (current_subject <= start_subject) break; |
489 |
current_subject--; |
current_subject--; |
490 |
while (current_subject > start_subject && |
ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--); |
|
(*current_subject & 0xc0) == 0x80) |
|
|
current_subject--; |
|
491 |
} |
} |
492 |
} |
} |
493 |
else |
else |
548 |
{ |
{ |
549 |
int length = 1 + LINK_SIZE + |
int length = 1 + LINK_SIZE + |
550 |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
551 |
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? |
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) |
552 |
2:0); |
? IMM2_SIZE:0); |
553 |
do |
do |
554 |
{ |
{ |
555 |
ADD_NEW((int)(end_code - start_code + length), 0); |
ADD_NEW((int)(end_code - start_code + length), 0); |
562 |
|
|
563 |
workspace[0] = 0; /* Bit indicating which vector is current */ |
workspace[0] = 0; /* Bit indicating which vector is current */ |
564 |
|
|
565 |
DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code)); |
DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code))); |
566 |
|
|
567 |
/* Loop for scanning the subject */ |
/* Loop for scanning the subject */ |
568 |
|
|
573 |
int clen, dlen; |
int clen, dlen; |
574 |
unsigned int c, d; |
unsigned int c, d; |
575 |
int forced_fail = 0; |
int forced_fail = 0; |
576 |
BOOL could_continue = FALSE; |
BOOL partial_newline = FALSE; |
577 |
|
BOOL could_continue = reset_could_continue; |
578 |
|
reset_could_continue = FALSE; |
579 |
|
|
580 |
/* Make the new state list into the active state list and empty the |
/* Make the new state list into the active state list and empty the |
581 |
new state list. */ |
new state list. */ |
591 |
|
|
592 |
#ifdef PCRE_DEBUG |
#ifdef PCRE_DEBUG |
593 |
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); |
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); |
594 |
pchars((uschar *)ptr, strlen((char *)ptr), stdout); |
pchars(ptr, STRLEN_UC(ptr), stdout); |
595 |
printf("\"\n"); |
printf("\"\n"); |
596 |
|
|
597 |
printf("%.*sActive states: ", rlevel*2-2, SP); |
printf("%.*sActive states: ", rlevel*2-2, SP); |
611 |
|
|
612 |
if (ptr < end_subject) |
if (ptr < end_subject) |
613 |
{ |
{ |
614 |
clen = 1; /* Number of bytes in the character */ |
clen = 1; /* Number of data items in the character */ |
615 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
616 |
if (utf8) { GETCHARLEN(c, ptr, clen); } else |
if (utf) { GETCHARLEN(c, ptr, clen); } else |
617 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
618 |
c = *ptr; |
c = *ptr; |
619 |
} |
} |
620 |
else |
else |
632 |
{ |
{ |
633 |
stateblock *current_state = active_states + i; |
stateblock *current_state = active_states + i; |
634 |
BOOL caseless = FALSE; |
BOOL caseless = FALSE; |
635 |
const uschar *code; |
const pcre_uchar *code; |
636 |
int state_offset = current_state->offset; |
int state_offset = current_state->offset; |
637 |
int count, codevalue, rrc; |
int count, codevalue, rrc; |
638 |
|
|
645 |
|
|
646 |
/* A negative offset is a special case meaning "hold off going to this |
/* A negative offset is a special case meaning "hold off going to this |
647 |
(negated) state until the number of characters in the data field have |
(negated) state until the number of characters in the data field have |
648 |
been skipped". */ |
been skipped". If the could_continue flag was passed over from a previous |
649 |
|
state, arrange for it to passed on. */ |
650 |
|
|
651 |
if (state_offset < 0) |
if (state_offset < 0) |
652 |
{ |
{ |
655 |
DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); |
DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); |
656 |
ADD_NEW_DATA(state_offset, current_state->count, |
ADD_NEW_DATA(state_offset, current_state->count, |
657 |
current_state->data - 1); |
current_state->data - 1); |
658 |
|
if (could_continue) reset_could_continue = TRUE; |
659 |
continue; |
continue; |
660 |
} |
} |
661 |
else |
else |
695 |
permitted. |
permitted. |
696 |
|
|
697 |
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an |
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an |
698 |
argument that is not a data character - but is always one byte long. We |
argument that is not a data character - but is always one byte long because |
699 |
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in |
the values are small. We have to take special action to deal with \P, \p, |
700 |
this case. To keep the other cases fast, convert these ones to new opcodes. |
\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert |
701 |
*/ |
these ones to new opcodes. */ |
702 |
|
|
703 |
if (coptable[codevalue] > 0) |
if (coptable[codevalue] > 0) |
704 |
{ |
{ |
705 |
dlen = 1; |
dlen = 1; |
706 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
707 |
if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else |
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else |
708 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
709 |
d = code[coptable[codevalue]]; |
d = code[coptable[codevalue]]; |
710 |
if (codevalue >= OP_TYPESTAR) |
if (codevalue >= OP_TYPESTAR) |
711 |
{ |
{ |
789 |
offsets[0] = (int)(current_subject - start_subject); |
offsets[0] = (int)(current_subject - start_subject); |
790 |
offsets[1] = (int)(ptr - start_subject); |
offsets[1] = (int)(ptr - start_subject); |
791 |
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP, |
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP, |
792 |
offsets[1] - offsets[0], current_subject)); |
offsets[1] - offsets[0], (char *)current_subject)); |
793 |
} |
} |
794 |
if ((md->moptions & PCRE_DFA_SHORTEST) != 0) |
if ((md->moptions & PCRE_DFA_SHORTEST) != 0) |
795 |
{ |
{ |
826 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
827 |
case OP_CBRA: |
case OP_CBRA: |
828 |
case OP_SCBRA: |
case OP_SCBRA: |
829 |
ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE), 0); |
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0); |
830 |
code += GET(code, 1); |
code += GET(code, 1); |
831 |
while (*code == OP_ALT) |
while (*code == OP_ALT) |
832 |
{ |
{ |
894 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
895 |
case OP_ANY: |
case OP_ANY: |
896 |
if (clen > 0 && !IS_NEWLINE(ptr)) |
if (clen > 0 && !IS_NEWLINE(ptr)) |
897 |
{ ADD_NEW(state_offset + 1, 0); } |
{ |
898 |
|
if (ptr + 1 >= md->end_subject && |
899 |
|
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 && |
900 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
901 |
|
NLBLOCK->nllen == 2 && |
902 |
|
c == NLBLOCK->nl[0]) |
903 |
|
{ |
904 |
|
could_continue = partial_newline = TRUE; |
905 |
|
} |
906 |
|
else |
907 |
|
{ |
908 |
|
ADD_NEW(state_offset + 1, 0); |
909 |
|
} |
910 |
|
} |
911 |
break; |
break; |
912 |
|
|
913 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
935 |
(ptr == end_subject - md->nllen) |
(ptr == end_subject - md->nllen) |
936 |
)) |
)) |
937 |
{ ADD_ACTIVE(state_offset + 1, 0); } |
{ ADD_ACTIVE(state_offset + 1, 0); } |
938 |
|
else if (ptr + 1 >= md->end_subject && |
939 |
|
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 && |
940 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
941 |
|
NLBLOCK->nllen == 2 && |
942 |
|
c == NLBLOCK->nl[0]) |
943 |
|
{ |
944 |
|
if ((md->moptions & PCRE_PARTIAL_HARD) != 0) |
945 |
|
{ |
946 |
|
reset_could_continue = TRUE; |
947 |
|
ADD_NEW_DATA(-(state_offset + 1), 0, 1); |
948 |
|
} |
949 |
|
else could_continue = partial_newline = TRUE; |
950 |
|
} |
951 |
} |
} |
952 |
break; |
break; |
953 |
|
|
960 |
else if (clen == 0 || |
else if (clen == 0 || |
961 |
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr))) |
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr))) |
962 |
{ ADD_ACTIVE(state_offset + 1, 0); } |
{ ADD_ACTIVE(state_offset + 1, 0); } |
963 |
|
else if (ptr + 1 >= md->end_subject && |
964 |
|
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 && |
965 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
966 |
|
NLBLOCK->nllen == 2 && |
967 |
|
c == NLBLOCK->nl[0]) |
968 |
|
{ |
969 |
|
if ((md->moptions & PCRE_PARTIAL_HARD) != 0) |
970 |
|
{ |
971 |
|
reset_could_continue = TRUE; |
972 |
|
ADD_NEW_DATA(-(state_offset + 1), 0, 1); |
973 |
|
} |
974 |
|
else could_continue = partial_newline = TRUE; |
975 |
|
} |
976 |
} |
} |
977 |
else if (IS_NEWLINE(ptr)) |
else if (IS_NEWLINE(ptr)) |
978 |
{ ADD_ACTIVE(state_offset + 1, 0); } |
{ ADD_ACTIVE(state_offset + 1, 0); } |
1005 |
|
|
1006 |
if (ptr > start_subject) |
if (ptr > start_subject) |
1007 |
{ |
{ |
1008 |
const uschar *temp = ptr - 1; |
const pcre_uchar *temp = ptr - 1; |
1009 |
if (temp < md->start_used_ptr) md->start_used_ptr = temp; |
if (temp < md->start_used_ptr) md->start_used_ptr = temp; |
1010 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
1011 |
if (utf8) BACKCHAR(temp); |
if (utf) { BACKCHAR(temp); } |
1012 |
#endif |
#endif |
1013 |
GETCHARTEST(d, temp); |
GETCHARTEST(d, temp); |
1014 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
1073 |
break; |
break; |
1074 |
|
|
1075 |
case PT_GC: |
case PT_GC: |
1076 |
OK = _pcre_ucp_gentype[prop->chartype] == code[2]; |
OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; |
1077 |
break; |
break; |
1078 |
|
|
1079 |
case PT_PC: |
case PT_PC: |
1087 |
/* These are specials for combination cases. */ |
/* These are specials for combination cases. */ |
1088 |
|
|
1089 |
case PT_ALNUM: |
case PT_ALNUM: |
1090 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1091 |
_pcre_ucp_gentype[prop->chartype] == ucp_N; |
PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
1092 |
break; |
break; |
1093 |
|
|
1094 |
case PT_SPACE: /* Perl space */ |
case PT_SPACE: /* Perl space */ |
1095 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1096 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
1097 |
break; |
break; |
1098 |
|
|
1099 |
case PT_PXSPACE: /* POSIX space */ |
case PT_PXSPACE: /* POSIX space */ |
1100 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1101 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
1102 |
c == CHAR_FF || c == CHAR_CR; |
c == CHAR_FF || c == CHAR_CR; |
1103 |
break; |
break; |
1104 |
|
|
1105 |
case PT_WORD: |
case PT_WORD: |
1106 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1107 |
_pcre_ucp_gentype[prop->chartype] == ucp_N || |
PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
1108 |
c == CHAR_UNDERSCORE; |
c == CHAR_UNDERSCORE; |
1109 |
break; |
break; |
1110 |
|
|
1135 |
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } |
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } |
1136 |
if (clen > 0) |
if (clen > 0) |
1137 |
{ |
{ |
1138 |
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
if (d == OP_ANY && ptr + 1 >= md->end_subject && |
1139 |
|
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 && |
1140 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
1141 |
|
NLBLOCK->nllen == 2 && |
1142 |
|
c == NLBLOCK->nl[0]) |
1143 |
|
{ |
1144 |
|
could_continue = partial_newline = TRUE; |
1145 |
|
} |
1146 |
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
1147 |
(c < 256 && |
(c < 256 && |
1148 |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
1149 |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
1166 |
ADD_ACTIVE(state_offset + 2, 0); |
ADD_ACTIVE(state_offset + 2, 0); |
1167 |
if (clen > 0) |
if (clen > 0) |
1168 |
{ |
{ |
1169 |
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
if (d == OP_ANY && ptr + 1 >= md->end_subject && |
1170 |
|
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 && |
1171 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
1172 |
|
NLBLOCK->nllen == 2 && |
1173 |
|
c == NLBLOCK->nl[0]) |
1174 |
|
{ |
1175 |
|
could_continue = partial_newline = TRUE; |
1176 |
|
} |
1177 |
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
1178 |
(c < 256 && |
(c < 256 && |
1179 |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
1180 |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
1196 |
ADD_ACTIVE(state_offset + 2, 0); |
ADD_ACTIVE(state_offset + 2, 0); |
1197 |
if (clen > 0) |
if (clen > 0) |
1198 |
{ |
{ |
1199 |
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
if (d == OP_ANY && ptr + 1 >= md->end_subject && |
1200 |
|
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 && |
1201 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
1202 |
|
NLBLOCK->nllen == 2 && |
1203 |
|
c == NLBLOCK->nl[0]) |
1204 |
|
{ |
1205 |
|
could_continue = partial_newline = TRUE; |
1206 |
|
} |
1207 |
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
1208 |
(c < 256 && |
(c < 256 && |
1209 |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
1210 |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
1224 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
1225 |
if (clen > 0) |
if (clen > 0) |
1226 |
{ |
{ |
1227 |
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
if (d == OP_ANY && ptr + 1 >= md->end_subject && |
1228 |
|
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 && |
1229 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
1230 |
|
NLBLOCK->nllen == 2 && |
1231 |
|
c == NLBLOCK->nl[0]) |
1232 |
|
{ |
1233 |
|
could_continue = partial_newline = TRUE; |
1234 |
|
} |
1235 |
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
1236 |
(c < 256 && |
(c < 256 && |
1237 |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
1238 |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
1239 |
{ |
{ |
1240 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
1241 |
{ ADD_NEW(state_offset + 4, 0); } |
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); } |
1242 |
else |
else |
1243 |
{ ADD_NEW(state_offset, count); } |
{ ADD_NEW(state_offset, count); } |
1244 |
} |
} |
1249 |
case OP_TYPEUPTO: |
case OP_TYPEUPTO: |
1250 |
case OP_TYPEMINUPTO: |
case OP_TYPEMINUPTO: |
1251 |
case OP_TYPEPOSUPTO: |
case OP_TYPEPOSUPTO: |
1252 |
ADD_ACTIVE(state_offset + 4, 0); |
ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); |
1253 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
1254 |
if (clen > 0) |
if (clen > 0) |
1255 |
{ |
{ |
1256 |
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
if (d == OP_ANY && ptr + 1 >= md->end_subject && |
1257 |
|
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 && |
1258 |
|
NLBLOCK->nltype == NLTYPE_FIXED && |
1259 |
|
NLBLOCK->nllen == 2 && |
1260 |
|
c == NLBLOCK->nl[0]) |
1261 |
|
{ |
1262 |
|
could_continue = partial_newline = TRUE; |
1263 |
|
} |
1264 |
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || |
1265 |
(c < 256 && |
(c < 256 && |
1266 |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
(d != OP_ANY || !IS_NEWLINE(ptr)) && |
1267 |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) |
1272 |
next_active_state--; |
next_active_state--; |
1273 |
} |
} |
1274 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
1275 |
{ ADD_NEW(state_offset + 4, 0); } |
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); } |
1276 |
else |
else |
1277 |
{ ADD_NEW(state_offset, count); } |
{ ADD_NEW(state_offset, count); } |
1278 |
} |
} |
1307 |
break; |
break; |
1308 |
|
|
1309 |
case PT_GC: |
case PT_GC: |
1310 |
OK = _pcre_ucp_gentype[prop->chartype] == code[3]; |
OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; |
1311 |
break; |
break; |
1312 |
|
|
1313 |
case PT_PC: |
case PT_PC: |
1321 |
/* These are specials for combination cases. */ |
/* These are specials for combination cases. */ |
1322 |
|
|
1323 |
case PT_ALNUM: |
case PT_ALNUM: |
1324 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1325 |
_pcre_ucp_gentype[prop->chartype] == ucp_N; |
PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
1326 |
break; |
break; |
1327 |
|
|
1328 |
case PT_SPACE: /* Perl space */ |
case PT_SPACE: /* Perl space */ |
1329 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1330 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
1331 |
break; |
break; |
1332 |
|
|
1333 |
case PT_PXSPACE: /* POSIX space */ |
case PT_PXSPACE: /* POSIX space */ |
1334 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1335 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
1336 |
c == CHAR_FF || c == CHAR_CR; |
c == CHAR_FF || c == CHAR_CR; |
1337 |
break; |
break; |
1338 |
|
|
1339 |
case PT_WORD: |
case PT_WORD: |
1340 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1341 |
_pcre_ucp_gentype[prop->chartype] == ucp_N || |
PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
1342 |
c == CHAR_UNDERSCORE; |
c == CHAR_UNDERSCORE; |
1343 |
break; |
break; |
1344 |
|
|
1368 |
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: |
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: |
1369 |
count = current_state->count; /* Already matched */ |
count = current_state->count; /* Already matched */ |
1370 |
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } |
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } |
1371 |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
if (clen > 0) |
1372 |
{ |
{ |
1373 |
const uschar *nptr = ptr + clen; |
int lgb, rgb; |
1374 |
|
const pcre_uchar *nptr = ptr + clen; |
1375 |
int ncount = 0; |
int ncount = 0; |
1376 |
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) |
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) |
1377 |
{ |
{ |
1378 |
active_count--; /* Remove non-match possibility */ |
active_count--; /* Remove non-match possibility */ |
1379 |
next_active_state--; |
next_active_state--; |
1380 |
} |
} |
1381 |
|
lgb = UCD_GRAPHBREAK(c); |
1382 |
while (nptr < end_subject) |
while (nptr < end_subject) |
1383 |
{ |
{ |
1384 |
int nd; |
dlen = 1; |
1385 |
int ndlen = 1; |
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } |
1386 |
GETCHARLEN(nd, nptr, ndlen); |
rgb = UCD_GRAPHBREAK(d); |
1387 |
if (UCD_CATEGORY(nd) != ucp_M) break; |
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
1388 |
ncount++; |
ncount++; |
1389 |
nptr += ndlen; |
lgb = rgb; |
1390 |
|
nptr += dlen; |
1391 |
} |
} |
1392 |
count++; |
count++; |
1393 |
ADD_NEW_DATA(-state_offset, count, ncount); |
ADD_NEW_DATA(-state_offset, count, ncount); |
1557 |
break; |
break; |
1558 |
|
|
1559 |
case PT_GC: |
case PT_GC: |
1560 |
OK = _pcre_ucp_gentype[prop->chartype] == code[3]; |
OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; |
1561 |
break; |
break; |
1562 |
|
|
1563 |
case PT_PC: |
case PT_PC: |
1571 |
/* These are specials for combination cases. */ |
/* These are specials for combination cases. */ |
1572 |
|
|
1573 |
case PT_ALNUM: |
case PT_ALNUM: |
1574 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1575 |
_pcre_ucp_gentype[prop->chartype] == ucp_N; |
PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
1576 |
break; |
break; |
1577 |
|
|
1578 |
case PT_SPACE: /* Perl space */ |
case PT_SPACE: /* Perl space */ |
1579 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1580 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
1581 |
break; |
break; |
1582 |
|
|
1583 |
case PT_PXSPACE: /* POSIX space */ |
case PT_PXSPACE: /* POSIX space */ |
1584 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1585 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
1586 |
c == CHAR_FF || c == CHAR_CR; |
c == CHAR_FF || c == CHAR_CR; |
1587 |
break; |
break; |
1588 |
|
|
1589 |
case PT_WORD: |
case PT_WORD: |
1590 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1591 |
_pcre_ucp_gentype[prop->chartype] == ucp_N || |
PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
1592 |
c == CHAR_UNDERSCORE; |
c == CHAR_UNDERSCORE; |
1593 |
break; |
break; |
1594 |
|
|
1627 |
QS2: |
QS2: |
1628 |
|
|
1629 |
ADD_ACTIVE(state_offset + 2, 0); |
ADD_ACTIVE(state_offset + 2, 0); |
1630 |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
if (clen > 0) |
1631 |
{ |
{ |
1632 |
const uschar *nptr = ptr + clen; |
int lgb, rgb; |
1633 |
|
const pcre_uchar *nptr = ptr + clen; |
1634 |
int ncount = 0; |
int ncount = 0; |
1635 |
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || |
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || |
1636 |
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) |
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) |
1638 |
active_count--; /* Remove non-match possibility */ |
active_count--; /* Remove non-match possibility */ |
1639 |
next_active_state--; |
next_active_state--; |
1640 |
} |
} |
1641 |
|
lgb = UCD_GRAPHBREAK(c); |
1642 |
while (nptr < end_subject) |
while (nptr < end_subject) |
1643 |
{ |
{ |
1644 |
int nd; |
dlen = 1; |
1645 |
int ndlen = 1; |
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } |
1646 |
GETCHARLEN(nd, nptr, ndlen); |
rgb = UCD_GRAPHBREAK(d); |
1647 |
if (UCD_CATEGORY(nd) != ucp_M) break; |
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
1648 |
ncount++; |
ncount++; |
1649 |
nptr += ndlen; |
lgb = rgb; |
1650 |
|
nptr += dlen; |
1651 |
} |
} |
1652 |
ADD_NEW_DATA(-(state_offset + count), 0, ncount); |
ADD_NEW_DATA(-(state_offset + count), 0, ncount); |
1653 |
} |
} |
1814 |
case OP_PROP_EXTRA + OP_TYPEMINUPTO: |
case OP_PROP_EXTRA + OP_TYPEMINUPTO: |
1815 |
case OP_PROP_EXTRA + OP_TYPEPOSUPTO: |
case OP_PROP_EXTRA + OP_TYPEPOSUPTO: |
1816 |
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) |
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) |
1817 |
{ ADD_ACTIVE(state_offset + 6, 0); } |
{ ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); } |
1818 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
1819 |
if (clen > 0) |
if (clen > 0) |
1820 |
{ |
{ |
1821 |
BOOL OK; |
BOOL OK; |
1822 |
const ucd_record * prop = GET_UCD(c); |
const ucd_record * prop = GET_UCD(c); |
1823 |
switch(code[4]) |
switch(code[1 + IMM2_SIZE + 1]) |
1824 |
{ |
{ |
1825 |
case PT_ANY: |
case PT_ANY: |
1826 |
OK = TRUE; |
OK = TRUE; |
1832 |
break; |
break; |
1833 |
|
|
1834 |
case PT_GC: |
case PT_GC: |
1835 |
OK = _pcre_ucp_gentype[prop->chartype] == code[5]; |
OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; |
1836 |
break; |
break; |
1837 |
|
|
1838 |
case PT_PC: |
case PT_PC: |
1839 |
OK = prop->chartype == code[5]; |
OK = prop->chartype == code[1 + IMM2_SIZE + 2]; |
1840 |
break; |
break; |
1841 |
|
|
1842 |
case PT_SC: |
case PT_SC: |
1843 |
OK = prop->script == code[5]; |
OK = prop->script == code[1 + IMM2_SIZE + 2]; |
1844 |
break; |
break; |
1845 |
|
|
1846 |
/* These are specials for combination cases. */ |
/* These are specials for combination cases. */ |
1847 |
|
|
1848 |
case PT_ALNUM: |
case PT_ALNUM: |
1849 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1850 |
_pcre_ucp_gentype[prop->chartype] == ucp_N; |
PRIV(ucp_gentype)[prop->chartype] == ucp_N; |
1851 |
break; |
break; |
1852 |
|
|
1853 |
case PT_SPACE: /* Perl space */ |
case PT_SPACE: /* Perl space */ |
1854 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1855 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; |
1856 |
break; |
break; |
1857 |
|
|
1858 |
case PT_PXSPACE: /* POSIX space */ |
case PT_PXSPACE: /* POSIX space */ |
1859 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
1860 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
1861 |
c == CHAR_FF || c == CHAR_CR; |
c == CHAR_FF || c == CHAR_CR; |
1862 |
break; |
break; |
1863 |
|
|
1864 |
case PT_WORD: |
case PT_WORD: |
1865 |
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || |
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
1866 |
_pcre_ucp_gentype[prop->chartype] == ucp_N || |
PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
1867 |
c == CHAR_UNDERSCORE; |
c == CHAR_UNDERSCORE; |
1868 |
break; |
break; |
1869 |
|
|
1882 |
next_active_state--; |
next_active_state--; |
1883 |
} |
} |
1884 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
1885 |
{ ADD_NEW(state_offset + 6, 0); } |
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); } |
1886 |
else |
else |
1887 |
{ ADD_NEW(state_offset, count); } |
{ ADD_NEW(state_offset, count); } |
1888 |
} |
} |
1895 |
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: |
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: |
1896 |
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: |
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: |
1897 |
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) |
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) |
1898 |
{ ADD_ACTIVE(state_offset + 4, 0); } |
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } |
1899 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
1900 |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
if (clen > 0) |
1901 |
{ |
{ |
1902 |
const uschar *nptr = ptr + clen; |
int lgb, rgb; |
1903 |
|
const pcre_uchar *nptr = ptr + clen; |
1904 |
int ncount = 0; |
int ncount = 0; |
1905 |
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) |
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) |
1906 |
{ |
{ |
1907 |
active_count--; /* Remove non-match possibility */ |
active_count--; /* Remove non-match possibility */ |
1908 |
next_active_state--; |
next_active_state--; |
1909 |
} |
} |
1910 |
|
lgb = UCD_GRAPHBREAK(c); |
1911 |
while (nptr < end_subject) |
while (nptr < end_subject) |
1912 |
{ |
{ |
1913 |
int nd; |
dlen = 1; |
1914 |
int ndlen = 1; |
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } |
1915 |
GETCHARLEN(nd, nptr, ndlen); |
rgb = UCD_GRAPHBREAK(d); |
1916 |
if (UCD_CATEGORY(nd) != ucp_M) break; |
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
1917 |
ncount++; |
ncount++; |
1918 |
nptr += ndlen; |
lgb = rgb; |
1919 |
|
nptr += dlen; |
1920 |
} |
} |
1921 |
|
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) |
1922 |
|
reset_could_continue = TRUE; |
1923 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
1924 |
{ ADD_NEW_DATA(-(state_offset + 4), 0, ncount); } |
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } |
1925 |
else |
else |
1926 |
{ ADD_NEW_DATA(-state_offset, count, ncount); } |
{ ADD_NEW_DATA(-state_offset, count, ncount); } |
1927 |
} |
} |
1934 |
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: |
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: |
1935 |
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: |
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: |
1936 |
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) |
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) |
1937 |
{ ADD_ACTIVE(state_offset + 4, 0); } |
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } |
1938 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
1939 |
if (clen > 0) |
if (clen > 0) |
1940 |
{ |
{ |
1961 |
next_active_state--; |
next_active_state--; |
1962 |
} |
} |
1963 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
1964 |
{ ADD_NEW_DATA(-(state_offset + 4), 0, ncount); } |
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } |
1965 |
else |
else |
1966 |
{ ADD_NEW_DATA(-state_offset, count, ncount); } |
{ ADD_NEW_DATA(-state_offset, count, ncount); } |
1967 |
break; |
break; |
1978 |
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: |
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: |
1979 |
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: |
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: |
1980 |
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) |
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) |
1981 |
{ ADD_ACTIVE(state_offset + 4, 0); } |
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } |
1982 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
1983 |
if (clen > 0) |
if (clen > 0) |
1984 |
{ |
{ |
2007 |
next_active_state--; |
next_active_state--; |
2008 |
} |
} |
2009 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
2010 |
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); } |
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } |
2011 |
else |
else |
2012 |
{ ADD_NEW_DATA(-state_offset, count, 0); } |
{ ADD_NEW_DATA(-state_offset, count, 0); } |
2013 |
} |
} |
2020 |
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: |
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: |
2021 |
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: |
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: |
2022 |
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) |
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) |
2023 |
{ ADD_ACTIVE(state_offset + 4, 0); } |
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } |
2024 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
2025 |
if (clen > 0) |
if (clen > 0) |
2026 |
{ |
{ |
2062 |
next_active_state--; |
next_active_state--; |
2063 |
} |
} |
2064 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
2065 |
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); } |
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } |
2066 |
else |
else |
2067 |
{ ADD_NEW_DATA(-state_offset, count, 0); } |
{ ADD_NEW_DATA(-state_offset, count, 0); } |
2068 |
} |
} |
2084 |
case OP_CHARI: |
case OP_CHARI: |
2085 |
if (clen == 0) break; |
if (clen == 0) break; |
2086 |
|
|
2087 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2088 |
if (utf8) |
if (utf) |
2089 |
{ |
{ |
2090 |
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else |
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else |
2091 |
{ |
{ |
2092 |
unsigned int othercase; |
unsigned int othercase; |
2093 |
if (c < 128) othercase = fcc[c]; else |
if (c < 128) |
2094 |
|
othercase = fcc[c]; |
2095 |
/* If we have Unicode property support, we can use it to test the |
else |
2096 |
other case of the character. */ |
/* If we have Unicode property support, we can use it to test the |
2097 |
|
other case of the character. */ |
2098 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2099 |
othercase = UCD_OTHERCASE(c); |
othercase = UCD_OTHERCASE(c); |
2100 |
#else |
#else |
2101 |
othercase = NOTACHAR; |
othercase = NOTACHAR; |
2102 |
#endif |
#endif |
2103 |
|
|
2104 |
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } |
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } |
2105 |
} |
} |
2106 |
} |
} |
2107 |
else |
else |
2108 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
2109 |
|
/* Not UTF mode */ |
|
/* Non-UTF-8 mode */ |
|
2110 |
{ |
{ |
2111 |
if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); } |
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d)) |
2112 |
|
{ ADD_NEW(state_offset + 2, 0); } |
2113 |
} |
} |
2114 |
break; |
break; |
2115 |
|
|
2121 |
to wait for them to pass before continuing. */ |
to wait for them to pass before continuing. */ |
2122 |
|
|
2123 |
case OP_EXTUNI: |
case OP_EXTUNI: |
2124 |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
if (clen > 0) |
2125 |
{ |
{ |
2126 |
const uschar *nptr = ptr + clen; |
int lgb, rgb; |
2127 |
|
const pcre_uchar *nptr = ptr + clen; |
2128 |
int ncount = 0; |
int ncount = 0; |
2129 |
|
lgb = UCD_GRAPHBREAK(c); |
2130 |
while (nptr < end_subject) |
while (nptr < end_subject) |
2131 |
{ |
{ |
2132 |
int nclen = 1; |
dlen = 1; |
2133 |
GETCHARLEN(c, nptr, nclen); |
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } |
2134 |
if (UCD_CATEGORY(c) != ucp_M) break; |
rgb = UCD_GRAPHBREAK(d); |
2135 |
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
2136 |
ncount++; |
ncount++; |
2137 |
nptr += nclen; |
lgb = rgb; |
2138 |
|
nptr += dlen; |
2139 |
} |
} |
2140 |
|
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) |
2141 |
|
reset_could_continue = TRUE; |
2142 |
ADD_NEW_DATA(-(state_offset + 1), 0, ncount); |
ADD_NEW_DATA(-(state_offset + 1), 0, ncount); |
2143 |
} |
} |
2144 |
break; |
break; |
2164 |
break; |
break; |
2165 |
|
|
2166 |
case 0x000d: |
case 0x000d: |
2167 |
if (ptr + 1 < end_subject && ptr[1] == 0x0a) |
if (ptr + 1 >= end_subject) |
2168 |
|
{ |
2169 |
|
ADD_NEW(state_offset + 1, 0); |
2170 |
|
if ((md->moptions & PCRE_PARTIAL_HARD) != 0) |
2171 |
|
reset_could_continue = TRUE; |
2172 |
|
} |
2173 |
|
else if (ptr[1] == 0x0a) |
2174 |
{ |
{ |
2175 |
ADD_NEW_DATA(-(state_offset + 1), 0, 1); |
ADD_NEW_DATA(-(state_offset + 1), 0, 1); |
2176 |
} |
} |
2279 |
break; |
break; |
2280 |
|
|
2281 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
2282 |
/* Match a negated single character casefully. This is only used for |
/* Match a negated single character casefully. */ |
|
one-byte characters, that is, we know that d < 256. The character we are |
|
|
checking (c) can be multibyte. */ |
|
2283 |
|
|
2284 |
case OP_NOT: |
case OP_NOT: |
2285 |
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } |
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } |
2286 |
break; |
break; |
2287 |
|
|
2288 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
2289 |
/* Match a negated single character caselessly. This is only used for |
/* Match a negated single character caselessly. */ |
|
one-byte characters, that is, we know that d < 256. The character we are |
|
|
checking (c) can be multibyte. */ |
|
2290 |
|
|
2291 |
case OP_NOTI: |
case OP_NOTI: |
2292 |
if (clen > 0 && c != d && c != fcc[d]) |
if (clen > 0) |
2293 |
{ ADD_NEW(state_offset + dlen + 1, 0); } |
{ |
2294 |
|
unsigned int otherd; |
2295 |
|
#ifdef SUPPORT_UTF |
2296 |
|
if (utf && d >= 128) |
2297 |
|
{ |
2298 |
|
#ifdef SUPPORT_UCP |
2299 |
|
otherd = UCD_OTHERCASE(d); |
2300 |
|
#endif /* SUPPORT_UCP */ |
2301 |
|
} |
2302 |
|
else |
2303 |
|
#endif /* SUPPORT_UTF */ |
2304 |
|
otherd = TABLE_GET(d, fcc, d); |
2305 |
|
if (c != d && c != otherd) |
2306 |
|
{ ADD_NEW(state_offset + dlen + 1, 0); } |
2307 |
|
} |
2308 |
break; |
break; |
2309 |
|
|
2310 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
2331 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
2332 |
if (caseless) |
if (caseless) |
2333 |
{ |
{ |
2334 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2335 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2336 |
{ |
{ |
2337 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2338 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2339 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
2340 |
} |
} |
2341 |
else |
else |
2342 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
2343 |
otherd = fcc[d]; |
otherd = TABLE_GET(d, fcc, d); |
2344 |
} |
} |
2345 |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
2346 |
{ |
{ |
2378 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
2379 |
if (caseless) |
if (caseless) |
2380 |
{ |
{ |
2381 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2382 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2383 |
{ |
{ |
2384 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2385 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2386 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
2387 |
} |
} |
2388 |
else |
else |
2389 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
2390 |
otherd = fcc[d]; |
otherd = TABLE_GET(d, fcc, d); |
2391 |
} |
} |
2392 |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
2393 |
{ |
{ |
2423 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
2424 |
if (caseless) |
if (caseless) |
2425 |
{ |
{ |
2426 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2427 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2428 |
{ |
{ |
2429 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2430 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2431 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
2432 |
} |
} |
2433 |
else |
else |
2434 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
2435 |
otherd = fcc[d]; |
otherd = TABLE_GET(d, fcc, d); |
2436 |
} |
} |
2437 |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
2438 |
{ |
{ |
2460 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
2461 |
if (caseless) |
if (caseless) |
2462 |
{ |
{ |
2463 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2464 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2465 |
{ |
{ |
2466 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2467 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2468 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
2469 |
} |
} |
2470 |
else |
else |
2471 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
2472 |
otherd = fcc[d]; |
otherd = TABLE_GET(d, fcc, d); |
2473 |
} |
} |
2474 |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
2475 |
{ |
{ |
2476 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
2477 |
{ ADD_NEW(state_offset + dlen + 3, 0); } |
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } |
2478 |
else |
else |
2479 |
{ ADD_NEW(state_offset, count); } |
{ ADD_NEW(state_offset, count); } |
2480 |
} |
} |
2497 |
case OP_NOTUPTO: |
case OP_NOTUPTO: |
2498 |
case OP_NOTMINUPTO: |
case OP_NOTMINUPTO: |
2499 |
case OP_NOTPOSUPTO: |
case OP_NOTPOSUPTO: |
2500 |
ADD_ACTIVE(state_offset + dlen + 3, 0); |
ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0); |
2501 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
2502 |
if (clen > 0) |
if (clen > 0) |
2503 |
{ |
{ |
2504 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
2505 |
if (caseless) |
if (caseless) |
2506 |
{ |
{ |
2507 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2508 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2509 |
{ |
{ |
2510 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2511 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2512 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
2513 |
} |
} |
2514 |
else |
else |
2515 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
2516 |
otherd = fcc[d]; |
otherd = TABLE_GET(d, fcc, d); |
2517 |
} |
} |
2518 |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) |
2519 |
{ |
{ |
2523 |
next_active_state--; |
next_active_state--; |
2524 |
} |
} |
2525 |
if (++count >= GET2(code, 1)) |
if (++count >= GET2(code, 1)) |
2526 |
{ ADD_NEW(state_offset + dlen + 3, 0); } |
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } |
2527 |
else |
else |
2528 |
{ ADD_NEW(state_offset, count); } |
{ ADD_NEW(state_offset, count); } |
2529 |
} |
} |
2540 |
{ |
{ |
2541 |
BOOL isinclass = FALSE; |
BOOL isinclass = FALSE; |
2542 |
int next_state_offset; |
int next_state_offset; |
2543 |
const uschar *ecode; |
const pcre_uchar *ecode; |
2544 |
|
|
2545 |
/* For a simple class, there is always just a 32-byte table, and we |
/* For a simple class, there is always just a 32-byte table, and we |
2546 |
can set isinclass from it. */ |
can set isinclass from it. */ |
2547 |
|
|
2548 |
if (codevalue != OP_XCLASS) |
if (codevalue != OP_XCLASS) |
2549 |
{ |
{ |
2550 |
ecode = code + 33; |
ecode = code + 1 + (32 / sizeof(pcre_uchar)); |
2551 |
if (clen > 0) |
if (clen > 0) |
2552 |
{ |
{ |
2553 |
isinclass = (c > 255)? (codevalue == OP_NCLASS) : |
isinclass = (c > 255)? (codevalue == OP_NCLASS) : |
2554 |
((code[1 + c/8] & (1 << (c&7))) != 0); |
((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0); |
2555 |
} |
} |
2556 |
} |
} |
2557 |
|
|
2562 |
else |
else |
2563 |
{ |
{ |
2564 |
ecode = code + GET(code, 1); |
ecode = code + GET(code, 1); |
2565 |
if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE); |
if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf); |
2566 |
} |
} |
2567 |
|
|
2568 |
/* At this point, isinclass is set for all kinds of class, and ecode |
/* At this point, isinclass is set for all kinds of class, and ecode |
2596 |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
2597 |
count = current_state->count; /* Already matched */ |
count = current_state->count; /* Already matched */ |
2598 |
if (count >= GET2(ecode, 1)) |
if (count >= GET2(ecode, 1)) |
2599 |
{ ADD_ACTIVE(next_state_offset + 5, 0); } |
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } |
2600 |
if (isinclass) |
if (isinclass) |
2601 |
{ |
{ |
2602 |
int max = GET2(ecode, 3); |
int max = GET2(ecode, 1 + IMM2_SIZE); |
2603 |
if (++count >= max && max != 0) /* Max 0 => no limit */ |
if (++count >= max && max != 0) /* Max 0 => no limit */ |
2604 |
{ ADD_NEW(next_state_offset + 5, 0); } |
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } |
2605 |
else |
else |
2606 |
{ ADD_NEW(state_offset, count); } |
{ ADD_NEW(state_offset, count); } |
2607 |
} |
} |
2632 |
int rc; |
int rc; |
2633 |
int local_offsets[2]; |
int local_offsets[2]; |
2634 |
int local_workspace[1000]; |
int local_workspace[1000]; |
2635 |
const uschar *endasscode = code + GET(code, 1); |
const pcre_uchar *endasscode = code + GET(code, 1); |
2636 |
|
|
2637 |
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); |
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); |
2638 |
|
|
2669 |
if (code[LINK_SIZE+1] == OP_CALLOUT) |
if (code[LINK_SIZE+1] == OP_CALLOUT) |
2670 |
{ |
{ |
2671 |
rrc = 0; |
rrc = 0; |
2672 |
if (pcre_callout != NULL) |
if (PUBL(callout) != NULL) |
2673 |
{ |
{ |
2674 |
pcre_callout_block cb; |
PUBL(callout_block) cb; |
2675 |
cb.version = 1; /* Version 1 of the callout block */ |
cb.version = 1; /* Version 1 of the callout block */ |
2676 |
cb.callout_number = code[LINK_SIZE+2]; |
cb.callout_number = code[LINK_SIZE+2]; |
2677 |
cb.offset_vector = offsets; |
cb.offset_vector = offsets; |
2678 |
|
#ifdef COMPILE_PCRE8 |
2679 |
cb.subject = (PCRE_SPTR)start_subject; |
cb.subject = (PCRE_SPTR)start_subject; |
2680 |
|
#else |
2681 |
|
cb.subject = (PCRE_SPTR16)start_subject; |
2682 |
|
#endif |
2683 |
cb.subject_length = (int)(end_subject - start_subject); |
cb.subject_length = (int)(end_subject - start_subject); |
2684 |
cb.start_match = (int)(current_subject - start_subject); |
cb.start_match = (int)(current_subject - start_subject); |
2685 |
cb.current_position = (int)(ptr - start_subject); |
cb.current_position = (int)(ptr - start_subject); |
2689 |
cb.capture_last = -1; |
cb.capture_last = -1; |
2690 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
2691 |
cb.mark = NULL; /* No (*MARK) support */ |
cb.mark = NULL; /* No (*MARK) support */ |
2692 |
if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ |
if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ |
2693 |
} |
} |
2694 |
if (rrc > 0) break; /* Fail this thread */ |
if (rrc > 0) break; /* Fail this thread */ |
2695 |
code += _pcre_OP_lengths[OP_CALLOUT]; /* Skip callout data */ |
code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */ |
2696 |
} |
} |
2697 |
|
|
2698 |
condcode = code[LINK_SIZE+1]; |
condcode = code[LINK_SIZE+1]; |
2713 |
|
|
2714 |
else if (condcode == OP_RREF || condcode == OP_NRREF) |
else if (condcode == OP_RREF || condcode == OP_NRREF) |
2715 |
{ |
{ |
2716 |
int value = GET2(code, LINK_SIZE+2); |
int value = GET2(code, LINK_SIZE + 2); |
2717 |
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; |
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; |
2718 |
if (md->recursive != NULL) |
if (md->recursive != NULL) |
2719 |
{ ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); } |
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); } |
2720 |
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } |
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } |
2721 |
} |
} |
2722 |
|
|
2725 |
else |
else |
2726 |
{ |
{ |
2727 |
int rc; |
int rc; |
2728 |
const uschar *asscode = code + LINK_SIZE + 1; |
const pcre_uchar *asscode = code + LINK_SIZE + 1; |
2729 |
const uschar *endasscode = asscode + GET(asscode, 1); |
const pcre_uchar *endasscode = asscode + GET(asscode, 1); |
2730 |
|
|
2731 |
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); |
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); |
2732 |
|
|
2757 |
dfa_recursion_info *ri; |
dfa_recursion_info *ri; |
2758 |
int local_offsets[1000]; |
int local_offsets[1000]; |
2759 |
int local_workspace[1000]; |
int local_workspace[1000]; |
2760 |
const uschar *callpat = start_code + GET(code, 1); |
const pcre_uchar *callpat = start_code + GET(code, 1); |
2761 |
int recno = (callpat == md->start_code)? 0 : |
int recno = (callpat == md->start_code)? 0 : |
2762 |
GET2(callpat, 1 + LINK_SIZE); |
GET2(callpat, 1 + LINK_SIZE); |
2763 |
int rc; |
int rc; |
2808 |
{ |
{ |
2809 |
for (rc = rc*2 - 2; rc >= 0; rc -= 2) |
for (rc = rc*2 - 2; rc >= 0; rc -= 2) |
2810 |
{ |
{ |
|
const uschar *p = start_subject + local_offsets[rc]; |
|
|
const uschar *pp = start_subject + local_offsets[rc+1]; |
|
2811 |
int charcount = local_offsets[rc+1] - local_offsets[rc]; |
int charcount = local_offsets[rc+1] - local_offsets[rc]; |
2812 |
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; |
#ifdef SUPPORT_UTF |
2813 |
|
if (utf) |
2814 |
|
{ |
2815 |
|
const pcre_uchar *p = start_subject + local_offsets[rc]; |
2816 |
|
const pcre_uchar *pp = start_subject + local_offsets[rc+1]; |
2817 |
|
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; |
2818 |
|
} |
2819 |
|
#endif |
2820 |
if (charcount > 0) |
if (charcount > 0) |
2821 |
{ |
{ |
2822 |
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); |
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); |
2839 |
case OP_BRAPOSZERO: |
case OP_BRAPOSZERO: |
2840 |
{ |
{ |
2841 |
int charcount, matched_count; |
int charcount, matched_count; |
2842 |
const uschar *local_ptr = ptr; |
const pcre_uchar *local_ptr = ptr; |
2843 |
BOOL allow_zero; |
BOOL allow_zero; |
2844 |
|
|
2845 |
if (codevalue == OP_BRAPOSZERO) |
if (codevalue == OP_BRAPOSZERO) |
2889 |
|
|
2890 |
if (matched_count > 0 || allow_zero) |
if (matched_count > 0 || allow_zero) |
2891 |
{ |
{ |
2892 |
const uschar *end_subpattern = code; |
const pcre_uchar *end_subpattern = code; |
2893 |
int next_state_offset; |
int next_state_offset; |
2894 |
|
|
2895 |
do { end_subpattern += GET(end_subpattern, 1); } |
do { end_subpattern += GET(end_subpattern, 1); } |
2910 |
} |
} |
2911 |
else |
else |
2912 |
{ |
{ |
2913 |
const uschar *p = ptr; |
const pcre_uchar *p = ptr; |
2914 |
const uschar *pp = local_ptr; |
const pcre_uchar *pp = local_ptr; |
2915 |
charcount = pp - p; |
charcount = (int)(pp - p); |
2916 |
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; |
#ifdef SUPPORT_UTF |
2917 |
|
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; |
2918 |
|
#endif |
2919 |
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); |
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); |
2920 |
} |
} |
2921 |
} |
} |
2924 |
|
|
2925 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
2926 |
case OP_ONCE: |
case OP_ONCE: |
2927 |
case OP_ONCE_NC: |
case OP_ONCE_NC: |
2928 |
{ |
{ |
2929 |
int local_offsets[2]; |
int local_offsets[2]; |
2930 |
int local_workspace[1000]; |
int local_workspace[1000]; |
2942 |
|
|
2943 |
if (rc >= 0) |
if (rc >= 0) |
2944 |
{ |
{ |
2945 |
const uschar *end_subpattern = code; |
const pcre_uchar *end_subpattern = code; |
2946 |
int charcount = local_offsets[1] - local_offsets[0]; |
int charcount = local_offsets[1] - local_offsets[0]; |
2947 |
int next_state_offset, repeat_state_offset; |
int next_state_offset, repeat_state_offset; |
2948 |
|
|
2995 |
} |
} |
2996 |
else |
else |
2997 |
{ |
{ |
2998 |
const uschar *p = start_subject + local_offsets[0]; |
#ifdef SUPPORT_UTF |
2999 |
const uschar *pp = start_subject + local_offsets[1]; |
if (utf) |
3000 |
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; |
{ |
3001 |
|
const pcre_uchar *p = start_subject + local_offsets[0]; |
3002 |
|
const pcre_uchar *pp = start_subject + local_offsets[1]; |
3003 |
|
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; |
3004 |
|
} |
3005 |
|
#endif |
3006 |
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); |
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); |
3007 |
if (repeat_state_offset >= 0) |
if (repeat_state_offset >= 0) |
3008 |
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } |
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } |
3018 |
|
|
3019 |
case OP_CALLOUT: |
case OP_CALLOUT: |
3020 |
rrc = 0; |
rrc = 0; |
3021 |
if (pcre_callout != NULL) |
if (PUBL(callout) != NULL) |
3022 |
{ |
{ |
3023 |
pcre_callout_block cb; |
PUBL(callout_block) cb; |
3024 |
cb.version = 1; /* Version 1 of the callout block */ |
cb.version = 1; /* Version 1 of the callout block */ |
3025 |
cb.callout_number = code[1]; |
cb.callout_number = code[1]; |
3026 |
cb.offset_vector = offsets; |
cb.offset_vector = offsets; |
3027 |
|
#ifdef COMPILE_PCRE8 |
3028 |
cb.subject = (PCRE_SPTR)start_subject; |
cb.subject = (PCRE_SPTR)start_subject; |
3029 |
|
#else |
3030 |
|
cb.subject = (PCRE_SPTR16)start_subject; |
3031 |
|
#endif |
3032 |
cb.subject_length = (int)(end_subject - start_subject); |
cb.subject_length = (int)(end_subject - start_subject); |
3033 |
cb.start_match = (int)(current_subject - start_subject); |
cb.start_match = (int)(current_subject - start_subject); |
3034 |
cb.current_position = (int)(ptr - start_subject); |
cb.current_position = (int)(ptr - start_subject); |
3038 |
cb.capture_last = -1; |
cb.capture_last = -1; |
3039 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
3040 |
cb.mark = NULL; /* No (*MARK) support */ |
cb.mark = NULL; /* No (*MARK) support */ |
3041 |
if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ |
if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ |
3042 |
} |
} |
3043 |
if (rrc == 0) |
if (rrc == 0) |
3044 |
{ ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); } |
{ ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); } |
3045 |
break; |
break; |
3046 |
|
|
3047 |
|
|
3070 |
if (new_count <= 0) |
if (new_count <= 0) |
3071 |
{ |
{ |
3072 |
if (rlevel == 1 && /* Top level, and */ |
if (rlevel == 1 && /* Top level, and */ |
3073 |
could_continue && /* Some could go on */ |
could_continue && /* Some could go on, and */ |
3074 |
forced_fail != workspace[1] && /* Not all forced fail & */ |
forced_fail != workspace[1] && /* Not all forced fail & */ |
3075 |
( /* either... */ |
( /* either... */ |
3076 |
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ |
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ |
3078 |
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */ |
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */ |
3079 |
match_count < 0) /* no matches */ |
match_count < 0) /* no matches */ |
3080 |
) && /* And... */ |
) && /* And... */ |
3081 |
ptr >= end_subject && /* Reached end of subject */ |
( |
3082 |
ptr > md->start_used_ptr) /* Inspected non-empty string */ |
partial_newline || /* Either partial NL */ |
3083 |
|
( /* or ... */ |
3084 |
|
ptr >= end_subject && /* End of subject and */ |
3085 |
|
ptr > md->start_used_ptr) /* Inspected non-empty string */ |
3086 |
|
) |
3087 |
|
) |
3088 |
{ |
{ |
3089 |
if (offsetcount >= 2) |
if (offsetcount >= 2) |
3090 |
{ |
{ |
3143 |
< -1 => some kind of unexpected problem |
< -1 => some kind of unexpected problem |
3144 |
*/ |
*/ |
3145 |
|
|
3146 |
|
#ifdef COMPILE_PCRE8 |
3147 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
3148 |
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, |
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, |
3149 |
const char *subject, int length, int start_offset, int options, int *offsets, |
const char *subject, int length, int start_offset, int options, int *offsets, |
3150 |
int offsetcount, int *workspace, int wscount) |
int offsetcount, int *workspace, int wscount) |
3151 |
|
#else |
3152 |
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
3153 |
|
pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, |
3154 |
|
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, |
3155 |
|
int offsetcount, int *workspace, int wscount) |
3156 |
|
#endif |
3157 |
{ |
{ |
3158 |
real_pcre *re = (real_pcre *)argument_re; |
REAL_PCRE *re = (REAL_PCRE *)argument_re; |
3159 |
dfa_match_data match_block; |
dfa_match_data match_block; |
3160 |
dfa_match_data *md = &match_block; |
dfa_match_data *md = &match_block; |
3161 |
BOOL utf8, anchored, startline, firstline; |
BOOL utf, anchored, startline, firstline; |
3162 |
const uschar *current_subject, *end_subject, *lcc; |
const pcre_uchar *current_subject, *end_subject; |
|
|
|
|
pcre_study_data internal_study; |
|
3163 |
const pcre_study_data *study = NULL; |
const pcre_study_data *study = NULL; |
|
real_pcre internal_re; |
|
3164 |
|
|
3165 |
const uschar *req_byte_ptr; |
const pcre_uchar *req_char_ptr; |
3166 |
const uschar *start_bits = NULL; |
const pcre_uint8 *start_bits = NULL; |
3167 |
BOOL first_byte_caseless = FALSE; |
BOOL has_first_char = FALSE; |
3168 |
BOOL req_byte_caseless = FALSE; |
BOOL has_req_char = FALSE; |
3169 |
int first_byte = -1; |
pcre_uchar first_char = 0; |
3170 |
int req_byte = -1; |
pcre_uchar first_char2 = 0; |
3171 |
int req_byte2 = -1; |
pcre_uchar req_char = 0; |
3172 |
|
pcre_uchar req_char2 = 0; |
3173 |
int newline; |
int newline; |
3174 |
|
|
3175 |
/* Plausibility checks */ |
/* Plausibility checks */ |
3181 |
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; |
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; |
3182 |
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; |
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; |
3183 |
|
|
3184 |
/* We need to find the pointer to any study data before we test for byte |
/* Check that the first field in the block is the magic number. If it is not, |
3185 |
flipping, so we scan the extra_data block first. This may set two fields in the |
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to |
3186 |
match block, so we must initialize them beforehand. However, the other fields |
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which |
3187 |
in the match block must not be set until after the byte flipping. */ |
means that the pattern is likely compiled with different endianness. */ |
3188 |
|
|
3189 |
|
if (re->magic_number != MAGIC_NUMBER) |
3190 |
|
return re->magic_number == REVERSED_MAGIC_NUMBER? |
3191 |
|
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; |
3192 |
|
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; |
3193 |
|
|
3194 |
|
/* If restarting after a partial match, do some sanity checks on the contents |
3195 |
|
of the workspace. */ |
3196 |
|
|
3197 |
|
if ((options & PCRE_DFA_RESTART) != 0) |
3198 |
|
{ |
3199 |
|
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 || |
3200 |
|
workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK) |
3201 |
|
return PCRE_ERROR_DFA_BADRESTART; |
3202 |
|
} |
3203 |
|
|
3204 |
|
/* Set up study, callout, and table data */ |
3205 |
|
|
3206 |
md->tables = re->tables; |
md->tables = re->tables; |
3207 |
md->callout_data = NULL; |
md->callout_data = NULL; |
3220 |
md->tables = extra_data->tables; |
md->tables = extra_data->tables; |
3221 |
} |
} |
3222 |
|
|
|
/* Check that the first field in the block is the magic number. If it is not, |
|
|
test for a regex that was compiled on a host of opposite endianness. If this is |
|
|
the case, flipped values are put in internal_re and internal_study if there was |
|
|
study data too. */ |
|
|
|
|
|
if (re->magic_number != MAGIC_NUMBER) |
|
|
{ |
|
|
re = _pcre_try_flipped(re, &internal_re, study, &internal_study); |
|
|
if (re == NULL) return PCRE_ERROR_BADMAGIC; |
|
|
if (study != NULL) study = &internal_study; |
|
|
} |
|
|
|
|
3223 |
/* Set some local values */ |
/* Set some local values */ |
3224 |
|
|
3225 |
current_subject = (const unsigned char *)subject + start_offset; |
current_subject = (const pcre_uchar *)subject + start_offset; |
3226 |
end_subject = (const unsigned char *)subject + length; |
end_subject = (const pcre_uchar *)subject + length; |
3227 |
req_byte_ptr = current_subject - 1; |
req_char_ptr = current_subject - 1; |
3228 |
|
|
3229 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3230 |
utf8 = (re->options & PCRE_UTF8) != 0; |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
3231 |
|
utf = (re->options & PCRE_UTF8) != 0; |
3232 |
#else |
#else |
3233 |
utf8 = FALSE; |
utf = FALSE; |
3234 |
#endif |
#endif |
3235 |
|
|
3236 |
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || |
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || |
3238 |
|
|
3239 |
/* The remaining fixed data for passing around. */ |
/* The remaining fixed data for passing around. */ |
3240 |
|
|
3241 |
md->start_code = (const uschar *)argument_re + |
md->start_code = (const pcre_uchar *)argument_re + |
3242 |
re->name_table_offset + re->name_count * re->name_entry_size; |
re->name_table_offset + re->name_count * re->name_entry_size; |
3243 |
md->start_subject = (const unsigned char *)subject; |
md->start_subject = (const pcre_uchar *)subject; |
3244 |
md->end_subject = end_subject; |
md->end_subject = end_subject; |
3245 |
md->start_offset = start_offset; |
md->start_offset = start_offset; |
3246 |
md->moptions = options; |
md->moptions = options; |
3301 |
/* Check a UTF-8 string if required. Unfortunately there's no way of passing |
/* Check a UTF-8 string if required. Unfortunately there's no way of passing |
3302 |
back the character offset. */ |
back the character offset. */ |
3303 |
|
|
3304 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3305 |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) |
3306 |
{ |
{ |
3307 |
int erroroffset; |
int erroroffset; |
3308 |
int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset); |
int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset); |
3309 |
if (errorcode != 0) |
if (errorcode != 0) |
3310 |
{ |
{ |
3311 |
if (offsetcount >= 2) |
if (offsetcount >= 2) |
3317 |
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
3318 |
} |
} |
3319 |
if (start_offset > 0 && start_offset < length && |
if (start_offset > 0 && start_offset < length && |
3320 |
(((USPTR)subject)[start_offset] & 0xc0) == 0x80) |
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) |
3321 |
return PCRE_ERROR_BADUTF8_OFFSET; |
return PCRE_ERROR_BADUTF8_OFFSET; |
3322 |
} |
} |
3323 |
#endif |
#endif |
3326 |
is a feature that makes it possible to save compiled regex and re-use them |
is a feature that makes it possible to save compiled regex and re-use them |
3327 |
in other programs later. */ |
in other programs later. */ |
3328 |
|
|
3329 |
if (md->tables == NULL) md->tables = _pcre_default_tables; |
if (md->tables == NULL) md->tables = PRIV(default_tables); |
3330 |
|
|
3331 |
/* The lower casing table and the "must be at the start of a line" flag are |
/* The "must be at the start of a line" flags are used in a loop when finding |
3332 |
used in a loop when finding where to start. */ |
where to start. */ |
3333 |
|
|
|
lcc = md->tables + lcc_offset; |
|
3334 |
startline = (re->flags & PCRE_STARTLINE) != 0; |
startline = (re->flags & PCRE_STARTLINE) != 0; |
3335 |
firstline = (re->options & PCRE_FIRSTLINE) != 0; |
firstline = (re->options & PCRE_FIRSTLINE) != 0; |
3336 |
|
|
3344 |
{ |
{ |
3345 |
if ((re->flags & PCRE_FIRSTSET) != 0) |
if ((re->flags & PCRE_FIRSTSET) != 0) |
3346 |
{ |
{ |
3347 |
first_byte = re->first_byte & 255; |
has_first_char = TRUE; |
3348 |
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) |
first_char = first_char2 = (pcre_uchar)(re->first_char); |
3349 |
first_byte = lcc[first_byte]; |
if ((re->flags & PCRE_FCH_CASELESS) != 0) |
3350 |
|
{ |
3351 |
|
first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char); |
3352 |
|
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
3353 |
|
if (utf && first_char > 127) |
3354 |
|
first_char2 = UCD_OTHERCASE(first_char); |
3355 |
|
#endif |
3356 |
|
} |
3357 |
} |
} |
3358 |
else |
else |
3359 |
{ |
{ |
3368 |
|
|
3369 |
if ((re->flags & PCRE_REQCHSET) != 0) |
if ((re->flags & PCRE_REQCHSET) != 0) |
3370 |
{ |
{ |
3371 |
req_byte = re->req_byte & 255; |
has_req_char = TRUE; |
3372 |
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; |
req_char = req_char2 = (pcre_uchar)(re->req_char); |
3373 |
req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */ |
if ((re->flags & PCRE_RCH_CASELESS) != 0) |
3374 |
|
{ |
3375 |
|
req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char); |
3376 |
|
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
3377 |
|
if (utf && req_char > 127) |
3378 |
|
req_char2 = UCD_OTHERCASE(req_char); |
3379 |
|
#endif |
3380 |
|
} |
3381 |
} |
} |
3382 |
|
|
3383 |
/* Call the main matching function, looping for a non-anchored regex after a |
/* Call the main matching function, looping for a non-anchored regex after a |
3390 |
|
|
3391 |
if ((options & PCRE_DFA_RESTART) == 0) |
if ((options & PCRE_DFA_RESTART) == 0) |
3392 |
{ |
{ |
3393 |
const uschar *save_end_subject = end_subject; |
const pcre_uchar *save_end_subject = end_subject; |
3394 |
|
|
3395 |
/* If firstline is TRUE, the start of the match is constrained to the first |
/* If firstline is TRUE, the start of the match is constrained to the first |
3396 |
line of a multiline string. Implement this by temporarily adjusting |
line of a multiline string. Implement this by temporarily adjusting |
3399 |
|
|
3400 |
if (firstline) |
if (firstline) |
3401 |
{ |
{ |
3402 |
USPTR t = current_subject; |
PCRE_PUCHAR t = current_subject; |
3403 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3404 |
if (utf8) |
if (utf) |
3405 |
{ |
{ |
3406 |
while (t < md->end_subject && !IS_NEWLINE(t)) |
while (t < md->end_subject && !IS_NEWLINE(t)) |
3407 |
{ |
{ |
3408 |
t++; |
t++; |
3409 |
while (t < end_subject && (*t & 0xc0) == 0x80) t++; |
ACROSSCHAR(t < end_subject, *t, t++); |
3410 |
} |
} |
3411 |
} |
} |
3412 |
else |
else |
3423 |
|
|
3424 |
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) |
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) |
3425 |
{ |
{ |
3426 |
/* Advance to a known first byte. */ |
/* Advance to a known first char. */ |
3427 |
|
|
3428 |
if (first_byte >= 0) |
if (has_first_char) |
3429 |
{ |
{ |
3430 |
if (first_byte_caseless) |
if (first_char != first_char2) |
3431 |
while (current_subject < end_subject && |
while (current_subject < end_subject && |
3432 |
lcc[*current_subject] != first_byte) |
*current_subject != first_char && *current_subject != first_char2) |
3433 |
current_subject++; |
current_subject++; |
3434 |
else |
else |
3435 |
while (current_subject < end_subject && |
while (current_subject < end_subject && |
3436 |
*current_subject != first_byte) |
*current_subject != first_char) |
3437 |
current_subject++; |
current_subject++; |
3438 |
} |
} |
3439 |
|
|
3443 |
{ |
{ |
3444 |
if (current_subject > md->start_subject + start_offset) |
if (current_subject > md->start_subject + start_offset) |
3445 |
{ |
{ |
3446 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3447 |
if (utf8) |
if (utf) |
3448 |
{ |
{ |
3449 |
while (current_subject < end_subject && |
while (current_subject < end_subject && |
3450 |
!WAS_NEWLINE(current_subject)) |
!WAS_NEWLINE(current_subject)) |
3451 |
{ |
{ |
3452 |
current_subject++; |
current_subject++; |
3453 |
while(current_subject < end_subject && |
ACROSSCHAR(current_subject < end_subject, *current_subject, |
3454 |
(*current_subject & 0xc0) == 0x80) |
current_subject++); |
|
current_subject++; |
|
3455 |
} |
} |
3456 |
} |
} |
3457 |
else |
else |
3478 |
while (current_subject < end_subject) |
while (current_subject < end_subject) |
3479 |
{ |
{ |
3480 |
register unsigned int c = *current_subject; |
register unsigned int c = *current_subject; |
3481 |
|
#ifndef COMPILE_PCRE8 |
3482 |
|
if (c > 255) c = 255; |
3483 |
|
#endif |
3484 |
if ((start_bits[c/8] & (1 << (c&7))) == 0) |
if ((start_bits[c/8] & (1 << (c&7))) == 0) |
3485 |
{ |
{ |
3486 |
current_subject++; |
current_subject++; |
3487 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3488 |
if (utf8) |
/* In non 8-bit mode, the iteration will stop for |
3489 |
while(current_subject < end_subject && |
characters > 255 at the beginning or not stop at all. */ |
3490 |
(*current_subject & 0xc0) == 0x80) current_subject++; |
if (utf) |
3491 |
|
ACROSSCHAR(current_subject < end_subject, *current_subject, |
3492 |
|
current_subject++); |
3493 |
#endif |
#endif |
3494 |
} |
} |
3495 |
else break; |
else break; |
3517 |
(pcre_uint32)(end_subject - current_subject) < study->minlength) |
(pcre_uint32)(end_subject - current_subject) < study->minlength) |
3518 |
return PCRE_ERROR_NOMATCH; |
return PCRE_ERROR_NOMATCH; |
3519 |
|
|
3520 |
/* If req_byte is set, we know that that character must appear in the |
/* If req_char is set, we know that that character must appear in the |
3521 |
subject for the match to succeed. If the first character is set, req_byte |
subject for the match to succeed. If the first character is set, req_char |
3522 |
must be later in the subject; otherwise the test starts at the match |
must be later in the subject; otherwise the test starts at the match |
3523 |
point. This optimization can save a huge amount of work in patterns with |
point. This optimization can save a huge amount of work in patterns with |
3524 |
nested unlimited repeats that aren't going to match. Writing separate |
nested unlimited repeats that aren't going to match. Writing separate |
3530 |
patterns. This showed up when somebody was matching /^C/ on a 32-megabyte |
patterns. This showed up when somebody was matching /^C/ on a 32-megabyte |
3531 |
string... so we don't do this when the string is sufficiently long. */ |
string... so we don't do this when the string is sufficiently long. */ |
3532 |
|
|
3533 |
if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX) |
if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX) |
3534 |
{ |
{ |
3535 |
register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0); |
register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0); |
3536 |
|
|
3537 |
/* We don't need to repeat the search if we haven't yet reached the |
/* We don't need to repeat the search if we haven't yet reached the |
3538 |
place we found it at last time. */ |
place we found it at last time. */ |
3539 |
|
|
3540 |
if (p > req_byte_ptr) |
if (p > req_char_ptr) |
3541 |
{ |
{ |
3542 |
if (req_byte_caseless) |
if (req_char != req_char2) |
3543 |
{ |
{ |
3544 |
while (p < end_subject) |
while (p < end_subject) |
3545 |
{ |
{ |
3546 |
register int pp = *p++; |
register int pp = *p++; |
3547 |
if (pp == req_byte || pp == req_byte2) { p--; break; } |
if (pp == req_char || pp == req_char2) { p--; break; } |
3548 |
} |
} |
3549 |
} |
} |
3550 |
else |
else |
3551 |
{ |
{ |
3552 |
while (p < end_subject) |
while (p < end_subject) |
3553 |
{ |
{ |
3554 |
if (*p++ == req_byte) { p--; break; } |
if (*p++ == req_char) { p--; break; } |
3555 |
} |
} |
3556 |
} |
} |
3557 |
|
|
3564 |
found it, so that we don't search again next time round the loop if |
found it, so that we don't search again next time round the loop if |
3565 |
the start hasn't passed this character yet. */ |
the start hasn't passed this character yet. */ |
3566 |
|
|
3567 |
req_byte_ptr = p; |
req_char_ptr = p; |
3568 |
} |
} |
3569 |
} |
} |
3570 |
} |
} |
3596 |
|
|
3597 |
if (firstline && IS_NEWLINE(current_subject)) break; |
if (firstline && IS_NEWLINE(current_subject)) break; |
3598 |
current_subject++; |
current_subject++; |
3599 |
if (utf8) |
#ifdef SUPPORT_UTF |
3600 |
|
if (utf) |
3601 |
{ |
{ |
3602 |
while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80) |
ACROSSCHAR(current_subject < end_subject, *current_subject, |
3603 |
current_subject++; |
current_subject++); |
3604 |
} |
} |
3605 |
|
#endif |
3606 |
if (current_subject > end_subject) break; |
if (current_subject > end_subject) break; |
3607 |
|
|
3608 |
/* If we have just passed a CR and we are now at a LF, and the pattern does |
/* If we have just passed a CR and we are now at a LF, and the pattern does |