6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
7 |
|
|
8 |
Written by Philip Hazel |
Written by Philip Hazel |
9 |
Copyright (c) 1997-2012 University of Cambridge |
Copyright (c) 1997-2013 University of Cambridge |
10 |
|
|
11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
56 |
#undef min |
#undef min |
57 |
#undef max |
#undef max |
58 |
|
|
59 |
|
/* The md->capture_last field uses the lower 16 bits for the last captured |
60 |
|
substring (which can never be greater than 65535) and a bit in the top half |
61 |
|
to mean "capture vector overflowed". This odd way of doing things was |
62 |
|
implemented when it was realized that preserving and restoring the overflow bit |
63 |
|
whenever the last capture number was saved/restored made for a neater |
64 |
|
interface, and doing it this way saved on (a) another variable, which would |
65 |
|
have increased the stack frame size (a big NO-NO in PCRE) and (b) another |
66 |
|
separate set of save/restore instructions. The following defines are used in |
67 |
|
implementing this. */ |
68 |
|
|
69 |
|
#define CAPLMASK 0x0000ffff /* The bits used for last_capture */ |
70 |
|
#define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */ |
71 |
|
#define OVFLBIT 0x00010000 /* The bit that is set for overflow */ |
72 |
|
|
73 |
/* Values for setting in md->match_function_type to indicate two special types |
/* Values for setting in md->match_function_type to indicate two special types |
74 |
of call to match(). We do it this way to save on using another stack variable, |
of call to match(). We do it this way to save on using another stack variable, |
75 |
as stack usage is to be discouraged. */ |
as stack usage is to be discouraged. */ |
87 |
negative to avoid the external error codes. */ |
negative to avoid the external error codes. */ |
88 |
|
|
89 |
#define MATCH_ACCEPT (-999) |
#define MATCH_ACCEPT (-999) |
90 |
#define MATCH_COMMIT (-998) |
#define MATCH_KETRPOS (-998) |
91 |
#define MATCH_KETRPOS (-997) |
#define MATCH_ONCE (-997) |
92 |
#define MATCH_ONCE (-996) |
/* The next 5 must be kept together and in sequence so that a test that checks |
93 |
|
for any one of them can use a range. */ |
94 |
|
#define MATCH_COMMIT (-996) |
95 |
#define MATCH_PRUNE (-995) |
#define MATCH_PRUNE (-995) |
96 |
#define MATCH_SKIP (-994) |
#define MATCH_SKIP (-994) |
97 |
#define MATCH_SKIP_ARG (-993) |
#define MATCH_SKIP_ARG (-993) |
98 |
#define MATCH_THEN (-992) |
#define MATCH_THEN (-992) |
99 |
|
#define MATCH_BACKTRACK_MAX MATCH_THEN |
100 |
|
#define MATCH_BACKTRACK_MIN MATCH_COMMIT |
101 |
|
|
102 |
/* Maximum number of ints of offset to save on the stack for recursive calls. |
/* Maximum number of ints of offset to save on the stack for recursive calls. |
103 |
If the offset vector is bigger, malloc is used. This should be a multiple of 3, |
If the offset vector is bigger, malloc is used. This should be a multiple of 3, |
110 |
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
111 |
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
112 |
|
|
|
|
|
|
|
|
113 |
#ifdef PCRE_DEBUG |
#ifdef PCRE_DEBUG |
114 |
/************************************************* |
/************************************************* |
115 |
* Debugging function to print chars * |
* Debugging function to print chars * |
130 |
static void |
static void |
131 |
pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) |
pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) |
132 |
{ |
{ |
133 |
unsigned int c; |
pcre_uint32 c; |
134 |
|
BOOL utf = md->utf; |
135 |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
136 |
while (length-- > 0) |
while (length-- > 0) |
137 |
if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c); |
if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c); |
138 |
} |
} |
139 |
#endif |
#endif |
140 |
|
|
167 |
{ |
{ |
168 |
PCRE_PUCHAR eptr_start = eptr; |
PCRE_PUCHAR eptr_start = eptr; |
169 |
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; |
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; |
170 |
|
#ifdef SUPPORT_UTF |
171 |
|
BOOL utf = md->utf; |
172 |
|
#endif |
173 |
|
|
174 |
#ifdef PCRE_DEBUG |
#ifdef PCRE_DEBUG |
175 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
197 |
{ |
{ |
198 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
199 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
200 |
if (md->utf) |
if (utf) |
201 |
{ |
{ |
202 |
/* Match characters up to the end of the reference. NOTE: the number of |
/* Match characters up to the end of the reference. NOTE: the number of |
203 |
bytes matched may differ, because there are some characters whose upper and |
data units matched may differ, because in UTF-8 there are some characters |
204 |
lower case versions code as different numbers of bytes. For example, U+023A |
whose upper and lower case versions code have different numbers of bytes. |
205 |
(2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8); |
For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 |
206 |
a sequence of 3 of the former uses 6 bytes, as does a sequence of two of |
(3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a |
207 |
the latter. It is important, therefore, to check the length along the |
sequence of two of the latter. It is important, therefore, to check the |
208 |
reference, not along the subject (earlier code did this wrong). */ |
length along the reference, not along the subject (earlier code did this |
209 |
|
wrong). */ |
210 |
|
|
211 |
PCRE_PUCHAR endptr = p + length; |
PCRE_PUCHAR endptr = p + length; |
212 |
while (p < endptr) |
while (p < endptr) |
213 |
{ |
{ |
214 |
int c, d; |
pcre_uint32 c, d; |
215 |
|
const ucd_record *ur; |
216 |
if (eptr >= md->end_subject) return -2; /* Partial match */ |
if (eptr >= md->end_subject) return -2; /* Partial match */ |
217 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
218 |
GETCHARINC(d, p); |
GETCHARINC(d, p); |
219 |
if (c != d && c != UCD_OTHERCASE(d)) return -1; |
ur = GET_UCD(d); |
220 |
|
if (c != d && c != d + ur->other_case) |
221 |
|
{ |
222 |
|
const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset; |
223 |
|
for (;;) |
224 |
|
{ |
225 |
|
if (c < *pp) return -1; |
226 |
|
if (c == *pp++) break; |
227 |
|
} |
228 |
|
} |
229 |
} |
} |
230 |
} |
} |
231 |
else |
else |
237 |
{ |
{ |
238 |
while (length-- > 0) |
while (length-- > 0) |
239 |
{ |
{ |
240 |
|
pcre_uint32 cc, cp; |
241 |
if (eptr >= md->end_subject) return -2; /* Partial match */ |
if (eptr >= md->end_subject) return -2; /* Partial match */ |
242 |
if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1; |
cc = RAWUCHARTEST(eptr); |
243 |
|
cp = RAWUCHARTEST(p); |
244 |
|
if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1; |
245 |
p++; |
p++; |
246 |
eptr++; |
eptr++; |
247 |
} |
} |
256 |
while (length-- > 0) |
while (length-- > 0) |
257 |
{ |
{ |
258 |
if (eptr >= md->end_subject) return -2; /* Partial match */ |
if (eptr >= md->end_subject) return -2; /* Partial match */ |
259 |
if (*p++ != *eptr++) return -1; |
if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1; |
260 |
} |
} |
261 |
} |
} |
262 |
|
|
312 |
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, |
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, |
313 |
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, |
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, |
314 |
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, |
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, |
315 |
RM61, RM62, RM63, RM64, RM65, RM66 }; |
RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 }; |
316 |
|
|
317 |
/* These versions of the macros use the stack, as normal. There are debugging |
/* These versions of the macros use the stack, as normal. There are debugging |
318 |
versions and production versions. Note that the "rw" argument of RMATCH isn't |
versions and production versions. Note that the "rw" argument of RMATCH isn't |
330 |
} |
} |
331 |
#define RRETURN(ra) \ |
#define RRETURN(ra) \ |
332 |
{ \ |
{ \ |
333 |
printf("match() returned %d from line %d ", ra, __LINE__); \ |
printf("match() returned %d from line %d\n", ra, __LINE__); \ |
334 |
return ra; \ |
return ra; \ |
335 |
} |
} |
336 |
#else |
#else |
421 |
|
|
422 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
423 |
int Xprop_type; |
int Xprop_type; |
424 |
int Xprop_value; |
unsigned int Xprop_value; |
425 |
int Xprop_fail_result; |
int Xprop_fail_result; |
426 |
int Xoclength; |
int Xoclength; |
427 |
pcre_uchar Xocchars[6]; |
pcre_uchar Xocchars[6]; |
434 |
int Xlength; |
int Xlength; |
435 |
int Xmax; |
int Xmax; |
436 |
int Xmin; |
int Xmin; |
437 |
int Xnumber; |
unsigned int Xnumber; |
438 |
int Xoffset; |
int Xoffset; |
439 |
int Xop; |
unsigned int Xop; |
440 |
int Xsave_capture_last; |
pcre_int32 Xsave_capture_last; |
441 |
int Xsave_offset1, Xsave_offset2, Xsave_offset3; |
int Xsave_offset1, Xsave_offset2, Xsave_offset3; |
442 |
int Xstacksave[REC_STACK_SAVE_MAX]; |
int Xstacksave[REC_STACK_SAVE_MAX]; |
443 |
|
|
522 |
|
|
523 |
register int rrc; /* Returns from recursive calls */ |
register int rrc; /* Returns from recursive calls */ |
524 |
register int i; /* Used for loops not involving calls to RMATCH() */ |
register int i; /* Used for loops not involving calls to RMATCH() */ |
525 |
register unsigned int c; /* Character values not kept over RMATCH() calls */ |
register pcre_uint32 c; /* Character values not kept over RMATCH() calls */ |
526 |
register BOOL utf; /* Local copy of UTF flag for speed */ |
register BOOL utf; /* Local copy of UTF flag for speed */ |
527 |
|
|
528 |
BOOL minimize, possessive; /* Quantifier options */ |
BOOL minimize, possessive; /* Quantifier options */ |
639 |
|
|
640 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
641 |
int prop_type; |
int prop_type; |
642 |
int prop_value; |
unsigned int prop_value; |
643 |
int prop_fail_result; |
int prop_fail_result; |
644 |
int oclength; |
int oclength; |
645 |
pcre_uchar occhars[6]; |
pcre_uchar occhars[6]; |
650 |
int length; |
int length; |
651 |
int max; |
int max; |
652 |
int min; |
int min; |
653 |
int number; |
unsigned int number; |
654 |
int offset; |
int offset; |
655 |
int op; |
unsigned int op; |
656 |
int save_capture_last; |
pcre_int32 save_capture_last; |
657 |
int save_offset1, save_offset2, save_offset3; |
int save_offset1, save_offset2, save_offset3; |
658 |
int stacksave[REC_STACK_SAVE_MAX]; |
int stacksave[REC_STACK_SAVE_MAX]; |
659 |
|
|
771 |
unaltered. */ |
unaltered. */ |
772 |
|
|
773 |
else if (rrc == MATCH_SKIP_ARG && |
else if (rrc == MATCH_SKIP_ARG && |
774 |
STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0) |
STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0) |
775 |
{ |
{ |
776 |
md->start_match_ptr = eptr; |
md->start_match_ptr = eptr; |
777 |
RRETURN(MATCH_SKIP); |
RRETURN(MATCH_SKIP); |
781 |
case OP_FAIL: |
case OP_FAIL: |
782 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
783 |
|
|
|
/* COMMIT overrides PRUNE, SKIP, and THEN */ |
|
|
|
|
784 |
case OP_COMMIT: |
case OP_COMMIT: |
785 |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
786 |
eptrb, RM52); |
eptrb, RM52); |
787 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
|
rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && |
|
|
rrc != MATCH_THEN) |
|
|
RRETURN(rrc); |
|
788 |
RRETURN(MATCH_COMMIT); |
RRETURN(MATCH_COMMIT); |
789 |
|
|
|
/* PRUNE overrides THEN */ |
|
|
|
|
790 |
case OP_PRUNE: |
case OP_PRUNE: |
791 |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
792 |
eptrb, RM51); |
eptrb, RM51); |
793 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
794 |
RRETURN(MATCH_PRUNE); |
RRETURN(MATCH_PRUNE); |
795 |
|
|
796 |
case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
800 |
eptrb, RM56); |
eptrb, RM56); |
801 |
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && |
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && |
802 |
md->mark == NULL) md->mark = ecode + 2; |
md->mark == NULL) md->mark = ecode + 2; |
803 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
804 |
RRETURN(MATCH_PRUNE); |
RRETURN(MATCH_PRUNE); |
805 |
|
|
|
/* SKIP overrides PRUNE and THEN */ |
|
|
|
|
806 |
case OP_SKIP: |
case OP_SKIP: |
807 |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
808 |
eptrb, RM53); |
eptrb, RM53); |
809 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
|
RRETURN(rrc); |
|
810 |
md->start_match_ptr = eptr; /* Pass back current position */ |
md->start_match_ptr = eptr; /* Pass back current position */ |
811 |
RRETURN(MATCH_SKIP); |
RRETURN(MATCH_SKIP); |
812 |
|
|
813 |
/* Note that, for Perl compatibility, SKIP with an argument does NOT set |
/* Note that, for Perl compatibility, SKIP with an argument does NOT set |
814 |
nomatch_mark. There is a flag that disables this opcode when re-matching a |
nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was |
815 |
pattern that ended with a SKIP for which there was not a matching MARK. */ |
not a matching mark, we have to re-run the match, ignoring the SKIP_ARG |
816 |
|
that failed and any that preceed it (either they also failed, or were not |
817 |
|
triggered). To do this, we maintain a count of executed SKIP_ARGs. If a |
818 |
|
SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg |
819 |
|
set to the count of the one that failed. */ |
820 |
|
|
821 |
case OP_SKIP_ARG: |
case OP_SKIP_ARG: |
822 |
if (md->ignore_skip_arg) |
md->skip_arg_count++; |
823 |
|
if (md->skip_arg_count <= md->ignore_skip_arg) |
824 |
{ |
{ |
825 |
ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; |
ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; |
826 |
break; |
break; |
827 |
} |
} |
828 |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, |
829 |
eptrb, RM57); |
eptrb, RM57); |
830 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
831 |
RRETURN(rrc); |
|
|
|
|
832 |
/* Pass back the current skip name by overloading md->start_match_ptr and |
/* Pass back the current skip name by overloading md->start_match_ptr and |
833 |
returning the special MATCH_SKIP_ARG return code. This will either be |
returning the special MATCH_SKIP_ARG return code. This will either be |
834 |
caught by a matching MARK, or get to the top, where it causes a rematch |
caught by a matching MARK, or get to the top, where it causes a rematch |
835 |
with the md->ignore_skip_arg flag set. */ |
with md->ignore_skip_arg set to the value of md->skip_arg_count. */ |
836 |
|
|
837 |
md->start_match_ptr = ecode + 2; |
md->start_match_ptr = ecode + 2; |
838 |
RRETURN(MATCH_SKIP_ARG); |
RRETURN(MATCH_SKIP_ARG); |
1078 |
/* In all other cases, we have to make another call to match(). */ |
/* In all other cases, we have to make another call to match(). */ |
1079 |
|
|
1080 |
save_mark = md->mark; |
save_mark = md->mark; |
1081 |
|
save_capture_last = md->capture_last; |
1082 |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, |
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, |
1083 |
RM2); |
RM2); |
1084 |
|
|
1110 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
1111 |
md->mark = save_mark; |
md->mark = save_mark; |
1112 |
if (*ecode != OP_ALT) break; |
if (*ecode != OP_ALT) break; |
1113 |
|
md->capture_last = save_capture_last; |
1114 |
} |
} |
1115 |
|
|
1116 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1232 |
POSSESSIVE_NON_CAPTURE: |
POSSESSIVE_NON_CAPTURE: |
1233 |
matched_once = FALSE; |
matched_once = FALSE; |
1234 |
code_offset = (int)(ecode - md->start_code); |
code_offset = (int)(ecode - md->start_code); |
1235 |
|
save_capture_last = md->capture_last; |
1236 |
|
|
1237 |
for (;;) |
for (;;) |
1238 |
{ |
{ |
1262 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
1263 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
1264 |
if (*ecode != OP_ALT) break; |
if (*ecode != OP_ALT) break; |
1265 |
|
md->capture_last = save_capture_last; |
1266 |
} |
} |
1267 |
|
|
1268 |
if (matched_once || allow_zero) |
if (matched_once || allow_zero) |
1294 |
cb.version = 2; /* Version 1 of the callout block */ |
cb.version = 2; /* Version 1 of the callout block */ |
1295 |
cb.callout_number = ecode[LINK_SIZE+2]; |
cb.callout_number = ecode[LINK_SIZE+2]; |
1296 |
cb.offset_vector = md->offset_vector; |
cb.offset_vector = md->offset_vector; |
1297 |
#ifdef COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
1298 |
cb.subject = (PCRE_SPTR)md->start_subject; |
cb.subject = (PCRE_SPTR)md->start_subject; |
1299 |
#else |
#elif defined COMPILE_PCRE16 |
1300 |
cb.subject = (PCRE_SPTR16)md->start_subject; |
cb.subject = (PCRE_SPTR16)md->start_subject; |
1301 |
|
#elif defined COMPILE_PCRE32 |
1302 |
|
cb.subject = (PCRE_SPTR32)md->start_subject; |
1303 |
#endif |
#endif |
1304 |
cb.subject_length = (int)(md->end_subject - md->start_subject); |
cb.subject_length = (int)(md->end_subject - md->start_subject); |
1305 |
cb.start_match = (int)(mstart - md->start_subject); |
cb.start_match = (int)(mstart - md->start_subject); |
1307 |
cb.pattern_position = GET(ecode, LINK_SIZE + 3); |
cb.pattern_position = GET(ecode, LINK_SIZE + 3); |
1308 |
cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE); |
cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE); |
1309 |
cb.capture_top = offset_top/2; |
cb.capture_top = offset_top/2; |
1310 |
cb.capture_last = md->capture_last; |
cb.capture_last = md->capture_last & CAPLMASK; |
1311 |
|
/* Internal change requires this for API compatibility. */ |
1312 |
|
if (cb.capture_last == 0) cb.capture_last = -1; |
1313 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
1314 |
cb.mark = md->nomatch_mark; |
cb.mark = md->nomatch_mark; |
1315 |
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1316 |
if (rrc < 0) RRETURN(rrc); |
if (rrc < 0) RRETURN(rrc); |
1317 |
} |
} |
1318 |
ecode += PRIV(OP_lengths)[OP_CALLOUT]; |
ecode += PRIV(OP_lengths)[OP_CALLOUT]; |
1319 |
|
codelink -= PRIV(OP_lengths)[OP_CALLOUT]; |
1320 |
} |
} |
1321 |
|
|
1322 |
condcode = ecode[LINK_SIZE+1]; |
condcode = ecode[LINK_SIZE+1]; |
1332 |
} |
} |
1333 |
else |
else |
1334 |
{ |
{ |
1335 |
int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ |
unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ |
1336 |
condition = (recno == RREF_ANY || recno == md->recursive->group_num); |
condition = (recno == RREF_ANY || recno == md->recursive->group_num); |
1337 |
|
|
1338 |
/* If the test is for recursion into a specific subpattern, and it is |
/* If the test is for recursion into a specific subpattern, and it is |
1404 |
|
|
1405 |
if (!condition && condcode == OP_NCREF) |
if (!condition && condcode == OP_NCREF) |
1406 |
{ |
{ |
1407 |
int refno = offset >> 1; |
unsigned int refno = offset >> 1; |
1408 |
pcre_uchar *slotA = md->name_table; |
pcre_uchar *slotA = md->name_table; |
1409 |
|
|
1410 |
for (i = 0; i < md->name_count; i++) |
for (i = 0; i < md->name_count; i++) |
1532 |
to close any currently open capturing brackets. */ |
to close any currently open capturing brackets. */ |
1533 |
|
|
1534 |
case OP_CLOSE: |
case OP_CLOSE: |
1535 |
number = GET2(ecode, 1); |
number = GET2(ecode, 1); /* Must be less than 65536 */ |
1536 |
offset = number << 1; |
offset = number << 1; |
1537 |
|
|
1538 |
#ifdef PCRE_DEBUG |
#ifdef PCRE_DEBUG |
1540 |
printf("\n"); |
printf("\n"); |
1541 |
#endif |
#endif |
1542 |
|
|
1543 |
md->capture_last = number; |
md->capture_last = (md->capture_last & OVFLMASK) | number; |
1544 |
if (offset >= md->offset_max) md->offset_overflow = TRUE; else |
if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else |
1545 |
{ |
{ |
1546 |
md->offset_vector[offset] = |
md->offset_vector[offset] = |
1547 |
md->offset_vector[md->offset_end - number]; |
md->offset_vector[md->offset_end - number]; |
1603 |
} |
} |
1604 |
else condassert = FALSE; |
else condassert = FALSE; |
1605 |
|
|
1606 |
|
/* Loop for each branch */ |
1607 |
|
|
1608 |
do |
do |
1609 |
{ |
{ |
1610 |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4); |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4); |
1611 |
|
|
1612 |
|
/* A match means that the assertion is true; break out of the loop |
1613 |
|
that matches its alternatives. */ |
1614 |
|
|
1615 |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
1616 |
{ |
{ |
1617 |
mstart = md->start_match_ptr; /* In case \K reset it */ |
mstart = md->start_match_ptr; /* In case \K reset it */ |
1618 |
break; |
break; |
1619 |
} |
} |
1620 |
|
|
1621 |
|
/* If not matched, restore the previous mark setting. */ |
1622 |
|
|
1623 |
md->mark = save_mark; |
md->mark = save_mark; |
1624 |
|
|
1625 |
/* A COMMIT failure must fail the entire assertion, without trying any |
/* See comment in the code for capturing groups above about handling |
1626 |
subsequent branches. */ |
THEN. */ |
|
|
|
|
if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH); |
|
1627 |
|
|
1628 |
/* PCRE does not allow THEN to escape beyond an assertion; it |
if (rrc == MATCH_THEN) |
1629 |
is treated as NOMATCH. */ |
{ |
1630 |
|
next = ecode + GET(ecode,1); |
1631 |
|
if (md->start_match_ptr < next && |
1632 |
|
(*ecode == OP_ALT || *next == OP_ALT)) |
1633 |
|
rrc = MATCH_NOMATCH; |
1634 |
|
} |
1635 |
|
|
1636 |
|
/* Anything other than NOMATCH causes the entire assertion to fail, |
1637 |
|
passing back the return code. This includes COMMIT, SKIP, PRUNE and an |
1638 |
|
uncaptured THEN, which means they take their normal effect. This |
1639 |
|
consistent approach does not always have exactly the same effect as in |
1640 |
|
Perl. */ |
1641 |
|
|
1642 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
1643 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
1644 |
} |
} |
1645 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); /* Continue for next alternative */ |
1646 |
|
|
1647 |
|
/* If we have tried all the alternative branches, the assertion has |
1648 |
|
failed. If not, we broke out after a match. */ |
1649 |
|
|
1650 |
if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); |
if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); |
1651 |
|
|
1653 |
|
|
1654 |
if (condassert) RRETURN(MATCH_MATCH); |
if (condassert) RRETURN(MATCH_MATCH); |
1655 |
|
|
1656 |
/* Continue from after the assertion, updating the offsets high water |
/* Continue from after a successful assertion, updating the offsets high |
1657 |
mark, since extracts may have been taken during the assertion. */ |
water mark, since extracts may have been taken during the assertion. */ |
1658 |
|
|
1659 |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
1660 |
ecode += 1 + LINK_SIZE; |
ecode += 1 + LINK_SIZE; |
1661 |
offset_top = md->end_offset_top; |
offset_top = md->end_offset_top; |
1662 |
continue; |
continue; |
1663 |
|
|
1664 |
/* Negative assertion: all branches must fail to match. Encountering SKIP, |
/* Negative assertion: all branches must fail to match for the assertion to |
1665 |
PRUNE, or COMMIT means we must assume failure without checking subsequent |
succeed. */ |
|
branches. */ |
|
1666 |
|
|
1667 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
1668 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
1674 |
} |
} |
1675 |
else condassert = FALSE; |
else condassert = FALSE; |
1676 |
|
|
1677 |
|
/* Loop for each alternative branch. */ |
1678 |
|
|
1679 |
do |
do |
1680 |
{ |
{ |
1681 |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); |
1682 |
md->mark = save_mark; |
md->mark = save_mark; /* Always restore the mark setting */ |
1683 |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); |
|
1684 |
if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) |
switch(rrc) |
1685 |
{ |
{ |
1686 |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
case MATCH_MATCH: /* A successful match means */ |
1687 |
break; |
case MATCH_ACCEPT: /* the assertion has failed. */ |
1688 |
} |
RRETURN(MATCH_NOMATCH); |
1689 |
|
|
1690 |
|
case MATCH_NOMATCH: /* Carry on with next branch */ |
1691 |
|
break; |
1692 |
|
|
1693 |
/* PCRE does not allow THEN to escape beyond an assertion; it is treated |
/* See comment in the code for capturing groups above about handling |
1694 |
as NOMATCH. */ |
THEN. */ |
1695 |
|
|
1696 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
case MATCH_THEN: |
1697 |
|
next = ecode + GET(ecode,1); |
1698 |
|
if (md->start_match_ptr < next && |
1699 |
|
(*ecode == OP_ALT || *next == OP_ALT)) |
1700 |
|
{ |
1701 |
|
rrc = MATCH_NOMATCH; |
1702 |
|
break; |
1703 |
|
} |
1704 |
|
/* Otherwise fall through. */ |
1705 |
|
|
1706 |
|
/* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole |
1707 |
|
assertion to fail to match, without considering any more alternatives. |
1708 |
|
Failing to match means the assertion is true. This is a consistent |
1709 |
|
approach, but does not always have the same effect as in Perl. */ |
1710 |
|
|
1711 |
|
case MATCH_COMMIT: |
1712 |
|
case MATCH_SKIP: |
1713 |
|
case MATCH_SKIP_ARG: |
1714 |
|
case MATCH_PRUNE: |
1715 |
|
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
1716 |
|
goto NEG_ASSERT_TRUE; /* Break out of alternation loop */ |
1717 |
|
|
1718 |
|
/* Anything else is an error */ |
1719 |
|
|
1720 |
|
default: |
1721 |
|
RRETURN(rrc); |
1722 |
|
} |
1723 |
|
|
1724 |
|
/* Continue with next branch */ |
1725 |
|
|
1726 |
ecode += GET(ecode,1); |
ecode += GET(ecode,1); |
1727 |
} |
} |
1728 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
1729 |
|
|
1730 |
|
/* All branches in the assertion failed to match. */ |
1731 |
|
|
1732 |
|
NEG_ASSERT_TRUE: |
1733 |
if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */ |
if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */ |
1734 |
|
ecode += 1 + LINK_SIZE; /* Continue with current branch */ |
|
ecode += 1 + LINK_SIZE; |
|
1735 |
continue; |
continue; |
1736 |
|
|
1737 |
/* Move the subject pointer back. This occurs only at the start of |
/* Move the subject pointer back. This occurs only at the start of |
1778 |
cb.version = 2; /* Version 1 of the callout block */ |
cb.version = 2; /* Version 1 of the callout block */ |
1779 |
cb.callout_number = ecode[1]; |
cb.callout_number = ecode[1]; |
1780 |
cb.offset_vector = md->offset_vector; |
cb.offset_vector = md->offset_vector; |
1781 |
#ifdef COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
1782 |
cb.subject = (PCRE_SPTR)md->start_subject; |
cb.subject = (PCRE_SPTR)md->start_subject; |
1783 |
#else |
#elif defined COMPILE_PCRE16 |
1784 |
cb.subject = (PCRE_SPTR16)md->start_subject; |
cb.subject = (PCRE_SPTR16)md->start_subject; |
1785 |
|
#elif defined COMPILE_PCRE32 |
1786 |
|
cb.subject = (PCRE_SPTR32)md->start_subject; |
1787 |
#endif |
#endif |
1788 |
cb.subject_length = (int)(md->end_subject - md->start_subject); |
cb.subject_length = (int)(md->end_subject - md->start_subject); |
1789 |
cb.start_match = (int)(mstart - md->start_subject); |
cb.start_match = (int)(mstart - md->start_subject); |
1791 |
cb.pattern_position = GET(ecode, 2); |
cb.pattern_position = GET(ecode, 2); |
1792 |
cb.next_item_length = GET(ecode, 2 + LINK_SIZE); |
cb.next_item_length = GET(ecode, 2 + LINK_SIZE); |
1793 |
cb.capture_top = offset_top/2; |
cb.capture_top = offset_top/2; |
1794 |
cb.capture_last = md->capture_last; |
cb.capture_last = md->capture_last & CAPLMASK; |
1795 |
|
/* Internal change requires this for API compatibility. */ |
1796 |
|
if (cb.capture_last == 0) cb.capture_last = -1; |
1797 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
1798 |
cb.mark = md->nomatch_mark; |
cb.mark = md->nomatch_mark; |
1799 |
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1822 |
case OP_RECURSE: |
case OP_RECURSE: |
1823 |
{ |
{ |
1824 |
recursion_info *ri; |
recursion_info *ri; |
1825 |
int recno; |
unsigned int recno; |
1826 |
|
|
1827 |
callpat = md->start_code + GET(ecode, 1); |
callpat = md->start_code + GET(ecode, 1); |
1828 |
recno = (callpat == md->start_code)? 0 : |
recno = (callpat == md->start_code)? 0 : |
1839 |
/* Add to "recursing stack" */ |
/* Add to "recursing stack" */ |
1840 |
|
|
1841 |
new_recursive.group_num = recno; |
new_recursive.group_num = recno; |
1842 |
|
new_recursive.saved_capture_last = md->capture_last; |
1843 |
new_recursive.subject_position = eptr; |
new_recursive.subject_position = eptr; |
1844 |
new_recursive.prevrec = md->recursive; |
new_recursive.prevrec = md->recursive; |
1845 |
md->recursive = &new_recursive; |
md->recursive = &new_recursive; |
1863 |
new_recursive.saved_max * sizeof(int)); |
new_recursive.saved_max * sizeof(int)); |
1864 |
|
|
1865 |
/* OK, now we can do the recursion. After processing each alternative, |
/* OK, now we can do the recursion. After processing each alternative, |
1866 |
restore the offset data. If there were nested recursions, md->recursive |
restore the offset data and the last captured value. If there were nested |
1867 |
might be changed, so reset it before looping. */ |
recursions, md->recursive might be changed, so reset it before looping. |
1868 |
|
*/ |
1869 |
|
|
1870 |
DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); |
DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); |
1871 |
cbegroup = (*callpat >= OP_SBRA); |
cbegroup = (*callpat >= OP_SBRA); |
1876 |
md, eptrb, RM6); |
md, eptrb, RM6); |
1877 |
memcpy(md->offset_vector, new_recursive.offset_save, |
memcpy(md->offset_vector, new_recursive.offset_save, |
1878 |
new_recursive.saved_max * sizeof(int)); |
new_recursive.saved_max * sizeof(int)); |
1879 |
|
md->capture_last = new_recursive.saved_capture_last; |
1880 |
md->recursive = new_recursive.prevrec; |
md->recursive = new_recursive.prevrec; |
1881 |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
1882 |
{ |
{ |
1893 |
goto RECURSION_MATCHED; /* Exit loop; end processing */ |
goto RECURSION_MATCHED; /* Exit loop; end processing */ |
1894 |
} |
} |
1895 |
|
|
1896 |
/* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it |
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a |
1897 |
is treated as NOMATCH. */ |
recursion; they cause a NOMATCH for the entire recursion. These codes |
1898 |
|
are defined in a range that can be tested for. */ |
1899 |
|
|
1900 |
|
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX) |
1901 |
|
RRETURN(MATCH_NOMATCH); |
1902 |
|
|
1903 |
|
/* Any return code other than NOMATCH is an error. */ |
1904 |
|
|
1905 |
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN && |
if (rrc != MATCH_NOMATCH) |
|
rrc != MATCH_COMMIT) |
|
1906 |
{ |
{ |
1907 |
DPRINTF(("Recursion gave error %d\n", rrc)); |
DPRINTF(("Recursion gave error %d\n", rrc)); |
1908 |
if (new_recursive.offset_save != stacksave) |
if (new_recursive.offset_save != stacksave) |
2032 |
|
|
2033 |
/* Deal with capturing */ |
/* Deal with capturing */ |
2034 |
|
|
2035 |
md->capture_last = number; |
md->capture_last = (md->capture_last & OVFLMASK) | number; |
2036 |
if (offset >= md->offset_max) md->offset_overflow = TRUE; else |
if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else |
2037 |
{ |
{ |
2038 |
/* If offset is greater than offset_top, it means that we are |
/* If offset is greater than offset_top, it means that we are |
2039 |
"skipping" a capturing group, and that group's offsets must be marked |
"skipping" a capturing group, and that group's offsets must be marked |
2184 |
eptr + 1 >= md->end_subject && |
eptr + 1 >= md->end_subject && |
2185 |
NLBLOCK->nltype == NLTYPE_FIXED && |
NLBLOCK->nltype == NLTYPE_FIXED && |
2186 |
NLBLOCK->nllen == 2 && |
NLBLOCK->nllen == 2 && |
2187 |
*eptr == NLBLOCK->nl[0]) |
RAWUCHARTEST(eptr) == NLBLOCK->nl[0]) |
2188 |
{ |
{ |
2189 |
md->hitend = TRUE; |
md->hitend = TRUE; |
2190 |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
2228 |
eptr + 1 >= md->end_subject && |
eptr + 1 >= md->end_subject && |
2229 |
NLBLOCK->nltype == NLTYPE_FIXED && |
NLBLOCK->nltype == NLTYPE_FIXED && |
2230 |
NLBLOCK->nllen == 2 && |
NLBLOCK->nllen == 2 && |
2231 |
*eptr == NLBLOCK->nl[0]) |
RAWUCHARTEST(eptr) == NLBLOCK->nl[0]) |
2232 |
{ |
{ |
2233 |
md->hitend = TRUE; |
md->hitend = TRUE; |
2234 |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
2371 |
eptr + 1 >= md->end_subject && |
eptr + 1 >= md->end_subject && |
2372 |
NLBLOCK->nltype == NLTYPE_FIXED && |
NLBLOCK->nltype == NLTYPE_FIXED && |
2373 |
NLBLOCK->nllen == 2 && |
NLBLOCK->nllen == 2 && |
2374 |
*eptr == NLBLOCK->nl[0]) |
RAWUCHARTEST(eptr) == NLBLOCK->nl[0]) |
2375 |
{ |
{ |
2376 |
md->hitend = TRUE; |
md->hitend = TRUE; |
2377 |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
2520 |
{ |
{ |
2521 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
2522 |
|
|
2523 |
case 0x000d: |
case CHAR_CR: |
2524 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
2525 |
{ |
{ |
2526 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
2527 |
} |
} |
2528 |
else if (*eptr == 0x0a) eptr++; |
else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++; |
2529 |
break; |
break; |
2530 |
|
|
2531 |
case 0x000a: |
case CHAR_LF: |
2532 |
break; |
break; |
2533 |
|
|
2534 |
case 0x000b: |
case CHAR_VT: |
2535 |
case 0x000c: |
case CHAR_FF: |
2536 |
case 0x0085: |
case CHAR_NEL: |
2537 |
|
#ifndef EBCDIC |
2538 |
case 0x2028: |
case 0x2028: |
2539 |
case 0x2029: |
case 0x2029: |
2540 |
|
#endif /* Not EBCDIC */ |
2541 |
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
2542 |
break; |
break; |
2543 |
} |
} |
2553 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2554 |
switch(c) |
switch(c) |
2555 |
{ |
{ |
2556 |
|
HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ |
2557 |
default: break; |
default: break; |
|
case 0x09: /* HT */ |
|
|
case 0x20: /* SPACE */ |
|
|
case 0xa0: /* NBSP */ |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
|
RRETURN(MATCH_NOMATCH); |
|
2558 |
} |
} |
2559 |
ecode++; |
ecode++; |
2560 |
break; |
break; |
2568 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2569 |
switch(c) |
switch(c) |
2570 |
{ |
{ |
2571 |
|
HSPACE_CASES: break; /* Byte and multibyte cases */ |
2572 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
|
case 0x09: /* HT */ |
|
|
case 0x20: /* SPACE */ |
|
|
case 0xa0: /* NBSP */ |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
|
break; |
|
2573 |
} |
} |
2574 |
ecode++; |
ecode++; |
2575 |
break; |
break; |
2583 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2584 |
switch(c) |
switch(c) |
2585 |
{ |
{ |
2586 |
|
VSPACE_CASES: RRETURN(MATCH_NOMATCH); |
2587 |
default: break; |
default: break; |
|
case 0x0a: /* LF */ |
|
|
case 0x0b: /* VT */ |
|
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
|
RRETURN(MATCH_NOMATCH); |
|
2588 |
} |
} |
2589 |
ecode++; |
ecode++; |
2590 |
break; |
break; |
2598 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2599 |
switch(c) |
switch(c) |
2600 |
{ |
{ |
2601 |
|
VSPACE_CASES: break; |
2602 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
|
case 0x0a: /* LF */ |
|
|
case 0x0b: /* VT */ |
|
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
|
break; |
|
2603 |
} |
} |
2604 |
ecode++; |
ecode++; |
2605 |
break; |
break; |
2617 |
} |
} |
2618 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2619 |
{ |
{ |
2620 |
|
const pcre_uint32 *cp; |
2621 |
const ucd_record *prop = GET_UCD(c); |
const ucd_record *prop = GET_UCD(c); |
2622 |
|
|
2623 |
switch(ecode[1]) |
switch(ecode[1]) |
2678 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
2679 |
break; |
break; |
2680 |
|
|
2681 |
|
case PT_CLIST: |
2682 |
|
cp = PRIV(ucd_caseless_sets) + ecode[2]; |
2683 |
|
for (;;) |
2684 |
|
{ |
2685 |
|
if (c < *cp) |
2686 |
|
{ if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; } |
2687 |
|
if (c == *cp++) |
2688 |
|
{ if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } } |
2689 |
|
} |
2690 |
|
break; |
2691 |
|
|
2692 |
|
case PT_UCNC: |
2693 |
|
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || |
2694 |
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || |
2695 |
|
c >= 0xe000) == (op == OP_NOTPROP)) |
2696 |
|
RRETURN(MATCH_NOMATCH); |
2697 |
|
break; |
2698 |
|
|
2699 |
/* This should never occur */ |
/* This should never occur */ |
2700 |
|
|
2701 |
default: |
default: |
2715 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
2716 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
2717 |
} |
} |
2718 |
GETCHARINCTEST(c, eptr); |
else |
|
if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
|
|
while (eptr < md->end_subject) |
|
2719 |
{ |
{ |
2720 |
int len = 1; |
int lgb, rgb; |
2721 |
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
GETCHARINCTEST(c, eptr); |
2722 |
if (UCD_CATEGORY(c) != ucp_M) break; |
lgb = UCD_GRAPHBREAK(c); |
2723 |
eptr += len; |
while (eptr < md->end_subject) |
2724 |
|
{ |
2725 |
|
int len = 1; |
2726 |
|
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
2727 |
|
rgb = UCD_GRAPHBREAK(c); |
2728 |
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
2729 |
|
lgb = rgb; |
2730 |
|
eptr += len; |
2731 |
|
} |
2732 |
} |
} |
2733 |
CHECK_PARTIAL(); |
CHECK_PARTIAL(); |
2734 |
ecode++; |
ecode++; |
2735 |
break; |
break; |
2736 |
#endif |
#endif /* SUPPORT_UCP */ |
2737 |
|
|
2738 |
|
|
2739 |
/* Match a back reference, possibly repeatedly. Look past the end of the |
/* Match a back reference, possibly repeatedly. Look past the end of the |
3242 |
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
3243 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3244 |
} |
} |
3245 |
while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); |
while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH); |
3246 |
} |
} |
3247 |
else |
else |
3248 |
#endif |
#endif |
3282 |
|
|
3283 |
if (fc < 128) |
if (fc < 128) |
3284 |
{ |
{ |
3285 |
if (md->lcc[fc] |
pcre_uint32 cc = RAWUCHAR(eptr); |
3286 |
!= TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); |
if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH); |
3287 |
ecode++; |
ecode++; |
3288 |
eptr++; |
eptr++; |
3289 |
} |
} |
3294 |
|
|
3295 |
else |
else |
3296 |
{ |
{ |
3297 |
unsigned int dc; |
pcre_uint32 dc; |
3298 |
GETCHARINC(dc, eptr); |
GETCHARINC(dc, eptr); |
3299 |
ecode += length; |
ecode += length; |
3300 |
|
|
3404 |
if (length > 1) |
if (length > 1) |
3405 |
{ |
{ |
3406 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
3407 |
unsigned int othercase; |
pcre_uint32 othercase; |
3408 |
if (op >= OP_STARI && /* Caseless */ |
if (op >= OP_STARI && /* Caseless */ |
3409 |
(othercase = UCD_OTHERCASE(fc)) != fc) |
(othercase = UCD_OTHERCASE(fc)) != fc) |
3410 |
oclength = PRIV(ord2utf)(othercase, occhars); |
oclength = PRIV(ord2utf)(othercase, occhars); |
3531 |
|
|
3532 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3533 |
{ |
{ |
3534 |
|
pcre_uint32 cc; /* Faster than pcre_uchar */ |
3535 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
3536 |
{ |
{ |
3537 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3538 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3539 |
} |
} |
3540 |
if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
cc = RAWUCHARTEST(eptr); |
3541 |
|
if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); |
3542 |
eptr++; |
eptr++; |
3543 |
} |
} |
3544 |
if (min == max) continue; |
if (min == max) continue; |
3546 |
{ |
{ |
3547 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
3548 |
{ |
{ |
3549 |
|
pcre_uint32 cc; /* Faster than pcre_uchar */ |
3550 |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); |
3551 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3552 |
if (fi >= max) RRETURN(MATCH_NOMATCH); |
if (fi >= max) RRETURN(MATCH_NOMATCH); |
3555 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3556 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3557 |
} |
} |
3558 |
if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
cc = RAWUCHARTEST(eptr); |
3559 |
|
if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); |
3560 |
eptr++; |
eptr++; |
3561 |
} |
} |
3562 |
/* Control never gets here */ |
/* Control never gets here */ |
3566 |
pp = eptr; |
pp = eptr; |
3567 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3568 |
{ |
{ |
3569 |
|
pcre_uint32 cc; /* Faster than pcre_uchar */ |
3570 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
3571 |
{ |
{ |
3572 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3573 |
break; |
break; |
3574 |
} |
} |
3575 |
if (fc != *eptr && foc != *eptr) break; |
cc = RAWUCHARTEST(eptr); |
3576 |
|
if (fc != cc && foc != cc) break; |
3577 |
eptr++; |
eptr++; |
3578 |
} |
} |
3579 |
|
|
3601 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3602 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3603 |
} |
} |
3604 |
if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH); |
3605 |
} |
} |
3606 |
|
|
3607 |
if (min == max) continue; |
if (min == max) continue; |
3618 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3619 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3620 |
} |
} |
3621 |
if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH); |
3622 |
} |
} |
3623 |
/* Control never gets here */ |
/* Control never gets here */ |
3624 |
} |
} |
3632 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3633 |
break; |
break; |
3634 |
} |
} |
3635 |
if (fc != *eptr) break; |
if (fc != RAWUCHARTEST(eptr)) break; |
3636 |
eptr++; |
eptr++; |
3637 |
} |
} |
3638 |
if (possessive) continue; |
if (possessive) continue; |
3661 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3662 |
if (utf) |
if (utf) |
3663 |
{ |
{ |
3664 |
register unsigned int ch, och; |
register pcre_uint32 ch, och; |
3665 |
|
|
3666 |
ecode++; |
ecode++; |
3667 |
GETCHARINC(ch, ecode); |
GETCHARINC(ch, ecode); |
3688 |
else |
else |
3689 |
#endif |
#endif |
3690 |
{ |
{ |
3691 |
register unsigned int ch = ecode[1]; |
register pcre_uint32 ch = ecode[1]; |
3692 |
c = *eptr++; |
c = *eptr++; |
3693 |
if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c)) |
if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c)) |
3694 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3802 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3803 |
if (utf) |
if (utf) |
3804 |
{ |
{ |
3805 |
register unsigned int d; |
register pcre_uint32 d; |
3806 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3807 |
{ |
{ |
3808 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
3837 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3838 |
if (utf) |
if (utf) |
3839 |
{ |
{ |
3840 |
register unsigned int d; |
register pcre_uint32 d; |
3841 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
3842 |
{ |
{ |
3843 |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); |
3882 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3883 |
if (utf) |
if (utf) |
3884 |
{ |
{ |
3885 |
register unsigned int d; |
register pcre_uint32 d; |
3886 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3887 |
{ |
{ |
3888 |
int len = 1; |
int len = 1; |
3939 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3940 |
if (utf) |
if (utf) |
3941 |
{ |
{ |
3942 |
register unsigned int d; |
register pcre_uint32 d; |
3943 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3944 |
{ |
{ |
3945 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
3973 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3974 |
if (utf) |
if (utf) |
3975 |
{ |
{ |
3976 |
register unsigned int d; |
register pcre_uint32 d; |
3977 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
3978 |
{ |
{ |
3979 |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); |
4017 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
4018 |
if (utf) |
if (utf) |
4019 |
{ |
{ |
4020 |
register unsigned int d; |
register pcre_uint32 d; |
4021 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4022 |
{ |
{ |
4023 |
int len = 1; |
int len = 1; |
4293 |
} |
} |
4294 |
break; |
break; |
4295 |
|
|
4296 |
|
case PT_CLIST: |
4297 |
|
for (i = 1; i <= min; i++) |
4298 |
|
{ |
4299 |
|
const pcre_uint32 *cp; |
4300 |
|
if (eptr >= md->end_subject) |
4301 |
|
{ |
4302 |
|
SCHECK_PARTIAL(); |
4303 |
|
RRETURN(MATCH_NOMATCH); |
4304 |
|
} |
4305 |
|
GETCHARINCTEST(c, eptr); |
4306 |
|
cp = PRIV(ucd_caseless_sets) + prop_value; |
4307 |
|
for (;;) |
4308 |
|
{ |
4309 |
|
if (c < *cp) |
4310 |
|
{ if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } } |
4311 |
|
if (c == *cp++) |
4312 |
|
{ if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; } |
4313 |
|
} |
4314 |
|
} |
4315 |
|
break; |
4316 |
|
|
4317 |
|
case PT_UCNC: |
4318 |
|
for (i = 1; i <= min; i++) |
4319 |
|
{ |
4320 |
|
if (eptr >= md->end_subject) |
4321 |
|
{ |
4322 |
|
SCHECK_PARTIAL(); |
4323 |
|
RRETURN(MATCH_NOMATCH); |
4324 |
|
} |
4325 |
|
GETCHARINCTEST(c, eptr); |
4326 |
|
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || |
4327 |
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || |
4328 |
|
c >= 0xe000) == prop_fail_result) |
4329 |
|
RRETURN(MATCH_NOMATCH); |
4330 |
|
} |
4331 |
|
break; |
4332 |
|
|
4333 |
/* This should not occur */ |
/* This should not occur */ |
4334 |
|
|
4335 |
default: |
default: |
4349 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
4350 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4351 |
} |
} |
4352 |
GETCHARINCTEST(c, eptr); |
else |
|
if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
|
|
while (eptr < md->end_subject) |
|
4353 |
{ |
{ |
4354 |
int len = 1; |
int lgb, rgb; |
4355 |
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
GETCHARINCTEST(c, eptr); |
4356 |
if (UCD_CATEGORY(c) != ucp_M) break; |
lgb = UCD_GRAPHBREAK(c); |
4357 |
eptr += len; |
while (eptr < md->end_subject) |
4358 |
|
{ |
4359 |
|
int len = 1; |
4360 |
|
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
4361 |
|
rgb = UCD_GRAPHBREAK(c); |
4362 |
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
4363 |
|
lgb = rgb; |
4364 |
|
eptr += len; |
4365 |
|
} |
4366 |
} |
} |
4367 |
CHECK_PARTIAL(); |
CHECK_PARTIAL(); |
4368 |
} |
} |
4389 |
eptr + 1 >= md->end_subject && |
eptr + 1 >= md->end_subject && |
4390 |
NLBLOCK->nltype == NLTYPE_FIXED && |
NLBLOCK->nltype == NLTYPE_FIXED && |
4391 |
NLBLOCK->nllen == 2 && |
NLBLOCK->nllen == 2 && |
4392 |
*eptr == NLBLOCK->nl[0]) |
RAWUCHAR(eptr) == NLBLOCK->nl[0]) |
4393 |
{ |
{ |
4394 |
md->hitend = TRUE; |
md->hitend = TRUE; |
4395 |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
4430 |
{ |
{ |
4431 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
4432 |
|
|
4433 |
case 0x000d: |
case CHAR_CR: |
4434 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++; |
4435 |
break; |
break; |
4436 |
|
|
4437 |
case 0x000a: |
case CHAR_LF: |
4438 |
break; |
break; |
4439 |
|
|
4440 |
case 0x000b: |
case CHAR_VT: |
4441 |
case 0x000c: |
case CHAR_FF: |
4442 |
case 0x0085: |
case CHAR_NEL: |
4443 |
|
#ifndef EBCDIC |
4444 |
case 0x2028: |
case 0x2028: |
4445 |
case 0x2029: |
case 0x2029: |
4446 |
|
#endif /* Not EBCDIC */ |
4447 |
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
4448 |
break; |
break; |
4449 |
} |
} |
4461 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
4462 |
switch(c) |
switch(c) |
4463 |
{ |
{ |
4464 |
|
HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ |
4465 |
default: break; |
default: break; |
|
case 0x09: /* HT */ |
|
|
case 0x20: /* SPACE */ |
|
|
case 0xa0: /* NBSP */ |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
|
RRETURN(MATCH_NOMATCH); |
|
4466 |
} |
} |
4467 |
} |
} |
4468 |
break; |
break; |
4478 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
4479 |
switch(c) |
switch(c) |
4480 |
{ |
{ |
4481 |
|
HSPACE_CASES: break; /* Byte and multibyte cases */ |
4482 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
|
case 0x09: /* HT */ |
|
|
case 0x20: /* SPACE */ |
|
|
case 0xa0: /* NBSP */ |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
|
break; |
|
4483 |
} |
} |
4484 |
} |
} |
4485 |
break; |
break; |
4495 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
4496 |
switch(c) |
switch(c) |
4497 |
{ |
{ |
4498 |
|
VSPACE_CASES: RRETURN(MATCH_NOMATCH); |
4499 |
default: break; |
default: break; |
|
case 0x0a: /* LF */ |
|
|
case 0x0b: /* VT */ |
|
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
|
RRETURN(MATCH_NOMATCH); |
|
4500 |
} |
} |
4501 |
} |
} |
4502 |
break; |
break; |
4512 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
4513 |
switch(c) |
switch(c) |
4514 |
{ |
{ |
4515 |
|
VSPACE_CASES: break; |
4516 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
|
case 0x0a: /* LF */ |
|
|
case 0x0b: /* VT */ |
|
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
|
break; |
|
4517 |
} |
} |
4518 |
} |
} |
4519 |
break; |
break; |
4535 |
case OP_DIGIT: |
case OP_DIGIT: |
4536 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
4537 |
{ |
{ |
4538 |
|
pcre_uint32 cc; |
4539 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
4540 |
{ |
{ |
4541 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
4542 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4543 |
} |
} |
4544 |
if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0) |
cc = RAWUCHAR(eptr); |
4545 |
|
if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0) |
4546 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4547 |
eptr++; |
eptr++; |
4548 |
/* No need to skip more bytes - we know it's a 1-byte character */ |
/* No need to skip more bytes - we know it's a 1-byte character */ |
4552 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
4553 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
4554 |
{ |
{ |
4555 |
|
pcre_uint32 cc; |
4556 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
4557 |
{ |
{ |
4558 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
4559 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4560 |
} |
} |
4561 |
if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) |
cc = RAWUCHAR(eptr); |
4562 |
|
if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0) |
4563 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4564 |
eptr++; |
eptr++; |
4565 |
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4569 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
4570 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
4571 |
{ |
{ |
4572 |
|
pcre_uint32 cc; |
4573 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
4574 |
{ |
{ |
4575 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
4576 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4577 |
} |
} |
4578 |
if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0) |
cc = RAWUCHAR(eptr); |
4579 |
|
if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0) |
4580 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4581 |
eptr++; |
eptr++; |
4582 |
/* No need to skip more bytes - we know it's a 1-byte character */ |
/* No need to skip more bytes - we know it's a 1-byte character */ |
4586 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
4587 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
4588 |
{ |
{ |
4589 |
|
pcre_uint32 cc; |
4590 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
4591 |
{ |
{ |
4592 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
4593 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4594 |
} |
} |
4595 |
if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) |
cc = RAWUCHAR(eptr); |
4596 |
|
if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0) |
4597 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4598 |
eptr++; |
eptr++; |
4599 |
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4603 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
4604 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
4605 |
{ |
{ |
4606 |
|
pcre_uint32 cc; |
4607 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
4608 |
{ |
{ |
4609 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
4610 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4611 |
} |
} |
4612 |
if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0) |
cc = RAWUCHAR(eptr); |
4613 |
|
if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0) |
4614 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4615 |
eptr++; |
eptr++; |
4616 |
/* No need to skip more bytes - we know it's a 1-byte character */ |
/* No need to skip more bytes - we know it's a 1-byte character */ |
4681 |
{ |
{ |
4682 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
4683 |
|
|
4684 |
case 0x000d: |
case CHAR_CR: |
4685 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; |
4686 |
break; |
break; |
4687 |
|
|
4688 |
case 0x000a: |
case CHAR_LF: |
4689 |
break; |
break; |
4690 |
|
|
4691 |
case 0x000b: |
case CHAR_VT: |
4692 |
case 0x000c: |
case CHAR_FF: |
4693 |
case 0x0085: |
case CHAR_NEL: |
4694 |
#ifdef COMPILE_PCRE16 |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4695 |
case 0x2028: |
case 0x2028: |
4696 |
case 0x2029: |
case 0x2029: |
4697 |
#endif |
#endif |
4712 |
switch(*eptr++) |
switch(*eptr++) |
4713 |
{ |
{ |
4714 |
default: break; |
default: break; |
4715 |
case 0x09: /* HT */ |
HSPACE_BYTE_CASES: |
4716 |
case 0x20: /* SPACE */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4717 |
case 0xa0: /* NBSP */ |
HSPACE_MULTIBYTE_CASES: |
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
4718 |
#endif |
#endif |
4719 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4720 |
} |
} |
4732 |
switch(*eptr++) |
switch(*eptr++) |
4733 |
{ |
{ |
4734 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
4735 |
case 0x09: /* HT */ |
HSPACE_BYTE_CASES: |
4736 |
case 0x20: /* SPACE */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4737 |
case 0xa0: /* NBSP */ |
HSPACE_MULTIBYTE_CASES: |
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
4738 |
#endif |
#endif |
4739 |
break; |
break; |
4740 |
} |
} |
4751 |
} |
} |
4752 |
switch(*eptr++) |
switch(*eptr++) |
4753 |
{ |
{ |
4754 |
default: break; |
VSPACE_BYTE_CASES: |
4755 |
case 0x0a: /* LF */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4756 |
case 0x0b: /* VT */ |
VSPACE_MULTIBYTE_CASES: |
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
4757 |
#endif |
#endif |
4758 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
4759 |
|
default: break; |
4760 |
} |
} |
4761 |
} |
} |
4762 |
break; |
break; |
4772 |
switch(*eptr++) |
switch(*eptr++) |
4773 |
{ |
{ |
4774 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
4775 |
case 0x0a: /* LF */ |
VSPACE_BYTE_CASES: |
4776 |
case 0x0b: /* VT */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4777 |
case 0x0c: /* FF */ |
VSPACE_MULTIBYTE_CASES: |
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
4778 |
#endif |
#endif |
4779 |
break; |
break; |
4780 |
} |
} |
5052 |
} |
} |
5053 |
/* Control never gets here */ |
/* Control never gets here */ |
5054 |
|
|
5055 |
/* This should never occur */ |
case PT_CLIST: |
5056 |
|
for (fi = min;; fi++) |
5057 |
|
{ |
5058 |
|
const pcre_uint32 *cp; |
5059 |
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM67); |
5060 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
5061 |
|
if (fi >= max) RRETURN(MATCH_NOMATCH); |
5062 |
|
if (eptr >= md->end_subject) |
5063 |
|
{ |
5064 |
|
SCHECK_PARTIAL(); |
5065 |
|
RRETURN(MATCH_NOMATCH); |
5066 |
|
} |
5067 |
|
GETCHARINCTEST(c, eptr); |
5068 |
|
cp = PRIV(ucd_caseless_sets) + prop_value; |
5069 |
|
for (;;) |
5070 |
|
{ |
5071 |
|
if (c < *cp) |
5072 |
|
{ if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } } |
5073 |
|
if (c == *cp++) |
5074 |
|
{ if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; } |
5075 |
|
} |
5076 |
|
} |
5077 |
|
/* Control never gets here */ |
5078 |
|
|
5079 |
|
case PT_UCNC: |
5080 |
|
for (fi = min;; fi++) |
5081 |
|
{ |
5082 |
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM68); |
5083 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
5084 |
|
if (fi >= max) RRETURN(MATCH_NOMATCH); |
5085 |
|
if (eptr >= md->end_subject) |
5086 |
|
{ |
5087 |
|
SCHECK_PARTIAL(); |
5088 |
|
RRETURN(MATCH_NOMATCH); |
5089 |
|
} |
5090 |
|
GETCHARINCTEST(c, eptr); |
5091 |
|
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || |
5092 |
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || |
5093 |
|
c >= 0xe000) == prop_fail_result) |
5094 |
|
RRETURN(MATCH_NOMATCH); |
5095 |
|
} |
5096 |
|
/* Control never gets here */ |
5097 |
|
|
5098 |
|
/* This should never occur */ |
5099 |
default: |
default: |
5100 |
RRETURN(PCRE_ERROR_INTERNAL); |
RRETURN(PCRE_ERROR_INTERNAL); |
5101 |
} |
} |
5116 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
5117 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
5118 |
} |
} |
5119 |
GETCHARINCTEST(c, eptr); |
else |
|
if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
|
|
while (eptr < md->end_subject) |
|
5120 |
{ |
{ |
5121 |
int len = 1; |
int lgb, rgb; |
5122 |
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
GETCHARINCTEST(c, eptr); |
5123 |
if (UCD_CATEGORY(c) != ucp_M) break; |
lgb = UCD_GRAPHBREAK(c); |
5124 |
eptr += len; |
while (eptr < md->end_subject) |
5125 |
|
{ |
5126 |
|
int len = 1; |
5127 |
|
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
5128 |
|
rgb = UCD_GRAPHBREAK(c); |
5129 |
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
5130 |
|
lgb = rgb; |
5131 |
|
eptr += len; |
5132 |
|
} |
5133 |
} |
} |
5134 |
CHECK_PARTIAL(); |
CHECK_PARTIAL(); |
5135 |
} |
} |
5175 |
switch(c) |
switch(c) |
5176 |
{ |
{ |
5177 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
5178 |
case 0x000d: |
case CHAR_CR: |
5179 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++; |
5180 |
break; |
break; |
5181 |
case 0x000a: |
|
5182 |
|
case CHAR_LF: |
5183 |
break; |
break; |
5184 |
|
|
5185 |
case 0x000b: |
case CHAR_VT: |
5186 |
case 0x000c: |
case CHAR_FF: |
5187 |
case 0x0085: |
case CHAR_NEL: |
5188 |
|
#ifndef EBCDIC |
5189 |
case 0x2028: |
case 0x2028: |
5190 |
case 0x2029: |
case 0x2029: |
5191 |
|
#endif /* Not EBCDIC */ |
5192 |
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
5193 |
break; |
break; |
5194 |
} |
} |
5197 |
case OP_NOT_HSPACE: |
case OP_NOT_HSPACE: |
5198 |
switch(c) |
switch(c) |
5199 |
{ |
{ |
5200 |
|
HSPACE_CASES: RRETURN(MATCH_NOMATCH); |
5201 |
default: break; |
default: break; |
|
case 0x09: /* HT */ |
|
|
case 0x20: /* SPACE */ |
|
|
case 0xa0: /* NBSP */ |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
|
RRETURN(MATCH_NOMATCH); |
|
5202 |
} |
} |
5203 |
break; |
break; |
5204 |
|
|
5205 |
case OP_HSPACE: |
case OP_HSPACE: |
5206 |
switch(c) |
switch(c) |
5207 |
{ |
{ |
5208 |
|
HSPACE_CASES: break; |
5209 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
|
case 0x09: /* HT */ |
|
|
case 0x20: /* SPACE */ |
|
|
case 0xa0: /* NBSP */ |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
|
break; |
|
5210 |
} |
} |
5211 |
break; |
break; |
5212 |
|
|
5213 |
case OP_NOT_VSPACE: |
case OP_NOT_VSPACE: |
5214 |
switch(c) |
switch(c) |
5215 |
{ |
{ |
5216 |
|
VSPACE_CASES: RRETURN(MATCH_NOMATCH); |
5217 |
default: break; |
default: break; |
|
case 0x0a: /* LF */ |
|
|
case 0x0b: /* VT */ |
|
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
|
RRETURN(MATCH_NOMATCH); |
|
5218 |
} |
} |
5219 |
break; |
break; |
5220 |
|
|
5221 |
case OP_VSPACE: |
case OP_VSPACE: |
5222 |
switch(c) |
switch(c) |
5223 |
{ |
{ |
5224 |
|
VSPACE_CASES: break; |
5225 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
|
case 0x0a: /* LF */ |
|
|
case 0x0b: /* VT */ |
|
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
|
break; |
|
5226 |
} |
} |
5227 |
break; |
break; |
5228 |
|
|
5300 |
switch(c) |
switch(c) |
5301 |
{ |
{ |
5302 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
5303 |
case 0x000d: |
case CHAR_CR: |
5304 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; |
5305 |
break; |
break; |
5306 |
|
|
5307 |
case 0x000a: |
case CHAR_LF: |
5308 |
break; |
break; |
5309 |
|
|
5310 |
case 0x000b: |
case CHAR_VT: |
5311 |
case 0x000c: |
case CHAR_FF: |
5312 |
case 0x0085: |
case CHAR_NEL: |
5313 |
#ifdef COMPILE_PCRE16 |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5314 |
case 0x2028: |
case 0x2028: |
5315 |
case 0x2029: |
case 0x2029: |
5316 |
#endif |
#endif |
5323 |
switch(c) |
switch(c) |
5324 |
{ |
{ |
5325 |
default: break; |
default: break; |
5326 |
case 0x09: /* HT */ |
HSPACE_BYTE_CASES: |
5327 |
case 0x20: /* SPACE */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5328 |
case 0xa0: /* NBSP */ |
HSPACE_MULTIBYTE_CASES: |
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
5329 |
#endif |
#endif |
5330 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
5331 |
} |
} |
5335 |
switch(c) |
switch(c) |
5336 |
{ |
{ |
5337 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
5338 |
case 0x09: /* HT */ |
HSPACE_BYTE_CASES: |
5339 |
case 0x20: /* SPACE */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5340 |
case 0xa0: /* NBSP */ |
HSPACE_MULTIBYTE_CASES: |
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
5341 |
#endif |
#endif |
5342 |
break; |
break; |
5343 |
} |
} |
5347 |
switch(c) |
switch(c) |
5348 |
{ |
{ |
5349 |
default: break; |
default: break; |
5350 |
case 0x0a: /* LF */ |
VSPACE_BYTE_CASES: |
5351 |
case 0x0b: /* VT */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5352 |
case 0x0c: /* FF */ |
VSPACE_MULTIBYTE_CASES: |
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
5353 |
#endif |
#endif |
5354 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
5355 |
} |
} |
5359 |
switch(c) |
switch(c) |
5360 |
{ |
{ |
5361 |
default: RRETURN(MATCH_NOMATCH); |
default: RRETURN(MATCH_NOMATCH); |
5362 |
case 0x0a: /* LF */ |
VSPACE_BYTE_CASES: |
5363 |
case 0x0b: /* VT */ |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5364 |
case 0x0c: /* FF */ |
VSPACE_MULTIBYTE_CASES: |
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
#ifdef COMPILE_PCRE16 |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
5365 |
#endif |
#endif |
5366 |
break; |
break; |
5367 |
} |
} |
5565 |
} |
} |
5566 |
break; |
break; |
5567 |
|
|
5568 |
|
case PT_CLIST: |
5569 |
|
for (i = min; i < max; i++) |
5570 |
|
{ |
5571 |
|
const pcre_uint32 *cp; |
5572 |
|
int len = 1; |
5573 |
|
if (eptr >= md->end_subject) |
5574 |
|
{ |
5575 |
|
SCHECK_PARTIAL(); |
5576 |
|
break; |
5577 |
|
} |
5578 |
|
GETCHARLENTEST(c, eptr, len); |
5579 |
|
cp = PRIV(ucd_caseless_sets) + prop_value; |
5580 |
|
for (;;) |
5581 |
|
{ |
5582 |
|
if (c < *cp) |
5583 |
|
{ if (prop_fail_result) break; else goto GOT_MAX; } |
5584 |
|
if (c == *cp++) |
5585 |
|
{ if (prop_fail_result) goto GOT_MAX; else break; } |
5586 |
|
} |
5587 |
|
eptr += len; |
5588 |
|
} |
5589 |
|
GOT_MAX: |
5590 |
|
break; |
5591 |
|
|
5592 |
|
case PT_UCNC: |
5593 |
|
for (i = min; i < max; i++) |
5594 |
|
{ |
5595 |
|
int len = 1; |
5596 |
|
if (eptr >= md->end_subject) |
5597 |
|
{ |
5598 |
|
SCHECK_PARTIAL(); |
5599 |
|
break; |
5600 |
|
} |
5601 |
|
GETCHARLENTEST(c, eptr, len); |
5602 |
|
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || |
5603 |
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || |
5604 |
|
c >= 0xe000) == prop_fail_result) |
5605 |
|
break; |
5606 |
|
eptr += len; |
5607 |
|
} |
5608 |
|
break; |
5609 |
|
|
5610 |
default: |
default: |
5611 |
RRETURN(PCRE_ERROR_INTERNAL); |
RRETURN(PCRE_ERROR_INTERNAL); |
5612 |
} |
} |
5630 |
{ |
{ |
5631 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
5632 |
{ |
{ |
|
int len = 1; |
|
5633 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
5634 |
{ |
{ |
5635 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
5636 |
break; |
break; |
5637 |
} |
} |
5638 |
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
else |
|
if (UCD_CATEGORY(c) == ucp_M) break; |
|
|
eptr += len; |
|
|
while (eptr < md->end_subject) |
|
5639 |
{ |
{ |
5640 |
len = 1; |
int lgb, rgb; |
5641 |
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
GETCHARINCTEST(c, eptr); |
5642 |
if (UCD_CATEGORY(c) != ucp_M) break; |
lgb = UCD_GRAPHBREAK(c); |
5643 |
eptr += len; |
while (eptr < md->end_subject) |
5644 |
|
{ |
5645 |
|
int len = 1; |
5646 |
|
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
5647 |
|
rgb = UCD_GRAPHBREAK(c); |
5648 |
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; |
5649 |
|
lgb = rgb; |
5650 |
|
eptr += len; |
5651 |
|
} |
5652 |
} |
} |
5653 |
CHECK_PARTIAL(); |
CHECK_PARTIAL(); |
5654 |
} |
} |
5698 |
eptr + 1 >= md->end_subject && |
eptr + 1 >= md->end_subject && |
5699 |
NLBLOCK->nltype == NLTYPE_FIXED && |
NLBLOCK->nltype == NLTYPE_FIXED && |
5700 |
NLBLOCK->nllen == 2 && |
NLBLOCK->nllen == 2 && |
5701 |
*eptr == NLBLOCK->nl[0]) |
RAWUCHAR(eptr) == NLBLOCK->nl[0]) |
5702 |
{ |
{ |
5703 |
md->hitend = TRUE; |
md->hitend = TRUE; |
5704 |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
5724 |
eptr + 1 >= md->end_subject && |
eptr + 1 >= md->end_subject && |
5725 |
NLBLOCK->nltype == NLTYPE_FIXED && |
NLBLOCK->nltype == NLTYPE_FIXED && |
5726 |
NLBLOCK->nllen == 2 && |
NLBLOCK->nllen == 2 && |
5727 |
*eptr == NLBLOCK->nl[0]) |
RAWUCHAR(eptr) == NLBLOCK->nl[0]) |
5728 |
{ |
{ |
5729 |
md->hitend = TRUE; |
md->hitend = TRUE; |
5730 |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); |
5778 |
break; |
break; |
5779 |
} |
} |
5780 |
GETCHARLEN(c, eptr, len); |
GETCHARLEN(c, eptr, len); |
5781 |
if (c == 0x000d) |
if (c == CHAR_CR) |
5782 |
{ |
{ |
5783 |
if (++eptr >= md->end_subject) break; |
if (++eptr >= md->end_subject) break; |
5784 |
if (*eptr == 0x000a) eptr++; |
if (RAWUCHAR(eptr) == CHAR_LF) eptr++; |
5785 |
} |
} |
5786 |
else |
else |
5787 |
{ |
{ |
5788 |
if (c != 0x000a && |
if (c != CHAR_LF && |
5789 |
(md->bsr_anycrlf || |
(md->bsr_anycrlf || |
5790 |
(c != 0x000b && c != 0x000c && |
(c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL |
5791 |
c != 0x0085 && c != 0x2028 && c != 0x2029))) |
#ifndef EBCDIC |
5792 |
|
&& c != 0x2028 && c != 0x2029 |
5793 |
|
#endif /* Not EBCDIC */ |
5794 |
|
))) |
5795 |
break; |
break; |
5796 |
eptr += len; |
eptr += len; |
5797 |
} |
} |
5812 |
GETCHARLEN(c, eptr, len); |
GETCHARLEN(c, eptr, len); |
5813 |
switch(c) |
switch(c) |
5814 |
{ |
{ |
5815 |
|
HSPACE_CASES: gotspace = TRUE; break; |
5816 |
default: gotspace = FALSE; break; |
default: gotspace = FALSE; break; |
|
case 0x09: /* HT */ |
|
|
case 0x20: /* SPACE */ |
|
|
case 0xa0: /* NBSP */ |
|
|
case 0x1680: /* OGHAM SPACE MARK */ |
|
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ |
|
|
case 0x2000: /* EN QUAD */ |
|
|
case 0x2001: /* EM QUAD */ |
|
|
case 0x2002: /* EN SPACE */ |
|
|
case 0x2003: /* EM SPACE */ |
|
|
case 0x2004: /* THREE-PER-EM SPACE */ |
|
|
case 0x2005: /* FOUR-PER-EM SPACE */ |
|
|
case 0x2006: /* SIX-PER-EM SPACE */ |
|
|
case 0x2007: /* FIGURE SPACE */ |
|
|
case 0x2008: /* PUNCTUATION SPACE */ |
|
|
case 0x2009: /* THIN SPACE */ |
|
|
case 0x200A: /* HAIR SPACE */ |
|
|
case 0x202f: /* NARROW NO-BREAK SPACE */ |
|
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
|
|
case 0x3000: /* IDEOGRAPHIC SPACE */ |
|
|
gotspace = TRUE; |
|
|
break; |
|
5817 |
} |
} |
5818 |
if (gotspace == (ctype == OP_NOT_HSPACE)) break; |
if (gotspace == (ctype == OP_NOT_HSPACE)) break; |
5819 |
eptr += len; |
eptr += len; |
5834 |
GETCHARLEN(c, eptr, len); |
GETCHARLEN(c, eptr, len); |
5835 |
switch(c) |
switch(c) |
5836 |
{ |
{ |
5837 |
|
VSPACE_CASES: gotspace = TRUE; break; |
5838 |
default: gotspace = FALSE; break; |
default: gotspace = FALSE; break; |
|
case 0x0a: /* LF */ |
|
|
case 0x0b: /* VT */ |
|
|
case 0x0c: /* FF */ |
|
|
case 0x0d: /* CR */ |
|
|
case 0x85: /* NEL */ |
|
|
case 0x2028: /* LINE SEPARATOR */ |
|
|
case 0x2029: /* PARAGRAPH SEPARATOR */ |
|
|
gotspace = TRUE; |
|
|
break; |
|
5839 |
} |
} |
5840 |
if (gotspace == (ctype == OP_NOT_VSPACE)) break; |
if (gotspace == (ctype == OP_NOT_VSPACE)) break; |
5841 |
eptr += len; |
eptr += len; |
5949 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
5950 |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
5951 |
BACKCHAR(eptr); |
BACKCHAR(eptr); |
5952 |
if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' && |
if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL && |
5953 |
eptr[-1] == '\r') eptr--; |
RAWUCHAR(eptr - 1) == CHAR_CR) eptr--; |
5954 |
} |
} |
5955 |
} |
} |
5956 |
else |
else |
6001 |
break; |
break; |
6002 |
} |
} |
6003 |
c = *eptr; |
c = *eptr; |
6004 |
if (c == 0x000d) |
if (c == CHAR_CR) |
6005 |
{ |
{ |
6006 |
if (++eptr >= md->end_subject) break; |
if (++eptr >= md->end_subject) break; |
6007 |
if (*eptr == 0x000a) eptr++; |
if (*eptr == CHAR_LF) eptr++; |
6008 |
} |
} |
6009 |
else |
else |
6010 |
{ |
{ |
6011 |
if (c != 0x000a && (md->bsr_anycrlf || |
if (c != CHAR_LF && (md->bsr_anycrlf || |
6012 |
(c != 0x000b && c != 0x000c && c != 0x0085 |
(c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL |
6013 |
#ifdef COMPILE_PCRE16 |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
6014 |
&& c != 0x2028 && c != 0x2029 |
&& c != 0x2028 && c != 0x2029 |
6015 |
#endif |
#endif |
6016 |
))) break; |
))) break; |
6017 |
eptr++; |
eptr++; |
6018 |
} |
} |
6019 |
} |
} |
6027 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
6028 |
break; |
break; |
6029 |
} |
} |
6030 |
c = *eptr; |
switch(*eptr) |
6031 |
if (c == 0x09 || c == 0x20 || c == 0xa0 |
{ |
6032 |
#ifdef COMPILE_PCRE16 |
default: eptr++; break; |
6033 |
|| c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A) |
HSPACE_BYTE_CASES: |
6034 |
|| c == 0x202f || c == 0x205f || c == 0x3000 |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
6035 |
|
HSPACE_MULTIBYTE_CASES: |
6036 |
#endif |
#endif |
6037 |
) break; |
goto ENDLOOP00; |
6038 |
eptr++; |
} |
6039 |
} |
} |
6040 |
|
ENDLOOP00: |
6041 |
break; |
break; |
6042 |
|
|
6043 |
case OP_HSPACE: |
case OP_HSPACE: |
6048 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
6049 |
break; |
break; |
6050 |
} |
} |
6051 |
c = *eptr; |
switch(*eptr) |
6052 |
if (c != 0x09 && c != 0x20 && c != 0xa0 |
{ |
6053 |
#ifdef COMPILE_PCRE16 |
default: goto ENDLOOP01; |
6054 |
&& c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A) |
HSPACE_BYTE_CASES: |
6055 |
&& c != 0x202f && c != 0x205f && c != 0x3000 |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
6056 |
|
HSPACE_MULTIBYTE_CASES: |
6057 |
#endif |
#endif |
6058 |
) break; |
eptr++; break; |
6059 |
eptr++; |
} |
6060 |
} |
} |
6061 |
|
ENDLOOP01: |
6062 |
break; |
break; |
6063 |
|
|
6064 |
case OP_NOT_VSPACE: |
case OP_NOT_VSPACE: |
6069 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
6070 |
break; |
break; |
6071 |
} |
} |
6072 |
c = *eptr; |
switch(*eptr) |
6073 |
if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85 |
{ |
6074 |
#ifdef COMPILE_PCRE16 |
default: eptr++; break; |
6075 |
|| c == 0x2028 || c == 0x2029 |
VSPACE_BYTE_CASES: |
6076 |
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
6077 |
|
VSPACE_MULTIBYTE_CASES: |
6078 |
#endif |
#endif |
6079 |
) break; |
goto ENDLOOP02; |
6080 |
eptr++; |
} |
6081 |
} |
} |
6082 |
|
ENDLOOP02: |
6083 |
break; |
break; |
6084 |
|
|
6085 |
case OP_VSPACE: |
case OP_VSPACE: |
6090 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
6091 |
break; |
break; |
6092 |
} |
} |
6093 |
c = *eptr; |
switch(*eptr) |
6094 |
if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85 |
{ |
6095 |
#ifdef COMPILE_PCRE16 |
default: goto ENDLOOP03; |
6096 |
&& c != 0x2028 && c != 0x2029 |
VSPACE_BYTE_CASES: |
6097 |
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
6098 |
|
VSPACE_MULTIBYTE_CASES: |
6099 |
#endif |
#endif |
6100 |
) break; |
eptr++; break; |
6101 |
eptr++; |
} |
6102 |
} |
} |
6103 |
|
ENDLOOP03: |
6104 |
break; |
break; |
6105 |
|
|
6106 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
6197 |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM47); |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM47); |
6198 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
6199 |
eptr--; |
eptr--; |
6200 |
if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' && |
if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF && |
6201 |
eptr[-1] == '\r') eptr--; |
eptr[-1] == CHAR_CR) eptr--; |
6202 |
} |
} |
6203 |
} |
} |
6204 |
|
|
6248 |
LBL(32) LBL(34) LBL(42) LBL(46) |
LBL(32) LBL(34) LBL(42) LBL(46) |
6249 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
6250 |
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) |
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) |
6251 |
LBL(59) LBL(60) LBL(61) LBL(62) |
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68) |
6252 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
6253 |
#endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
6254 |
default: |
default: |
6255 |
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); |
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); |
|
|
|
|
printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere); |
|
|
|
|
6256 |
return PCRE_ERROR_INTERNAL; |
return PCRE_ERROR_INTERNAL; |
6257 |
} |
} |
6258 |
#undef LBL |
#undef LBL |
6364 |
< -1 => some kind of unexpected problem |
< -1 => some kind of unexpected problem |
6365 |
*/ |
*/ |
6366 |
|
|
6367 |
#ifdef COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
6368 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
6369 |
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, |
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, |
6370 |
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, |
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, |
6371 |
int offsetcount) |
int offsetcount) |
6372 |
#else |
#elif defined COMPILE_PCRE16 |
6373 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
6374 |
pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, |
pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, |
6375 |
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, |
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, |
6376 |
int offsetcount) |
int offsetcount) |
6377 |
|
#elif defined COMPILE_PCRE32 |
6378 |
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
6379 |
|
pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, |
6380 |
|
PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets, |
6381 |
|
int offsetcount) |
6382 |
#endif |
#endif |
6383 |
{ |
{ |
6384 |
int rc, ocount, arg_offset_max; |
int rc, ocount, arg_offset_max; |
6401 |
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset; |
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset; |
6402 |
PCRE_PUCHAR end_subject; |
PCRE_PUCHAR end_subject; |
6403 |
PCRE_PUCHAR start_partial = NULL; |
PCRE_PUCHAR start_partial = NULL; |
6404 |
|
PCRE_PUCHAR match_partial; |
6405 |
PCRE_PUCHAR req_char_ptr = start_match - 1; |
PCRE_PUCHAR req_char_ptr = start_match - 1; |
6406 |
|
|
6407 |
const pcre_study_data *study; |
const pcre_study_data *study; |
6433 |
if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) |
if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) |
6434 |
return PCRE_ERROR_NULL; |
return PCRE_ERROR_NULL; |
6435 |
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; |
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; |
6436 |
|
if (length < 0) return PCRE_ERROR_BADLENGTH; |
6437 |
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; |
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; |
6438 |
|
|
6439 |
/* Check that the first field in the block is the magic number. If it is not, |
/* Check that the first field in the block is the magic number. If it is not, |
6471 |
offsets[0] = erroroffset; |
offsets[0] = erroroffset; |
6472 |
offsets[1] = errorcode; |
offsets[1] = errorcode; |
6473 |
} |
} |
6474 |
#ifdef COMPILE_PCRE16 |
#if defined COMPILE_PCRE8 |
|
return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)? |
|
|
PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16; |
|
|
#else |
|
6475 |
return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)? |
return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)? |
6476 |
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
6477 |
|
#elif defined COMPILE_PCRE16 |
6478 |
|
return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)? |
6479 |
|
PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16; |
6480 |
|
#elif defined COMPILE_PCRE32 |
6481 |
|
return PCRE_ERROR_BADUTF32; |
6482 |
#endif |
#endif |
6483 |
} |
} |
6484 |
|
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 |
6485 |
/* Check that a start_offset points to the start of a UTF character. */ |
/* Check that a start_offset points to the start of a UTF character. */ |
6486 |
if (start_offset > 0 && start_offset < length && |
if (start_offset > 0 && start_offset < length && |
6487 |
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) |
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) |
6488 |
return PCRE_ERROR_BADUTF8_OFFSET; |
return PCRE_ERROR_BADUTF8_OFFSET; |
6489 |
|
#endif |
6490 |
} |
} |
6491 |
#endif |
#endif |
6492 |
|
|
6500 |
&& (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT | |
&& (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT | |
6501 |
PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT |
PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT |
6502 |
&& extra_data->executable_jit != NULL |
&& extra_data->executable_jit != NULL |
6503 |
&& (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL | |
&& (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0) |
|
PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | |
|
|
PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0) |
|
6504 |
{ |
{ |
6505 |
rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length, |
rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length, |
6506 |
start_offset, options, offsets, offsetcount); |
start_offset, options, offsets, offsetcount); |
6507 |
|
|
6508 |
/* PCRE_ERROR_NULL means that the selected normal or partial matching |
/* PCRE_ERROR_NULL means that the selected normal or partial matching |
6509 |
mode is not compiled. In this case we simply fallback to interpreter. */ |
mode is not compiled. In this case we simply fallback to interpreter. */ |
6510 |
|
|
6511 |
if (rc != PCRE_ERROR_NULL) return rc; |
if (rc != PCRE_ERROR_JIT_BADOPTION) return rc; |
6512 |
} |
} |
6513 |
#endif |
#endif |
6514 |
|
|
6570 |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
6571 |
md->use_ucp = (re->options & PCRE_UCP) != 0; |
md->use_ucp = (re->options & PCRE_UCP) != 0; |
6572 |
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
6573 |
md->ignore_skip_arg = FALSE; |
md->ignore_skip_arg = 0; |
6574 |
|
|
6575 |
/* Some options are unpacked into BOOL variables in the hope that testing |
/* Some options are unpacked into BOOL variables in the hope that testing |
6576 |
them will be faster than individual option bits. */ |
them will be faster than individual option bits. */ |
6680 |
DPRINTF(("Got memory to hold back references\n")); |
DPRINTF(("Got memory to hold back references\n")); |
6681 |
} |
} |
6682 |
else md->offset_vector = offsets; |
else md->offset_vector = offsets; |
|
|
|
6683 |
md->offset_end = ocount; |
md->offset_end = ocount; |
6684 |
md->offset_max = (2*ocount)/3; |
md->offset_max = (2*ocount)/3; |
6685 |
md->offset_overflow = FALSE; |
md->capture_last = 0; |
|
md->capture_last = -1; |
|
6686 |
|
|
6687 |
/* Reset the working variable associated with each extraction. These should |
/* Reset the working variable associated with each extraction. These should |
6688 |
never be used unless previously set, but they get saved and restored, and so we |
never be used unless previously set, but they get saved and restored, and so we |
6790 |
|
|
6791 |
if (has_first_char) |
if (has_first_char) |
6792 |
{ |
{ |
6793 |
|
pcre_uchar smc; |
6794 |
|
|
6795 |
if (first_char != first_char2) |
if (first_char != first_char2) |
6796 |
while (start_match < end_subject && |
while (start_match < end_subject && |
6797 |
*start_match != first_char && *start_match != first_char2) |
(smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2) |
6798 |
start_match++; |
start_match++; |
6799 |
else |
else |
6800 |
while (start_match < end_subject && *start_match != first_char) |
while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char) |
6801 |
start_match++; |
start_match++; |
6802 |
} |
} |
6803 |
|
|
6829 |
if (start_match[-1] == CHAR_CR && |
if (start_match[-1] == CHAR_CR && |
6830 |
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
6831 |
start_match < end_subject && |
start_match < end_subject && |
6832 |
*start_match == CHAR_NL) |
RAWUCHARTEST(start_match) == CHAR_NL) |
6833 |
start_match++; |
start_match++; |
6834 |
} |
} |
6835 |
} |
} |
6840 |
{ |
{ |
6841 |
while (start_match < end_subject) |
while (start_match < end_subject) |
6842 |
{ |
{ |
6843 |
register unsigned int c = *start_match; |
register pcre_uint32 c = RAWUCHARTEST(start_match); |
6844 |
#ifndef COMPILE_PCRE8 |
#ifndef COMPILE_PCRE8 |
6845 |
if (c > 255) c = 255; |
if (c > 255) c = 255; |
6846 |
#endif |
#endif |
6908 |
{ |
{ |
6909 |
while (p < end_subject) |
while (p < end_subject) |
6910 |
{ |
{ |
6911 |
register int pp = *p++; |
register pcre_uint32 pp = RAWUCHARINCTEST(p); |
6912 |
if (pp == req_char || pp == req_char2) { p--; break; } |
if (pp == req_char || pp == req_char2) { p--; break; } |
6913 |
} |
} |
6914 |
} |
} |
6916 |
{ |
{ |
6917 |
while (p < end_subject) |
while (p < end_subject) |
6918 |
{ |
{ |
6919 |
if (*p++ == req_char) { p--; break; } |
if (RAWUCHARINCTEST(p) == req_char) { p--; break; } |
6920 |
} |
} |
6921 |
} |
} |
6922 |
|
|
6952 |
md->match_call_count = 0; |
md->match_call_count = 0; |
6953 |
md->match_function_type = 0; |
md->match_function_type = 0; |
6954 |
md->end_offset_top = 0; |
md->end_offset_top = 0; |
6955 |
|
md->skip_arg_count = 0; |
6956 |
rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0); |
rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0); |
6957 |
if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; |
if (md->hitend && start_partial == NULL) |
6958 |
|
{ |
6959 |
|
start_partial = md->start_used_ptr; |
6960 |
|
match_partial = start_match; |
6961 |
|
} |
6962 |
|
|
6963 |
switch(rc) |
switch(rc) |
6964 |
{ |
{ |
6971 |
|
|
6972 |
case MATCH_SKIP_ARG: |
case MATCH_SKIP_ARG: |
6973 |
new_start_match = start_match; |
new_start_match = start_match; |
6974 |
md->ignore_skip_arg = TRUE; |
md->ignore_skip_arg = md->skip_arg_count; |
6975 |
break; |
break; |
6976 |
|
|
6977 |
/* SKIP passes back the next starting point explicitly, but if it is the |
/* SKIP passes back the next starting point explicitly, but if it is no |
6978 |
same as the match we have just done, treat it as NOMATCH. */ |
greater than the match we have just done, treat it as NOMATCH. */ |
6979 |
|
|
6980 |
case MATCH_SKIP: |
case MATCH_SKIP: |
6981 |
if (md->start_match_ptr != start_match) |
if (md->start_match_ptr > start_match) |
6982 |
{ |
{ |
6983 |
new_start_match = md->start_match_ptr; |
new_start_match = md->start_match_ptr; |
6984 |
break; |
break; |
6986 |
/* Fall through */ |
/* Fall through */ |
6987 |
|
|
6988 |
/* NOMATCH and PRUNE advance by one character. THEN at this level acts |
/* NOMATCH and PRUNE advance by one character. THEN at this level acts |
6989 |
exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */ |
exactly like PRUNE. Unset ignore SKIP-with-argument. */ |
6990 |
|
|
6991 |
case MATCH_NOMATCH: |
case MATCH_NOMATCH: |
6992 |
case MATCH_PRUNE: |
case MATCH_PRUNE: |
6993 |
case MATCH_THEN: |
case MATCH_THEN: |
6994 |
md->ignore_skip_arg = FALSE; |
md->ignore_skip_arg = 0; |
6995 |
new_start_match = start_match + 1; |
new_start_match = start_match + 1; |
6996 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
6997 |
if (utf) |
if (utf) |
7084 |
(arg_offset_max - 2) * sizeof(int)); |
(arg_offset_max - 2) * sizeof(int)); |
7085 |
DPRINTF(("Copied offsets from temporary memory\n")); |
DPRINTF(("Copied offsets from temporary memory\n")); |
7086 |
} |
} |
7087 |
if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE; |
if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT; |
7088 |
DPRINTF(("Freeing temporary memory\n")); |
DPRINTF(("Freeing temporary memory\n")); |
7089 |
(PUBL(free))(md->offset_vector); |
(PUBL(free))(md->offset_vector); |
7090 |
} |
} |
7092 |
/* Set the return code to the number of captured strings, or 0 if there were |
/* Set the return code to the number of captured strings, or 0 if there were |
7093 |
too many to fit into the vector. */ |
too many to fit into the vector. */ |
7094 |
|
|
7095 |
rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)? |
rc = ((md->capture_last & OVFLBIT) != 0 && |
7096 |
|
md->end_offset_top >= arg_offset_max)? |
7097 |
0 : md->end_offset_top/2; |
0 : md->end_offset_top/2; |
7098 |
|
|
7099 |
/* If there is space in the offset vector, set any unused pairs at the end of |
/* If there is space in the offset vector, set any unused pairs at the end of |
7166 |
{ |
{ |
7167 |
offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject); |
offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject); |
7168 |
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject); |
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject); |
7169 |
|
if (offsetcount > 2) |
7170 |
|
offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject); |
7171 |
} |
} |
7172 |
rc = PCRE_ERROR_PARTIAL; |
rc = PCRE_ERROR_PARTIAL; |
7173 |
} |
} |