Parent Directory
|
Revision Log
|
Patch
revision 716 by ph10, Tue Oct 4 16:38:05 2011 UTC | revision 976 by ph10, Sat Jun 16 17:53:17 2012 UTC | |
---|---|---|
# | Line 6 | Line 6 |
6 | and semantics are as close as possible to those of the Perl 5 language. | and semantics are as close as possible to those of the Perl 5 language. |
7 | ||
8 | Written by Philip Hazel | Written by Philip Hazel |
9 | Copyright (c) 1997-2011 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
10 | ||
11 | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
12 | Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without |
# | Line 37 POSSIBILITY OF SUCH DAMAGE. | Line 37 POSSIBILITY OF SUCH DAMAGE. |
37 | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
38 | */ | */ |
39 | ||
40 | /* This module contains pcre_exec(), the externally visible function that does | /* This module contains pcre_exec(), the externally visible function that does |
41 | pattern matching using an NFA algorithm, trying to mimic Perl as closely as | pattern matching using an NFA algorithm, trying to mimic Perl as closely as |
42 | possible. There are also some static supporting functions. */ | possible. There are also some static supporting functions. */ |
# | Line 82 negative to avoid the external error cod | Line 81 negative to avoid the external error cod |
81 | #define MATCH_SKIP_ARG (-993) | #define MATCH_SKIP_ARG (-993) |
82 | #define MATCH_THEN (-992) | #define MATCH_THEN (-992) |
83 | ||
/* This is a convenience macro for code that occurs many times. */ | ||
#define MRRETURN(ra) \ | ||
{ \ | ||
md->mark = markptr; \ | ||
RRETURN(ra); \ | ||
} | ||
84 | /* Maximum number of ints of offset to save on the stack for recursive calls. | /* Maximum number of ints of offset to save on the stack for recursive calls. |
85 | If the offset vector is bigger, malloc is used. This should be a multiple of 3, | If the offset vector is bigger, malloc is used. This should be a multiple of 3, |
86 | because the offset vector is always a multiple of 3 long. */ | because the offset vector is always a multiple of 3 long. */ |
# | Line 121 Returns: nothing | Line 112 Returns: nothing |
112 | */ | */ |
113 | ||
114 | static void | static void |
115 | pchars(const uschar *p, int length, BOOL is_subject, match_data *md) | pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) |
116 | { | { |
117 | unsigned int c; | unsigned int c; |
118 | if (is_subject && length > md->end_subject - p) length = md->end_subject - p; | if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
# | Line 148 Arguments: | Line 139 Arguments: |
139 | md points to match data block | md points to match data block |
140 | caseless TRUE if caseless | caseless TRUE if caseless |
141 | ||
142 | Returns: < 0 if not matched, otherwise the number of subject bytes matched | Returns: >= 0 the number of subject bytes matched |
143 | -1 no match | |
144 | -2 partial match; always given if at end subject | |
145 | */ | */ |
146 | ||
147 | static int | static int |
148 | match_ref(int offset, register USPTR eptr, int length, match_data *md, | match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md, |
149 | BOOL caseless) | BOOL caseless) |
150 | { | { |
151 | USPTR eptr_start = eptr; | PCRE_PUCHAR eptr_start = eptr; |
152 | register USPTR p = md->start_subject + md->offset_vector[offset]; | register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; |
153 | ||
154 | #ifdef PCRE_DEBUG | #ifdef PCRE_DEBUG |
155 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
# | Line 171 pchars(p, length, FALSE, md); | Line 164 pchars(p, length, FALSE, md); |
164 | printf("\n"); | printf("\n"); |
165 | #endif | #endif |
166 | ||
167 | /* Always fail if reference not set (and not JavaScript compatible). */ | /* Always fail if reference not set (and not JavaScript compatible - in that |
168 | case the length is passed as zero). */ | |
169 | ||
170 | if (length < 0) return -1; | if (length < 0) return -1; |
171 | ||
# | Line 181 ASCII characters. */ | Line 175 ASCII characters. */ |
175 | ||
176 | if (caseless) | if (caseless) |
177 | { | { |
178 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
179 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
180 | if (md->utf8) | if (md->utf) |
181 | { | { |
182 | /* Match characters up to the end of the reference. NOTE: the number of | /* Match characters up to the end of the reference. NOTE: the number of |
183 | bytes matched may differ, because there are some characters whose upper and | bytes matched may differ, because there are some characters whose upper and |
# | Line 193 if (caseless) | Line 187 if (caseless) |
187 | the latter. It is important, therefore, to check the length along the | the latter. It is important, therefore, to check the length along the |
188 | reference, not along the subject (earlier code did this wrong). */ | reference, not along the subject (earlier code did this wrong). */ |
189 | ||
190 | USPTR endptr = p + length; | PCRE_PUCHAR endptr = p + length; |
191 | while (p < endptr) | while (p < endptr) |
192 | { | { |
193 | int c, d; | int c, d; |
194 | if (eptr >= md->end_subject) return -1; | if (eptr >= md->end_subject) return -2; /* Partial match */ |
195 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
196 | GETCHARINC(d, p); | GETCHARINC(d, p); |
197 | if (c != d && c != UCD_OTHERCASE(d)) return -1; | if (c != d && c != UCD_OTHERCASE(d)) return -1; |
# | Line 210 if (caseless) | Line 204 if (caseless) |
204 | /* The same code works when not in UTF-8 mode and in UTF-8 mode when there | /* The same code works when not in UTF-8 mode and in UTF-8 mode when there |
205 | is no UCP support. */ | is no UCP support. */ |
206 | { | { |
if (eptr + length > md->end_subject) return -1; | ||
207 | while (length-- > 0) | while (length-- > 0) |
208 | { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; } | { |
209 | if (eptr >= md->end_subject) return -2; /* Partial match */ | |
210 | if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1; | |
211 | p++; | |
212 | eptr++; | |
213 | } | |
214 | } | } |
215 | } | } |
216 | ||
# | Line 221 are in UTF-8 mode. */ | Line 219 are in UTF-8 mode. */ |
219 | ||
220 | else | else |
221 | { | { |
222 | if (eptr + length > md->end_subject) return -1; | while (length-- > 0) |
223 | while (length-- > 0) if (*p++ != *eptr++) return -1; | { |
224 | if (eptr >= md->end_subject) return -2; /* Partial match */ | |
225 | if (*p++ != *eptr++) return -1; | |
226 | } | |
227 | } | } |
228 | ||
229 | return eptr - eptr_start; | return (int)(eptr - eptr_start); |
230 | } | } |
231 | ||
232 | ||
# | Line 277 enum { RM1=1, RM2, RM3, RM4, RM5, RM | Line 278 enum { RM1=1, RM2, RM3, RM4, RM5, RM |
278 | RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, | RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, |
279 | RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, | RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, |
280 | RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, | RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, |
281 | RM61, RM62, RM63 }; | RM61, RM62, RM63, RM64, RM65, RM66 }; |
282 | ||
283 | /* These versions of the macros use the stack, as normal. There are debugging | /* These versions of the macros use the stack, as normal. There are debugging |
284 | versions and production versions. Note that the "rw" argument of RMATCH isn't | versions and production versions. Note that the "rw" argument of RMATCH isn't |
# | Line 290 actually used in this definition. */ | Line 291 actually used in this definition. */ |
291 | #define RMATCH(ra,rb,rc,rd,re,rw) \ | #define RMATCH(ra,rb,rc,rd,re,rw) \ |
292 | { \ | { \ |
293 | printf("match() called in line %d\n", __LINE__); \ | printf("match() called in line %d\n", __LINE__); \ |
294 | rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \ | rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \ |
295 | printf("to line %d\n", __LINE__); \ | printf("to line %d\n", __LINE__); \ |
296 | } | } |
297 | #define RRETURN(ra) \ | #define RRETURN(ra) \ |
# | Line 300 actually used in this definition. */ | Line 301 actually used in this definition. */ |
301 | } | } |
302 | #else | #else |
303 | #define RMATCH(ra,rb,rc,rd,re,rw) \ | #define RMATCH(ra,rb,rc,rd,re,rw) \ |
304 | rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1) | rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1) |
305 | #define RRETURN(ra) return ra | #define RRETURN(ra) return ra |
306 | #endif | #endif |
307 | ||
# | Line 315 argument of match(), which never changes | Line 316 argument of match(), which never changes |
316 | ||
317 | #define RMATCH(ra,rb,rc,rd,re,rw)\ | #define RMATCH(ra,rb,rc,rd,re,rw)\ |
318 | {\ | {\ |
319 | heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\ | heapframe *newframe = frame->Xnextframe;\ |
320 | if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ | if (newframe == NULL)\ |
321 | frame->Xwhere = rw; \ | {\ |
322 | newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ | |
323 | if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ | |
324 | newframe->Xnextframe = NULL;\ | |
325 | frame->Xnextframe = newframe;\ | |
326 | }\ | |
327 | frame->Xwhere = rw;\ | |
328 | newframe->Xeptr = ra;\ | newframe->Xeptr = ra;\ |
329 | newframe->Xecode = rb;\ | newframe->Xecode = rb;\ |
330 | newframe->Xmstart = mstart;\ | newframe->Xmstart = mstart;\ |
newframe->Xmarkptr = markptr;\ | ||
331 | newframe->Xoffset_top = rc;\ | newframe->Xoffset_top = rc;\ |
332 | newframe->Xeptrb = re;\ | newframe->Xeptrb = re;\ |
333 | newframe->Xrdepth = frame->Xrdepth + 1;\ | newframe->Xrdepth = frame->Xrdepth + 1;\ |
# | Line 337 argument of match(), which never changes | Line 343 argument of match(), which never changes |
343 | {\ | {\ |
344 | heapframe *oldframe = frame;\ | heapframe *oldframe = frame;\ |
345 | frame = oldframe->Xprevframe;\ | frame = oldframe->Xprevframe;\ |
(pcre_stack_free)(oldframe);\ | ||
346 | if (frame != NULL)\ | if (frame != NULL)\ |
347 | {\ | {\ |
348 | rrc = ra;\ | rrc = ra;\ |
# | Line 351 argument of match(), which never changes | Line 356 argument of match(), which never changes |
356 | ||
357 | typedef struct heapframe { | typedef struct heapframe { |
358 | struct heapframe *Xprevframe; | struct heapframe *Xprevframe; |
359 | struct heapframe *Xnextframe; | |
360 | ||
361 | /* Function arguments that may change */ | /* Function arguments that may change */ |
362 | ||
363 | USPTR Xeptr; | PCRE_PUCHAR Xeptr; |
364 | const uschar *Xecode; | const pcre_uchar *Xecode; |
365 | USPTR Xmstart; | PCRE_PUCHAR Xmstart; |
USPTR Xmarkptr; | ||
366 | int Xoffset_top; | int Xoffset_top; |
367 | eptrblock *Xeptrb; | eptrblock *Xeptrb; |
368 | unsigned int Xrdepth; | unsigned int Xrdepth; |
369 | ||
370 | /* Function local variables */ | /* Function local variables */ |
371 | ||
372 | USPTR Xcallpat; | PCRE_PUCHAR Xcallpat; |
373 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
374 | USPTR Xcharptr; | PCRE_PUCHAR Xcharptr; |
375 | #endif | #endif |
376 | USPTR Xdata; | PCRE_PUCHAR Xdata; |
377 | USPTR Xnext; | PCRE_PUCHAR Xnext; |
378 | USPTR Xpp; | PCRE_PUCHAR Xpp; |
379 | USPTR Xprev; | PCRE_PUCHAR Xprev; |
380 | USPTR Xsaved_eptr; | PCRE_PUCHAR Xsaved_eptr; |
381 | ||
382 | recursion_info Xnew_recursive; | recursion_info Xnew_recursive; |
383 | ||
# | Line 385 typedef struct heapframe { | Line 390 typedef struct heapframe { |
390 | int Xprop_value; | int Xprop_value; |
391 | int Xprop_fail_result; | int Xprop_fail_result; |
392 | int Xoclength; | int Xoclength; |
393 | uschar Xocchars[8]; | pcre_uchar Xocchars[6]; |
394 | #endif | #endif |
395 | ||
396 | int Xcodelink; | int Xcodelink; |
# | Line 427 returns a negative (error) response, the | Line 432 returns a negative (error) response, the |
432 | same response. */ | same response. */ |
433 | ||
434 | /* These macros pack up tests that are used for partial matching, and which | /* These macros pack up tests that are used for partial matching, and which |
435 | appears several times in the code. We set the "hit end" flag if the pointer is | appear several times in the code. We set the "hit end" flag if the pointer is |
436 | at the end of the subject and also past the start of the subject (i.e. | at the end of the subject and also past the start of the subject (i.e. |
437 | something has been matched). For hard partial matching, we then return | something has been matched). For hard partial matching, we then return |
438 | immediately. The second one is used when we already know we are past the end of | immediately. The second one is used when we already know we are past the end of |
# | Line 438 the subject. */ | Line 443 the subject. */ |
443 | eptr > md->start_used_ptr) \ | eptr > md->start_used_ptr) \ |
444 | { \ | { \ |
445 | md->hitend = TRUE; \ | md->hitend = TRUE; \ |
446 | if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ |
447 | } | } |
448 | ||
449 | #define SCHECK_PARTIAL()\ | #define SCHECK_PARTIAL()\ |
450 | if (md->partial != 0 && eptr > md->start_used_ptr) \ | if (md->partial != 0 && eptr > md->start_used_ptr) \ |
451 | { \ | { \ |
452 | md->hitend = TRUE; \ | md->hitend = TRUE; \ |
453 | if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ |
454 | } | } |
455 | ||
456 | ||
457 | /* Performance note: It might be tempting to extract commonly used fields from | /* Performance note: It might be tempting to extract commonly used fields from |
458 | the md structure (e.g. utf8, end_subject) into individual variables to improve | the md structure (e.g. utf, end_subject) into individual variables to improve |
459 | performance. Tests using gcc on a SPARC disproved this; in the first case, it | performance. Tests using gcc on a SPARC disproved this; in the first case, it |
460 | made performance worse. | made performance worse. |
461 | ||
# | Line 459 Arguments: | Line 464 Arguments: |
464 | ecode pointer to current position in compiled code | ecode pointer to current position in compiled code |
465 | mstart pointer to the current match start position (can be modified | mstart pointer to the current match start position (can be modified |
466 | by encountering \K) | by encountering \K) |
markptr pointer to the most recent MARK name, or NULL | ||
467 | offset_top current top pointer | offset_top current top pointer |
468 | md pointer to "static" info for the match | md pointer to "static" info for the match |
469 | eptrb pointer to chain of blocks containing eptr at start of | eptrb pointer to chain of blocks containing eptr at start of |
# | Line 474 Returns: MATCH_MATCH if matched | Line 478 Returns: MATCH_MATCH if matched |
478 | */ | */ |
479 | ||
480 | static int | static int |
481 | match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, | match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode, |
482 | const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb, | PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb, |
483 | unsigned int rdepth) | unsigned int rdepth) |
484 | { | { |
485 | /* These variables do not need to be preserved over recursion in this function, | /* These variables do not need to be preserved over recursion in this function, |
# | Line 485 so they can be ordinary variables in all | Line 489 so they can be ordinary variables in all |
489 | register int rrc; /* Returns from recursive calls */ | register int rrc; /* Returns from recursive calls */ |
490 | register int i; /* Used for loops not involving calls to RMATCH() */ | register int i; /* Used for loops not involving calls to RMATCH() */ |
491 | register unsigned int c; /* Character values not kept over RMATCH() calls */ | register unsigned int c; /* Character values not kept over RMATCH() calls */ |
492 | register BOOL utf8; /* Local copy of UTF-8 flag for speed */ | register BOOL utf; /* Local copy of UTF flag for speed */ |
493 | ||
494 | BOOL minimize, possessive; /* Quantifier options */ | BOOL minimize, possessive; /* Quantifier options */ |
495 | BOOL caseless; | BOOL caseless; |
496 | int condcode; | int condcode; |
497 | ||
498 | /* When recursion is not being used, all "local" variables that have to be | /* When recursion is not being used, all "local" variables that have to be |
499 | preserved over calls to RMATCH() are part of a "frame" which is obtained from | preserved over calls to RMATCH() are part of a "frame". We set up the top-level |
500 | heap storage. Set up the top-level frame here; others are obtained from the | frame on the stack here; subsequent instantiations are obtained from the heap |
501 | heap whenever RMATCH() does a "recursion". See the macro definitions above. */ | whenever RMATCH() does a "recursion". See the macro definitions above. Putting |
502 | the top-level on the stack rather than malloc-ing them all gives a performance | |
503 | boost in many cases where there is not much "recursion". */ | |
504 | ||
505 | #ifdef NO_RECURSE | #ifdef NO_RECURSE |
506 | heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe)); | heapframe *frame = (heapframe *)md->match_frames_base; |
if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); | ||
frame->Xprevframe = NULL; /* Marks the top level */ | ||
507 | ||
508 | /* Copy in the original argument variables */ | /* Copy in the original argument variables */ |
509 | ||
510 | frame->Xeptr = eptr; | frame->Xeptr = eptr; |
511 | frame->Xecode = ecode; | frame->Xecode = ecode; |
512 | frame->Xmstart = mstart; | frame->Xmstart = mstart; |
frame->Xmarkptr = markptr; | ||
513 | frame->Xoffset_top = offset_top; | frame->Xoffset_top = offset_top; |
514 | frame->Xeptrb = eptrb; | frame->Xeptrb = eptrb; |
515 | frame->Xrdepth = rdepth; | frame->Xrdepth = rdepth; |
# | Line 520 HEAP_RECURSE: | Line 523 HEAP_RECURSE: |
523 | #define eptr frame->Xeptr | #define eptr frame->Xeptr |
524 | #define ecode frame->Xecode | #define ecode frame->Xecode |
525 | #define mstart frame->Xmstart | #define mstart frame->Xmstart |
#define markptr frame->Xmarkptr | ||
526 | #define offset_top frame->Xoffset_top | #define offset_top frame->Xoffset_top |
527 | #define eptrb frame->Xeptrb | #define eptrb frame->Xeptrb |
528 | #define rdepth frame->Xrdepth | #define rdepth frame->Xrdepth |
529 | ||
530 | /* Ditto for the local variables */ | /* Ditto for the local variables */ |
531 | ||
532 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
533 | #define charptr frame->Xcharptr | #define charptr frame->Xcharptr |
534 | #endif | #endif |
535 | #define callpat frame->Xcallpat | #define callpat frame->Xcallpat |
# | Line 585 declarations can be cut out in a block. | Line 587 declarations can be cut out in a block. |
587 | below are for variables that do not have to be preserved over a recursive call | below are for variables that do not have to be preserved over a recursive call |
588 | to RMATCH(). */ | to RMATCH(). */ |
589 | ||
590 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
591 | const uschar *charptr; | const pcre_uchar *charptr; |
592 | #endif | #endif |
593 | const uschar *callpat; | const pcre_uchar *callpat; |
594 | const uschar *data; | const pcre_uchar *data; |
595 | const uschar *next; | const pcre_uchar *next; |
596 | USPTR pp; | PCRE_PUCHAR pp; |
597 | const uschar *prev; | const pcre_uchar *prev; |
598 | USPTR saved_eptr; | PCRE_PUCHAR saved_eptr; |
599 | ||
600 | recursion_info new_recursive; | recursion_info new_recursive; |
601 | ||
# | Line 606 int prop_type; | Line 608 int prop_type; |
608 | int prop_value; | int prop_value; |
609 | int prop_fail_result; | int prop_fail_result; |
610 | int oclength; | int oclength; |
611 | uschar occhars[8]; | pcre_uchar occhars[6]; |
612 | #endif | #endif |
613 | ||
614 | int codelink; | int codelink; |
# | Line 622 int save_offset1, save_offset2, save_off | Line 624 int save_offset1, save_offset2, save_off |
624 | int stacksave[REC_STACK_SAVE_MAX]; | int stacksave[REC_STACK_SAVE_MAX]; |
625 | ||
626 | eptrblock newptrb; | eptrblock newptrb; |
627 | ||
628 | /* There is a special fudge for calling match() in a way that causes it to | |
629 | measure the size of its basic stack frame when the stack is being used for | |
630 | recursion. The second argument (ecode) being NULL triggers this behaviour. It | |
631 | cannot normally ever be NULL. The return is the negated value of the frame | |
632 | size. */ | |
633 | ||
634 | if (ecode == NULL) | |
635 | { | |
636 | if (rdepth == 0) | |
637 | return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1); | |
638 | else | |
639 | { | |
640 | int len = (char *)&rdepth - (char *)eptr; | |
641 | return (len > 0)? -len : len; | |
642 | } | |
643 | } | |
644 | #endif /* NO_RECURSE */ | #endif /* NO_RECURSE */ |
645 | ||
646 | /* To save space on the stack and in the heap frame, I have doubled up on some | /* To save space on the stack and in the heap frame, I have doubled up on some |
# | Line 634 the alternative names that are used. */ | Line 653 the alternative names that are used. */ |
653 | #define code_offset codelink | #define code_offset codelink |
654 | #define condassert condition | #define condassert condition |
655 | #define matched_once prev_is_word | #define matched_once prev_is_word |
656 | #define foc number | |
657 | #define save_mark data | |
658 | ||
659 | /* These statements are here to stop the compiler complaining about unitialized | /* These statements are here to stop the compiler complaining about unitialized |
660 | variables. */ | variables. */ |
# | Line 659 defined). However, RMATCH isn't like a f | Line 680 defined). However, RMATCH isn't like a f |
680 | complicated macro. It has to be used in one particular way. This shouldn't, | complicated macro. It has to be used in one particular way. This shouldn't, |
681 | however, impact performance when true recursion is being used. */ | however, impact performance when true recursion is being used. */ |
682 | ||
683 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
684 | utf8 = md->utf8; /* Local copy of the flag */ | utf = md->utf; /* Local copy of the flag */ |
685 | #else | #else |
686 | utf8 = FALSE; | utf = FALSE; |
687 | #endif | #endif |
688 | ||
689 | /* First check that we haven't called match() too many times, or that we | /* First check that we haven't called match() too many times, or that we |
# | Line 701 for (;;) | Line 722 for (;;) |
722 | switch(op) | switch(op) |
723 | { | { |
724 | case OP_MARK: | case OP_MARK: |
725 | markptr = ecode + 2; | md->nomatch_mark = ecode + 2; |
726 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | md->mark = NULL; /* In case previously set by assertion */ |
727 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
728 | eptrb, RM55); | eptrb, RM55); |
729 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
730 | md->mark == NULL) md->mark = ecode + 2; | |
731 | ||
732 | /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an | /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an |
733 | argument, and we must check whether that argument matches this MARK's | argument, and we must check whether that argument matches this MARK's |
# | Line 712 for (;;) | Line 736 for (;;) |
736 | position and return MATCH_SKIP. Otherwise, pass back the return code | position and return MATCH_SKIP. Otherwise, pass back the return code |
737 | unaltered. */ | unaltered. */ |
738 | ||
739 | if (rrc == MATCH_SKIP_ARG && | else if (rrc == MATCH_SKIP_ARG && |
740 | strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0) | STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0) |
741 | { | { |
742 | md->start_match_ptr = eptr; | md->start_match_ptr = eptr; |
743 | RRETURN(MATCH_SKIP); | RRETURN(MATCH_SKIP); |
744 | } | } |
if (md->mark == NULL) md->mark = markptr; | ||
745 | RRETURN(rrc); | RRETURN(rrc); |
746 | ||
747 | case OP_FAIL: | case OP_FAIL: |
748 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
749 | ||
750 | /* COMMIT overrides PRUNE, SKIP, and THEN */ | /* COMMIT overrides PRUNE, SKIP, and THEN */ |
751 | ||
752 | case OP_COMMIT: | case OP_COMMIT: |
753 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
754 | eptrb, RM52); | eptrb, RM52); |
755 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && |
756 | rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && | rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && |
757 | rrc != MATCH_THEN) | rrc != MATCH_THEN) |
758 | RRETURN(rrc); | RRETURN(rrc); |
759 | MRRETURN(MATCH_COMMIT); | RRETURN(MATCH_COMMIT); |
760 | ||
761 | /* PRUNE overrides THEN */ | /* PRUNE overrides THEN */ |
762 | ||
763 | case OP_PRUNE: | case OP_PRUNE: |
764 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
765 | eptrb, RM51); | eptrb, RM51); |
766 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
767 | MRRETURN(MATCH_PRUNE); | RRETURN(MATCH_PRUNE); |
768 | ||
769 | case OP_PRUNE_ARG: | case OP_PRUNE_ARG: |
770 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | md->nomatch_mark = ecode + 2; |
771 | md->mark = NULL; /* In case previously set by assertion */ | |
772 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
773 | eptrb, RM56); | eptrb, RM56); |
774 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
775 | md->mark == NULL) md->mark = ecode + 2; | |
776 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
md->mark = ecode + 2; | ||
777 | RRETURN(MATCH_PRUNE); | RRETURN(MATCH_PRUNE); |
778 | ||
779 | /* SKIP overrides PRUNE and THEN */ | /* SKIP overrides PRUNE and THEN */ |
780 | ||
781 | case OP_SKIP: | case OP_SKIP: |
782 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
783 | eptrb, RM53); | eptrb, RM53); |
784 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
785 | RRETURN(rrc); | RRETURN(rrc); |
786 | md->start_match_ptr = eptr; /* Pass back current position */ | md->start_match_ptr = eptr; /* Pass back current position */ |
787 | MRRETURN(MATCH_SKIP); | RRETURN(MATCH_SKIP); |
788 | ||
789 | /* Note that, for Perl compatibility, SKIP with an argument does NOT set | |
790 | nomatch_mark. There is a flag that disables this opcode when re-matching a | |
791 | pattern that ended with a SKIP for which there was not a matching MARK. */ | |
792 | ||
793 | case OP_SKIP_ARG: | case OP_SKIP_ARG: |
794 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | if (md->ignore_skip_arg) |
795 | { | |
796 | ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; | |
797 | break; | |
798 | } | |
799 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
800 | eptrb, RM57); | eptrb, RM57); |
801 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
802 | RRETURN(rrc); | RRETURN(rrc); |
803 | ||
804 | /* Pass back the current skip name by overloading md->start_match_ptr and | /* Pass back the current skip name by overloading md->start_match_ptr and |
805 | returning the special MATCH_SKIP_ARG return code. This will either be | returning the special MATCH_SKIP_ARG return code. This will either be |
806 | caught by a matching MARK, or get to the top, where it is treated the same | caught by a matching MARK, or get to the top, where it causes a rematch |
807 | as PRUNE. */ | with the md->ignore_skip_arg flag set. */ |
808 | ||
809 | md->start_match_ptr = ecode + 2; | md->start_match_ptr = ecode + 2; |
810 | RRETURN(MATCH_SKIP_ARG); | RRETURN(MATCH_SKIP_ARG); |
# | Line 780 for (;;) | Line 814 for (;;) |
814 | match pointer to do this. */ | match pointer to do this. */ |
815 | ||
816 | case OP_THEN: | case OP_THEN: |
817 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
818 | eptrb, RM54); | eptrb, RM54); |
819 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
820 | md->start_match_ptr = ecode; | md->start_match_ptr = ecode; |
821 | MRRETURN(MATCH_THEN); | RRETURN(MATCH_THEN); |
822 | ||
823 | case OP_THEN_ARG: | case OP_THEN_ARG: |
824 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, | md->nomatch_mark = ecode + 2; |
825 | md->mark = NULL; /* In case previously set by assertion */ | |
826 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, | |
827 | md, eptrb, RM58); | md, eptrb, RM58); |
828 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
829 | md->mark == NULL) md->mark = ecode + 2; | |
830 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
831 | md->start_match_ptr = ecode; | md->start_match_ptr = ecode; |
md->mark = ecode + 2; | ||
832 | RRETURN(MATCH_THEN); | RRETURN(MATCH_THEN); |
833 | ||
834 | /* Handle an atomic group that does not contain any capturing parentheses. | |
835 | This can be handled like an assertion. Prior to 8.13, all atomic groups | |
836 | were handled this way. In 8.13, the code was changed as below for ONCE, so | |
837 | that backups pass through the group and thereby reset captured values. | |
838 | However, this uses a lot more stack, so in 8.20, atomic groups that do not | |
839 | contain any captures generate OP_ONCE_NC, which can be handled in the old, | |
840 | less stack intensive way. | |
841 | ||
842 | Check the alternative branches in turn - the matching won't pass the KET | |
843 | for this kind of subpattern. If any one branch matches, we carry on as at | |
844 | the end of a normal bracket, leaving the subject pointer, but resetting | |
845 | the start-of-match value in case it was changed by \K. */ | |
846 | ||
847 | case OP_ONCE_NC: | |
848 | prev = ecode; | |
849 | saved_eptr = eptr; | |
850 | save_mark = md->mark; | |
851 | do | |
852 | { | |
853 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); | |
854 | if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ | |
855 | { | |
856 | mstart = md->start_match_ptr; | |
857 | break; | |
858 | } | |
859 | if (rrc == MATCH_THEN) | |
860 | { | |
861 | next = ecode + GET(ecode,1); | |
862 | if (md->start_match_ptr < next && | |
863 | (*ecode == OP_ALT || *next == OP_ALT)) | |
864 | rrc = MATCH_NOMATCH; | |
865 | } | |
866 | ||
867 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | |
868 | ecode += GET(ecode,1); | |
869 | md->mark = save_mark; | |
870 | } | |
871 | while (*ecode == OP_ALT); | |
872 | ||
873 | /* If hit the end of the group (which could be repeated), fail */ | |
874 | ||
875 | if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH); | |
876 | ||
877 | /* Continue as from after the group, updating the offsets high water | |
878 | mark, since extracts may have been taken. */ | |
879 | ||
880 | do ecode += GET(ecode, 1); while (*ecode == OP_ALT); | |
881 | ||
882 | offset_top = md->end_offset_top; | |
883 | eptr = md->end_match_ptr; | |
884 | ||
885 | /* For a non-repeating ket, just continue at this level. This also | |
886 | happens for a repeating ket if no characters were matched in the group. | |
887 | This is the forcible breaking of infinite loops as implemented in Perl | |
888 | 5.005. */ | |
889 | ||
890 | if (*ecode == OP_KET || eptr == saved_eptr) | |
891 | { | |
892 | ecode += 1+LINK_SIZE; | |
893 | break; | |
894 | } | |
895 | ||
896 | /* The repeating kets try the rest of the pattern or restart from the | |
897 | preceding bracket, in the appropriate order. The second "call" of match() | |
898 | uses tail recursion, to avoid using another stack frame. */ | |
899 | ||
900 | if (*ecode == OP_KETRMIN) | |
901 | { | |
902 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65); | |
903 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | |
904 | ecode = prev; | |
905 | goto TAIL_RECURSE; | |
906 | } | |
907 | else /* OP_KETRMAX */ | |
908 | { | |
909 | RMATCH(eptr, prev, offset_top, md, eptrb, RM66); | |
910 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | |
911 | ecode += 1 + LINK_SIZE; | |
912 | goto TAIL_RECURSE; | |
913 | } | |
914 | /* Control never gets here */ | |
915 | ||
916 | /* Handle a capturing bracket, other than those that are possessive with an | /* Handle a capturing bracket, other than those that are possessive with an |
917 | unlimited repeat. If there is space in the offset vector, save the current | unlimited repeat. If there is space in the offset vector, save the current |
918 | subject position in the working slot at the top of the vector. We mustn't | subject position in the working slot at the top of the vector. We mustn't |
# | Line 826 for (;;) | Line 945 for (;;) |
945 | save_offset2 = md->offset_vector[offset+1]; | save_offset2 = md->offset_vector[offset+1]; |
946 | save_offset3 = md->offset_vector[md->offset_end - number]; | save_offset3 = md->offset_vector[md->offset_end - number]; |
947 | save_capture_last = md->capture_last; | save_capture_last = md->capture_last; |
948 | save_mark = md->mark; | |
949 | ||
950 | DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); | DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); |
951 | md->offset_vector[md->offset_end - number] = | md->offset_vector[md->offset_end - number] = |
# | Line 834 for (;;) | Line 954 for (;;) |
954 | for (;;) | for (;;) |
955 | { | { |
956 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
957 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
958 | eptrb, RM1); | eptrb, RM1); |
959 | if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ | if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ |
960 | ||
961 | /* If we backed up to a THEN, check whether it is within the current | /* If we backed up to a THEN, check whether it is within the current |
962 | branch by comparing the address of the THEN that is passed back with | branch by comparing the address of the THEN that is passed back with |
963 | the end of the branch. If it is within the current branch, and the | the end of the branch. If it is within the current branch, and the |
964 | branch is one of two or more alternatives (it either starts or ends | branch is one of two or more alternatives (it either starts or ends |
965 | with OP_ALT), we have reached the limit of THEN's action, so convert | with OP_ALT), we have reached the limit of THEN's action, so convert |
966 | the return code to NOMATCH, which will cause normal backtracking to | the return code to NOMATCH, which will cause normal backtracking to |
967 | happen from now on. Otherwise, THEN is passed back to an outer | happen from now on. Otherwise, THEN is passed back to an outer |
968 | alternative. This implements Perl's treatment of parenthesized groups, | alternative. This implements Perl's treatment of parenthesized groups, |
969 | where a group not containing | does not affect the current alternative, | where a group not containing | does not affect the current alternative, |
970 | that is, (X) is NOT the same as (X|(*F)). */ | that is, (X) is NOT the same as (X|(*F)). */ |
971 | ||
972 | if (rrc == MATCH_THEN) | if (rrc == MATCH_THEN) |
973 | { | { |
974 | next = ecode + GET(ecode,1); | next = ecode + GET(ecode,1); |
975 | if (md->start_match_ptr < next && | if (md->start_match_ptr < next && |
976 | (*ecode == OP_ALT || *next == OP_ALT)) | (*ecode == OP_ALT || *next == OP_ALT)) |
977 | rrc = MATCH_NOMATCH; | rrc = MATCH_NOMATCH; |
978 | } | } |
979 | ||
980 | /* Anything other than NOMATCH is passed back. */ | /* Anything other than NOMATCH is passed back. */ |
981 | ||
982 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
983 | md->capture_last = save_capture_last; | md->capture_last = save_capture_last; |
984 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
985 | md->mark = save_mark; | |
986 | if (*ecode != OP_ALT) break; | if (*ecode != OP_ALT) break; |
987 | } | } |
988 | ||
# | Line 872 for (;;) | Line 993 for (;;) |
993 | ||
994 | /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ | /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ |
995 | ||
if (md->mark == NULL) md->mark = markptr; | ||
996 | RRETURN(rrc); | RRETURN(rrc); |
997 | } | } |
998 | ||
# | Line 888 for (;;) | Line 1008 for (;;) |
1008 | /* VVVVVVVVVVVVVVVVVVVVVVVVV */ | /* VVVVVVVVVVVVVVVVVVVVVVVVV */ |
1009 | ||
1010 | /* Non-capturing or atomic group, except for possessive with unlimited | /* Non-capturing or atomic group, except for possessive with unlimited |
1011 | repeat. Loop for all the alternatives. | repeat and ONCE group with no captures. Loop for all the alternatives. |
1012 | ||
1013 | When we get to the final alternative within the brackets, we used to return | When we get to the final alternative within the brackets, we used to return |
1014 | the result of a recursive call to match() whatever happened so it was | the result of a recursive call to match() whatever happened so it was |
# | Line 914 for (;;) | Line 1034 for (;;) |
1034 | ||
1035 | for (;;) | for (;;) |
1036 | { | { |
1037 | if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA || op == OP_ONCE) |
1038 | md->match_function_type = MATCH_CBEGROUP; | |
1039 | ||
1040 | /* If this is not a possibly empty group, and there are no (*THEN)s in | /* If this is not a possibly empty group, and there are no (*THEN)s in |
1041 | the pattern, and this is the final alternative, optimize as described | the pattern, and this is the final alternative, optimize as described |
# | Line 922 for (;;) | Line 1043 for (;;) |
1043 | ||
1044 | else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) | else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) |
1045 | { | { |
1046 | ecode += _pcre_OP_lengths[*ecode]; | ecode += PRIV(OP_lengths)[*ecode]; |
1047 | goto TAIL_RECURSE; | goto TAIL_RECURSE; |
1048 | } | } |
1049 | ||
1050 | /* In all other cases, we have to make another call to match(). */ | /* In all other cases, we have to make another call to match(). */ |
1051 | ||
1052 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, | save_mark = md->mark; |
1053 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, | |
1054 | RM2); | RM2); |
1055 | ||
1056 | /* See comment in the code for capturing groups above about handling | /* See comment in the code for capturing groups above about handling |
1057 | THEN. */ | THEN. */ |
1058 | ||
1059 | if (rrc == MATCH_THEN) | if (rrc == MATCH_THEN) |
1060 | { | { |
1061 | next = ecode + GET(ecode,1); | next = ecode + GET(ecode,1); |
1062 | if (md->start_match_ptr < next && | if (md->start_match_ptr < next && |
1063 | (*ecode == OP_ALT || *next == OP_ALT)) | (*ecode == OP_ALT || *next == OP_ALT)) |
1064 | rrc = MATCH_NOMATCH; | rrc = MATCH_NOMATCH; |
1065 | } | } |
1066 | ||
1067 | if (rrc != MATCH_NOMATCH) | if (rrc != MATCH_NOMATCH) |
1068 | { | { |
1069 | if (rrc == MATCH_ONCE) | if (rrc == MATCH_ONCE) |
1070 | { | { |
1071 | const uschar *scode = ecode; | const pcre_uchar *scode = ecode; |
1072 | if (*scode != OP_ONCE) /* If not at start, find it */ | if (*scode != OP_ONCE) /* If not at start, find it */ |
1073 | { | { |
1074 | while (*scode == OP_ALT) scode += GET(scode, 1); | while (*scode == OP_ALT) scode += GET(scode, 1); |
# | Line 957 for (;;) | Line 1079 for (;;) |
1079 | RRETURN(rrc); | RRETURN(rrc); |
1080 | } | } |
1081 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
1082 | md->mark = save_mark; | |
1083 | if (*ecode != OP_ALT) break; | if (*ecode != OP_ALT) break; |
1084 | } | } |
1085 | ||
if (md->mark == NULL) md->mark = markptr; | ||
1086 | RRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1087 | ||
1088 | /* Handle possessive capturing brackets with an unlimited repeat. We come | /* Handle possessive capturing brackets with an unlimited repeat. We come |
# | Line 989 for (;;) | Line 1111 for (;;) |
1111 | if (offset < md->offset_max) | if (offset < md->offset_max) |
1112 | { | { |
1113 | matched_once = FALSE; | matched_once = FALSE; |
1114 | code_offset = ecode - md->start_code; | code_offset = (int)(ecode - md->start_code); |
1115 | ||
1116 | save_offset1 = md->offset_vector[offset]; | save_offset1 = md->offset_vector[offset]; |
1117 | save_offset2 = md->offset_vector[offset+1]; | save_offset2 = md->offset_vector[offset+1]; |
# | Line 1012 for (;;) | Line 1134 for (;;) |
1134 | md->offset_vector[md->offset_end - number] = | md->offset_vector[md->offset_end - number] = |
1135 | (int)(eptr - md->start_subject); | (int)(eptr - md->start_subject); |
1136 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
1137 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
1138 | eptrb, RM63); | eptrb, RM63); |
1139 | if (rrc == MATCH_KETRPOS) | if (rrc == MATCH_KETRPOS) |
1140 | { | { |
# | Line 1023 for (;;) | Line 1145 for (;;) |
1145 | matched_once = TRUE; | matched_once = TRUE; |
1146 | continue; | continue; |
1147 | } | } |
1148 | ||
1149 | /* See comment in the code for capturing groups above about handling | /* See comment in the code for capturing groups above about handling |
1150 | THEN. */ | THEN. */ |
1151 | ||
1152 | if (rrc == MATCH_THEN) | if (rrc == MATCH_THEN) |
1153 | { | { |
1154 | next = ecode + GET(ecode,1); | next = ecode + GET(ecode,1); |
1155 | if (md->start_match_ptr < next && | if (md->start_match_ptr < next && |
1156 | (*ecode == OP_ALT || *next == OP_ALT)) | (*ecode == OP_ALT || *next == OP_ALT)) |
1157 | rrc = MATCH_NOMATCH; | rrc = MATCH_NOMATCH; |
1158 | } | } |
1159 | ||
1160 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
1161 | md->capture_last = save_capture_last; | md->capture_last = save_capture_last; |
# | Line 1048 for (;;) | Line 1170 for (;;) |
1170 | md->offset_vector[md->offset_end - number] = save_offset3; | md->offset_vector[md->offset_end - number] = save_offset3; |
1171 | } | } |
1172 | ||
if (md->mark == NULL) md->mark = markptr; | ||
1173 | if (allow_zero || matched_once) | if (allow_zero || matched_once) |
1174 | { | { |
1175 | ecode += 1 + LINK_SIZE; | ecode += 1 + LINK_SIZE; |
# | Line 1080 for (;;) | Line 1201 for (;;) |
1201 | ||
1202 | POSSESSIVE_NON_CAPTURE: | POSSESSIVE_NON_CAPTURE: |
1203 | matched_once = FALSE; | matched_once = FALSE; |
1204 | code_offset = ecode - md->start_code; | code_offset = (int)(ecode - md->start_code); |
1205 | ||
1206 | for (;;) | for (;;) |
1207 | { | { |
1208 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
1209 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
1210 | eptrb, RM48); | eptrb, RM48); |
1211 | if (rrc == MATCH_KETRPOS) | if (rrc == MATCH_KETRPOS) |
1212 | { | { |
# | Line 1095 for (;;) | Line 1216 for (;;) |
1216 | matched_once = TRUE; | matched_once = TRUE; |
1217 | continue; | continue; |
1218 | } | } |
1219 | ||
1220 | /* See comment in the code for capturing groups above about handling | /* See comment in the code for capturing groups above about handling |
1221 | THEN. */ | THEN. */ |
1222 | ||
1223 | if (rrc == MATCH_THEN) | if (rrc == MATCH_THEN) |
1224 | { | { |
1225 | next = ecode + GET(ecode,1); | next = ecode + GET(ecode,1); |
1226 | if (md->start_match_ptr < next && | if (md->start_match_ptr < next && |
1227 | (*ecode == OP_ALT || *next == OP_ALT)) | (*ecode == OP_ALT || *next == OP_ALT)) |
1228 | rrc = MATCH_NOMATCH; | rrc = MATCH_NOMATCH; |
1229 | } | } |
1230 | ||
1231 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
1232 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
# | Line 1135 for (;;) | Line 1256 for (;;) |
1256 | ||
1257 | if (ecode[LINK_SIZE+1] == OP_CALLOUT) | if (ecode[LINK_SIZE+1] == OP_CALLOUT) |
1258 | { | { |
1259 | if (pcre_callout != NULL) | if (PUBL(callout) != NULL) |
1260 | { | { |
1261 | pcre_callout_block cb; | PUBL(callout_block) cb; |
1262 | cb.version = 2; /* Version 1 of the callout block */ | cb.version = 2; /* Version 1 of the callout block */ |
1263 | cb.callout_number = ecode[LINK_SIZE+2]; | cb.callout_number = ecode[LINK_SIZE+2]; |
1264 | cb.offset_vector = md->offset_vector; | cb.offset_vector = md->offset_vector; |
1265 | #ifdef COMPILE_PCRE8 | |
1266 | cb.subject = (PCRE_SPTR)md->start_subject; | cb.subject = (PCRE_SPTR)md->start_subject; |
1267 | #else | |
1268 | cb.subject = (PCRE_SPTR16)md->start_subject; | |
1269 | #endif | |
1270 | cb.subject_length = (int)(md->end_subject - md->start_subject); | cb.subject_length = (int)(md->end_subject - md->start_subject); |
1271 | cb.start_match = (int)(mstart - md->start_subject); | cb.start_match = (int)(mstart - md->start_subject); |
1272 | cb.current_position = (int)(eptr - md->start_subject); | cb.current_position = (int)(eptr - md->start_subject); |
# | Line 1150 for (;;) | Line 1275 for (;;) |
1275 | cb.capture_top = offset_top/2; | cb.capture_top = offset_top/2; |
1276 | cb.capture_last = md->capture_last; | cb.capture_last = md->capture_last; |
1277 | cb.callout_data = md->callout_data; | cb.callout_data = md->callout_data; |
1278 | cb.mark = markptr; | cb.mark = md->nomatch_mark; |
1279 | if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); | if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1280 | if (rrc < 0) RRETURN(rrc); | if (rrc < 0) RRETURN(rrc); |
1281 | } | } |
1282 | ecode += _pcre_OP_lengths[OP_CALLOUT]; | ecode += PRIV(OP_lengths)[OP_CALLOUT]; |
1283 | } | } |
1284 | ||
1285 | condcode = ecode[LINK_SIZE+1]; | condcode = ecode[LINK_SIZE+1]; |
# | Line 1171 for (;;) | Line 1296 for (;;) |
1296 | else | else |
1297 | { | { |
1298 | int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ | int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ |
1299 | condition = (recno == RREF_ANY || recno == md->recursive->group_num); | condition = (recno == RREF_ANY || recno == md->recursive->group_num); |
1300 | ||
1301 | /* If the test is for recursion into a specific subpattern, and it is | /* If the test is for recursion into a specific subpattern, and it is |
1302 | false, but the test was set up by name, scan the table to see if the | false, but the test was set up by name, scan the table to see if the |
1303 | name refers to any other numbers, and test them. The condition is true | name refers to any other numbers, and test them. The condition is true |
1304 | if any one is set. */ | if any one is set. */ |
1305 | ||
1306 | if (!condition && condcode == OP_NRREF && recno != RREF_ANY) | if (!condition && condcode == OP_NRREF) |
1307 | { | { |
1308 | uschar *slotA = md->name_table; | pcre_uchar *slotA = md->name_table; |
1309 | for (i = 0; i < md->name_count; i++) | for (i = 0; i < md->name_count; i++) |
1310 | { | { |
1311 | if (GET2(slotA, 0) == recno) break; | if (GET2(slotA, 0) == recno) break; |
# | Line 1193 for (;;) | Line 1318 for (;;) |
1318 | ||
1319 | if (i < md->name_count) | if (i < md->name_count) |
1320 | { | { |
1321 | uschar *slotB = slotA; | pcre_uchar *slotB = slotA; |
1322 | while (slotB > md->name_table) | while (slotB > md->name_table) |
1323 | { | { |
1324 | slotB -= md->name_entry_size; | slotB -= md->name_entry_size; |
1325 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1326 | { | { |
1327 | condition = GET2(slotB, 0) == md->recursive->group_num; | condition = GET2(slotB, 0) == md->recursive->group_num; |
1328 | if (condition) break; | if (condition) break; |
# | Line 1213 for (;;) | Line 1338 for (;;) |
1338 | for (i++; i < md->name_count; i++) | for (i++; i < md->name_count; i++) |
1339 | { | { |
1340 | slotB += md->name_entry_size; | slotB += md->name_entry_size; |
1341 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1342 | { | { |
1343 | condition = GET2(slotB, 0) == md->recursive->group_num; | condition = GET2(slotB, 0) == md->recursive->group_num; |
1344 | if (condition) break; | if (condition) break; |
# | Line 1226 for (;;) | Line 1351 for (;;) |
1351 | ||
1352 | /* Chose branch according to the condition */ | /* Chose branch according to the condition */ |
1353 | ||
1354 | ecode += condition? 3 : GET(ecode, 1); | ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); |
1355 | } | } |
1356 | } | } |
1357 | ||
# | Line 1243 for (;;) | Line 1368 for (;;) |
1368 | if (!condition && condcode == OP_NCREF) | if (!condition && condcode == OP_NCREF) |
1369 | { | { |
1370 | int refno = offset >> 1; | int refno = offset >> 1; |
1371 | uschar *slotA = md->name_table; | pcre_uchar *slotA = md->name_table; |
1372 | ||
1373 | for (i = 0; i < md->name_count; i++) | for (i = 0; i < md->name_count; i++) |
1374 | { | { |
# | Line 1257 for (;;) | Line 1382 for (;;) |
1382 | ||
1383 | if (i < md->name_count) | if (i < md->name_count) |
1384 | { | { |
1385 | uschar *slotB = slotA; | pcre_uchar *slotB = slotA; |
1386 | while (slotB > md->name_table) | while (slotB > md->name_table) |
1387 | { | { |
1388 | slotB -= md->name_entry_size; | slotB -= md->name_entry_size; |
1389 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1390 | { | { |
1391 | offset = GET2(slotB, 0) << 1; | offset = GET2(slotB, 0) << 1; |
1392 | condition = offset < offset_top && | condition = offset < offset_top && |
# | Line 1279 for (;;) | Line 1404 for (;;) |
1404 | for (i++; i < md->name_count; i++) | for (i++; i < md->name_count; i++) |
1405 | { | { |
1406 | slotB += md->name_entry_size; | slotB += md->name_entry_size; |
1407 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1408 | { | { |
1409 | offset = GET2(slotB, 0) << 1; | offset = GET2(slotB, 0) << 1; |
1410 | condition = offset < offset_top && | condition = offset < offset_top && |
# | Line 1294 for (;;) | Line 1419 for (;;) |
1419 | ||
1420 | /* Chose branch according to the condition */ | /* Chose branch according to the condition */ |
1421 | ||
1422 | ecode += condition? 3 : GET(ecode, 1); | ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); |
1423 | } | } |
1424 | ||
1425 | else if (condcode == OP_DEF) /* DEFINE - always false */ | else if (condcode == OP_DEF) /* DEFINE - always false */ |
# | Line 1319 for (;;) | Line 1444 for (;;) |
1444 | ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); | ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); |
1445 | while (*ecode == OP_ALT) ecode += GET(ecode, 1); | while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
1446 | } | } |
1447 | ||
1448 | /* PCRE doesn't allow the effect of (*THEN) to escape beyond an | /* PCRE doesn't allow the effect of (*THEN) to escape beyond an |
1449 | assertion; it is therefore treated as NOMATCH. */ | assertion; it is therefore treated as NOMATCH. */ |
1450 | ||
1451 | else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) | else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) |
1452 | { | { |
1453 | RRETURN(rrc); /* Need braces because of following else */ | RRETURN(rrc); /* Need braces because of following else */ |
1454 | } | } |
# | Line 1351 for (;;) | Line 1476 for (;;) |
1476 | ecode += 1 + LINK_SIZE; | ecode += 1 + LINK_SIZE; |
1477 | goto TAIL_RECURSE; | goto TAIL_RECURSE; |
1478 | } | } |
1479 | ||
1480 | md->match_function_type = MATCH_CBEGROUP; | md->match_function_type = MATCH_CBEGROUP; |
1481 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49); | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49); |
1482 | RRETURN(rrc); | RRETURN(rrc); |
# | Line 1386 for (;;) | Line 1511 for (;;) |
1511 | md->offset_vector[offset+1] = (int)(eptr - md->start_subject); | md->offset_vector[offset+1] = (int)(eptr - md->start_subject); |
1512 | if (offset_top <= offset) offset_top = offset + 2; | if (offset_top <= offset) offset_top = offset + 2; |
1513 | } | } |
1514 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
1515 | break; | break; |
1516 | ||
1517 | ||
# | Line 1406 for (;;) | Line 1531 for (;;) |
1531 | (md->notempty || | (md->notempty || |
1532 | (md->notempty_atstart && | (md->notempty_atstart && |
1533 | mstart == md->start_subject + md->start_offset))) | mstart == md->start_subject + md->start_offset))) |
1534 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1535 | ||
1536 | /* Otherwise, we have a match. */ | /* Otherwise, we have a match. */ |
1537 | ||
# | Line 1415 for (;;) | Line 1540 for (;;) |
1540 | md->start_match_ptr = mstart; /* and the start (\K can modify) */ | md->start_match_ptr = mstart; /* and the start (\K can modify) */ |
1541 | ||
1542 | /* For some reason, the macros don't work properly if an expression is | /* For some reason, the macros don't work properly if an expression is |
1543 | given as the argument to MRRETURN when the heap is in use. */ | given as the argument to RRETURN when the heap is in use. */ |
1544 | ||
1545 | rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; | rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; |
1546 | MRRETURN(rrc); | RRETURN(rrc); |
1547 | ||
1548 | /* Assertion brackets. Check the alternative branches in turn - the | /* Assertion brackets. Check the alternative branches in turn - the |
1549 | matching won't pass the KET for an assertion. If any one branch matches, | matching won't pass the KET for an assertion. If any one branch matches, |
# | Line 1433 for (;;) | Line 1558 for (;;) |
1558 | ||
1559 | case OP_ASSERT: | case OP_ASSERT: |
1560 | case OP_ASSERTBACK: | case OP_ASSERTBACK: |
1561 | save_mark = md->mark; | |
1562 | if (md->match_function_type == MATCH_CONDASSERT) | if (md->match_function_type == MATCH_CONDASSERT) |
1563 | { | { |
1564 | condassert = TRUE; | condassert = TRUE; |
# | Line 1446 for (;;) | Line 1572 for (;;) |
1572 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
1573 | { | { |
1574 | mstart = md->start_match_ptr; /* In case \K reset it */ | mstart = md->start_match_ptr; /* In case \K reset it */ |
markptr = md->mark; | ||
1575 | break; | break; |
1576 | } | } |
1577 | md->mark = save_mark; | |
1578 | /* PCRE does not allow THEN to escape beyond an assertion; it is treated | |
1579 | as NOMATCH. */ | /* A COMMIT failure must fail the entire assertion, without trying any |
1580 | subsequent branches. */ | |
1581 | ||
1582 | if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH); | |
1583 | ||
1584 | /* PCRE does not allow THEN to escape beyond an assertion; it | |
1585 | is treated as NOMATCH. */ | |
1586 | ||
1587 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
1588 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
1589 | } | } |
1590 | while (*ecode == OP_ALT); | while (*ecode == OP_ALT); |
1591 | ||
1592 | if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH); | if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); |
1593 | ||
1594 | /* If checking an assertion for a condition, return MATCH_MATCH. */ | /* If checking an assertion for a condition, return MATCH_MATCH. */ |
1595 | ||
# | Line 1478 for (;;) | Line 1609 for (;;) |
1609 | ||
1610 | case OP_ASSERT_NOT: | case OP_ASSERT_NOT: |
1611 | case OP_ASSERTBACK_NOT: | case OP_ASSERTBACK_NOT: |
1612 | save_mark = md->mark; | |
1613 | if (md->match_function_type == MATCH_CONDASSERT) | if (md->match_function_type == MATCH_CONDASSERT) |
1614 | { | { |
1615 | condassert = TRUE; | condassert = TRUE; |
# | Line 1488 for (;;) | Line 1620 for (;;) |
1620 | do | do |
1621 | { | { |
1622 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); |
1623 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH); | md->mark = save_mark; |
1624 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); | |
1625 | if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) | if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) |
1626 | { | { |
1627 | do ecode += GET(ecode,1); while (*ecode == OP_ALT); | do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
1628 | break; | break; |
1629 | } | } |
1630 | ||
1631 | /* PCRE does not allow THEN to escape beyond an assertion; it is treated | /* PCRE does not allow THEN to escape beyond an assertion; it is treated |
1632 | as NOMATCH. */ | as NOMATCH. */ |
1633 | ||
1634 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
# | Line 1514 for (;;) | Line 1647 for (;;) |
1647 | back a number of characters, not bytes. */ | back a number of characters, not bytes. */ |
1648 | ||
1649 | case OP_REVERSE: | case OP_REVERSE: |
1650 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
1651 | if (utf8) | if (utf) |
1652 | { | { |
1653 | i = GET(ecode, 1); | i = GET(ecode, 1); |
1654 | while (i-- > 0) | while (i-- > 0) |
1655 | { | { |
1656 | eptr--; | eptr--; |
1657 | if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
1658 | BACKCHAR(eptr); | BACKCHAR(eptr); |
1659 | } | } |
1660 | } | } |
# | Line 1532 for (;;) | Line 1665 for (;;) |
1665 | ||
1666 | { | { |
1667 | eptr -= GET(ecode, 1); | eptr -= GET(ecode, 1); |
1668 | if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
1669 | } | } |
1670 | ||
1671 | /* Save the earliest consulted character, then skip to next op code */ | /* Save the earliest consulted character, then skip to next op code */ |
# | Line 1546 for (;;) | Line 1679 for (;;) |
1679 | function is able to force a failure. */ | function is able to force a failure. */ |
1680 | ||
1681 | case OP_CALLOUT: | case OP_CALLOUT: |
1682 | if (pcre_callout != NULL) | if (PUBL(callout) != NULL) |
1683 | { | { |
1684 | pcre_callout_block cb; | PUBL(callout_block) cb; |
1685 | cb.version = 2; /* Version 1 of the callout block */ | cb.version = 2; /* Version 1 of the callout block */ |
1686 | cb.callout_number = ecode[1]; | cb.callout_number = ecode[1]; |
1687 | cb.offset_vector = md->offset_vector; | cb.offset_vector = md->offset_vector; |
1688 | #ifdef COMPILE_PCRE8 | |
1689 | cb.subject = (PCRE_SPTR)md->start_subject; | cb.subject = (PCRE_SPTR)md->start_subject; |
1690 | #else | |
1691 | cb.subject = (PCRE_SPTR16)md->start_subject; | |
1692 | #endif | |
1693 | cb.subject_length = (int)(md->end_subject - md->start_subject); | cb.subject_length = (int)(md->end_subject - md->start_subject); |
1694 | cb.start_match = (int)(mstart - md->start_subject); | cb.start_match = (int)(mstart - md->start_subject); |
1695 | cb.current_position = (int)(eptr - md->start_subject); | cb.current_position = (int)(eptr - md->start_subject); |
# | Line 1561 for (;;) | Line 1698 for (;;) |
1698 | cb.capture_top = offset_top/2; | cb.capture_top = offset_top/2; |
1699 | cb.capture_last = md->capture_last; | cb.capture_last = md->capture_last; |
1700 | cb.callout_data = md->callout_data; | cb.callout_data = md->callout_data; |
1701 | cb.mark = markptr; | cb.mark = md->nomatch_mark; |
1702 | if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); | if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1703 | if (rrc < 0) RRETURN(rrc); | if (rrc < 0) RRETURN(rrc); |
1704 | } | } |
1705 | ecode += 2 + 2*LINK_SIZE; | ecode += 2 + 2*LINK_SIZE; |
# | Line 1621 for (;;) | Line 1758 for (;;) |
1758 | else | else |
1759 | { | { |
1760 | new_recursive.offset_save = | new_recursive.offset_save = |
1761 | (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); | (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int)); |
1762 | if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); | if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
1763 | } | } |
1764 | memcpy(new_recursive.offset_save, md->offset_vector, | memcpy(new_recursive.offset_save, md->offset_vector, |
# | Line 1636 for (;;) | Line 1773 for (;;) |
1773 | do | do |
1774 | { | { |
1775 | if (cbegroup) md->match_function_type = MATCH_CBEGROUP; | if (cbegroup) md->match_function_type = MATCH_CBEGROUP; |
1776 | RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top, | RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, |
1777 | md, eptrb, RM6); | md, eptrb, RM6); |
1778 | memcpy(md->offset_vector, new_recursive.offset_save, | memcpy(md->offset_vector, new_recursive.offset_save, |
1779 | new_recursive.saved_max * sizeof(int)); | new_recursive.saved_max * sizeof(int)); |
# | Line 1645 for (;;) | Line 1782 for (;;) |
1782 | { | { |
1783 | DPRINTF(("Recursion matched\n")); | DPRINTF(("Recursion matched\n")); |
1784 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1785 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1786 | ||
1787 | /* Set where we got to in the subject, and reset the start in case | /* Set where we got to in the subject, and reset the start in case |
1788 | it was changed by \K. This *is* propagated back out of a recursion, | it was changed by \K. This *is* propagated back out of a recursion, |
# | Line 1656 for (;;) | Line 1793 for (;;) |
1793 | goto RECURSION_MATCHED; /* Exit loop; end processing */ | goto RECURSION_MATCHED; /* Exit loop; end processing */ |
1794 | } | } |
1795 | ||
1796 | /* PCRE does not allow THEN to escape beyond a recursion; it is treated | /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it |
1797 | as NOMATCH. */ | is treated as NOMATCH. */ |
1798 | ||
1799 | else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) | else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN && |
1800 | rrc != MATCH_COMMIT) | |
1801 | { | { |
1802 | DPRINTF(("Recursion gave error %d\n", rrc)); | DPRINTF(("Recursion gave error %d\n", rrc)); |
1803 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1804 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1805 | RRETURN(rrc); | RRETURN(rrc); |
1806 | } | } |
1807 | ||
# | Line 1675 for (;;) | Line 1813 for (;;) |
1813 | DPRINTF(("Recursion didn't match\n")); | DPRINTF(("Recursion didn't match\n")); |
1814 | md->recursive = new_recursive.prevrec; | md->recursive = new_recursive.prevrec; |
1815 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1816 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1817 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1818 | } | } |
1819 | ||
1820 | RECURSION_MATCHED: | RECURSION_MATCHED: |
# | Line 1745 for (;;) | Line 1883 for (;;) |
1883 | } | } |
1884 | else saved_eptr = NULL; | else saved_eptr = NULL; |
1885 | ||
1886 | /* If we are at the end of an assertion group, stop matching and return | /* If we are at the end of an assertion group or a non-capturing atomic |
1887 | MATCH_MATCH, but record the current high water mark for use by positive | group, stop matching and return MATCH_MATCH, but record the current high |
1888 | assertions. We also need to record the match start in case it was changed | water mark for use by positive assertions. We also need to record the match |
1889 | by \K. */ | start in case it was changed by \K. */ |
1890 | ||
1891 | if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || | if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) || |
1892 | *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT) | *prev == OP_ONCE_NC) |
1893 | { | { |
1894 | md->end_match_ptr = eptr; /* For ONCE */ | md->end_match_ptr = eptr; /* For ONCE_NC */ |
1895 | md->end_offset_top = offset_top; | md->end_offset_top = offset_top; |
1896 | md->start_match_ptr = mstart; | md->start_match_ptr = mstart; |
1897 | MRRETURN(MATCH_MATCH); /* Sets md->mark */ | RRETURN(MATCH_MATCH); /* Sets md->mark */ |
1898 | } | } |
1899 | ||
1900 | /* For capturing groups we have to check the group number back at the start | /* For capturing groups we have to check the group number back at the start |
# | Line 1821 for (;;) | Line 1959 for (;;) |
1959 | /* For an ordinary non-repeating ket, just continue at this level. This | /* For an ordinary non-repeating ket, just continue at this level. This |
1960 | also happens for a repeating ket if no characters were matched in the | also happens for a repeating ket if no characters were matched in the |
1961 | group. This is the forcible breaking of infinite loops as implemented in | group. This is the forcible breaking of infinite loops as implemented in |
1962 | Perl 5.005. For a non-repeating atomic group, establish a backup point by | Perl 5.005. For a non-repeating atomic group that includes captures, |
1963 | processing the rest of the pattern at a lower level. If this results in a | establish a backup point by processing the rest of the pattern at a lower |
1964 | NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby | level. If this results in a NOMATCH return, pass MATCH_ONCE back to the |
1965 | bypassing intermediate backup points, but resetting any captures that | original OP_ONCE level, thereby bypassing intermediate backup points, but |
1966 | happened along the way. */ | resetting any captures that happened along the way. */ |
1967 | ||
1968 | if (*ecode == OP_KET || eptr == saved_eptr) | if (*ecode == OP_KET || eptr == saved_eptr) |
1969 | { | { |
# | Line 1870 for (;;) | Line 2008 for (;;) |
2008 | } | } |
2009 | if (*prev >= OP_SBRA) /* Could match an empty string */ | if (*prev >= OP_SBRA) /* Could match an empty string */ |
2010 | { | { |
md->match_function_type = MATCH_CBEGROUP; | ||
2011 | RMATCH(eptr, prev, offset_top, md, eptrb, RM50); | RMATCH(eptr, prev, offset_top, md, eptrb, RM50); |
2012 | RRETURN(rrc); | RRETURN(rrc); |
2013 | } | } |
# | Line 1879 for (;;) | Line 2016 for (;;) |
2016 | } | } |
2017 | else /* OP_KETRMAX */ | else /* OP_KETRMAX */ |
2018 | { | { |
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | ||
2019 | RMATCH(eptr, prev, offset_top, md, eptrb, RM13); | RMATCH(eptr, prev, offset_top, md, eptrb, RM13); |
2020 | if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; | if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; |
2021 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
# | Line 1898 for (;;) | Line 2034 for (;;) |
2034 | /* Not multiline mode: start of subject assertion, unless notbol. */ | /* Not multiline mode: start of subject assertion, unless notbol. */ |
2035 | ||
2036 | case OP_CIRC: | case OP_CIRC: |
2037 | if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH); | if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); |
2038 | ||
2039 | /* Start of subject assertion */ | /* Start of subject assertion */ |
2040 | ||
2041 | case OP_SOD: | case OP_SOD: |
2042 | if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); |
2043 | ecode++; | ecode++; |
2044 | break; | break; |
2045 | ||
2046 | /* Multiline mode: start of subject unless notbol, or after any newline. */ | /* Multiline mode: start of subject unless notbol, or after any newline. */ |
2047 | ||
2048 | case OP_CIRCM: | case OP_CIRCM: |
2049 | if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH); | if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); |
2050 | if (eptr != md->start_subject && | if (eptr != md->start_subject && |
2051 | (eptr == md->end_subject || !WAS_NEWLINE(eptr))) | (eptr == md->end_subject || !WAS_NEWLINE(eptr))) |
2052 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2053 | ecode++; | ecode++; |
2054 | break; | break; |
2055 | ||
2056 | /* Start of match assertion */ | /* Start of match assertion */ |
2057 | ||
2058 | case OP_SOM: | case OP_SOM: |
2059 | if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH); | if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); |
2060 | ecode++; | ecode++; |
2061 | break; | break; |
2062 | ||
# | Line 1936 for (;;) | Line 2072 for (;;) |
2072 | ||
2073 | case OP_DOLLM: | case OP_DOLLM: |
2074 | if (eptr < md->end_subject) | if (eptr < md->end_subject) |
2075 | { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); } | { |
2076 | if (!IS_NEWLINE(eptr)) | |
2077 | { | |
2078 | if (md->partial != 0 && | |
2079 | eptr + 1 >= md->end_subject && | |
2080 | NLBLOCK->nltype == NLTYPE_FIXED && | |
2081 | NLBLOCK->nllen == 2 && | |
2082 | *eptr == NLBLOCK->nl[0]) | |
2083 | { | |
2084 | md->hitend = TRUE; | |
2085 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2086 | } | |
2087 | RRETURN(MATCH_NOMATCH); | |
2088 | } | |
2089 | } | |
2090 | else | else |
2091 | { | { |
2092 | if (md->noteol) MRRETURN(MATCH_NOMATCH); | if (md->noteol) RRETURN(MATCH_NOMATCH); |
2093 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2094 | } | } |
2095 | ecode++; | ecode++; |
# | Line 1949 for (;;) | Line 2099 for (;;) |
2099 | subject unless noteol is set. */ | subject unless noteol is set. */ |
2100 | ||
2101 | case OP_DOLL: | case OP_DOLL: |
2102 | if (md->noteol) MRRETURN(MATCH_NOMATCH); | if (md->noteol) RRETURN(MATCH_NOMATCH); |
2103 | if (!md->endonly) goto ASSERT_NL_OR_EOS; | if (!md->endonly) goto ASSERT_NL_OR_EOS; |
2104 | ||
2105 | /* ... else fall through for endonly */ | /* ... else fall through for endonly */ |
# | Line 1957 for (;;) | Line 2107 for (;;) |
2107 | /* End of subject assertion (\z) */ | /* End of subject assertion (\z) */ |
2108 | ||
2109 | case OP_EOD: | case OP_EOD: |
2110 | if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); |
2111 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2112 | ecode++; | ecode++; |
2113 | break; | break; |
# | Line 1968 for (;;) | Line 2118 for (;;) |
2118 | ASSERT_NL_OR_EOS: | ASSERT_NL_OR_EOS: |
2119 | if (eptr < md->end_subject && | if (eptr < md->end_subject && |
2120 | (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) | (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) |
2121 | MRRETURN(MATCH_NOMATCH); | { |
2122 | if (md->partial != 0 && | |
2123 | eptr + 1 >= md->end_subject && | |
2124 | NLBLOCK->nltype == NLTYPE_FIXED && | |
2125 | NLBLOCK->nllen == 2 && | |
2126 | *eptr == NLBLOCK->nl[0]) | |
2127 | { | |
2128 | md->hitend = TRUE; | |
2129 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2130 | } | |
2131 | RRETURN(MATCH_NOMATCH); | |
2132 | } | |
2133 | ||
2134 | /* Either at end of string or \n before end. */ | /* Either at end of string or \n before end. */ |
2135 | ||
# | Line 1987 for (;;) | Line 2148 for (;;) |
2148 | be "non-word" characters. Remember the earliest consulted character for | be "non-word" characters. Remember the earliest consulted character for |
2149 | partial matching. */ | partial matching. */ |
2150 | ||
2151 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2152 | if (utf8) | if (utf) |
2153 | { | { |
2154 | /* Get status of previous character */ | /* Get status of previous character */ |
2155 | ||
2156 | if (eptr == md->start_subject) prev_is_word = FALSE; else | if (eptr == md->start_subject) prev_is_word = FALSE; else |
2157 | { | { |
2158 | USPTR lastptr = eptr - 1; | PCRE_PUCHAR lastptr = eptr - 1; |
2159 | while((*lastptr & 0xc0) == 0x80) lastptr--; | BACKCHAR(lastptr); |
2160 | if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; | if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; |
2161 | GETCHAR(c, lastptr); | GETCHAR(c, lastptr); |
2162 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
# | Line 2060 for (;;) | Line 2221 for (;;) |
2221 | } | } |
2222 | else | else |
2223 | #endif | #endif |
2224 | prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); | prev_is_word = MAX_255(eptr[-1]) |
2225 | && ((md->ctypes[eptr[-1]] & ctype_word) != 0); | |
2226 | } | } |
2227 | ||
2228 | /* Get status of next character */ | /* Get status of next character */ |
# | Line 2083 for (;;) | Line 2245 for (;;) |
2245 | } | } |
2246 | else | else |
2247 | #endif | #endif |
2248 | cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); | cur_is_word = MAX_255(*eptr) |
2249 | && ((md->ctypes[*eptr] & ctype_word) != 0); | |
2250 | } | } |
2251 | ||
2252 | /* Now see if the situation is what we want */ | /* Now see if the situation is what we want */ |
2253 | ||
2254 | if ((*ecode++ == OP_WORD_BOUNDARY)? | if ((*ecode++ == OP_WORD_BOUNDARY)? |
2255 | cur_is_word == prev_is_word : cur_is_word != prev_is_word) | cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
2256 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2257 | } | } |
2258 | break; | break; |
2259 | ||
2260 | /* Match a single character type; inline for speed */ | /* Match any single character type except newline; have to take care with |
2261 | CRLF newlines and partial matching. */ | |
2262 | ||
2263 | case OP_ANY: | case OP_ANY: |
2264 | if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
2265 | if (md->partial != 0 && | |
2266 | eptr + 1 >= md->end_subject && | |
2267 | NLBLOCK->nltype == NLTYPE_FIXED && | |
2268 | NLBLOCK->nllen == 2 && | |
2269 | *eptr == NLBLOCK->nl[0]) | |
2270 | { | |
2271 | md->hitend = TRUE; | |
2272 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2273 | } | |
2274 | ||
2275 | /* Fall through */ | /* Fall through */ |
2276 | ||
2277 | /* Match any single character whatsoever. */ | |
2278 | ||
2279 | case OP_ALLANY: | case OP_ALLANY: |
2280 | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
2281 | { /* not be updated before SCHECK_PARTIAL. */ | { /* not be updated before SCHECK_PARTIAL. */ |
2282 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2283 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2284 | } | } |
2285 | eptr++; | eptr++; |
2286 | if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | #ifdef SUPPORT_UTF |
2287 | if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
2288 | #endif | |
2289 | ecode++; | ecode++; |
2290 | break; | break; |
2291 | ||
# | Line 2118 for (;;) | Line 2296 for (;;) |
2296 | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
2297 | { /* not be updated before SCHECK_PARTIAL. */ | { /* not be updated before SCHECK_PARTIAL. */ |
2298 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2299 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2300 | } | } |
2301 | eptr++; | eptr++; |
2302 | ecode++; | ecode++; |
# | Line 2128 for (;;) | Line 2306 for (;;) |
2306 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2307 | { | { |
2308 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2309 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2310 | } | } |
2311 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2312 | if ( | if ( |
2313 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2314 | c < 256 && | c < 256 && |
2315 | #endif | #endif |
2316 | (md->ctypes[c] & ctype_digit) != 0 | (md->ctypes[c] & ctype_digit) != 0 |
2317 | ) | ) |
2318 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2319 | ecode++; | ecode++; |
2320 | break; | break; |
2321 | ||
# | Line 2145 for (;;) | Line 2323 for (;;) |
2323 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2324 | { | { |
2325 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2326 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2327 | } | } |
2328 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2329 | if ( | if ( |
2330 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2331 | c >= 256 || | c > 255 || |
2332 | #endif | #endif |
2333 | (md->ctypes[c] & ctype_digit) == 0 | (md->ctypes[c] & ctype_digit) == 0 |
2334 | ) | ) |
2335 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2336 | ecode++; | ecode++; |
2337 | break; | break; |
2338 | ||
# | Line 2162 for (;;) | Line 2340 for (;;) |
2340 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2341 | { | { |
2342 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2343 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2344 | } | } |
2345 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2346 | if ( | if ( |
2347 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2348 | c < 256 && | c < 256 && |
2349 | #endif | #endif |
2350 | (md->ctypes[c] & ctype_space) != 0 | (md->ctypes[c] & ctype_space) != 0 |
2351 | ) | ) |
2352 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2353 | ecode++; | ecode++; |
2354 | break; | break; |
2355 | ||
# | Line 2179 for (;;) | Line 2357 for (;;) |
2357 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2358 | { | { |
2359 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2360 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2361 | } | } |
2362 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2363 | if ( | if ( |
2364 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2365 | c >= 256 || | c > 255 || |
2366 | #endif | #endif |
2367 | (md->ctypes[c] & ctype_space) == 0 | (md->ctypes[c] & ctype_space) == 0 |
2368 | ) | ) |
2369 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2370 | ecode++; | ecode++; |
2371 | break; | break; |
2372 | ||
# | Line 2196 for (;;) | Line 2374 for (;;) |
2374 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2375 | { | { |
2376 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2377 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2378 | } | } |
2379 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2380 | if ( | if ( |
2381 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2382 | c < 256 && | c < 256 && |
2383 | #endif | #endif |
2384 | (md->ctypes[c] & ctype_word) != 0 | (md->ctypes[c] & ctype_word) != 0 |
2385 | ) | ) |
2386 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2387 | ecode++; | ecode++; |
2388 | break; | break; |
2389 | ||
# | Line 2213 for (;;) | Line 2391 for (;;) |
2391 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2392 | { | { |
2393 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2394 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2395 | } | } |
2396 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2397 | if ( | if ( |
2398 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2399 | c >= 256 || | c > 255 || |
2400 | #endif | #endif |
2401 | (md->ctypes[c] & ctype_word) == 0 | (md->ctypes[c] & ctype_word) == 0 |
2402 | ) | ) |
2403 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2404 | ecode++; | ecode++; |
2405 | break; | break; |
2406 | ||
# | Line 2230 for (;;) | Line 2408 for (;;) |
2408 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2409 | { | { |
2410 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2411 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2412 | } | } |
2413 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2414 | switch(c) | switch(c) |
2415 | { | { |
2416 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
2417 | ||
2418 | case 0x000d: | case 0x000d: |
2419 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr >= md->end_subject) |
2420 | { | |
2421 | SCHECK_PARTIAL(); | |
2422 | } | |
2423 | else if (*eptr == 0x0a) eptr++; | |
2424 | break; | break; |
2425 | ||
2426 | case 0x000a: | case 0x000a: |
# | Line 2249 for (;;) | Line 2431 for (;;) |
2431 | case 0x0085: | case 0x0085: |
2432 | case 0x2028: | case 0x2028: |
2433 | case 0x2029: | case 0x2029: |
2434 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
2435 | break; | break; |
2436 | } | } |
2437 | ecode++; | ecode++; |
# | Line 2259 for (;;) | Line 2441 for (;;) |
2441 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2442 | { | { |
2443 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2444 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2445 | } | } |
2446 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2447 | switch(c) | switch(c) |
# | Line 2284 for (;;) | Line 2466 for (;;) |
2466 | case 0x202f: /* NARROW NO-BREAK SPACE */ | case 0x202f: /* NARROW NO-BREAK SPACE */ |
2467 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
2468 | case 0x3000: /* IDEOGRAPHIC SPACE */ | case 0x3000: /* IDEOGRAPHIC SPACE */ |
2469 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2470 | } | } |
2471 | ecode++; | ecode++; |
2472 | break; | break; |
# | Line 2293 for (;;) | Line 2475 for (;;) |
2475 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2476 | { | { |
2477 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2478 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2479 | } | } |
2480 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2481 | switch(c) | switch(c) |
2482 | { | { |
2483 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
2484 | case 0x09: /* HT */ | case 0x09: /* HT */ |
2485 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
2486 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
# | Line 2327 for (;;) | Line 2509 for (;;) |
2509 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2510 | { | { |
2511 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2512 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2513 | } | } |
2514 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2515 | switch(c) | switch(c) |
# | Line 2340 for (;;) | Line 2522 for (;;) |
2522 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
2523 | case 0x2028: /* LINE SEPARATOR */ | case 0x2028: /* LINE SEPARATOR */ |
2524 | case 0x2029: /* PARAGRAPH SEPARATOR */ | case 0x2029: /* PARAGRAPH SEPARATOR */ |
2525 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2526 | } | } |
2527 | ecode++; | ecode++; |
2528 | break; | break; |
# | Line 2349 for (;;) | Line 2531 for (;;) |
2531 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2532 | { | { |
2533 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2534 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2535 | } | } |
2536 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2537 | switch(c) | switch(c) |
2538 | { | { |
2539 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
2540 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
2541 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
2542 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
# | Line 2376 for (;;) | Line 2558 for (;;) |
2558 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2559 | { | { |
2560 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2561 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2562 | } | } |
2563 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2564 | { | { |
# | Line 2385 for (;;) | Line 2567 for (;;) |
2567 | switch(ecode[1]) | switch(ecode[1]) |
2568 | { | { |
2569 | case PT_ANY: | case PT_ANY: |
2570 | if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH); | if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH); |
2571 | break; | break; |
2572 | ||
2573 | case PT_LAMP: | case PT_LAMP: |
2574 | if ((prop->chartype == ucp_Lu || | if ((prop->chartype == ucp_Lu || |
2575 | prop->chartype == ucp_Ll || | prop->chartype == ucp_Ll || |
2576 | prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) | prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) |
2577 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2578 | break; | break; |
2579 | ||
2580 | case PT_GC: | case PT_GC: |
2581 | if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP)) | if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) |
2582 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2583 | break; | break; |
2584 | ||
2585 | case PT_PC: | case PT_PC: |
2586 | if ((ecode[2] != prop->chartype) == (op == OP_PROP)) | if ((ecode[2] != prop->chartype) == (op == OP_PROP)) |
2587 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2588 | break; | break; |
2589 | ||
2590 | case PT_SC: | case PT_SC: |
2591 | if ((ecode[2] != prop->script) == (op == OP_PROP)) | if ((ecode[2] != prop->script) == (op == OP_PROP)) |
2592 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2593 | break; | break; |
2594 | ||
2595 | /* These are specials */ | /* These are specials */ |
2596 | ||
2597 | case PT_ALNUM: | case PT_ALNUM: |
2598 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
2599 | _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) | PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) |
2600 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2601 | break; | break; |
2602 | ||
2603 | case PT_SPACE: /* Perl space */ | case PT_SPACE: /* Perl space */ |
2604 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
2605 | c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) | c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) |
2606 | == (op == OP_NOTPROP)) | == (op == OP_NOTPROP)) |
2607 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2608 | break; | break; |
2609 | ||
2610 | case PT_PXSPACE: /* POSIX space */ | case PT_PXSPACE: /* POSIX space */ |
2611 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
2612 | c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || | c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
2613 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
2614 | == (op == OP_NOTPROP)) | == (op == OP_NOTPROP)) |
2615 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2616 | break; | break; |
2617 | ||
2618 | case PT_WORD: | case PT_WORD: |
2619 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
2620 | _pcre_ucp_gentype[prop->chartype] == ucp_N || | PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
2621 | c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) | c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) |
2622 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2623 | break; | break; |
2624 | ||
2625 | /* This should never occur */ | /* This should never occur */ |
# | Line 2457 for (;;) | Line 2639 for (;;) |
2639 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2640 | { | { |
2641 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2642 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2643 | } | } |
2644 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2645 | if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); | if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
2646 | while (eptr < md->end_subject) | while (eptr < md->end_subject) |
2647 | { | { |
2648 | int len = 1; | int len = 1; |
2649 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
2650 | if (UCD_CATEGORY(c) != ucp_M) break; | if (UCD_CATEGORY(c) != ucp_M) break; |
2651 | eptr += len; | eptr += len; |
2652 | } | } |
2653 | CHECK_PARTIAL(); | |
2654 | ecode++; | ecode++; |
2655 | break; | break; |
2656 | #endif | #endif |
# | Line 2485 for (;;) | Line 2668 for (;;) |
2668 | case OP_REFI: | case OP_REFI: |
2669 | caseless = op == OP_REFI; | caseless = op == OP_REFI; |
2670 | offset = GET2(ecode, 1) << 1; /* Doubled ref number */ | offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
2671 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
2672 | ||
2673 | /* If the reference is unset, there are two possibilities: | /* If the reference is unset, there are two possibilities: |
2674 | ||
# | Line 2525 for (;;) | Line 2708 for (;;) |
2708 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
2709 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
2710 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
2711 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
2712 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
2713 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
2714 | break; | break; |
2715 | ||
2716 | default: /* No repeat follows */ | default: /* No repeat follows */ |
2717 | if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) |
2718 | { | { |
2719 | if (length == -2) eptr = md->end_subject; /* Partial match */ | |
2720 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2721 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2722 | } | } |
2723 | eptr += length; | eptr += length; |
2724 | continue; /* With the main loop */ | continue; /* With the main loop */ |
2725 | } | } |
2726 | ||
2727 | /* Handle repeated back references. If the length of the reference is | /* Handle repeated back references. If the length of the reference is |
2728 | zero, just continue with the main loop. */ | zero, just continue with the main loop. If the length is negative, it |
2729 | means the reference is unset in non-Java-compatible mode. If the minimum is | |
2730 | zero, we can continue at the same level without recursion. For any other | |
2731 | minimum, carrying on will result in NOMATCH. */ | |
2732 | ||
2733 | if (length == 0) continue; | if (length == 0) continue; |
2734 | if (length < 0 && min == 0) continue; | |
2735 | ||
2736 | /* First, ensure the minimum number of matches are present. We get back | /* First, ensure the minimum number of matches are present. We get back |
2737 | the length of the reference string explicitly rather than passing the | the length of the reference string explicitly rather than passing the |
# | Line 2554 for (;;) | Line 2742 for (;;) |
2742 | int slength; | int slength; |
2743 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2744 | { | { |
2745 | if (slength == -2) eptr = md->end_subject; /* Partial match */ | |
2746 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2747 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2748 | } | } |
2749 | eptr += slength; | eptr += slength; |
2750 | } | } |
# | Line 2574 for (;;) | Line 2763 for (;;) |
2763 | int slength; | int slength; |
2764 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); |
2765 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2766 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2767 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2768 | { | { |
2769 | if (slength == -2) eptr = md->end_subject; /* Partial match */ | |
2770 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2771 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2772 | } | } |
2773 | eptr += slength; | eptr += slength; |
2774 | } | } |
# | Line 2595 for (;;) | Line 2785 for (;;) |
2785 | int slength; | int slength; |
2786 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2787 | { | { |
2788 | CHECK_PARTIAL(); | /* Can't use CHECK_PARTIAL because we don't want to update eptr in |
2789 | the soft partial matching case. */ | |
2790 | ||
2791 | if (slength == -2 && md->partial != 0 && | |
2792 | md->end_subject > md->start_used_ptr) | |
2793 | { | |
2794 | md->hitend = TRUE; | |
2795 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2796 | } | |
2797 | break; | break; |
2798 | } | } |
2799 | eptr += slength; | eptr += slength; |
2800 | } | } |
2801 | ||
2802 | while (eptr >= pp) | while (eptr >= pp) |
2803 | { | { |
2804 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM15); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM15); |
2805 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2806 | eptr -= length; | eptr -= length; |
2807 | } | } |
2808 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2809 | } | } |
2810 | /* Control never gets here */ | /* Control never gets here */ |
2811 | ||
# | Line 2624 for (;;) | Line 2823 for (;;) |
2823 | case OP_NCLASS: | case OP_NCLASS: |
2824 | case OP_CLASS: | case OP_CLASS: |
2825 | { | { |
2826 | /* The data variable is saved across frames, so the byte map needs to | |
2827 | be stored there. */ | |
2828 | #define BYTE_MAP ((pcre_uint8 *)data) | |
2829 | data = ecode + 1; /* Save for matching */ | data = ecode + 1; /* Save for matching */ |
2830 | ecode += 33; /* Advance past the item */ | ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */ |
2831 | ||
2832 | switch (*ecode) | switch (*ecode) |
2833 | { | { |
# | Line 2646 for (;;) | Line 2848 for (;;) |
2848 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
2849 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
2850 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
2851 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
2852 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
2853 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
2854 | break; | break; |
2855 | ||
2856 | default: /* No repeat follows */ | default: /* No repeat follows */ |
# | Line 2658 for (;;) | Line 2860 for (;;) |
2860 | ||
2861 | /* First, ensure the minimum number of matches are present. */ | /* First, ensure the minimum number of matches are present. */ |
2862 | ||
2863 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2864 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2865 | { | { |
2866 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
2867 | { | { |
2868 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2869 | { | { |
2870 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2871 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2872 | } | } |
2873 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
2874 | if (c > 255) | if (c > 255) |
2875 | { | { |
2876 | if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH); | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
2877 | } | } |
2878 | else | else |
2879 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | ||
} | ||
2880 | } | } |
2881 | } | } |
2882 | else | else |
2883 | #endif | #endif |
2884 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2885 | { | { |
2886 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
2887 | { | { |
2888 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2889 | { | { |
2890 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2891 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2892 | } | } |
2893 | c = *eptr++; | c = *eptr++; |
2894 | if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | #ifndef COMPILE_PCRE8 |
2895 | if (c > 255) | |
2896 | { | |
2897 | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); | |
2898 | } | |
2899 | else | |
2900 | #endif | |
2901 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); | |
2902 | } | } |
2903 | } | } |
2904 | ||
# | Line 2706 for (;;) | Line 2912 for (;;) |
2912 | ||
2913 | if (minimize) | if (minimize) |
2914 | { | { |
2915 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2916 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2917 | { | { |
2918 | for (fi = min;; fi++) | for (fi = min;; fi++) |
2919 | { | { |
2920 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); |
2921 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2922 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2923 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2924 | { | { |
2925 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2926 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2927 | } | } |
2928 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
2929 | if (c > 255) | if (c > 255) |
2930 | { | { |
2931 | if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH); | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
2932 | } | } |
2933 | else | else |
2934 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | ||
} | ||
2935 | } | } |
2936 | } | } |
2937 | else | else |
2938 | #endif | #endif |
2939 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2940 | { | { |
2941 | for (fi = min;; fi++) | for (fi = min;; fi++) |
2942 | { | { |
2943 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); |
2944 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2945 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2946 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2947 | { | { |
2948 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2949 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2950 | } | } |
2951 | c = *eptr++; | c = *eptr++; |
2952 | if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | #ifndef COMPILE_PCRE8 |
2953 | if (c > 255) | |
2954 | { | |
2955 | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); | |
2956 | } | |
2957 | else | |
2958 | #endif | |
2959 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); | |
2960 | } | } |
2961 | } | } |
2962 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 2758 for (;;) | Line 2968 for (;;) |
2968 | { | { |
2969 | pp = eptr; | pp = eptr; |
2970 | ||
2971 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2972 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2973 | { | { |
2974 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
2975 | { | { |
# | Line 2776 for (;;) | Line 2985 for (;;) |
2985 | if (op == OP_CLASS) break; | if (op == OP_CLASS) break; |
2986 | } | } |
2987 | else | else |
2988 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; |
if ((data[c/8] & (1 << (c&7))) == 0) break; | ||
} | ||
2989 | eptr += len; | eptr += len; |
2990 | } | } |
2991 | for (;;) | for (;;) |
# | Line 2791 for (;;) | Line 2998 for (;;) |
2998 | } | } |
2999 | else | else |
3000 | #endif | #endif |
3001 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3002 | { | { |
3003 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3004 | { | { |
# | Line 2801 for (;;) | Line 3008 for (;;) |
3008 | break; | break; |
3009 | } | } |
3010 | c = *eptr; | c = *eptr; |
3011 | if ((data[c/8] & (1 << (c&7))) == 0) break; | #ifndef COMPILE_PCRE8 |
3012 | if (c > 255) | |
3013 | { | |
3014 | if (op == OP_CLASS) break; | |
3015 | } | |
3016 | else | |
3017 | #endif | |
3018 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; | |
3019 | eptr++; | eptr++; |
3020 | } | } |
3021 | while (eptr >= pp) | while (eptr >= pp) |
# | Line 2812 for (;;) | Line 3026 for (;;) |
3026 | } | } |
3027 | } | } |
3028 | ||
3029 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3030 | } | } |
3031 | #undef BYTE_MAP | |
3032 | } | } |
3033 | /* Control never gets here */ | /* Control never gets here */ |
3034 | ||
# | Line 2822 for (;;) | Line 3037 for (;;) |
3037 | when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 | when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 |
3038 | mode, because Unicode properties are supported in non-UTF-8 mode. */ | mode, because Unicode properties are supported in non-UTF-8 mode. */ |
3039 | ||
3040 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
3041 | case OP_XCLASS: | case OP_XCLASS: |
3042 | { | { |
3043 | data = ecode + 1 + LINK_SIZE; /* Save for matching */ | data = ecode + 1 + LINK_SIZE; /* Save for matching */ |
# | Line 2847 for (;;) | Line 3062 for (;;) |
3062 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
3063 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
3064 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
3065 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
3066 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
3067 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
3068 | break; | break; |
3069 | ||
3070 | default: /* No repeat follows */ | default: /* No repeat follows */ |
# | Line 2864 for (;;) | Line 3079 for (;;) |
3079 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3080 | { | { |
3081 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3082 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3083 | } | } |
3084 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
3085 | if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH); | if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); |
3086 | } | } |
3087 | ||
3088 | /* If max == min we can continue with the main loop without the | /* If max == min we can continue with the main loop without the |
# | Line 2884 for (;;) | Line 3099 for (;;) |
3099 | { | { |
3100 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); |
3101 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3102 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3103 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3104 | { | { |
3105 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3106 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3107 | } | } |
3108 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
3109 | if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH); | if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); |
3110 | } | } |
3111 | /* Control never gets here */ | /* Control never gets here */ |
3112 | } | } |
# | Line 2909 for (;;) | Line 3124 for (;;) |
3124 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3125 | break; | break; |
3126 | } | } |
3127 | #ifdef SUPPORT_UTF | |
3128 | GETCHARLENTEST(c, eptr, len); | GETCHARLENTEST(c, eptr, len); |
3129 | if (!_pcre_xclass(c, data)) break; | #else |
3130 | c = *eptr; | |
3131 | #endif | |
3132 | if (!PRIV(xclass)(c, data, utf)) break; | |
3133 | eptr += len; | eptr += len; |
3134 | } | } |
3135 | for(;;) | for(;;) |
# | Line 2918 for (;;) | Line 3137 for (;;) |
3137 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); |
3138 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3139 | if (eptr-- == pp) break; /* Stop if tried at original pos */ | if (eptr-- == pp) break; /* Stop if tried at original pos */ |
3140 | if (utf8) BACKCHAR(eptr); | #ifdef SUPPORT_UTF |
3141 | if (utf) BACKCHAR(eptr); | |
3142 | #endif | |
3143 | } | } |
3144 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3145 | } | } |
3146 | ||
3147 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 2930 for (;;) | Line 3151 for (;;) |
3151 | /* Match a single character, casefully */ | /* Match a single character, casefully */ |
3152 | ||
3153 | case OP_CHAR: | case OP_CHAR: |
3154 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3155 | if (utf8) | if (utf) |
3156 | { | { |
3157 | length = 1; | length = 1; |
3158 | ecode++; | ecode++; |
# | Line 2939 for (;;) | Line 3160 for (;;) |
3160 | if (length > md->end_subject - eptr) | if (length > md->end_subject - eptr) |
3161 | { | { |
3162 | CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ | CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
3163 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3164 | } | } |
3165 | while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH); | while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); |
3166 | } | } |
3167 | else | else |
3168 | #endif | #endif |
3169 | /* Not UTF mode */ | |
/* Non-UTF-8 mode */ | ||
3170 | { | { |
3171 | if (md->end_subject - eptr < 1) | if (md->end_subject - eptr < 1) |
3172 | { | { |
3173 | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
3174 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3175 | } | } |
3176 | if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH); | if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); |
3177 | ecode += 2; | ecode += 2; |
3178 | } | } |
3179 | break; | break; |
3180 | ||
3181 | /* Match a single character, caselessly */ | /* Match a single character, caselessly. If we are at the end of the |
3182 | subject, give up immediately. */ | |
3183 | ||
3184 | case OP_CHARI: | case OP_CHARI: |
3185 | #ifdef SUPPORT_UTF8 | if (eptr >= md->end_subject) |
3186 | if (utf8) | { |
3187 | SCHECK_PARTIAL(); | |
3188 | RRETURN(MATCH_NOMATCH); | |
3189 | } | |
3190 | ||
3191 | #ifdef SUPPORT_UTF | |
3192 | if (utf) | |
3193 | { | { |
3194 | length = 1; | length = 1; |
3195 | ecode++; | ecode++; |
3196 | GETCHARLEN(fc, ecode, length); | GETCHARLEN(fc, ecode, length); |
3197 | ||
if (length > md->end_subject - eptr) | ||
{ | ||
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ | ||
MRRETURN(MATCH_NOMATCH); | ||
} | ||
3198 | /* If the pattern character's value is < 128, we have only one byte, and | /* If the pattern character's value is < 128, we have only one byte, and |
3199 | can use the fast lookup table. */ | we know that its other case must also be one byte long, so we can use the |
3200 | fast lookup table. We know that there is at least one byte left in the | |
3201 | subject. */ | |
3202 | ||
3203 | if (fc < 128) | if (fc < 128) |
3204 | { | { |
3205 | if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (md->lcc[fc] |
3206 | != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); | |
3207 | ecode++; | |
3208 | eptr++; | |
3209 | } | } |
3210 | ||
3211 | /* Otherwise we must pick up the subject character */ | /* Otherwise we must pick up the subject character. Note that we cannot |
3212 | use the value of "length" to check for sufficient bytes left, because the | |
3213 | other case of the character may have more or fewer bytes. */ | |
3214 | ||
3215 | else | else |
3216 | { | { |
# | Line 2998 for (;;) | Line 3226 for (;;) |
3226 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3227 | if (dc != UCD_OTHERCASE(fc)) | if (dc != UCD_OTHERCASE(fc)) |
3228 | #endif | #endif |
3229 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3230 | } | } |
3231 | } | } |
3232 | } | } |
3233 | else | else |
3234 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
3235 | ||
3236 | /* Non-UTF-8 mode */ | /* Not UTF mode */ |
3237 | { | { |
3238 | if (md->end_subject - eptr < 1) | if (TABLE_GET(ecode[1], md->lcc, ecode[1]) |
3239 | { | != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); |
3240 | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ | eptr++; |
MRRETURN(MATCH_NOMATCH); | ||
} | ||
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | ||
3241 | ecode += 2; | ecode += 2; |
3242 | } | } |
3243 | break; | break; |
# | Line 3022 for (;;) | Line 3247 for (;;) |
3247 | case OP_EXACT: | case OP_EXACT: |
3248 | case OP_EXACTI: | case OP_EXACTI: |
3249 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3250 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3251 | goto REPEATCHAR; | goto REPEATCHAR; |
3252 | ||
3253 | case OP_POSUPTO: | case OP_POSUPTO: |
# | Line 3037 for (;;) | Line 3262 for (;;) |
3262 | min = 0; | min = 0; |
3263 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3264 | minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; | minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; |
3265 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3266 | goto REPEATCHAR; | goto REPEATCHAR; |
3267 | ||
3268 | case OP_POSSTAR: | case OP_POSSTAR: |
# | Line 3085 for (;;) | Line 3310 for (;;) |
3310 | /* Common code for all repeated single-character matches. */ | /* Common code for all repeated single-character matches. */ |
3311 | ||
3312 | REPEATCHAR: | REPEATCHAR: |
3313 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3314 | if (utf8) | if (utf) |
3315 | { | { |
3316 | length = 1; | length = 1; |
3317 | charptr = ecode; | charptr = ecode; |
# | Line 3102 for (;;) | Line 3327 for (;;) |
3327 | unsigned int othercase; | unsigned int othercase; |
3328 | if (op >= OP_STARI && /* Caseless */ | if (op >= OP_STARI && /* Caseless */ |
3329 | (othercase = UCD_OTHERCASE(fc)) != fc) | (othercase = UCD_OTHERCASE(fc)) != fc) |
3330 | oclength = _pcre_ord2utf8(othercase, occhars); | oclength = PRIV(ord2utf)(othercase, occhars); |
3331 | else oclength = 0; | else oclength = 0; |
3332 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3333 | ||
3334 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3335 | { | { |
3336 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3337 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3338 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3339 | else if (oclength > 0 && | else if (oclength > 0 && |
3340 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3341 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3342 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3343 | else | else |
3344 | { | { |
3345 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
3346 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3347 | } | } |
3348 | } | } |
3349 | ||
# | Line 3130 for (;;) | Line 3355 for (;;) |
3355 | { | { |
3356 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); |
3357 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3358 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3359 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3360 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3361 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3362 | else if (oclength > 0 && | else if (oclength > 0 && |
3363 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3364 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3365 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3366 | else | else |
3367 | { | { |
3368 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
3369 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3370 | } | } |
3371 | } | } |
3372 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3153 for (;;) | Line 3378 for (;;) |
3378 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3379 | { | { |
3380 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3381 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3382 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3383 | else if (oclength > 0 && | else if (oclength > 0 && |
3384 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3385 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3386 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3387 | else | else |
3388 | { | { |
# | Line 3172 for (;;) | Line 3397 for (;;) |
3397 | { | { |
3398 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); |
3399 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3400 | if (eptr == pp) { MRRETURN(MATCH_NOMATCH); } | if (eptr == pp) { RRETURN(MATCH_NOMATCH); } |
3401 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3402 | eptr--; | eptr--; |
3403 | BACKCHAR(eptr); | BACKCHAR(eptr); |
# | Line 3189 for (;;) | Line 3414 for (;;) |
3414 | value of fc will always be < 128. */ | value of fc will always be < 128. */ |
3415 | } | } |
3416 | else | else |
3417 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
3418 | /* When not in UTF-8 mode, load a single-byte character. */ | |
3419 | fc = *ecode++; | |
3420 | ||
3421 | /* When not in UTF-8 mode, load a single-byte character. */ | /* The value of fc at this point is always one character, though we may |
3422 | or may not be in UTF mode. The code is duplicated for the caseless and | |
fc = *ecode++; | ||
/* The value of fc at this point is always less than 256, though we may or | ||
may not be in UTF-8 mode. The code is duplicated for the caseless and | ||
3423 | caseful cases, for speed, since matching characters is likely to be quite | caseful cases, for speed, since matching characters is likely to be quite |
3424 | common. First, ensure the minimum number of matches are present. If min = | common. First, ensure the minimum number of matches are present. If min = |
3425 | max, continue at the same level without recursing. Otherwise, if | max, continue at the same level without recursing. Otherwise, if |
# | Line 3205 for (;;) | Line 3428 for (;;) |
3428 | maximizing, find the maximum number of characters and work backwards. */ | maximizing, find the maximum number of characters and work backwards. */ |
3429 | ||
3430 | DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, | DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, |
3431 | max, eptr)); | max, (char *)eptr)); |
3432 | ||
3433 | if (op >= OP_STARI) /* Caseless */ | if (op >= OP_STARI) /* Caseless */ |
3434 | { | { |
3435 | fc = md->lcc[fc]; | #ifdef COMPILE_PCRE8 |
3436 | /* fc must be < 128 if UTF is enabled. */ | |
3437 | foc = md->fcc[fc]; | |
3438 | #else | |
3439 | #ifdef SUPPORT_UTF | |
3440 | #ifdef SUPPORT_UCP | |
3441 | if (utf && fc > 127) | |
3442 | foc = UCD_OTHERCASE(fc); | |
3443 | #else | |
3444 | if (utf && fc > 127) | |
3445 | foc = fc; | |
3446 | #endif /* SUPPORT_UCP */ | |
3447 | else | |
3448 | #endif /* SUPPORT_UTF */ | |
3449 | foc = TABLE_GET(fc, md->fcc, fc); | |
3450 | #endif /* COMPILE_PCRE8 */ | |
3451 | ||
3452 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3453 | { | { |
3454 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3455 | { | { |
3456 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3457 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3458 | } | } |
3459 | if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
3460 | eptr++; | |
3461 | } | } |
3462 | if (min == max) continue; | if (min == max) continue; |
3463 | if (minimize) | if (minimize) |
# | Line 3226 for (;;) | Line 3466 for (;;) |
3466 | { | { |
3467 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); |
3468 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3469 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3470 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3471 | { | { |
3472 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3473 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3474 | } | } |
3475 | if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
3476 | eptr++; | |
3477 | } | } |
3478 | /* Control never gets here */ | /* Control never gets here */ |
3479 | } | } |
# | Line 3246 for (;;) | Line 3487 for (;;) |
3487 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3488 | break; | break; |
3489 | } | } |
3490 | if (fc != md->lcc[*eptr]) break; | if (fc != *eptr && foc != *eptr) break; |
3491 | eptr++; | eptr++; |
3492 | } | } |
3493 | ||
# | Line 3258 for (;;) | Line 3499 for (;;) |
3499 | eptr--; | eptr--; |
3500 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3501 | } | } |
3502 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3503 | } | } |
3504 | /* Control never gets here */ | /* Control never gets here */ |
3505 | } | } |
# | Line 3272 for (;;) | Line 3513 for (;;) |
3513 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3514 | { | { |
3515 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3516 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3517 | } | } |
3518 | if (fc != *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
3519 | } | } |
3520 | ||
3521 | if (min == max) continue; | if (min == max) continue; |
# | Line 3285 for (;;) | Line 3526 for (;;) |
3526 | { | { |
3527 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); |
3528 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3529 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3530 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3531 | { | { |
3532 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3533 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3534 | } | } |
3535 | if (fc != *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
3536 | } | } |
3537 | /* Control never gets here */ | /* Control never gets here */ |
3538 | } | } |
# | Line 3316 for (;;) | Line 3557 for (;;) |
3557 | eptr--; | eptr--; |
3558 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3559 | } | } |
3560 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3561 | } | } |
3562 | } | } |
3563 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3329 for (;;) | Line 3570 for (;;) |
3570 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3571 | { | { |
3572 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3573 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3574 | } | } |
3575 | ecode++; | #ifdef SUPPORT_UTF |
3576 | GETCHARINCTEST(c, eptr); | if (utf) |
if (op == OP_NOTI) /* The caseless case */ | ||
3577 | { | { |
3578 | #ifdef SUPPORT_UTF8 | register unsigned int ch, och; |
3579 | if (c < 256) | |
3580 | #endif | ecode++; |
3581 | c = md->lcc[c]; | GETCHARINC(ch, ecode); |
3582 | if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH); | GETCHARINC(c, eptr); |
3583 | ||
3584 | if (op == OP_NOT) | |
3585 | { | |
3586 | if (ch == c) RRETURN(MATCH_NOMATCH); | |
3587 | } | |
3588 | else | |
3589 | { | |
3590 | #ifdef SUPPORT_UCP | |
3591 | if (ch > 127) | |
3592 | och = UCD_OTHERCASE(ch); | |
3593 | #else | |
3594 | if (ch > 127) | |
3595 | och = ch; | |
3596 | #endif /* SUPPORT_UCP */ | |
3597 | else | |
3598 | och = TABLE_GET(ch, md->fcc, ch); | |
3599 | if (ch == c || och == c) RRETURN(MATCH_NOMATCH); | |
3600 | } | |
3601 | } | } |
3602 | else /* Caseful */ | else |
3603 | #endif | |
3604 | { | { |
3605 | if (*ecode++ == c) MRRETURN(MATCH_NOMATCH); | register unsigned int ch = ecode[1]; |
3606 | c = *eptr++; | |
3607 | if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c)) | |
3608 | RRETURN(MATCH_NOMATCH); | |
3609 | ecode += 2; | |
3610 | } | } |
3611 | break; | break; |
3612 | ||
# | Line 3357 for (;;) | Line 3620 for (;;) |
3620 | case OP_NOTEXACT: | case OP_NOTEXACT: |
3621 | case OP_NOTEXACTI: | case OP_NOTEXACTI: |
3622 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3623 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3624 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3625 | ||
3626 | case OP_NOTUPTO: | case OP_NOTUPTO: |
# | Line 3367 for (;;) | Line 3630 for (;;) |
3630 | min = 0; | min = 0; |
3631 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3632 | minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; | minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; |
3633 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3634 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3635 | ||
3636 | case OP_NOTPOSSTAR: | case OP_NOTPOSSTAR: |
# | Line 3399 for (;;) | Line 3662 for (;;) |
3662 | possessive = TRUE; | possessive = TRUE; |
3663 | min = 0; | min = 0; |
3664 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3665 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3666 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3667 | ||
3668 | case OP_NOTSTAR: | case OP_NOTSTAR: |
# | Line 3423 for (;;) | Line 3686 for (;;) |
3686 | /* Common code for all repeated single-byte matches. */ | /* Common code for all repeated single-byte matches. */ |
3687 | ||
3688 | REPEATNOTCHAR: | REPEATNOTCHAR: |
3689 | fc = *ecode++; | GETCHARINCTEST(fc, ecode); |
3690 | ||
3691 | /* The code is duplicated for the caseless and caseful cases, for speed, | /* The code is duplicated for the caseless and caseful cases, for speed, |
3692 | since matching characters is likely to be quite common. First, ensure the | since matching characters is likely to be quite common. First, ensure the |
# | Line 3434 for (;;) | Line 3697 for (;;) |
3697 | characters and work backwards. */ | characters and work backwards. */ |
3698 | ||
3699 | DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, | DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, |
3700 | max, eptr)); | max, (char *)eptr)); |
3701 | ||
3702 | if (op >= OP_NOTSTARI) /* Caseless */ | if (op >= OP_NOTSTARI) /* Caseless */ |
3703 | { | { |
3704 | fc = md->lcc[fc]; | #ifdef SUPPORT_UTF |
3705 | #ifdef SUPPORT_UCP | |
3706 | if (utf && fc > 127) | |
3707 | foc = UCD_OTHERCASE(fc); | |
3708 | #else | |
3709 | if (utf && fc > 127) | |
3710 | foc = fc; | |
3711 | #endif /* SUPPORT_UCP */ | |
3712 | else | |
3713 | #endif /* SUPPORT_UTF */ | |
3714 | foc = TABLE_GET(fc, md->fcc, fc); | |
3715 | ||
3716 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3717 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3718 | { | { |
3719 | register unsigned int d; | register unsigned int d; |
3720 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 3450 for (;;) | Line 3722 for (;;) |
3722 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3723 | { | { |
3724 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3725 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3726 | } | } |
3727 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3728 | if (d < 256) d = md->lcc[d]; | if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); |
if (fc == d) MRRETURN(MATCH_NOMATCH); | ||
3729 | } | } |
3730 | } | } |
3731 | else | else |
3732 | #endif | #endif |
3733 | /* Not UTF mode */ | |
/* Not UTF-8 mode */ | ||
3734 | { | { |
3735 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3736 | { | { |
3737 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3738 | { | { |
3739 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3740 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3741 | } | } |
3742 | if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); |
3743 | eptr++; | |
3744 | } | } |
3745 | } | } |
3746 | ||
# | Line 3477 for (;;) | Line 3748 for (;;) |
3748 | ||
3749 | if (minimize) | if (minimize) |
3750 | { | { |
3751 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3752 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3753 | { | { |
3754 | register unsigned int d; | register unsigned int d; |
3755 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3756 | { | { |
3757 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); |
3758 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3759 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3760 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3761 | { | { |
3762 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3763 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3764 | } | } |
3765 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3766 | if (d < 256) d = md->lcc[d]; | if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); |
if (fc == d) MRRETURN(MATCH_NOMATCH); | ||
3767 | } | } |
3768 | } | } |
3769 | else | else |
3770 | #endif | #endif |
3771 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3772 | { | { |
3773 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3774 | { | { |
3775 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); |
3776 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3777 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3778 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3779 | { | { |
3780 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3781 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3782 | } | } |
3783 | if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); |
3784 | eptr++; | |
3785 | } | } |
3786 | } | } |
3787 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3523 for (;;) | Line 3793 for (;;) |
3793 | { | { |
3794 | pp = eptr; | pp = eptr; |
3795 | ||
3796 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3797 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3798 | { | { |
3799 | register unsigned int d; | register unsigned int d; |
3800 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
# | Line 3537 for (;;) | Line 3806 for (;;) |
3806 | break; | break; |
3807 | } | } |
3808 | GETCHARLEN(d, eptr, len); | GETCHARLEN(d, eptr, len); |
3809 | if (d < 256) d = md->lcc[d]; | if (fc == d || (unsigned int)foc == d) break; |
if (fc == d) break; | ||
3810 | eptr += len; | eptr += len; |
3811 | } | } |
3812 | if (possessive) continue; | if (possessive) continue; |
3813 | for(;;) | for(;;) |
3814 | { | { |
3815 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); |
3816 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
# | Line 3552 for (;;) | Line 3820 for (;;) |
3820 | } | } |
3821 | else | else |
3822 | #endif | #endif |
3823 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3824 | { | { |
3825 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3826 | { | { |
# | Line 3561 for (;;) | Line 3829 for (;;) |
3829 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3830 | break; | break; |
3831 | } | } |
3832 | if (fc == md->lcc[*eptr]) break; | if (fc == *eptr || foc == *eptr) break; |
3833 | eptr++; | eptr++; |
3834 | } | } |
3835 | if (possessive) continue; | if (possessive) continue; |
# | Line 3573 for (;;) | Line 3841 for (;;) |
3841 | } | } |
3842 | } | } |
3843 | ||
3844 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3845 | } | } |
3846 | /* Control never gets here */ | /* Control never gets here */ |
3847 | } | } |
# | Line 3582 for (;;) | Line 3850 for (;;) |
3850 | ||
3851 | else | else |
3852 | { | { |
3853 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3854 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3855 | { | { |
3856 | register unsigned int d; | register unsigned int d; |
3857 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 3592 for (;;) | Line 3859 for (;;) |
3859 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3860 | { | { |
3861 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3862 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3863 | } | } |
3864 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3865 | if (fc == d) MRRETURN(MATCH_NOMATCH); | if (fc == d) RRETURN(MATCH_NOMATCH); |
3866 | } | } |
3867 | } | } |
3868 | else | else |
3869 | #endif | #endif |
3870 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3871 | { | { |
3872 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3873 | { | { |
3874 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3875 | { | { |
3876 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3877 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3878 | } | } |
3879 | if (fc == *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr++) RRETURN(MATCH_NOMATCH); |
3880 | } | } |
3881 | } | } |
3882 | ||
# | Line 3617 for (;;) | Line 3884 for (;;) |
3884 | ||
3885 | if (minimize) | if (minimize) |
3886 | { | { |
3887 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3888 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3889 | { | { |
3890 | register unsigned int d; | register unsigned int d; |
3891 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3892 | { | { |
3893 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); |
3894 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3895 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3896 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3897 | { | { |
3898 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3899 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3900 | } | } |
3901 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3902 | if (fc == d) MRRETURN(MATCH_NOMATCH); | if (fc == d) RRETURN(MATCH_NOMATCH); |
3903 | } | } |
3904 | } | } |
3905 | else | else |
3906 | #endif | #endif |
3907 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3908 | { | { |
3909 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3910 | { | { |
3911 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); |
3912 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3913 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3914 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3915 | { | { |
3916 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3917 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3918 | } | } |
3919 | if (fc == *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr++) RRETURN(MATCH_NOMATCH); |
3920 | } | } |
3921 | } | } |
3922 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3662 for (;;) | Line 3928 for (;;) |
3928 | { | { |
3929 | pp = eptr; | pp = eptr; |
3930 | ||
3931 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3932 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3933 | { | { |
3934 | register unsigned int d; | register unsigned int d; |
3935 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
# | Line 3690 for (;;) | Line 3955 for (;;) |
3955 | } | } |
3956 | else | else |
3957 | #endif | #endif |
3958 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3959 | { | { |
3960 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3961 | { | { |
# | Line 3711 for (;;) | Line 3976 for (;;) |
3976 | } | } |
3977 | } | } |
3978 | ||
3979 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3980 | } | } |
3981 | } | } |
3982 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3723 for (;;) | Line 3988 for (;;) |
3988 | case OP_TYPEEXACT: | case OP_TYPEEXACT: |
3989 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3990 | minimize = TRUE; | minimize = TRUE; |
3991 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3992 | goto REPEATTYPE; | goto REPEATTYPE; |
3993 | ||
3994 | case OP_TYPEUPTO: | case OP_TYPEUPTO: |
# | Line 3731 for (;;) | Line 3996 for (;;) |
3996 | min = 0; | min = 0; |
3997 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3998 | minimize = *ecode == OP_TYPEMINUPTO; | minimize = *ecode == OP_TYPEMINUPTO; |
3999 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
4000 | goto REPEATTYPE; | goto REPEATTYPE; |
4001 | ||
4002 | case OP_TYPEPOSSTAR: | case OP_TYPEPOSSTAR: |
# | Line 3759 for (;;) | Line 4024 for (;;) |
4024 | possessive = TRUE; | possessive = TRUE; |
4025 | min = 0; | min = 0; |
4026 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
4027 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
4028 | goto REPEATTYPE; | goto REPEATTYPE; |
4029 | ||
4030 | case OP_TYPESTAR: | case OP_TYPESTAR: |
# | Line 3805 for (;;) | Line 4070 for (;;) |
4070 | switch(prop_type) | switch(prop_type) |
4071 | { | { |
4072 | case PT_ANY: | case PT_ANY: |
4073 | if (prop_fail_result) MRRETURN(MATCH_NOMATCH); | if (prop_fail_result) RRETURN(MATCH_NOMATCH); |
4074 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
4075 | { | { |
4076 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4077 | { | { |
4078 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4079 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4080 | } | } |
4081 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4082 | } | } |
# | Line 3824 for (;;) | Line 4089 for (;;) |
4089 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4090 | { | { |
4091 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4092 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4093 | } | } |
4094 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4095 | chartype = UCD_CHARTYPE(c); | chartype = UCD_CHARTYPE(c); |
4096 | if ((chartype == ucp_Lu || | if ((chartype == ucp_Lu || |
4097 | chartype == ucp_Ll || | chartype == ucp_Ll || |
4098 | chartype == ucp_Lt) == prop_fail_result) | chartype == ucp_Lt) == prop_fail_result) |
4099 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4100 | } | } |
4101 | break; | break; |
4102 | ||
# | Line 3841 for (;;) | Line 4106 for (;;) |
4106 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4107 | { | { |
4108 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4109 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4110 | } | } |
4111 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4112 | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
4113 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4114 | } | } |
4115 | break; | break; |
4116 | ||
# | Line 3855 for (;;) | Line 4120 for (;;) |
4120 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4121 | { | { |
4122 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4123 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4124 | } | } |
4125 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4126 | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
4127 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4128 | } | } |
4129 | break; | break; |
4130 | ||
# | Line 3869 for (;;) | Line 4134 for (;;) |
4134 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4135 | { | { |
4136 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4137 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4138 | } | } |
4139 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4140 | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
4141 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4142 | } | } |
4143 | break; | break; |
4144 | ||
# | Line 3884 for (;;) | Line 4149 for (;;) |
4149 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4150 | { | { |
4151 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4152 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4153 | } | } |
4154 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4155 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4156 | if ((category == ucp_L || category == ucp_N) == prop_fail_result) | if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
4157 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4158 | } | } |
4159 | break; | break; |
4160 | ||
# | Line 3899 for (;;) | Line 4164 for (;;) |
4164 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4165 | { | { |
4166 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4167 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4168 | } | } |
4169 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4170 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4171 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
4172 | == prop_fail_result) | == prop_fail_result) |
4173 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4174 | } | } |
4175 | break; | break; |
4176 | ||
# | Line 3915 for (;;) | Line 4180 for (;;) |
4180 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4181 | { | { |
4182 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4183 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4184 | } | } |
4185 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4186 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4187 | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
4188 | == prop_fail_result) | == prop_fail_result) |
4189 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4190 | } | } |
4191 | break; | break; |
4192 | ||
# | Line 3932 for (;;) | Line 4197 for (;;) |
4197 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4198 | { | { |
4199 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4200 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4201 | } | } |
4202 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4203 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4204 | if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) | if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) |
4205 | == prop_fail_result) | == prop_fail_result) |
4206 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4207 | } | } |
4208 | break; | break; |
4209 | ||
# | Line 3959 for (;;) | Line 4224 for (;;) |
4224 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4225 | { | { |
4226 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4227 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4228 | } | } |
4229 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4230 | if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); | if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
4231 | while (eptr < md->end_subject) | while (eptr < md->end_subject) |
4232 | { | { |
4233 | int len = 1; | int len = 1; |
4234 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
4235 | if (UCD_CATEGORY(c) != ucp_M) break; | if (UCD_CATEGORY(c) != ucp_M) break; |
4236 | eptr += len; | eptr += len; |
4237 | } | } |
4238 | CHECK_PARTIAL(); | |
4239 | } | } |
4240 | } | } |
4241 | ||
# | Line 3978 for (;;) | Line 4244 for (;;) |
4244 | ||
4245 | /* Handle all other cases when the coding is UTF-8 */ | /* Handle all other cases when the coding is UTF-8 */ |
4246 | ||
4247 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
4248 | if (utf8) switch(ctype) | if (utf) switch(ctype) |
4249 | { | { |
4250 | case OP_ANY: | case OP_ANY: |
4251 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 3987 for (;;) | Line 4253 for (;;) |
4253 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4254 | { | { |
4255 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4256 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4257 | } | |
4258 | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); | |
4259 | if (md->partial != 0 && | |
4260 | eptr + 1 >= md->end_subject && | |
4261 | NLBLOCK->nltype == NLTYPE_FIXED && | |
4262 | NLBLOCK->nllen == 2 && | |
4263 | *eptr == NLBLOCK->nl[0]) | |
4264 | { | |
4265 | md->hitend = TRUE; | |
4266 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
4267 | } | } |
if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | ||
4268 | eptr++; | eptr++; |
4269 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4270 | } | } |
4271 | break; | break; |
4272 | ||
# | Line 4001 for (;;) | Line 4276 for (;;) |
4276 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4277 | { | { |
4278 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4279 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4280 | } | } |
4281 | eptr++; | eptr++; |
4282 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4283 | } | } |
4284 | break; | break; |
4285 | ||
4286 | case OP_ANYBYTE: | case OP_ANYBYTE: |
4287 | if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH); | if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); |
4288 | eptr += min; | eptr += min; |
4289 | break; | break; |
4290 | ||
# | Line 4019 for (;;) | Line 4294 for (;;) |
4294 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4295 | { | { |
4296 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4297 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4298 | } | } |
4299 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4300 | switch(c) | switch(c) |
4301 | { | { |
4302 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4303 | ||
4304 | case 0x000d: | case 0x000d: |
4305 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
# | Line 4038 for (;;) | Line 4313 for (;;) |
4313 | case 0x0085: | case 0x0085: |
4314 | case 0x2028: | case 0x2028: |
4315 | case 0x2029: | case 0x2029: |
4316 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
4317 | break; | break; |
4318 | } | } |
4319 | } | } |
# | Line 4050 for (;;) | Line 4325 for (;;) |
4325 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4326 | { | { |
4327 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4328 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4329 | } | } |
4330 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4331 | switch(c) | switch(c) |
# | Line 4075 for (;;) | Line 4350 for (;;) |
4350 | case 0x202f: /* NARROW NO-BREAK SPACE */ | case 0x202f: /* NARROW NO-BREAK SPACE */ |
4351 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
4352 | case 0x3000: /* IDEOGRAPHIC SPACE */ | case 0x3000: /* IDEOGRAPHIC SPACE */ |
4353 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4354 | } | } |
4355 | } | } |
4356 | break; | break; |
# | Line 4086 for (;;) | Line 4361 for (;;) |
4361 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4362 | { | { |
4363 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4364 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4365 | } | } |
4366 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4367 | switch(c) | switch(c) |
4368 | { | { |
4369 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4370 | case 0x09: /* HT */ | case 0x09: /* HT */ |
4371 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
4372 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
# | Line 4122 for (;;) | Line 4397 for (;;) |
4397 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4398 | { | { |
4399 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4400 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4401 | } | } |
4402 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4403 | switch(c) | switch(c) |
# | Line 4135 for (;;) | Line 4410 for (;;) |
4410 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
4411 | case 0x2028: /* LINE SEPARATOR */ | case 0x2028: /* LINE SEPARATOR */ |
4412 | case 0x2029: /* PARAGRAPH SEPARATOR */ | case 0x2029: /* PARAGRAPH SEPARATOR */ |
4413 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4414 | } | } |
4415 | } | } |
4416 | break; | break; |
# | Line 4146 for (;;) | Line 4421 for (;;) |
4421 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4422 | { | { |
4423 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4424 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4425 | } | } |
4426 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4427 | switch(c) | switch(c) |
4428 | { | { |
4429 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4430 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
4431 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
4432 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
# | Line 4170 for (;;) | Line 4445 for (;;) |
4445 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4446 | { | { |
4447 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4448 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4449 | } | } |
4450 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4451 | if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) | if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) |
4452 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4453 | } | } |
4454 | break; | break; |
4455 | ||
# | Line 4184 for (;;) | Line 4459 for (;;) |
4459 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4460 | { | { |
4461 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4462 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4463 | } | } |
4464 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0) |
4465 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4466 | eptr++; | |
4467 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4468 | } | } |
4469 | break; | break; |
# | Line 4198 for (;;) | Line 4474 for (;;) |
4474 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4475 | { | { |
4476 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4477 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4478 | } | } |
4479 | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) |
4480 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4481 | while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); | eptr++; |
4482 | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
4483 | } | } |
4484 | break; | break; |
4485 | ||
# | Line 4212 for (;;) | Line 4489 for (;;) |
4489 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4490 | { | { |
4491 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4492 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4493 | } | } |
4494 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0) |
4495 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4496 | eptr++; | |
4497 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4498 | } | } |
4499 | break; | break; |
# | Line 4226 for (;;) | Line 4504 for (;;) |
4504 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4505 | { | { |
4506 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4507 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4508 | } | } |
4509 | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) |
4510 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4511 | while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); | eptr++; |
4512 | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
4513 | } | } |
4514 | break; | break; |
4515 | ||
# | Line 4240 for (;;) | Line 4519 for (;;) |
4519 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4520 | { | { |
4521 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4522 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4523 | } | } |
4524 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0) |
4525 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4526 | eptr++; | |
4527 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4528 | } | } |
4529 | break; | break; |
# | Line 4253 for (;;) | Line 4533 for (;;) |
4533 | } /* End switch(ctype) */ | } /* End switch(ctype) */ |
4534 | ||
4535 | else | else |
4536 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
4537 | ||
4538 | /* Code for the non-UTF-8 case for minimum matching of operators other | /* Code for the non-UTF-8 case for minimum matching of operators other |
4539 | than OP_PROP and OP_NOTPROP. */ | than OP_PROP and OP_NOTPROP. */ |
# | Line 4266 for (;;) | Line 4546 for (;;) |
4546 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4547 | { | { |
4548 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4549 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4550 | } | |
4551 | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); | |
4552 | if (md->partial != 0 && | |
4553 | eptr + 1 >= md->end_subject && | |
4554 | NLBLOCK->nltype == NLTYPE_FIXED && | |
4555 | NLBLOCK->nllen == 2 && | |
4556 | *eptr == NLBLOCK->nl[0]) | |
4557 | { | |
4558 | md->hitend = TRUE; | |
4559 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
4560 | } | } |
if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | ||
4561 | eptr++; | eptr++; |
4562 | } | } |
4563 | break; | break; |
# | Line 4277 for (;;) | Line 4566 for (;;) |
4566 | if (eptr > md->end_subject - min) | if (eptr > md->end_subject - min) |
4567 | { | { |
4568 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4569 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4570 | } | } |
4571 | eptr += min; | eptr += min; |
4572 | break; | break; |
# | Line 4286 for (;;) | Line 4575 for (;;) |
4575 | if (eptr > md->end_subject - min) | if (eptr > md->end_subject - min) |
4576 | { | { |
4577 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4578 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4579 | } | } |
4580 | eptr += min; | eptr += min; |
4581 | break; | break; |
# | Line 4297 for (;;) | Line 4586 for (;;) |
4586 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4587 | { | { |
4588 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4589 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4590 | } | } |
4591 | switch(*eptr++) | switch(*eptr++) |
4592 | { | { |
4593 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4594 | ||
4595 | case 0x000d: | case 0x000d: |
4596 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
# | Line 4313 for (;;) | Line 4602 for (;;) |
4602 | case 0x000b: | case 0x000b: |
4603 | case 0x000c: | case 0x000c: |
4604 | case 0x0085: | case 0x0085: |
4605 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
4606 | case 0x2028: | |
4607 | case 0x2029: | |
4608 | #endif | |
4609 | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); | |
4610 | break; | break; |
4611 | } | } |
4612 | } | } |
# | Line 4325 for (;;) | Line 4618 for (;;) |
4618 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4619 | { | { |
4620 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4621 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4622 | } | } |
4623 | switch(*eptr++) | switch(*eptr++) |
4624 | { | { |
# | Line 4333 for (;;) | Line 4626 for (;;) |
4626 | case 0x09: /* HT */ | case 0x09: /* HT */ |
4627 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
4628 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
4629 | MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
4630 | case 0x1680: /* OGHAM SPACE MARK */ | |
4631 | case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | |
4632 | case 0x2000: /* EN QUAD */ | |
4633 | case 0x2001: /* EM QUAD */ | |
4634 | case 0x2002: /* EN SPACE */ | |
4635 | case 0x2003: /* EM SPACE */ | |
4636 | case 0x2004: /* THREE-PER-EM SPACE */ | |
4637 | case 0x2005: /* FOUR-PER-EM SPACE */ | |
4638 | case 0x2006: /* SIX-PER-EM SPACE */ | |
4639 | case 0x2007: /* FIGURE SPACE */ | |
4640 | case 0x2008: /* PUNCTUATION SPACE */ | |
4641 | case 0x2009: /* THIN SPACE */ | |
4642 | case 0x200A: /* HAIR SPACE */ | |
4643 | case 0x202f: /* NARROW NO-BREAK SPACE */ | |
4644 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | |
4645 | case 0x3000: /* IDEOGRAPHIC SPACE */ | |
4646 | #endif | |
4647 | RRETURN(MATCH_NOMATCH); | |
4648 | } | } |
4649 | } | } |
4650 | break; | break; |
# | Line 4344 for (;;) | Line 4655 for (;;) |
4655 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4656 | { | { |
4657 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4658 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4659 | } | } |
4660 | switch(*eptr++) | switch(*eptr++) |
4661 | { | { |
4662 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4663 | case 0x09: /* HT */ | case 0x09: /* HT */ |
4664 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
4665 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
4666 | #ifdef COMPILE_PCRE16 | |
4667 | case 0x1680: /* OGHAM SPACE MARK */ | |
4668 | case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | |
4669 | case 0x2000: /* EN QUAD */ | |
4670 | case 0x2001: /* EM QUAD */ | |
4671 | case 0x2002: /* EN SPACE */ | |
4672 | case 0x2003: /* EM SPACE */ | |
4673 | case 0x2004: /* THREE-PER-EM SPACE */ | |
4674 | case 0x2005: /* FOUR-PER-EM SPACE */ | |
4675 | case 0x2006: /* SIX-PER-EM SPACE */ | |
4676 | case 0x2007: /* FIGURE SPACE */ | |
4677 | case 0x2008: /* PUNCTUATION SPACE */ | |
4678 | case 0x2009: /* THIN SPACE */ | |
4679 | case 0x200A: /* HAIR SPACE */ | |
4680 | case 0x202f: /* NARROW NO-BREAK SPACE */ | |
4681 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | |
4682 | case 0x3000: /* IDEOGRAPHIC SPACE */ | |
4683 | #endif | |
4684 | break; | break; |
4685 | } | } |
4686 | } | } |
# | Line 4363 for (;;) | Line 4692 for (;;) |
4692 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4693 | { | { |
4694 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4695 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4696 | } | } |
4697 | switch(*eptr++) | switch(*eptr++) |
4698 | { | { |
# | Line 4373 for (;;) | Line 4702 for (;;) |
4702 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
4703 | case 0x0d: /* CR */ | case 0x0d: /* CR */ |
4704 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
4705 | MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
4706 | case 0x2028: /* LINE SEPARATOR */ | |
4707 | case 0x2029: /* PARAGRAPH SEPARATOR */ | |
4708 | #endif | |
4709 | RRETURN(MATCH_NOMATCH); | |
4710 | } | } |
4711 | } | } |
4712 | break; | break; |
# | Line 4384 for (;;) | Line 4717 for (;;) |
4717 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4718 | { | { |
4719 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4720 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4721 | } | } |
4722 | switch(*eptr++) | switch(*eptr++) |
4723 | { | { |
4724 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4725 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
4726 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
4727 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
4728 | case 0x0d: /* CR */ | case 0x0d: /* CR */ |
4729 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
4730 | #ifdef COMPILE_PCRE16 | |
4731 | case 0x2028: /* LINE SEPARATOR */ | |
4732 | case 0x2029: /* PARAGRAPH SEPARATOR */ | |
4733 | #endif | |
4734 | break; | break; |
4735 | } | } |
4736 | } | } |
# | Line 4405 for (;;) | Line 4742 for (;;) |
4742 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4743 | { | { |
4744 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4745 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4746 | } | } |
4747 | if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) |
4748 | RRETURN(MATCH_NOMATCH); | |
4749 | eptr++; | |
4750 | } | } |
4751 | break; | break; |
4752 | ||
# | Line 4417 for (;;) | Line 4756 for (;;) |
4756 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4757 | { | { |
4758 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4759 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4760 | } | } |
4761 | if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) |
4762 | RRETURN(MATCH_NOMATCH); | |
4763 | eptr++; | |
4764 | } | } |
4765 | break; | break; |
4766 | ||
# | Line 4429 for (;;) | Line 4770 for (;;) |
4770 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4771 | { | { |
4772 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4773 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4774 | } | } |
4775 | if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) |
4776 | RRETURN(MATCH_NOMATCH); | |
4777 | eptr++; | |
4778 | } | } |
4779 | break; | break; |
4780 | ||
# | Line 4441 for (;;) | Line 4784 for (;;) |
4784 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4785 | { | { |
4786 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4787 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4788 | } | } |
4789 | if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) |
4790 | RRETURN(MATCH_NOMATCH); | |
4791 | eptr++; | |
4792 | } | } |
4793 | break; | break; |
4794 | ||
# | Line 4453 for (;;) | Line 4798 for (;;) |
4798 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4799 | { | { |
4800 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4801 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4802 | } | } |
4803 | if ((md->ctypes[*eptr++] & ctype_word) != 0) | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) |
4804 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4805 | eptr++; | |
4806 | } | } |
4807 | break; | break; |
4808 | ||
# | Line 4466 for (;;) | Line 4812 for (;;) |
4812 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4813 | { | { |
4814 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4815 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4816 | } | } |
4817 | if ((md->ctypes[*eptr++] & ctype_word) == 0) | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) |
4818 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4819 | eptr++; | |
4820 | } | } |
4821 | break; | break; |
4822 | ||
# | Line 4498 for (;;) | Line 4845 for (;;) |
4845 | { | { |
4846 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); |
4847 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4848 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4849 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4850 | { | { |
4851 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4852 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4853 | } | } |
4854 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4855 | if (prop_fail_result) MRRETURN(MATCH_NOMATCH); | if (prop_fail_result) RRETURN(MATCH_NOMATCH); |
4856 | } | } |
4857 | /* Control never gets here */ | /* Control never gets here */ |
4858 | ||
# | Line 4515 for (;;) | Line 4862 for (;;) |
4862 | int chartype; | int chartype; |
4863 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); |
4864 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4865 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4866 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4867 | { | { |
4868 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4869 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4870 | } | } |
4871 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4872 | chartype = UCD_CHARTYPE(c); | chartype = UCD_CHARTYPE(c); |
4873 | if ((chartype == ucp_Lu || | if ((chartype == ucp_Lu || |
4874 | chartype == ucp_Ll || | chartype == ucp_Ll || |
4875 | chartype == ucp_Lt) == prop_fail_result) | chartype == ucp_Lt) == prop_fail_result) |
4876 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4877 | } | } |
4878 | /* Control never gets here */ | /* Control never gets here */ |
4879 | ||
# | Line 4535 for (;;) | Line 4882 for (;;) |
4882 | { | { |
4883 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); |
4884 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4885 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4886 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4887 | { | { |
4888 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4889 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4890 | } | } |
4891 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4892 | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
4893 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4894 | } | } |
4895 | /* Control never gets here */ | /* Control never gets here */ |
4896 | ||
# | Line 4552 for (;;) | Line 4899 for (;;) |
4899 | { | { |
4900 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); |
4901 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4902 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4903 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4904 | { | { |
4905 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4906 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4907 | } | } |
4908 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4909 | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
4910 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4911 | } | } |
4912 | /* Control never gets here */ | /* Control never gets here */ |
4913 | ||
# | Line 4569 for (;;) | Line 4916 for (;;) |
4916 | { | { |
4917 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); |
4918 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4919 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4920 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4921 | { | { |
4922 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4923 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4924 | } | } |
4925 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4926 | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
4927 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4928 | } | } |
4929 | /* Control never gets here */ | /* Control never gets here */ |
4930 | ||
# | Line 4587 for (;;) | Line 4934 for (;;) |
4934 | int category; | int category; |
4935 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); |
4936 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4937 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4938 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4939 | { | { |
4940 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4941 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4942 | } | } |
4943 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4944 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4945 | if ((category == ucp_L || category == ucp_N) == prop_fail_result) | if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
4946 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4947 | } | } |
4948 | /* Control never gets here */ | /* Control never gets here */ |
4949 | ||
# | Line 4605 for (;;) | Line 4952 for (;;) |
4952 | { | { |
4953 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); |
4954 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4955 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4956 |   |