Parent Directory
|
Revision Log
|
Patch
revision 835 by ph10, Wed Dec 28 16:10:09 2011 UTC | revision 888 by ph10, Tue Jan 17 14:43:23 2012 UTC | |
---|---|---|
# | Line 6 | Line 6 |
6 | and semantics are as close as possible to those of the Perl 5 language. | and semantics are as close as possible to those of the Perl 5 language. |
7 | ||
8 | Written by Philip Hazel | Written by Philip Hazel |
9 | Copyright (c) 1997-2011 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
10 | ||
11 | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
12 | Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without |
# | Line 82 negative to avoid the external error cod | Line 82 negative to avoid the external error cod |
82 | #define MATCH_SKIP_ARG (-993) | #define MATCH_SKIP_ARG (-993) |
83 | #define MATCH_THEN (-992) | #define MATCH_THEN (-992) |
84 | ||
/* This is a convenience macro for code that occurs many times. */ | ||
#define MRRETURN(ra) \ | ||
{ \ | ||
md->mark = markptr; \ | ||
RRETURN(ra); \ | ||
} | ||
85 | /* Maximum number of ints of offset to save on the stack for recursive calls. | /* Maximum number of ints of offset to save on the stack for recursive calls. |
86 | If the offset vector is bigger, malloc is used. This should be a multiple of 3, | If the offset vector is bigger, malloc is used. This should be a multiple of 3, |
87 | because the offset vector is always a multiple of 3 long. */ | because the offset vector is always a multiple of 3 long. */ |
# | Line 121 Returns: nothing | Line 113 Returns: nothing |
113 | */ | */ |
114 | ||
115 | static void | static void |
116 | pchars(const uschar *p, int length, BOOL is_subject, match_data *md) | pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) |
117 | { | { |
118 | unsigned int c; | unsigned int c; |
119 | if (is_subject && length > md->end_subject - p) length = md->end_subject - p; | if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
# | Line 152 Returns: < 0 if not matched, otherw | Line 144 Returns: < 0 if not matched, otherw |
144 | */ | */ |
145 | ||
146 | static int | static int |
147 | match_ref(int offset, register USPTR eptr, int length, match_data *md, | match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md, |
148 | BOOL caseless) | BOOL caseless) |
149 | { | { |
150 | USPTR eptr_start = eptr; | PCRE_PUCHAR eptr_start = eptr; |
151 | register USPTR p = md->start_subject + md->offset_vector[offset]; | register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; |
152 | ||
153 | #ifdef PCRE_DEBUG | #ifdef PCRE_DEBUG |
154 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
# | Line 181 ASCII characters. */ | Line 173 ASCII characters. */ |
173 | ||
174 | if (caseless) | if (caseless) |
175 | { | { |
176 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
177 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
178 | if (md->utf8) | if (md->utf) |
179 | { | { |
180 | /* Match characters up to the end of the reference. NOTE: the number of | /* Match characters up to the end of the reference. NOTE: the number of |
181 | bytes matched may differ, because there are some characters whose upper and | bytes matched may differ, because there are some characters whose upper and |
# | Line 193 if (caseless) | Line 185 if (caseless) |
185 | the latter. It is important, therefore, to check the length along the | the latter. It is important, therefore, to check the length along the |
186 | reference, not along the subject (earlier code did this wrong). */ | reference, not along the subject (earlier code did this wrong). */ |
187 | ||
188 | USPTR endptr = p + length; | PCRE_PUCHAR endptr = p + length; |
189 | while (p < endptr) | while (p < endptr) |
190 | { | { |
191 | int c, d; | int c, d; |
# | Line 212 if (caseless) | Line 204 if (caseless) |
204 | { | { |
205 | if (eptr + length > md->end_subject) return -1; | if (eptr + length > md->end_subject) return -1; |
206 | while (length-- > 0) | while (length-- > 0) |
207 | { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; } | { |
208 | if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1; | |
209 | p++; | |
210 | eptr++; | |
211 | } | |
212 | } | } |
213 | } | } |
214 | ||
# | Line 225 else | Line 221 else |
221 | while (length-- > 0) if (*p++ != *eptr++) return -1; | while (length-- > 0) if (*p++ != *eptr++) return -1; |
222 | } | } |
223 | ||
224 | return eptr - eptr_start; | return (int)(eptr - eptr_start); |
225 | } | } |
226 | ||
227 | ||
# | Line 290 actually used in this definition. */ | Line 286 actually used in this definition. */ |
286 | #define RMATCH(ra,rb,rc,rd,re,rw) \ | #define RMATCH(ra,rb,rc,rd,re,rw) \ |
287 | { \ | { \ |
288 | printf("match() called in line %d\n", __LINE__); \ | printf("match() called in line %d\n", __LINE__); \ |
289 | rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \ | rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \ |
290 | printf("to line %d\n", __LINE__); \ | printf("to line %d\n", __LINE__); \ |
291 | } | } |
292 | #define RRETURN(ra) \ | #define RRETURN(ra) \ |
# | Line 300 actually used in this definition. */ | Line 296 actually used in this definition. */ |
296 | } | } |
297 | #else | #else |
298 | #define RMATCH(ra,rb,rc,rd,re,rw) \ | #define RMATCH(ra,rb,rc,rd,re,rw) \ |
299 | rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1) | rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1) |
300 | #define RRETURN(ra) return ra | #define RRETURN(ra) return ra |
301 | #endif | #endif |
302 | ||
# | Line 315 argument of match(), which never changes | Line 311 argument of match(), which never changes |
311 | ||
312 | #define RMATCH(ra,rb,rc,rd,re,rw)\ | #define RMATCH(ra,rb,rc,rd,re,rw)\ |
313 | {\ | {\ |
314 | heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\ | heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ |
315 | if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ | if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ |
316 | frame->Xwhere = rw; \ | frame->Xwhere = rw; \ |
317 | newframe->Xeptr = ra;\ | newframe->Xeptr = ra;\ |
318 | newframe->Xecode = rb;\ | newframe->Xecode = rb;\ |
319 | newframe->Xmstart = mstart;\ | newframe->Xmstart = mstart;\ |
newframe->Xmarkptr = markptr;\ | ||
320 | newframe->Xoffset_top = rc;\ | newframe->Xoffset_top = rc;\ |
321 | newframe->Xeptrb = re;\ | newframe->Xeptrb = re;\ |
322 | newframe->Xrdepth = frame->Xrdepth + 1;\ | newframe->Xrdepth = frame->Xrdepth + 1;\ |
# | Line 337 argument of match(), which never changes | Line 332 argument of match(), which never changes |
332 | {\ | {\ |
333 | heapframe *oldframe = frame;\ | heapframe *oldframe = frame;\ |
334 | frame = oldframe->Xprevframe;\ | frame = oldframe->Xprevframe;\ |
335 | (pcre_stack_free)(oldframe);\ | (PUBL(stack_free))(oldframe);\ |
336 | if (frame != NULL)\ | if (frame != NULL)\ |
337 | {\ | {\ |
338 | rrc = ra;\ | rrc = ra;\ |
# | Line 354 typedef struct heapframe { | Line 349 typedef struct heapframe { |
349 | ||
350 | /* Function arguments that may change */ | /* Function arguments that may change */ |
351 | ||
352 | USPTR Xeptr; | PCRE_PUCHAR Xeptr; |
353 | const uschar *Xecode; | const pcre_uchar *Xecode; |
354 | USPTR Xmstart; | PCRE_PUCHAR Xmstart; |
USPTR Xmarkptr; | ||
355 | int Xoffset_top; | int Xoffset_top; |
356 | eptrblock *Xeptrb; | eptrblock *Xeptrb; |
357 | unsigned int Xrdepth; | unsigned int Xrdepth; |
358 | ||
359 | /* Function local variables */ | /* Function local variables */ |
360 | ||
361 | USPTR Xcallpat; | PCRE_PUCHAR Xcallpat; |
362 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
363 | USPTR Xcharptr; | PCRE_PUCHAR Xcharptr; |
364 | #endif | #endif |
365 | USPTR Xdata; | PCRE_PUCHAR Xdata; |
366 | USPTR Xnext; | PCRE_PUCHAR Xnext; |
367 | USPTR Xpp; | PCRE_PUCHAR Xpp; |
368 | USPTR Xprev; | PCRE_PUCHAR Xprev; |
369 | USPTR Xsaved_eptr; | PCRE_PUCHAR Xsaved_eptr; |
370 | ||
371 | recursion_info Xnew_recursive; | recursion_info Xnew_recursive; |
372 | ||
# | Line 385 typedef struct heapframe { | Line 379 typedef struct heapframe { |
379 | int Xprop_value; | int Xprop_value; |
380 | int Xprop_fail_result; | int Xprop_fail_result; |
381 | int Xoclength; | int Xoclength; |
382 | uschar Xocchars[8]; | pcre_uchar Xocchars[6]; |
383 | #endif | #endif |
384 | ||
385 | int Xcodelink; | int Xcodelink; |
# | Line 427 returns a negative (error) response, the | Line 421 returns a negative (error) response, the |
421 | same response. */ | same response. */ |
422 | ||
423 | /* These macros pack up tests that are used for partial matching, and which | /* These macros pack up tests that are used for partial matching, and which |
424 | appears several times in the code. We set the "hit end" flag if the pointer is | appear several times in the code. We set the "hit end" flag if the pointer is |
425 | at the end of the subject and also past the start of the subject (i.e. | at the end of the subject and also past the start of the subject (i.e. |
426 | something has been matched). For hard partial matching, we then return | something has been matched). For hard partial matching, we then return |
427 | immediately. The second one is used when we already know we are past the end of | immediately. The second one is used when we already know we are past the end of |
# | Line 438 the subject. */ | Line 432 the subject. */ |
432 | eptr > md->start_used_ptr) \ | eptr > md->start_used_ptr) \ |
433 | { \ | { \ |
434 | md->hitend = TRUE; \ | md->hitend = TRUE; \ |
435 | if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ |
436 | } | } |
437 | ||
438 | #define SCHECK_PARTIAL()\ | #define SCHECK_PARTIAL()\ |
439 | if (md->partial != 0 && eptr > md->start_used_ptr) \ | if (md->partial != 0 && eptr > md->start_used_ptr) \ |
440 | { \ | { \ |
441 | md->hitend = TRUE; \ | md->hitend = TRUE; \ |
442 | if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ |
443 | } | } |
444 | ||
445 | ||
446 | /* Performance note: It might be tempting to extract commonly used fields from | /* Performance note: It might be tempting to extract commonly used fields from |
447 | the md structure (e.g. utf8, end_subject) into individual variables to improve | the md structure (e.g. utf, end_subject) into individual variables to improve |
448 | performance. Tests using gcc on a SPARC disproved this; in the first case, it | performance. Tests using gcc on a SPARC disproved this; in the first case, it |
449 | made performance worse. | made performance worse. |
450 | ||
# | Line 459 Arguments: | Line 453 Arguments: |
453 | ecode pointer to current position in compiled code | ecode pointer to current position in compiled code |
454 | mstart pointer to the current match start position (can be modified | mstart pointer to the current match start position (can be modified |
455 | by encountering \K) | by encountering \K) |
markptr pointer to the most recent MARK name, or NULL | ||
456 | offset_top current top pointer | offset_top current top pointer |
457 | md pointer to "static" info for the match | md pointer to "static" info for the match |
458 | eptrb pointer to chain of blocks containing eptr at start of | eptrb pointer to chain of blocks containing eptr at start of |
# | Line 474 Returns: MATCH_MATCH if matched | Line 467 Returns: MATCH_MATCH if matched |
467 | */ | */ |
468 | ||
469 | static int | static int |
470 | match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, | match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode, |
471 | const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb, | PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb, |
472 | unsigned int rdepth) | unsigned int rdepth) |
473 | { | { |
474 | /* These variables do not need to be preserved over recursion in this function, | /* These variables do not need to be preserved over recursion in this function, |
# | Line 485 so they can be ordinary variables in all | Line 478 so they can be ordinary variables in all |
478 | register int rrc; /* Returns from recursive calls */ | register int rrc; /* Returns from recursive calls */ |
479 | register int i; /* Used for loops not involving calls to RMATCH() */ | register int i; /* Used for loops not involving calls to RMATCH() */ |
480 | register unsigned int c; /* Character values not kept over RMATCH() calls */ | register unsigned int c; /* Character values not kept over RMATCH() calls */ |
481 | register BOOL utf8; /* Local copy of UTF-8 flag for speed */ | register BOOL utf; /* Local copy of UTF flag for speed */ |
482 | ||
483 | BOOL minimize, possessive; /* Quantifier options */ | BOOL minimize, possessive; /* Quantifier options */ |
484 | BOOL caseless; | BOOL caseless; |
# | Line 497 heap storage. Set up the top-level frame | Line 490 heap storage. Set up the top-level frame |
490 | heap whenever RMATCH() does a "recursion". See the macro definitions above. */ | heap whenever RMATCH() does a "recursion". See the macro definitions above. */ |
491 | ||
492 | #ifdef NO_RECURSE | #ifdef NO_RECURSE |
493 | heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe)); | heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe)); |
494 | if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); | if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
495 | frame->Xprevframe = NULL; /* Marks the top level */ | frame->Xprevframe = NULL; /* Marks the top level */ |
496 | ||
# | Line 506 frame->Xprevframe = NULL; /* | Line 499 frame->Xprevframe = NULL; /* |
499 | frame->Xeptr = eptr; | frame->Xeptr = eptr; |
500 | frame->Xecode = ecode; | frame->Xecode = ecode; |
501 | frame->Xmstart = mstart; | frame->Xmstart = mstart; |
frame->Xmarkptr = markptr; | ||
502 | frame->Xoffset_top = offset_top; | frame->Xoffset_top = offset_top; |
503 | frame->Xeptrb = eptrb; | frame->Xeptrb = eptrb; |
504 | frame->Xrdepth = rdepth; | frame->Xrdepth = rdepth; |
# | Line 520 HEAP_RECURSE: | Line 512 HEAP_RECURSE: |
512 | #define eptr frame->Xeptr | #define eptr frame->Xeptr |
513 | #define ecode frame->Xecode | #define ecode frame->Xecode |
514 | #define mstart frame->Xmstart | #define mstart frame->Xmstart |
#define markptr frame->Xmarkptr | ||
515 | #define offset_top frame->Xoffset_top | #define offset_top frame->Xoffset_top |
516 | #define eptrb frame->Xeptrb | #define eptrb frame->Xeptrb |
517 | #define rdepth frame->Xrdepth | #define rdepth frame->Xrdepth |
518 | ||
519 | /* Ditto for the local variables */ | /* Ditto for the local variables */ |
520 | ||
521 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
522 | #define charptr frame->Xcharptr | #define charptr frame->Xcharptr |
523 | #endif | #endif |
524 | #define callpat frame->Xcallpat | #define callpat frame->Xcallpat |
# | Line 585 declarations can be cut out in a block. | Line 576 declarations can be cut out in a block. |
576 | below are for variables that do not have to be preserved over a recursive call | below are for variables that do not have to be preserved over a recursive call |
577 | to RMATCH(). */ | to RMATCH(). */ |
578 | ||
579 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
580 | const uschar *charptr; | const pcre_uchar *charptr; |
581 | #endif | #endif |
582 | const uschar *callpat; | const pcre_uchar *callpat; |
583 | const uschar *data; | const pcre_uchar *data; |
584 | const uschar *next; | const pcre_uchar *next; |
585 | USPTR pp; | PCRE_PUCHAR pp; |
586 | const uschar *prev; | const pcre_uchar *prev; |
587 | USPTR saved_eptr; | PCRE_PUCHAR saved_eptr; |
588 | ||
589 | recursion_info new_recursive; | recursion_info new_recursive; |
590 | ||
# | Line 606 int prop_type; | Line 597 int prop_type; |
597 | int prop_value; | int prop_value; |
598 | int prop_fail_result; | int prop_fail_result; |
599 | int oclength; | int oclength; |
600 | uschar occhars[8]; | pcre_uchar occhars[6]; |
601 | #endif | #endif |
602 | ||
603 | int codelink; | int codelink; |
# | Line 634 the alternative names that are used. */ | Line 625 the alternative names that are used. */ |
625 | #define code_offset codelink | #define code_offset codelink |
626 | #define condassert condition | #define condassert condition |
627 | #define matched_once prev_is_word | #define matched_once prev_is_word |
628 | #define foc number | |
629 | #define save_mark data | |
630 | ||
631 | /* These statements are here to stop the compiler complaining about unitialized | /* These statements are here to stop the compiler complaining about unitialized |
632 | variables. */ | variables. */ |
# | Line 659 defined). However, RMATCH isn't like a f | Line 652 defined). However, RMATCH isn't like a f |
652 | complicated macro. It has to be used in one particular way. This shouldn't, | complicated macro. It has to be used in one particular way. This shouldn't, |
653 | however, impact performance when true recursion is being used. */ | however, impact performance when true recursion is being used. */ |
654 | ||
655 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
656 | utf8 = md->utf8; /* Local copy of the flag */ | utf = md->utf; /* Local copy of the flag */ |
657 | #else | #else |
658 | utf8 = FALSE; | utf = FALSE; |
659 | #endif | #endif |
660 | ||
661 | /* First check that we haven't called match() too many times, or that we | /* First check that we haven't called match() too many times, or that we |
# | Line 701 for (;;) | Line 694 for (;;) |
694 | switch(op) | switch(op) |
695 | { | { |
696 | case OP_MARK: | case OP_MARK: |
697 | markptr = ecode + 2; | md->nomatch_mark = ecode + 2; |
698 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | md->mark = NULL; /* In case previously set by assertion */ |
699 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
700 | eptrb, RM55); | eptrb, RM55); |
701 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
702 | md->mark == NULL) md->mark = ecode + 2; | |
703 | ||
704 | /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an | /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an |
705 | argument, and we must check whether that argument matches this MARK's | argument, and we must check whether that argument matches this MARK's |
# | Line 712 for (;;) | Line 708 for (;;) |
708 | position and return MATCH_SKIP. Otherwise, pass back the return code | position and return MATCH_SKIP. Otherwise, pass back the return code |
709 | unaltered. */ | unaltered. */ |
710 | ||
711 | if (rrc == MATCH_SKIP_ARG && | else if (rrc == MATCH_SKIP_ARG && |
712 | strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0) | STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0) |
713 | { | { |
714 | md->start_match_ptr = eptr; | md->start_match_ptr = eptr; |
715 | RRETURN(MATCH_SKIP); | RRETURN(MATCH_SKIP); |
716 | } | } |
if (md->mark == NULL) md->mark = markptr; | ||
717 | RRETURN(rrc); | RRETURN(rrc); |
718 | ||
719 | case OP_FAIL: | case OP_FAIL: |
720 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
721 | ||
722 | /* COMMIT overrides PRUNE, SKIP, and THEN */ | /* COMMIT overrides PRUNE, SKIP, and THEN */ |
723 | ||
724 | case OP_COMMIT: | case OP_COMMIT: |
725 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
726 | eptrb, RM52); | eptrb, RM52); |
727 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && |
728 | rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && | rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && |
729 | rrc != MATCH_THEN) | rrc != MATCH_THEN) |
730 | RRETURN(rrc); | RRETURN(rrc); |
731 | MRRETURN(MATCH_COMMIT); | RRETURN(MATCH_COMMIT); |
732 | ||
733 | /* PRUNE overrides THEN */ | /* PRUNE overrides THEN */ |
734 | ||
735 | case OP_PRUNE: | case OP_PRUNE: |
736 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
737 | eptrb, RM51); | eptrb, RM51); |
738 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
739 | MRRETURN(MATCH_PRUNE); | RRETURN(MATCH_PRUNE); |
740 | ||
741 | case OP_PRUNE_ARG: | case OP_PRUNE_ARG: |
742 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | md->nomatch_mark = ecode + 2; |
743 | md->mark = NULL; /* In case previously set by assertion */ | |
744 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
745 | eptrb, RM56); | eptrb, RM56); |
746 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
747 | md->mark == NULL) md->mark = ecode + 2; | |
748 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
md->mark = ecode + 2; | ||
749 | RRETURN(MATCH_PRUNE); | RRETURN(MATCH_PRUNE); |
750 | ||
751 | /* SKIP overrides PRUNE and THEN */ | /* SKIP overrides PRUNE and THEN */ |
752 | ||
753 | case OP_SKIP: | case OP_SKIP: |
754 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
755 | eptrb, RM53); | eptrb, RM53); |
756 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
757 | RRETURN(rrc); | RRETURN(rrc); |
758 | md->start_match_ptr = eptr; /* Pass back current position */ | md->start_match_ptr = eptr; /* Pass back current position */ |
759 | MRRETURN(MATCH_SKIP); | RRETURN(MATCH_SKIP); |
760 | ||
761 | /* Note that, for Perl compatibility, SKIP with an argument does NOT set | |
762 | nomatch_mark. There is a flag that disables this opcode when re-matching a | |
763 | pattern that ended with a SKIP for which there was not a matching MARK. */ | |
764 | ||
765 | case OP_SKIP_ARG: | case OP_SKIP_ARG: |
766 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | if (md->ignore_skip_arg) |
767 | { | |
768 | ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; | |
769 | break; | |
770 | } | |
771 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
772 | eptrb, RM57); | eptrb, RM57); |
773 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
774 | RRETURN(rrc); | RRETURN(rrc); |
775 | ||
776 | /* Pass back the current skip name by overloading md->start_match_ptr and | /* Pass back the current skip name by overloading md->start_match_ptr and |
777 | returning the special MATCH_SKIP_ARG return code. This will either be | returning the special MATCH_SKIP_ARG return code. This will either be |
778 | caught by a matching MARK, or get to the top, where it is treated the same | caught by a matching MARK, or get to the top, where it causes a rematch |
779 | as PRUNE. */ | with the md->ignore_skip_arg flag set. */ |
780 | ||
781 | md->start_match_ptr = ecode + 2; | md->start_match_ptr = ecode + 2; |
782 | RRETURN(MATCH_SKIP_ARG); | RRETURN(MATCH_SKIP_ARG); |
# | Line 780 for (;;) | Line 786 for (;;) |
786 | match pointer to do this. */ | match pointer to do this. */ |
787 | ||
788 | case OP_THEN: | case OP_THEN: |
789 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
790 | eptrb, RM54); | eptrb, RM54); |
791 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
792 | md->start_match_ptr = ecode; | md->start_match_ptr = ecode; |
793 | MRRETURN(MATCH_THEN); | RRETURN(MATCH_THEN); |
794 | ||
795 | case OP_THEN_ARG: | case OP_THEN_ARG: |
796 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, | md->nomatch_mark = ecode + 2; |
797 | md->mark = NULL; /* In case previously set by assertion */ | |
798 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, | |
799 | md, eptrb, RM58); | md, eptrb, RM58); |
800 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
801 | md->mark == NULL) md->mark = ecode + 2; | |
802 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
803 | md->start_match_ptr = ecode; | md->start_match_ptr = ecode; |
md->mark = ecode + 2; | ||
804 | RRETURN(MATCH_THEN); | RRETURN(MATCH_THEN); |
805 | ||
806 | /* Handle an atomic group that does not contain any capturing parentheses. | /* Handle an atomic group that does not contain any capturing parentheses. |
# | Line 810 for (;;) | Line 819 for (;;) |
819 | case OP_ONCE_NC: | case OP_ONCE_NC: |
820 | prev = ecode; | prev = ecode; |
821 | saved_eptr = eptr; | saved_eptr = eptr; |
822 | save_mark = md->mark; | |
823 | do | do |
824 | { | { |
825 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); |
826 | if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ | if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ |
827 | { | { |
828 | mstart = md->start_match_ptr; | mstart = md->start_match_ptr; |
markptr = md->mark; | ||
829 | break; | break; |
830 | } | } |
831 | if (rrc == MATCH_THEN) | if (rrc == MATCH_THEN) |
# | Line 829 for (;;) | Line 838 for (;;) |
838 | ||
839 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
840 | ecode += GET(ecode,1); | ecode += GET(ecode,1); |
841 | md->mark = save_mark; | |
842 | } | } |
843 | while (*ecode == OP_ALT); | while (*ecode == OP_ALT); |
844 | ||
# | Line 908 for (;;) | Line 918 for (;;) |
918 | save_offset2 = md->offset_vector[offset+1]; | save_offset2 = md->offset_vector[offset+1]; |
919 | save_offset3 = md->offset_vector[md->offset_end - number]; | save_offset3 = md->offset_vector[md->offset_end - number]; |
920 | save_capture_last = md->capture_last; | save_capture_last = md->capture_last; |
921 | save_mark = md->mark; | |
922 | ||
923 | DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); | DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); |
924 | md->offset_vector[md->offset_end - number] = | md->offset_vector[md->offset_end - number] = |
# | Line 916 for (;;) | Line 927 for (;;) |
927 | for (;;) | for (;;) |
928 | { | { |
929 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
930 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
931 | eptrb, RM1); | eptrb, RM1); |
932 | if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ | if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ |
933 | ||
# | Line 944 for (;;) | Line 955 for (;;) |
955 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
956 | md->capture_last = save_capture_last; | md->capture_last = save_capture_last; |
957 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
958 | md->mark = save_mark; | |
959 | if (*ecode != OP_ALT) break; | if (*ecode != OP_ALT) break; |
960 | } | } |
961 | ||
# | Line 954 for (;;) | Line 966 for (;;) |
966 | ||
967 | /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ | /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ |
968 | ||
if (md->mark == NULL) md->mark = markptr; | ||
969 | RRETURN(rrc); | RRETURN(rrc); |
970 | } | } |
971 | ||
# | Line 1004 for (;;) | Line 1015 for (;;) |
1015 | ||
1016 | else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) | else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) |
1017 | { | { |
1018 | ecode += _pcre_OP_lengths[*ecode]; | ecode += PRIV(OP_lengths)[*ecode]; |
1019 | goto TAIL_RECURSE; | goto TAIL_RECURSE; |
1020 | } | } |
1021 | ||
1022 | /* In all other cases, we have to make another call to match(). */ | /* In all other cases, we have to make another call to match(). */ |
1023 | ||
1024 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, | save_mark = md->mark; |
1025 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, | |
1026 | RM2); | RM2); |
1027 | ||
1028 | /* See comment in the code for capturing groups above about handling | /* See comment in the code for capturing groups above about handling |
1029 | THEN. */ | THEN. */ |
1030 | ||
# | Line 1028 for (;;) | Line 1040 for (;;) |
1040 | { | { |
1041 | if (rrc == MATCH_ONCE) | if (rrc == MATCH_ONCE) |
1042 | { | { |
1043 | const uschar *scode = ecode; | const pcre_uchar *scode = ecode; |
1044 | if (*scode != OP_ONCE) /* If not at start, find it */ | if (*scode != OP_ONCE) /* If not at start, find it */ |
1045 | { | { |
1046 | while (*scode == OP_ALT) scode += GET(scode, 1); | while (*scode == OP_ALT) scode += GET(scode, 1); |
# | Line 1039 for (;;) | Line 1051 for (;;) |
1051 | RRETURN(rrc); | RRETURN(rrc); |
1052 | } | } |
1053 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
1054 | md->mark = save_mark; | |
1055 | if (*ecode != OP_ALT) break; | if (*ecode != OP_ALT) break; |
1056 | } | } |
1057 | ||
if (md->mark == NULL) md->mark = markptr; | ||
1058 | RRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1059 | ||
1060 | /* Handle possessive capturing brackets with an unlimited repeat. We come | /* Handle possessive capturing brackets with an unlimited repeat. We come |
# | Line 1071 for (;;) | Line 1083 for (;;) |
1083 | if (offset < md->offset_max) | if (offset < md->offset_max) |
1084 | { | { |
1085 | matched_once = FALSE; | matched_once = FALSE; |
1086 | code_offset = ecode - md->start_code; | code_offset = (int)(ecode - md->start_code); |
1087 | ||
1088 | save_offset1 = md->offset_vector[offset]; | save_offset1 = md->offset_vector[offset]; |
1089 | save_offset2 = md->offset_vector[offset+1]; | save_offset2 = md->offset_vector[offset+1]; |
# | Line 1094 for (;;) | Line 1106 for (;;) |
1106 | md->offset_vector[md->offset_end - number] = | md->offset_vector[md->offset_end - number] = |
1107 | (int)(eptr - md->start_subject); | (int)(eptr - md->start_subject); |
1108 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
1109 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
1110 | eptrb, RM63); | eptrb, RM63); |
1111 | if (rrc == MATCH_KETRPOS) | if (rrc == MATCH_KETRPOS) |
1112 | { | { |
# | Line 1130 for (;;) | Line 1142 for (;;) |
1142 | md->offset_vector[md->offset_end - number] = save_offset3; | md->offset_vector[md->offset_end - number] = save_offset3; |
1143 | } | } |
1144 | ||
if (md->mark == NULL) md->mark = markptr; | ||
1145 | if (allow_zero || matched_once) | if (allow_zero || matched_once) |
1146 | { | { |
1147 | ecode += 1 + LINK_SIZE; | ecode += 1 + LINK_SIZE; |
# | Line 1162 for (;;) | Line 1173 for (;;) |
1173 | ||
1174 | POSSESSIVE_NON_CAPTURE: | POSSESSIVE_NON_CAPTURE: |
1175 | matched_once = FALSE; | matched_once = FALSE; |
1176 | code_offset = ecode - md->start_code; | code_offset = (int)(ecode - md->start_code); |
1177 | ||
1178 | for (;;) | for (;;) |
1179 | { | { |
1180 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
1181 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
1182 | eptrb, RM48); | eptrb, RM48); |
1183 | if (rrc == MATCH_KETRPOS) | if (rrc == MATCH_KETRPOS) |
1184 | { | { |
# | Line 1217 for (;;) | Line 1228 for (;;) |
1228 | ||
1229 | if (ecode[LINK_SIZE+1] == OP_CALLOUT) | if (ecode[LINK_SIZE+1] == OP_CALLOUT) |
1230 | { | { |
1231 | if (pcre_callout != NULL) | if (PUBL(callout) != NULL) |
1232 | { | { |
1233 | pcre_callout_block cb; | PUBL(callout_block) cb; |
1234 | cb.version = 2; /* Version 1 of the callout block */ | cb.version = 2; /* Version 1 of the callout block */ |
1235 | cb.callout_number = ecode[LINK_SIZE+2]; | cb.callout_number = ecode[LINK_SIZE+2]; |
1236 | cb.offset_vector = md->offset_vector; | cb.offset_vector = md->offset_vector; |
1237 | #ifdef COMPILE_PCRE8 | |
1238 | cb.subject = (PCRE_SPTR)md->start_subject; | cb.subject = (PCRE_SPTR)md->start_subject; |
1239 | #else | |
1240 | cb.subject = (PCRE_SPTR16)md->start_subject; | |
1241 | #endif | |
1242 | cb.subject_length = (int)(md->end_subject - md->start_subject); | cb.subject_length = (int)(md->end_subject - md->start_subject); |
1243 | cb.start_match = (int)(mstart - md->start_subject); | cb.start_match = (int)(mstart - md->start_subject); |
1244 | cb.current_position = (int)(eptr - md->start_subject); | cb.current_position = (int)(eptr - md->start_subject); |
# | Line 1232 for (;;) | Line 1247 for (;;) |
1247 | cb.capture_top = offset_top/2; | cb.capture_top = offset_top/2; |
1248 | cb.capture_last = md->capture_last; | cb.capture_last = md->capture_last; |
1249 | cb.callout_data = md->callout_data; | cb.callout_data = md->callout_data; |
1250 | cb.mark = markptr; | cb.mark = md->nomatch_mark; |
1251 | if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); | if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1252 | if (rrc < 0) RRETURN(rrc); | if (rrc < 0) RRETURN(rrc); |
1253 | } | } |
1254 | ecode += _pcre_OP_lengths[OP_CALLOUT]; | ecode += PRIV(OP_lengths)[OP_CALLOUT]; |
1255 | } | } |
1256 | ||
1257 | condcode = ecode[LINK_SIZE+1]; | condcode = ecode[LINK_SIZE+1]; |
# | Line 1262 for (;;) | Line 1277 for (;;) |
1277 | ||
1278 | if (!condition && condcode == OP_NRREF) | if (!condition && condcode == OP_NRREF) |
1279 | { | { |
1280 | uschar *slotA = md->name_table; | pcre_uchar *slotA = md->name_table; |
1281 | for (i = 0; i < md->name_count; i++) | for (i = 0; i < md->name_count; i++) |
1282 | { | { |
1283 | if (GET2(slotA, 0) == recno) break; | if (GET2(slotA, 0) == recno) break; |
# | Line 1275 for (;;) | Line 1290 for (;;) |
1290 | ||
1291 | if (i < md->name_count) | if (i < md->name_count) |
1292 | { | { |
1293 | uschar *slotB = slotA; | pcre_uchar *slotB = slotA; |
1294 | while (slotB > md->name_table) | while (slotB > md->name_table) |
1295 | { | { |
1296 | slotB -= md->name_entry_size; | slotB -= md->name_entry_size; |
1297 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1298 | { | { |
1299 | condition = GET2(slotB, 0) == md->recursive->group_num; | condition = GET2(slotB, 0) == md->recursive->group_num; |
1300 | if (condition) break; | if (condition) break; |
# | Line 1295 for (;;) | Line 1310 for (;;) |
1310 | for (i++; i < md->name_count; i++) | for (i++; i < md->name_count; i++) |
1311 | { | { |
1312 | slotB += md->name_entry_size; | slotB += md->name_entry_size; |
1313 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1314 | { | { |
1315 | condition = GET2(slotB, 0) == md->recursive->group_num; | condition = GET2(slotB, 0) == md->recursive->group_num; |
1316 | if (condition) break; | if (condition) break; |
# | Line 1308 for (;;) | Line 1323 for (;;) |
1323 | ||
1324 | /* Chose branch according to the condition */ | /* Chose branch according to the condition */ |
1325 | ||
1326 | ecode += condition? 3 : GET(ecode, 1); | ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); |
1327 | } | } |
1328 | } | } |
1329 | ||
# | Line 1325 for (;;) | Line 1340 for (;;) |
1340 | if (!condition && condcode == OP_NCREF) | if (!condition && condcode == OP_NCREF) |
1341 | { | { |
1342 | int refno = offset >> 1; | int refno = offset >> 1; |
1343 | uschar *slotA = md->name_table; | pcre_uchar *slotA = md->name_table; |
1344 | ||
1345 | for (i = 0; i < md->name_count; i++) | for (i = 0; i < md->name_count; i++) |
1346 | { | { |
# | Line 1339 for (;;) | Line 1354 for (;;) |
1354 | ||
1355 | if (i < md->name_count) | if (i < md->name_count) |
1356 | { | { |
1357 | uschar *slotB = slotA; | pcre_uchar *slotB = slotA; |
1358 | while (slotB > md->name_table) | while (slotB > md->name_table) |
1359 | { | { |
1360 | slotB -= md->name_entry_size; | slotB -= md->name_entry_size; |
1361 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1362 | { | { |
1363 | offset = GET2(slotB, 0) << 1; | offset = GET2(slotB, 0) << 1; |
1364 | condition = offset < offset_top && | condition = offset < offset_top && |
# | Line 1361 for (;;) | Line 1376 for (;;) |
1376 | for (i++; i < md->name_count; i++) | for (i++; i < md->name_count; i++) |
1377 | { | { |
1378 | slotB += md->name_entry_size; | slotB += md->name_entry_size; |
1379 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1380 | { | { |
1381 | offset = GET2(slotB, 0) << 1; | offset = GET2(slotB, 0) << 1; |
1382 | condition = offset < offset_top && | condition = offset < offset_top && |
# | Line 1376 for (;;) | Line 1391 for (;;) |
1391 | ||
1392 | /* Chose branch according to the condition */ | /* Chose branch according to the condition */ |
1393 | ||
1394 | ecode += condition? 3 : GET(ecode, 1); | ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); |
1395 | } | } |
1396 | ||
1397 | else if (condcode == OP_DEF) /* DEFINE - always false */ | else if (condcode == OP_DEF) /* DEFINE - always false */ |
# | Line 1468 for (;;) | Line 1483 for (;;) |
1483 | md->offset_vector[offset+1] = (int)(eptr - md->start_subject); | md->offset_vector[offset+1] = (int)(eptr - md->start_subject); |
1484 | if (offset_top <= offset) offset_top = offset + 2; | if (offset_top <= offset) offset_top = offset + 2; |
1485 | } | } |
1486 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
1487 | break; | break; |
1488 | ||
1489 | ||
# | Line 1488 for (;;) | Line 1503 for (;;) |
1503 | (md->notempty || | (md->notempty || |
1504 | (md->notempty_atstart && | (md->notempty_atstart && |
1505 | mstart == md->start_subject + md->start_offset))) | mstart == md->start_subject + md->start_offset))) |
1506 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1507 | ||
1508 | /* Otherwise, we have a match. */ | /* Otherwise, we have a match. */ |
1509 | ||
# | Line 1497 for (;;) | Line 1512 for (;;) |
1512 | md->start_match_ptr = mstart; /* and the start (\K can modify) */ | md->start_match_ptr = mstart; /* and the start (\K can modify) */ |
1513 | ||
1514 | /* For some reason, the macros don't work properly if an expression is | /* For some reason, the macros don't work properly if an expression is |
1515 | given as the argument to MRRETURN when the heap is in use. */ | given as the argument to RRETURN when the heap is in use. */ |
1516 | ||
1517 | rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; | rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; |
1518 | MRRETURN(rrc); | RRETURN(rrc); |
1519 | ||
1520 | /* Assertion brackets. Check the alternative branches in turn - the | /* Assertion brackets. Check the alternative branches in turn - the |
1521 | matching won't pass the KET for an assertion. If any one branch matches, | matching won't pass the KET for an assertion. If any one branch matches, |
# | Line 1515 for (;;) | Line 1530 for (;;) |
1530 | ||
1531 | case OP_ASSERT: | case OP_ASSERT: |
1532 | case OP_ASSERTBACK: | case OP_ASSERTBACK: |
1533 | save_mark = md->mark; | |
1534 | if (md->match_function_type == MATCH_CONDASSERT) | if (md->match_function_type == MATCH_CONDASSERT) |
1535 | { | { |
1536 | condassert = TRUE; | condassert = TRUE; |
# | Line 1528 for (;;) | Line 1544 for (;;) |
1544 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
1545 | { | { |
1546 | mstart = md->start_match_ptr; /* In case \K reset it */ | mstart = md->start_match_ptr; /* In case \K reset it */ |
markptr = md->mark; | ||
1547 | break; | break; |
1548 | } | } |
1549 | ||
# | Line 1537 for (;;) | Line 1552 for (;;) |
1552 | ||
1553 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
1554 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
1555 | md->mark = save_mark; | |
1556 | } | } |
1557 | while (*ecode == OP_ALT); | while (*ecode == OP_ALT); |
1558 | ||
1559 | if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH); | if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); |
1560 | ||
1561 | /* If checking an assertion for a condition, return MATCH_MATCH. */ | /* If checking an assertion for a condition, return MATCH_MATCH. */ |
1562 | ||
# | Line 1560 for (;;) | Line 1576 for (;;) |
1576 | ||
1577 | case OP_ASSERT_NOT: | case OP_ASSERT_NOT: |
1578 | case OP_ASSERTBACK_NOT: | case OP_ASSERTBACK_NOT: |
1579 | save_mark = md->mark; | |
1580 | if (md->match_function_type == MATCH_CONDASSERT) | if (md->match_function_type == MATCH_CONDASSERT) |
1581 | { | { |
1582 | condassert = TRUE; | condassert = TRUE; |
# | Line 1570 for (;;) | Line 1587 for (;;) |
1587 | do | do |
1588 | { | { |
1589 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); |
1590 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH); | md->mark = save_mark; |
1591 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); | |
1592 | if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) | if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) |
1593 | { | { |
1594 | do ecode += GET(ecode,1); while (*ecode == OP_ALT); | do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
# | Line 1596 for (;;) | Line 1614 for (;;) |
1614 | back a number of characters, not bytes. */ | back a number of characters, not bytes. */ |
1615 | ||
1616 | case OP_REVERSE: | case OP_REVERSE: |
1617 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
1618 | if (utf8) | if (utf) |
1619 | { | { |
1620 | i = GET(ecode, 1); | i = GET(ecode, 1); |
1621 | while (i-- > 0) | while (i-- > 0) |
1622 | { | { |
1623 | eptr--; | eptr--; |
1624 | if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
1625 | BACKCHAR(eptr); | BACKCHAR(eptr); |
1626 | } | } |
1627 | } | } |
# | Line 1614 for (;;) | Line 1632 for (;;) |
1632 | ||
1633 | { | { |
1634 | eptr -= GET(ecode, 1); | eptr -= GET(ecode, 1); |
1635 | if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
1636 | } | } |
1637 | ||
1638 | /* Save the earliest consulted character, then skip to next op code */ | /* Save the earliest consulted character, then skip to next op code */ |
# | Line 1628 for (;;) | Line 1646 for (;;) |
1646 | function is able to force a failure. */ | function is able to force a failure. */ |
1647 | ||
1648 | case OP_CALLOUT: | case OP_CALLOUT: |
1649 | if (pcre_callout != NULL) | if (PUBL(callout) != NULL) |
1650 | { | { |
1651 | pcre_callout_block cb; | PUBL(callout_block) cb; |
1652 | cb.version = 2; /* Version 1 of the callout block */ | cb.version = 2; /* Version 1 of the callout block */ |
1653 | cb.callout_number = ecode[1]; | cb.callout_number = ecode[1]; |
1654 | cb.offset_vector = md->offset_vector; | cb.offset_vector = md->offset_vector; |
1655 | #ifdef COMPILE_PCRE8 | |
1656 | cb.subject = (PCRE_SPTR)md->start_subject; | cb.subject = (PCRE_SPTR)md->start_subject; |
1657 | #else | |
1658 | cb.subject = (PCRE_SPTR16)md->start_subject; | |
1659 | #endif | |
1660 | cb.subject_length = (int)(md->end_subject - md->start_subject); | cb.subject_length = (int)(md->end_subject - md->start_subject); |
1661 | cb.start_match = (int)(mstart - md->start_subject); | cb.start_match = (int)(mstart - md->start_subject); |
1662 | cb.current_position = (int)(eptr - md->start_subject); | cb.current_position = (int)(eptr - md->start_subject); |
# | Line 1643 for (;;) | Line 1665 for (;;) |
1665 | cb.capture_top = offset_top/2; | cb.capture_top = offset_top/2; |
1666 | cb.capture_last = md->capture_last; | cb.capture_last = md->capture_last; |
1667 | cb.callout_data = md->callout_data; | cb.callout_data = md->callout_data; |
1668 | cb.mark = markptr; | cb.mark = md->nomatch_mark; |
1669 | if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); | if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1670 | if (rrc < 0) RRETURN(rrc); | if (rrc < 0) RRETURN(rrc); |
1671 | } | } |
1672 | ecode += 2 + 2*LINK_SIZE; | ecode += 2 + 2*LINK_SIZE; |
# | Line 1703 for (;;) | Line 1725 for (;;) |
1725 | else | else |
1726 | { | { |
1727 | new_recursive.offset_save = | new_recursive.offset_save = |
1728 | (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); | (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int)); |
1729 | if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); | if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
1730 | } | } |
1731 | memcpy(new_recursive.offset_save, md->offset_vector, | memcpy(new_recursive.offset_save, md->offset_vector, |
# | Line 1718 for (;;) | Line 1740 for (;;) |
1740 | do | do |
1741 | { | { |
1742 | if (cbegroup) md->match_function_type = MATCH_CBEGROUP; | if (cbegroup) md->match_function_type = MATCH_CBEGROUP; |
1743 | RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top, | RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, |
1744 | md, eptrb, RM6); | md, eptrb, RM6); |
1745 | memcpy(md->offset_vector, new_recursive.offset_save, | memcpy(md->offset_vector, new_recursive.offset_save, |
1746 | new_recursive.saved_max * sizeof(int)); | new_recursive.saved_max * sizeof(int)); |
# | Line 1727 for (;;) | Line 1749 for (;;) |
1749 | { | { |
1750 | DPRINTF(("Recursion matched\n")); | DPRINTF(("Recursion matched\n")); |
1751 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1752 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1753 | ||
1754 | /* Set where we got to in the subject, and reset the start in case | /* Set where we got to in the subject, and reset the start in case |
1755 | it was changed by \K. This *is* propagated back out of a recursion, | it was changed by \K. This *is* propagated back out of a recursion, |
# | Line 1745 for (;;) | Line 1767 for (;;) |
1767 | { | { |
1768 | DPRINTF(("Recursion gave error %d\n", rrc)); | DPRINTF(("Recursion gave error %d\n", rrc)); |
1769 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1770 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1771 | RRETURN(rrc); | RRETURN(rrc); |
1772 | } | } |
1773 | ||
# | Line 1757 for (;;) | Line 1779 for (;;) |
1779 | DPRINTF(("Recursion didn't match\n")); | DPRINTF(("Recursion didn't match\n")); |
1780 | md->recursive = new_recursive.prevrec; | md->recursive = new_recursive.prevrec; |
1781 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1782 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1783 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1784 | } | } |
1785 | ||
1786 | RECURSION_MATCHED: | RECURSION_MATCHED: |
# | Line 1838 for (;;) | Line 1860 for (;;) |
1860 | md->end_match_ptr = eptr; /* For ONCE_NC */ | md->end_match_ptr = eptr; /* For ONCE_NC */ |
1861 | md->end_offset_top = offset_top; | md->end_offset_top = offset_top; |
1862 | md->start_match_ptr = mstart; | md->start_match_ptr = mstart; |
1863 | MRRETURN(MATCH_MATCH); /* Sets md->mark */ | RRETURN(MATCH_MATCH); /* Sets md->mark */ |
1864 | } | } |
1865 | ||
1866 | /* For capturing groups we have to check the group number back at the start | /* For capturing groups we have to check the group number back at the start |
# | Line 1980 for (;;) | Line 2002 for (;;) |
2002 | /* Not multiline mode: start of subject assertion, unless notbol. */ | /* Not multiline mode: start of subject assertion, unless notbol. */ |
2003 | ||
2004 | case OP_CIRC: | case OP_CIRC: |
2005 | if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH); | if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); |
2006 | ||
2007 | /* Start of subject assertion */ | /* Start of subject assertion */ |
2008 | ||
2009 | case OP_SOD: | case OP_SOD: |
2010 | if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); |
2011 | ecode++; | ecode++; |
2012 | break; | break; |
2013 | ||
2014 | /* Multiline mode: start of subject unless notbol, or after any newline. */ | /* Multiline mode: start of subject unless notbol, or after any newline. */ |
2015 | ||
2016 | case OP_CIRCM: | case OP_CIRCM: |
2017 | if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH); | if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); |
2018 | if (eptr != md->start_subject && | if (eptr != md->start_subject && |
2019 | (eptr == md->end_subject || !WAS_NEWLINE(eptr))) | (eptr == md->end_subject || !WAS_NEWLINE(eptr))) |
2020 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2021 | ecode++; | ecode++; |
2022 | break; | break; |
2023 | ||
2024 | /* Start of match assertion */ | /* Start of match assertion */ |
2025 | ||
2026 | case OP_SOM: | case OP_SOM: |
2027 | if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH); | if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); |
2028 | ecode++; | ecode++; |
2029 | break; | break; |
2030 | ||
# | Line 2018 for (;;) | Line 2040 for (;;) |
2040 | ||
2041 | case OP_DOLLM: | case OP_DOLLM: |
2042 | if (eptr < md->end_subject) | if (eptr < md->end_subject) |
2043 | { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); } | { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); } |
2044 | else | else |
2045 | { | { |
2046 | if (md->noteol) MRRETURN(MATCH_NOMATCH); | if (md->noteol) RRETURN(MATCH_NOMATCH); |
2047 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2048 | } | } |
2049 | ecode++; | ecode++; |
# | Line 2031 for (;;) | Line 2053 for (;;) |
2053 | subject unless noteol is set. */ | subject unless noteol is set. */ |
2054 | ||
2055 | case OP_DOLL: | case OP_DOLL: |
2056 | if (md->noteol) MRRETURN(MATCH_NOMATCH); | if (md->noteol) RRETURN(MATCH_NOMATCH); |
2057 | if (!md->endonly) goto ASSERT_NL_OR_EOS; | if (!md->endonly) goto ASSERT_NL_OR_EOS; |
2058 | ||
2059 | /* ... else fall through for endonly */ | /* ... else fall through for endonly */ |
# | Line 2039 for (;;) | Line 2061 for (;;) |
2061 | /* End of subject assertion (\z) */ | /* End of subject assertion (\z) */ |
2062 | ||
2063 | case OP_EOD: | case OP_EOD: |
2064 | if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); |
2065 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2066 | ecode++; | ecode++; |
2067 | break; | break; |
# | Line 2050 for (;;) | Line 2072 for (;;) |
2072 | ASSERT_NL_OR_EOS: | ASSERT_NL_OR_EOS: |
2073 | if (eptr < md->end_subject && | if (eptr < md->end_subject && |
2074 | (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) | (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) |
2075 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2076 | ||
2077 | /* Either at end of string or \n before end. */ | /* Either at end of string or \n before end. */ |
2078 | ||
# | Line 2069 for (;;) | Line 2091 for (;;) |
2091 | be "non-word" characters. Remember the earliest consulted character for | be "non-word" characters. Remember the earliest consulted character for |
2092 | partial matching. */ | partial matching. */ |
2093 | ||
2094 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2095 | if (utf8) | if (utf) |
2096 | { | { |
2097 | /* Get status of previous character */ | /* Get status of previous character */ |
2098 | ||
2099 | if (eptr == md->start_subject) prev_is_word = FALSE; else | if (eptr == md->start_subject) prev_is_word = FALSE; else |
2100 | { | { |
2101 | USPTR lastptr = eptr - 1; | PCRE_PUCHAR lastptr = eptr - 1; |
2102 | while((*lastptr & 0xc0) == 0x80) lastptr--; | BACKCHAR(lastptr); |
2103 | if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; | if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; |
2104 | GETCHAR(c, lastptr); | GETCHAR(c, lastptr); |
2105 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
# | Line 2142 for (;;) | Line 2164 for (;;) |
2164 | } | } |
2165 | else | else |
2166 | #endif | #endif |
2167 | prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); | prev_is_word = MAX_255(eptr[-1]) |
2168 | && ((md->ctypes[eptr[-1]] & ctype_word) != 0); | |
2169 | } | } |
2170 | ||
2171 | /* Get status of next character */ | /* Get status of next character */ |
# | Line 2165 for (;;) | Line 2188 for (;;) |
2188 | } | } |
2189 | else | else |
2190 | #endif | #endif |
2191 | cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); | cur_is_word = MAX_255(*eptr) |
2192 | && ((md->ctypes[*eptr] & ctype_word) != 0); | |
2193 | } | } |
2194 | ||
2195 | /* Now see if the situation is what we want */ | /* Now see if the situation is what we want */ |
2196 | ||
2197 | if ((*ecode++ == OP_WORD_BOUNDARY)? | if ((*ecode++ == OP_WORD_BOUNDARY)? |
2198 | cur_is_word == prev_is_word : cur_is_word != prev_is_word) | cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
2199 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2200 | } | } |
2201 | break; | break; |
2202 | ||
2203 | /* Match a single character type; inline for speed */ | /* Match a single character type; inline for speed */ |
2204 | ||
2205 | case OP_ANY: | case OP_ANY: |
2206 | if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
2207 | /* Fall through */ | /* Fall through */ |
2208 | ||
2209 | case OP_ALLANY: | case OP_ALLANY: |
2210 | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
2211 | { /* not be updated before SCHECK_PARTIAL. */ | { /* not be updated before SCHECK_PARTIAL. */ |
2212 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2213 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2214 | } | } |
2215 | eptr++; | eptr++; |
2216 | if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | #ifdef SUPPORT_UTF |
2217 | if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
2218 | #endif | |
2219 | ecode++; | ecode++; |
2220 | break; | break; |
2221 | ||
# | Line 2200 for (;;) | Line 2226 for (;;) |
2226 | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
2227 | { /* not be updated before SCHECK_PARTIAL. */ | { /* not be updated before SCHECK_PARTIAL. */ |
2228 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2229 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2230 | } | } |
2231 | eptr++; | eptr++; |
2232 | ecode++; | ecode++; |
# | Line 2210 for (;;) | Line 2236 for (;;) |
2236 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2237 | { | { |
2238 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2239 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2240 | } | } |
2241 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2242 | if ( | if ( |
2243 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2244 | c < 256 && | c < 256 && |
2245 | #endif | #endif |
2246 | (md->ctypes[c] & ctype_digit) != 0 | (md->ctypes[c] & ctype_digit) != 0 |
2247 | ) | ) |
2248 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2249 | ecode++; | ecode++; |
2250 | break; | break; |
2251 | ||
# | Line 2227 for (;;) | Line 2253 for (;;) |
2253 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2254 | { | { |
2255 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2256 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2257 | } | } |
2258 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2259 | if ( | if ( |
2260 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2261 | c >= 256 || | c > 255 || |
2262 | #endif | #endif |
2263 | (md->ctypes[c] & ctype_digit) == 0 | (md->ctypes[c] & ctype_digit) == 0 |
2264 | ) | ) |
2265 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2266 | ecode++; | ecode++; |
2267 | break; | break; |
2268 | ||
# | Line 2244 for (;;) | Line 2270 for (;;) |
2270 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2271 | { | { |
2272 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2273 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2274 | } | } |
2275 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2276 | if ( | if ( |
2277 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2278 | c < 256 && | c < 256 && |
2279 | #endif | #endif |
2280 | (md->ctypes[c] & ctype_space) != 0 | (md->ctypes[c] & ctype_space) != 0 |
2281 | ) | ) |
2282 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2283 | ecode++; | ecode++; |
2284 | break; | break; |
2285 | ||
# | Line 2261 for (;;) | Line 2287 for (;;) |
2287 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2288 | { | { |
2289 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2290 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2291 | } | } |
2292 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2293 | if ( | if ( |
2294 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2295 | c >= 256 || | c > 255 || |
2296 | #endif | #endif |
2297 | (md->ctypes[c] & ctype_space) == 0 | (md->ctypes[c] & ctype_space) == 0 |
2298 | ) | ) |
2299 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2300 | ecode++; | ecode++; |
2301 | break; | break; |
2302 | ||
# | Line 2278 for (;;) | Line 2304 for (;;) |
2304 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2305 | { | { |
2306 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2307 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2308 | } | } |
2309 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2310 | if ( | if ( |
2311 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2312 | c < 256 && | c < 256 && |
2313 | #endif | #endif |
2314 | (md->ctypes[c] & ctype_word) != 0 | (md->ctypes[c] & ctype_word) != 0 |
2315 | ) | ) |
2316 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2317 | ecode++; | ecode++; |
2318 | break; | break; |
2319 | ||
# | Line 2295 for (;;) | Line 2321 for (;;) |
2321 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2322 | { | { |
2323 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2324 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2325 | } | } |
2326 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2327 | if ( | if ( |
2328 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2329 | c >= 256 || | c > 255 || |
2330 | #endif | #endif |
2331 | (md->ctypes[c] & ctype_word) == 0 | (md->ctypes[c] & ctype_word) == 0 |
2332 | ) | ) |
2333 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2334 | ecode++; | ecode++; |
2335 | break; | break; |
2336 | ||
# | Line 2312 for (;;) | Line 2338 for (;;) |
2338 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2339 | { | { |
2340 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2341 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2342 | } | } |
2343 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2344 | switch(c) | switch(c) |
2345 | { | { |
2346 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
2347 | ||
2348 | case 0x000d: | case 0x000d: |
2349 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
# | Line 2331 for (;;) | Line 2357 for (;;) |
2357 | case 0x0085: | case 0x0085: |
2358 | case 0x2028: | case 0x2028: |
2359 | case 0x2029: | case 0x2029: |
2360 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
2361 | break; | break; |
2362 | } | } |
2363 | ecode++; | ecode++; |
# | Line 2341 for (;;) | Line 2367 for (;;) |
2367 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2368 | { | { |
2369 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2370 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2371 | } | } |
2372 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2373 | switch(c) | switch(c) |
# | Line 2366 for (;;) | Line 2392 for (;;) |
2392 | case 0x202f: /* NARROW NO-BREAK SPACE */ | case 0x202f: /* NARROW NO-BREAK SPACE */ |
2393 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
2394 | case 0x3000: /* IDEOGRAPHIC SPACE */ | case 0x3000: /* IDEOGRAPHIC SPACE */ |
2395 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2396 | } | } |
2397 | ecode++; | ecode++; |
2398 | break; | break; |
# | Line 2375 for (;;) | Line 2401 for (;;) |
2401 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2402 | { | { |
2403 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2404 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2405 | } | } |
2406 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2407 | switch(c) | switch(c) |
2408 | { | { |
2409 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
2410 | case 0x09: /* HT */ | case 0x09: /* HT */ |
2411 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
2412 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
# | Line 2409 for (;;) | Line 2435 for (;;) |
2435 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2436 | { | { |
2437 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2438 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2439 | } | } |
2440 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2441 | switch(c) | switch(c) |
# | Line 2422 for (;;) | Line 2448 for (;;) |
2448 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
2449 | case 0x2028: /* LINE SEPARATOR */ | case 0x2028: /* LINE SEPARATOR */ |
2450 | case 0x2029: /* PARAGRAPH SEPARATOR */ | case 0x2029: /* PARAGRAPH SEPARATOR */ |
2451 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2452 | } | } |
2453 | ecode++; | ecode++; |
2454 | break; | break; |
# | Line 2431 for (;;) | Line 2457 for (;;) |
2457 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2458 | { | { |
2459 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2460 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2461 | } | } |
2462 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2463 | switch(c) | switch(c) |
2464 | { | { |
2465 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
2466 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
2467 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
2468 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
# | Line 2458 for (;;) | Line 2484 for (;;) |
2484 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2485 | { | { |
2486 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2487 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2488 | } | } |
2489 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2490 | { | { |
# | Line 2467 for (;;) | Line 2493 for (;;) |
2493 | switch(ecode[1]) | switch(ecode[1]) |
2494 | { | { |
2495 | case PT_ANY: | case PT_ANY: |
2496 | if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH); | if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH); |
2497 | break; | break; |
2498 | ||
2499 | case PT_LAMP: | case PT_LAMP: |
2500 | if ((prop->chartype == ucp_Lu || | if ((prop->chartype == ucp_Lu || |
2501 | prop->chartype == ucp_Ll || | prop->chartype == ucp_Ll || |
2502 | prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) | prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) |
2503 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2504 | break; | break; |
2505 | ||
2506 | case PT_GC: | case PT_GC: |
2507 | if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP)) | if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) |
2508 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2509 | break; | break; |
2510 | ||
2511 | case PT_PC: | case PT_PC: |
2512 | if ((ecode[2] != prop->chartype) == (op == OP_PROP)) | if ((ecode[2] != prop->chartype) == (op == OP_PROP)) |
2513 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2514 | break; | break; |
2515 | ||
2516 | case PT_SC: | case PT_SC: |
2517 | if ((ecode[2] != prop->script) == (op == OP_PROP)) | if ((ecode[2] != prop->script) == (op == OP_PROP)) |
2518 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2519 | break; | break; |
2520 | ||
2521 | /* These are specials */ | /* These are specials */ |
2522 | ||
2523 | case PT_ALNUM: | case PT_ALNUM: |
2524 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
2525 | _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) | PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) |
2526 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2527 | break; | break; |
2528 | ||
2529 | case PT_SPACE: /* Perl space */ | case PT_SPACE: /* Perl space */ |
2530 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
2531 | c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) | c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) |
2532 | == (op == OP_NOTPROP)) | == (op == OP_NOTPROP)) |
2533 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2534 | break; | break; |
2535 | ||
2536 | case PT_PXSPACE: /* POSIX space */ | case PT_PXSPACE: /* POSIX space */ |
2537 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
2538 | c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || | c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
2539 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
2540 | == (op == OP_NOTPROP)) | == (op == OP_NOTPROP)) |
2541 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2542 | break; | break; |
2543 | ||
2544 | case PT_WORD: | case PT_WORD: |
2545 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
2546 | _pcre_ucp_gentype[prop->chartype] == ucp_N || | PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
2547 | c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) | c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) |
2548 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2549 | break; | break; |
2550 | ||
2551 | /* This should never occur */ | /* This should never occur */ |
# | Line 2539 for (;;) | Line 2565 for (;;) |
2565 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2566 | { | { |
2567 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2568 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2569 | } | } |
2570 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2571 | if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); | if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
2572 | while (eptr < md->end_subject) | while (eptr < md->end_subject) |
2573 | { | { |
2574 | int len = 1; | int len = 1; |
2575 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
2576 | if (UCD_CATEGORY(c) != ucp_M) break; | if (UCD_CATEGORY(c) != ucp_M) break; |
2577 | eptr += len; | eptr += len; |
2578 | } | } |
# | Line 2567 for (;;) | Line 2593 for (;;) |
2593 | case OP_REFI: | case OP_REFI: |
2594 | caseless = op == OP_REFI; | caseless = op == OP_REFI; |
2595 | offset = GET2(ecode, 1) << 1; /* Doubled ref number */ | offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
2596 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
2597 | ||
2598 | /* If the reference is unset, there are two possibilities: | /* If the reference is unset, there are two possibilities: |
2599 | ||
# | Line 2607 for (;;) | Line 2633 for (;;) |
2633 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
2634 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
2635 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
2636 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
2637 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
2638 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
2639 | break; | break; |
2640 | ||
2641 | default: /* No repeat follows */ | default: /* No repeat follows */ |
2642 | if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) |
2643 | { | { |
2644 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2645 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2646 | } | } |
2647 | eptr += length; | eptr += length; |
2648 | continue; /* With the main loop */ | continue; /* With the main loop */ |
2649 | } | } |
2650 | ||
2651 | /* Handle repeated back references. If the length of the reference is | /* Handle repeated back references. If the length of the reference is |
2652 | zero, just continue with the main loop. */ | zero, just continue with the main loop. If the length is negative, it |
2653 | means the reference is unset in non-Java-compatible mode. If the minimum is | |
2654 | zero, we can continue at the same level without recursion. For any other | |
2655 | minimum, carrying on will result in NOMATCH. */ | |
2656 | ||
2657 | if (length == 0) continue; | if (length == 0) continue; |
2658 | if (length < 0 && min == 0) continue; | |
2659 | ||
2660 | /* First, ensure the minimum number of matches are present. We get back | /* First, ensure the minimum number of matches are present. We get back |
2661 | the length of the reference string explicitly rather than passing the | the length of the reference string explicitly rather than passing the |
# | Line 2637 for (;;) | Line 2667 for (;;) |
2667 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2668 | { | { |
2669 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2670 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2671 | } | } |
2672 | eptr += slength; | eptr += slength; |
2673 | } | } |
# | Line 2656 for (;;) | Line 2686 for (;;) |
2686 | int slength; | int slength; |
2687 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); |
2688 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2689 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2690 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2691 | { | { |
2692 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2693 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2694 | } | } |
2695 | eptr += slength; | eptr += slength; |
2696 | } | } |
# | Line 2688 for (;;) | Line 2718 for (;;) |
2718 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2719 | eptr -= length; | eptr -= length; |
2720 | } | } |
2721 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2722 | } | } |
2723 | /* Control never gets here */ | /* Control never gets here */ |
2724 | ||
# | Line 2706 for (;;) | Line 2736 for (;;) |
2736 | case OP_NCLASS: | case OP_NCLASS: |
2737 | case OP_CLASS: | case OP_CLASS: |
2738 | { | { |
2739 | /* The data variable is saved across frames, so the byte map needs to | |
2740 | be stored there. */ | |
2741 | #define BYTE_MAP ((pcre_uint8 *)data) | |
2742 | data = ecode + 1; /* Save for matching */ | data = ecode + 1; /* Save for matching */ |
2743 | ecode += 33; /* Advance past the item */ | ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */ |
2744 | ||
2745 | switch (*ecode) | switch (*ecode) |
2746 | { | { |
# | Line 2728 for (;;) | Line 2761 for (;;) |
2761 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
2762 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
2763 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
2764 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
2765 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
2766 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
2767 | break; | break; |
2768 | ||
2769 | default: /* No repeat follows */ | default: /* No repeat follows */ |
# | Line 2740 for (;;) | Line 2773 for (;;) |
2773 | ||
2774 | /* First, ensure the minimum number of matches are present. */ | /* First, ensure the minimum number of matches are present. */ |
2775 | ||
2776 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2777 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2778 | { | { |
2779 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
2780 | { | { |
2781 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2782 | { | { |
2783 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2784 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2785 | } | } |
2786 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
2787 | if (c > 255) | if (c > 255) |
2788 | { | { |
2789 | if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH); | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
2790 | } | } |
2791 | else | else |
2792 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | ||
} | ||
2793 | } | } |
2794 | } | } |
2795 | else | else |
2796 | #endif | #endif |
2797 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2798 | { | { |
2799 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
2800 | { | { |
2801 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2802 | { | { |
2803 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2804 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2805 | } | } |
2806 | c = *eptr++; | c = *eptr++; |
2807 | if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | #ifndef COMPILE_PCRE8 |
2808 | if (c > 255) | |
2809 | { | |
2810 | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); | |
2811 | } | |
2812 | else | |
2813 | #endif | |
2814 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); | |
2815 | } | } |
2816 | } | } |
2817 | ||
# | Line 2788 for (;;) | Line 2825 for (;;) |
2825 | ||
2826 | if (minimize) | if (minimize) |
2827 | { | { |
2828 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2829 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2830 | { | { |
2831 | for (fi = min;; fi++) | for (fi = min;; fi++) |
2832 | { | { |
2833 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); |
2834 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2835 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2836 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2837 | { | { |
2838 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2839 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2840 | } | } |
2841 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
2842 | if (c > 255) | if (c > 255) |
2843 | { | { |
2844 | if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH); | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
2845 | } | } |
2846 | else | else |
2847 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | ||
} | ||
2848 | } | } |
2849 | } | } |
2850 | else | else |
2851 | #endif | #endif |
2852 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2853 | { | { |
2854 | for (fi = min;; fi++) | for (fi = min;; fi++) |
2855 | { | { |
2856 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); |
2857 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2858 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2859 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2860 | { | { |
2861 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2862 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2863 | } | } |
2864 | c = *eptr++; | c = *eptr++; |
2865 | if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | #ifndef COMPILE_PCRE8 |
2866 | if (c > 255) | |
2867 | { | |
2868 | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); | |
2869 | } | |
2870 | else | |
2871 | #endif | |
2872 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); | |
2873 | } | } |
2874 | } | } |
2875 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 2840 for (;;) | Line 2881 for (;;) |
2881 | { | { |
2882 | pp = eptr; | pp = eptr; |
2883 | ||
2884 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2885 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2886 | { | { |
2887 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
2888 | { | { |
# | Line 2858 for (;;) | Line 2898 for (;;) |
2898 | if (op == OP_CLASS) break; | if (op == OP_CLASS) break; |
2899 | } | } |
2900 | else | else |
2901 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; |
if ((data[c/8] & (1 << (c&7))) == 0) break; | ||
} | ||
2902 | eptr += len; | eptr += len; |
2903 | } | } |
2904 | for (;;) | for (;;) |
# | Line 2873 for (;;) | Line 2911 for (;;) |
2911 | } | } |
2912 | else | else |
2913 | #endif | #endif |
2914 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2915 | { | { |
2916 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
2917 | { | { |
# | Line 2883 for (;;) | Line 2921 for (;;) |
2921 | break; | break; |
2922 | } | } |
2923 | c = *eptr; | c = *eptr; |
2924 | if ((data[c/8] & (1 << (c&7))) == 0) break; | #ifndef COMPILE_PCRE8 |
2925 | if (c > 255) | |
2926 | { | |
2927 | if (op == OP_CLASS) break; | |
2928 | } | |
2929 | else | |
2930 | #endif | |
2931 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; | |
2932 | eptr++; | eptr++; |
2933 | } | } |
2934 | while (eptr >= pp) | while (eptr >= pp) |
# | Line 2894 for (;;) | Line 2939 for (;;) |
2939 | } | } |
2940 | } | } |
2941 | ||
2942 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2943 | } | } |
2944 | #undef BYTE_MAP | |
2945 | } | } |
2946 | /* Control never gets here */ | /* Control never gets here */ |
2947 | ||
# | Line 2904 for (;;) | Line 2950 for (;;) |
2950 | when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 | when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 |
2951 | mode, because Unicode properties are supported in non-UTF-8 mode. */ | mode, because Unicode properties are supported in non-UTF-8 mode. */ |
2952 | ||
2953 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
2954 | case OP_XCLASS: | case OP_XCLASS: |
2955 | { | { |
2956 | data = ecode + 1 + LINK_SIZE; /* Save for matching */ | data = ecode + 1 + LINK_SIZE; /* Save for matching */ |
# | Line 2929 for (;;) | Line 2975 for (;;) |
2975 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
2976 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
2977 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
2978 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
2979 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
2980 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
2981 | break; | break; |
2982 | ||
2983 | default: /* No repeat follows */ | default: /* No repeat follows */ |
# | Line 2946 for (;;) | Line 2992 for (;;) |
2992 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2993 | { | { |
2994 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2995 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2996 | } | } |
2997 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2998 | if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH); | if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); |
2999 | } | } |
3000 | ||
3001 | /* If max == min we can continue with the main loop without the | /* If max == min we can continue with the main loop without the |
# | Line 2966 for (;;) | Line 3012 for (;;) |
3012 | { | { |
3013 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); |
3014 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3015 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3016 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3017 | { | { |
3018 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3019 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3020 | } | } |
3021 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
3022 | if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH); | if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); |
3023 | } | } |
3024 | /* Control never gets here */ | /* Control never gets here */ |
3025 | } | } |
# | Line 2991 for (;;) | Line 3037 for (;;) |
3037 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3038 | break; | break; |
3039 | } | } |
3040 | #ifdef SUPPORT_UTF | |
3041 | GETCHARLENTEST(c, eptr, len); | GETCHARLENTEST(c, eptr, len); |
3042 | if (!_pcre_xclass(c, data)) break; | #else |
3043 | c = *eptr; | |
3044 | #endif | |
3045 | if (!PRIV(xclass)(c, data, utf)) break; | |
3046 | eptr += len; | eptr += len; |
3047 | } | } |
3048 | for(;;) | for(;;) |
# | Line 3000 for (;;) | Line 3050 for (;;) |
3050 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); |
3051 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3052 | if (eptr-- == pp) break; /* Stop if tried at original pos */ | if (eptr-- == pp) break; /* Stop if tried at original pos */ |
3053 | if (utf8) BACKCHAR(eptr); | #ifdef SUPPORT_UTF |
3054 | if (utf) BACKCHAR(eptr); | |
3055 | #endif | |
3056 | } | } |
3057 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3058 | } | } |
3059 | ||
3060 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3012 for (;;) | Line 3064 for (;;) |
3064 | /* Match a single character, casefully */ | /* Match a single character, casefully */ |
3065 | ||
3066 | case OP_CHAR: | case OP_CHAR: |
3067 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3068 | if (utf8) | if (utf) |
3069 | { | { |
3070 | length = 1; | length = 1; |
3071 | ecode++; | ecode++; |
# | Line 3021 for (;;) | Line 3073 for (;;) |
3073 | if (length > md->end_subject - eptr) | if (length > md->end_subject - eptr) |
3074 | { | { |
3075 | CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ | CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
3076 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3077 | } | } |
3078 | while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH); | while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); |
3079 | } | } |
3080 | else | else |
3081 | #endif | #endif |
3082 | /* Not UTF mode */ | |
/* Non-UTF-8 mode */ | ||
3083 | { | { |
3084 | if (md->end_subject - eptr < 1) | if (md->end_subject - eptr < 1) |
3085 | { | { |
3086 | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
3087 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3088 | } | } |
3089 | if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH); | if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); |
3090 | ecode += 2; | ecode += 2; |
3091 | } | } |
3092 | break; | break; |
3093 | ||
3094 | /* Match a single character, caselessly */ | /* Match a single character, caselessly. If we are at the end of the |
3095 | subject, give up immediately. */ | |
3096 | ||
3097 | case OP_CHARI: | case OP_CHARI: |
3098 | #ifdef SUPPORT_UTF8 | if (eptr >= md->end_subject) |
3099 | if (utf8) | { |
3100 | SCHECK_PARTIAL(); | |
3101 | RRETURN(MATCH_NOMATCH); | |
3102 | } | |
3103 | ||
3104 | #ifdef SUPPORT_UTF | |
3105 | if (utf) | |
3106 | { | { |
3107 | length = 1; | length = 1; |
3108 | ecode++; | ecode++; |
3109 | GETCHARLEN(fc, ecode, length); | GETCHARLEN(fc, ecode, length); |
3110 | ||
if (length > md->end_subject - eptr) | ||
{ | ||
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ | ||
MRRETURN(MATCH_NOMATCH); | ||
} | ||
3111 | /* If the pattern character's value is < 128, we have only one byte, and | /* If the pattern character's value is < 128, we have only one byte, and |
3112 | can use the fast lookup table. */ | we know that its other case must also be one byte long, so we can use the |
3113 | fast lookup table. We know that there is at least one byte left in the | |
3114 | subject. */ | |
3115 | ||
3116 | if (fc < 128) | if (fc < 128) |
3117 | { | { |
3118 | if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (md->lcc[fc] |
3119 | != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); | |
3120 | ecode++; | |
3121 | eptr++; | |
3122 | } | } |
3123 | ||
3124 | /* Otherwise we must pick up the subject character */ | /* Otherwise we must pick up the subject character. Note that we cannot |
3125 | use the value of "length" to check for sufficient bytes left, because the | |
3126 | other case of the character may have more or fewer bytes. */ | |
3127 | ||
3128 | else | else |
3129 | { | { |
# | Line 3080 for (;;) | Line 3139 for (;;) |
3139 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3140 | if (dc != UCD_OTHERCASE(fc)) | if (dc != UCD_OTHERCASE(fc)) |
3141 | #endif | #endif |
3142 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3143 | } | } |
3144 | } | } |
3145 | } | } |
3146 | else | else |
3147 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
3148 | ||
3149 | /* Non-UTF-8 mode */ | /* Not UTF mode */ |
3150 | { | { |
3151 | if (md->end_subject - eptr < 1) | if (TABLE_GET(ecode[1], md->lcc, ecode[1]) |
3152 | { | != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); |
3153 | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ | eptr++; |
MRRETURN(MATCH_NOMATCH); | ||
} | ||
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | ||
3154 | ecode += 2; | ecode += 2; |
3155 | } | } |
3156 | break; | break; |
# | Line 3104 for (;;) | Line 3160 for (;;) |
3160 | case OP_EXACT: | case OP_EXACT: |
3161 | case OP_EXACTI: | case OP_EXACTI: |
3162 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3163 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3164 | goto REPEATCHAR; | goto REPEATCHAR; |
3165 | ||
3166 | case OP_POSUPTO: | case OP_POSUPTO: |
# | Line 3119 for (;;) | Line 3175 for (;;) |
3175 | min = 0; | min = 0; |
3176 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3177 | minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; | minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; |
3178 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3179 | goto REPEATCHAR; | goto REPEATCHAR; |
3180 | ||
3181 | case OP_POSSTAR: | case OP_POSSTAR: |
# | Line 3167 for (;;) | Line 3223 for (;;) |
3223 | /* Common code for all repeated single-character matches. */ | /* Common code for all repeated single-character matches. */ |
3224 | ||
3225 | REPEATCHAR: | REPEATCHAR: |
3226 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3227 | if (utf8) | if (utf) |
3228 | { | { |
3229 | length = 1; | length = 1; |
3230 | charptr = ecode; | charptr = ecode; |
# | Line 3184 for (;;) | Line 3240 for (;;) |
3240 | unsigned int othercase; | unsigned int othercase; |
3241 | if (op >= OP_STARI && /* Caseless */ | if (op >= OP_STARI && /* Caseless */ |
3242 | (othercase = UCD_OTHERCASE(fc)) != fc) | (othercase = UCD_OTHERCASE(fc)) != fc) |
3243 | oclength = _pcre_ord2utf8(othercase, occhars); | oclength = PRIV(ord2utf)(othercase, occhars); |
3244 | else oclength = 0; | else oclength = 0; |
3245 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3246 | ||
3247 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3248 | { | { |
3249 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3250 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3251 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3252 | else if (oclength > 0 && | else if (oclength > 0 && |
3253 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3254 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3255 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3256 | else | else |
3257 | { | { |
3258 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
3259 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3260 | } | } |
3261 | } | } |
3262 | ||
# | Line 3212 for (;;) | Line 3268 for (;;) |
3268 | { | { |
3269 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); |
3270 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3271 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3272 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3273 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3274 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3275 | else if (oclength > 0 && | else if (oclength > 0 && |
3276 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3277 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3278 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3279 | else | else |
3280 | { | { |
3281 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
3282 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3283 | } | } |
3284 | } | } |
3285 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3235 for (;;) | Line 3291 for (;;) |
3291 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3292 | { | { |
3293 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3294 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3295 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3296 | else if (oclength > 0 && | else if (oclength > 0 && |
3297 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3298 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3299 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3300 | else | else |
3301 | { | { |
# | Line 3254 for (;;) | Line 3310 for (;;) |
3310 | { | { |
3311 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); |
3312 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3313 | if (eptr == pp) { MRRETURN(MATCH_NOMATCH); } | if (eptr == pp) { RRETURN(MATCH_NOMATCH); } |
3314 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3315 | eptr--; | eptr--; |
3316 | BACKCHAR(eptr); | BACKCHAR(eptr); |
# | Line 3271 for (;;) | Line 3327 for (;;) |
3327 | value of fc will always be < 128. */ | value of fc will always be < 128. */ |
3328 | } | } |
3329 | else | else |
3330 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
3331 | /* When not in UTF-8 mode, load a single-byte character. */ | |
3332 | /* When not in UTF-8 mode, load a single-byte character. */ | fc = *ecode++; |
fc = *ecode++; | ||
3333 | ||
3334 | /* The value of fc at this point is always less than 256, though we may or | /* The value of fc at this point is always one character, though we may |
3335 | may not be in UTF-8 mode. The code is duplicated for the caseless and | or may not be in UTF mode. The code is duplicated for the caseless and |
3336 | caseful cases, for speed, since matching characters is likely to be quite | caseful cases, for speed, since matching characters is likely to be quite |
3337 | common. First, ensure the minimum number of matches are present. If min = | common. First, ensure the minimum number of matches are present. If min = |
3338 | max, continue at the same level without recursing. Otherwise, if | max, continue at the same level without recursing. Otherwise, if |
# | Line 3291 for (;;) | Line 3345 for (;;) |
3345 | ||
3346 | if (op >= OP_STARI) /* Caseless */ | if (op >= OP_STARI) /* Caseless */ |
3347 | { | { |
3348 | fc = md->lcc[fc]; | #ifdef COMPILE_PCRE8 |
3349 | /* fc must be < 128 if UTF is enabled. */ | |
3350 | foc = md->fcc[fc]; | |
3351 | #else | |
3352 | #ifdef SUPPORT_UTF | |
3353 | #ifdef SUPPORT_UCP | |
3354 | if (utf && fc > 127) | |
3355 | foc = UCD_OTHERCASE(fc); | |
3356 | #else | |
3357 | if (utf && fc > 127) | |
3358 | foc = fc; | |
3359 | #endif /* SUPPORT_UCP */ | |
3360 | else | |
3361 | #endif /* SUPPORT_UTF */ | |
3362 | foc = TABLE_GET(fc, md->fcc, fc); | |
3363 | #endif /* COMPILE_PCRE8 */ | |
3364 | ||
3365 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3366 | { | { |
3367 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3368 | { | { |
3369 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3370 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3371 | } | } |
3372 | if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
3373 | eptr++; | |
3374 | } | } |
3375 | if (min == max) continue; | if (min == max) continue; |
3376 | if (minimize) | if (minimize) |
# | Line 3308 for (;;) | Line 3379 for (;;) |
3379 | { | { |
3380 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); |
3381 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3382 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3383 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3384 | { | { |
3385 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3386 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3387 | } | } |
3388 | if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
3389 | eptr++; | |
3390 | } | } |
3391 | /* Control never gets here */ | /* Control never gets here */ |
3392 | } | } |
# | Line 3328 for (;;) | Line 3400 for (;;) |
3400 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3401 | break; | break; |
3402 | } | } |
3403 | if (fc != md->lcc[*eptr]) break; | if (fc != *eptr && foc != *eptr) break; |
3404 | eptr++; | eptr++; |
3405 | } | } |
3406 | ||
# | Line 3340 for (;;) | Line 3412 for (;;) |
3412 | eptr--; | eptr--; |
3413 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3414 | } | } |
3415 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3416 | } | } |
3417 | /* Control never gets here */ | /* Control never gets here */ |
3418 | } | } |
# | Line 3354 for (;;) | Line 3426 for (;;) |
3426 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3427 | { | { |
3428 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3429 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3430 | } | } |
3431 | if (fc != *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
3432 | } | } |
3433 | ||
3434 | if (min == max) continue; | if (min == max) continue; |
# | Line 3367 for (;;) | Line 3439 for (;;) |
3439 | { | { |
3440 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); |
3441 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3442 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3443 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3444 | { | { |
3445 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3446 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3447 | } | } |
3448 | if (fc != *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
3449 | } | } |
3450 | /* Control never gets here */ | /* Control never gets here */ |
3451 | } | } |
# | Line 3398 for (;;) | Line 3470 for (;;) |
3470 | eptr--; | eptr--; |
3471 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3472 | } | } |
3473 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3474 | } | } |
3475 | } | } |
3476 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3411 for (;;) | Line 3483 for (;;) |
3483 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3484 | { | { |
3485 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3486 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3487 | } | } |
3488 | ecode++; | ecode++; |
3489 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
3490 | if (op == OP_NOTI) /* The caseless case */ | if (op == OP_NOTI) /* The caseless case */ |
3491 | { | { |
3492 | #ifdef SUPPORT_UTF8 | register int ch, och; |
3493 | if (c < 256) | ch = *ecode++; |
3494 | #endif | #ifdef COMPILE_PCRE8 |
3495 | c = md->lcc[c]; | /* ch must be < 128 if UTF is enabled. */ |
3496 | if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH); | och = md->fcc[ch]; |
3497 | #else | |
3498 | #ifdef SUPPORT_UTF | |
3499 | #ifdef SUPPORT_UCP | |
3500 | if (utf && ch > 127) | |
3501 | och = UCD_OTHERCASE(ch); | |
3502 | #else | |
3503 | if (utf && ch > 127) | |
3504 | och = ch; | |
3505 | #endif /* SUPPORT_UCP */ | |
3506 | else | |
3507 | #endif /* SUPPORT_UTF */ | |
3508 | och = TABLE_GET(ch, md->fcc, ch); | |
3509 | #endif /* COMPILE_PCRE8 */ | |
3510 | if (ch == c || och == c) RRETURN(MATCH_NOMATCH); | |
3511 | } | } |
3512 | else /* Caseful */ | else /* Caseful */ |
3513 | { | { |
3514 | if (*ecode++ == c) MRRETURN(MATCH_NOMATCH); | if (*ecode++ == c) RRETURN(MATCH_NOMATCH); |
3515 | } | } |
3516 | break; | break; |
3517 | ||
# | Line 3439 for (;;) | Line 3525 for (;;) |
3525 | case OP_NOTEXACT: | case OP_NOTEXACT: |
3526 | case OP_NOTEXACTI: | case OP_NOTEXACTI: |
3527 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3528 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3529 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3530 | ||
3531 | case OP_NOTUPTO: | case OP_NOTUPTO: |
# | Line 3449 for (;;) | Line 3535 for (;;) |
3535 | min = 0; | min = 0; |
3536 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3537 | minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; | minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; |
3538 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3539 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3540 | ||
3541 | case OP_NOTPOSSTAR: | case OP_NOTPOSSTAR: |
# | Line 3481 for (;;) | Line 3567 for (;;) |
3567 | possessive = TRUE; | possessive = TRUE; |
3568 | min = 0; | min = 0; |
3569 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3570 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3571 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3572 | ||
3573 | case OP_NOTSTAR: | case OP_NOTSTAR: |
# | Line 3520 for (;;) | Line 3606 for (;;) |
3606 | ||
3607 | if (op >= OP_NOTSTARI) /* Caseless */ | if (op >= OP_NOTSTARI) /* Caseless */ |
3608 | { | { |
3609 | fc = md->lcc[fc]; | #ifdef COMPILE_PCRE8 |
3610 | /* fc must be < 128 if UTF is enabled. */ | |
3611 | foc = md->fcc[fc]; | |
3612 | #else | |
3613 | #ifdef SUPPORT_UTF | |
3614 | #ifdef SUPPORT_UCP | |
3615 | if (utf && fc > 127) | |
3616 | foc = UCD_OTHERCASE(fc); | |
3617 | #else | |
3618 | if (utf && fc > 127) | |
3619 | foc = fc; | |
3620 | #endif /* SUPPORT_UCP */ | |
3621 | else | |
3622 | #endif /* SUPPORT_UTF */ | |
3623 | foc = TABLE_GET(fc, md->fcc, fc); | |
3624 | #endif /* COMPILE_PCRE8 */ | |
3625 | ||
3626 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3627 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3628 | { | { |
3629 | register unsigned int d; | register unsigned int d; |
3630 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 3532 for (;;) | Line 3632 for (;;) |
3632 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3633 | { | { |
3634 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3635 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3636 | } | } |
3637 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3638 | if (d < 256) d = md->lcc[d]; | if (fc == d || foc == d) RRETURN(MATCH_NOMATCH); |
if (fc == d) MRRETURN(MATCH_NOMATCH); | ||
3639 | } | } |
3640 | } | } |
3641 | else | else |
3642 | #endif | #endif |
3643 | /* Not UTF mode */ | |
/* Not UTF-8 mode */ | ||
3644 | { | { |
3645 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3646 | { | { |
3647 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3648 | { | { |
3649 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3650 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3651 | } | } |
3652 | if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); |
3653 | eptr++; | |
3654 | } | } |
3655 | } | } |
3656 | ||
# | Line 3559 for (;;) | Line 3658 for (;;) |
3658 | ||
3659 | if (minimize) | if (minimize) |
3660 | { | { |
3661 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3662 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3663 | { | { |
3664 | register unsigned int d; | register unsigned int d; |
3665 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3666 | { | { |
3667 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); |
3668 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3669 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3670 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3671 | { | { |
3672 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3673 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3674 | } | } |
3675 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3676 | if (d < 256) d = md->lcc[d]; | if (fc == d || foc == d) RRETURN(MATCH_NOMATCH); |
if (fc == d) MRRETURN(MATCH_NOMATCH); | ||
3677 | } | } |
3678 | } | } |
3679 | else | else |
3680 | #endif | #endif |
3681 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3682 | { | { |
3683 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3684 | { | { |
3685 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); |
3686 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3687 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3688 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3689 | { | { |
3690 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3691 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3692 | } | } |
3693 | if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); |
3694 | eptr++; | |
3695 | } | } |
3696 | } | } |
3697 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3605 for (;;) | Line 3703 for (;;) |
3703 | { | { |
3704 | pp = eptr; | pp = eptr; |
3705 | ||
3706 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3707 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3708 | { | { |
3709 | register unsigned int d; | register unsigned int d; |
3710 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
# | Line 3619 for (;;) | Line 3716 for (;;) |
3716 | break; | break; |
3717 | } | } |
3718 | GETCHARLEN(d, eptr, len); | GETCHARLEN(d, eptr, len); |
3719 | if (d < 256) d = md->lcc[d]; | if (fc == d || foc == d) break; |
if (fc == d) break; | ||
3720 | eptr += len; | eptr += len; |
3721 | } | } |
3722 | if (possessive) continue; | if (possessive) continue; |
# | Line 3634 for (;;) | Line 3730 for (;;) |
3730 | } | } |
3731 | else | else |
3732 | #endif | #endif |
3733 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3734 | { | { |
3735 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3736 | { | { |
# | Line 3643 for (;;) | Line 3739 for (;;) |
3739 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3740 | break; | break; |
3741 | } | } |
3742 | if (fc == md->lcc[*eptr]) break; | if (fc == *eptr || foc == *eptr) break; |
3743 | eptr++; | eptr++; |
3744 | } | } |
3745 | if (possessive) continue; | if (possessive) continue; |
# | Line 3655 for (;;) | Line 3751 for (;;) |
3751 | } | } |
3752 | } | } |
3753 | ||
3754 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3755 | } | } |
3756 | /* Control never gets here */ | /* Control never gets here */ |
3757 | } | } |
# | Line 3664 for (;;) | Line 3760 for (;;) |
3760 | ||
3761 | else | else |
3762 | { | { |
3763 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3764 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3765 | { | { |
3766 | register unsigned int d; | register unsigned int d; |
3767 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 3674 for (;;) | Line 3769 for (;;) |
3769 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3770 | { | { |
3771 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3772 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3773 | } | } |
3774 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3775 | if (fc == d) MRRETURN(MATCH_NOMATCH); | if (fc == d) RRETURN(MATCH_NOMATCH); |
3776 | } | } |
3777 | } | } |
3778 | else | else |
3779 | #endif | #endif |
3780 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3781 | { | { |
3782 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3783 | { | { |
3784 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3785 | { | { |
3786 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3787 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3788 | } | } |
3789 | if (fc == *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr++) RRETURN(MATCH_NOMATCH); |
3790 | } | } |
3791 | } | } |
3792 | ||
# | Line 3699 for (;;) | Line 3794 for (;;) |
3794 | ||
3795 | if (minimize) | if (minimize) |
3796 | { | { |
3797 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3798 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3799 | { | { |
3800 | register unsigned int d; | register unsigned int d; |
3801 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3802 | { | { |
3803 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); |
3804 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3805 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3806 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3807 | { | { |
3808 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3809 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3810 | } | } |
3811 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3812 | if (fc == d) MRRETURN(MATCH_NOMATCH); | if (fc == d) RRETURN(MATCH_NOMATCH); |
3813 | } | } |
3814 | } | } |
3815 | else | else |
3816 | #endif | #endif |
3817 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3818 | { | { |
3819 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3820 | { | { |
3821 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); |
3822 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3823 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3824 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3825 | { | { |
3826 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3827 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3828 | } | } |
3829 | if (fc == *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr++) RRETURN(MATCH_NOMATCH); |
3830 | } | } |
3831 | } | } |
3832 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3744 for (;;) | Line 3838 for (;;) |
3838 | { | { |
3839 | pp = eptr; | pp = eptr; |
3840 | ||
3841 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3842 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3843 | { | { |
3844 | register unsigned int d; | register unsigned int d; |
3845 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
# | Line 3772 for (;;) | Line 3865 for (;;) |
3865 | } | } |
3866 | else | else |
3867 | #endif | #endif |
3868 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3869 | { | { |
3870 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3871 | { | { |
# | Line 3793 for (;;) | Line 3886 for (;;) |
3886 | } | } |
3887 | } | } |
3888 | ||
3889 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3890 | } | } |
3891 | } | } |
3892 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3805 for (;;) | Line 3898 for (;;) |
3898 | case OP_TYPEEXACT: | case OP_TYPEEXACT: |
3899 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3900 | minimize = TRUE; | minimize = TRUE; |
3901 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3902 | goto REPEATTYPE; | goto REPEATTYPE; |
3903 | ||
3904 | case OP_TYPEUPTO: | case OP_TYPEUPTO: |
# | Line 3813 for (;;) | Line 3906 for (;;) |
3906 | min = 0; | min = 0; |
3907 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3908 | minimize = *ecode == OP_TYPEMINUPTO; | minimize = *ecode == OP_TYPEMINUPTO; |
3909 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3910 | goto REPEATTYPE; | goto REPEATTYPE; |
3911 | ||
3912 | case OP_TYPEPOSSTAR: | case OP_TYPEPOSSTAR: |
# | Line 3841 for (;;) | Line 3934 for (;;) |
3934 | possessive = TRUE; | possessive = TRUE; |
3935 | min = 0; | min = 0; |
3936 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3937 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3938 | goto REPEATTYPE; | goto REPEATTYPE; |
3939 | ||
3940 | case OP_TYPESTAR: | case OP_TYPESTAR: |
# | Line 3887 for (;;) | Line 3980 for (;;) |
3980 | switch(prop_type) | switch(prop_type) |
3981 | { | { |
3982 | case PT_ANY: | case PT_ANY: |
3983 | if (prop_fail_result) MRRETURN(MATCH_NOMATCH); | if (prop_fail_result) RRETURN(MATCH_NOMATCH); |
3984 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3985 | { | { |
3986 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3987 | { | { |
3988 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3989 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3990 | } | } |
3991 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
3992 | } | } |
# | Line 3906 for (;;) | Line 3999 for (;;) |
3999 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4000 | { | { |
4001 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4002 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4003 | } | } |
4004 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4005 | chartype = UCD_CHARTYPE(c); | chartype = UCD_CHARTYPE(c); |
4006 | if ((chartype == ucp_Lu || | if ((chartype == ucp_Lu || |
4007 | chartype == ucp_Ll || | chartype == ucp_Ll || |
4008 | chartype == ucp_Lt) == prop_fail_result) | chartype == ucp_Lt) == prop_fail_result) |
4009 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4010 | } | } |
4011 | break; | break; |
4012 | ||
# | Line 3923 for (;;) | Line 4016 for (;;) |
4016 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4017 | { | { |
4018 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4019 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4020 | } | } |
4021 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4022 | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
4023 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4024 | } | } |
4025 | break; | break; |
4026 | ||
# | Line 3937 for (;;) | Line 4030 for (;;) |
4030 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4031 | { | { |
4032 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4033 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4034 | } | } |
4035 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4036 | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
4037 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4038 | } | } |
4039 | break; | break; |
4040 | ||
# | Line 3951 for (;;) | Line 4044 for (;;) |
4044 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4045 | { | { |
4046 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4047 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4048 | } | } |
4049 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4050 | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
4051 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4052 | } | } |
4053 | break; | break; |
4054 | ||
# | Line 3966 for (;;) | Line 4059 for (;;) |
4059 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4060 | { | { |
4061 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4062 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4063 | } | } |
4064 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4065 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4066 | if ((category == ucp_L || category == ucp_N) == prop_fail_result) | if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
4067 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4068 | } | } |
4069 | break; | break; |
4070 | ||
# | Line 3981 for (;;) | Line 4074 for (;;) |
4074 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4075 | { | { |
4076 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4077 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4078 | } | } |
4079 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4080 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4081 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
4082 | == prop_fail_result) | == prop_fail_result) |
4083 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4084 | } | } |
4085 | break; | break; |
4086 | ||
# | Line 3997 for (;;) | Line 4090 for (;;) |
4090 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4091 | { | { |
4092 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4093 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4094 | } | } |
4095 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4096 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4097 | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
4098 | == prop_fail_result) | == prop_fail_result) |
4099 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4100 | } | } |
4101 | break; | break; |
4102 | ||
# | Line 4014 for (;;) | Line 4107 for (;;) |
4107 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4108 | { | { |
4109 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4110 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4111 | } | } |
4112 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4113 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4114 | if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) | if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) |
4115 | == prop_fail_result) | == prop_fail_result) |
4116 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4117 | } | } |
4118 | break; | break; |
4119 | ||
# | Line 4041 for (;;) | Line 4134 for (;;) |
4134 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4135 | { | { |
4136 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4137 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4138 | } | } |
4139 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4140 | if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); | if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
4141 | while (eptr < md->end_subject) | while (eptr < md->end_subject) |
4142 | { | { |
4143 | int len = 1; | int len = 1; |
4144 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
4145 | if (UCD_CATEGORY(c) != ucp_M) break; | if (UCD_CATEGORY(c) != ucp_M) break; |
4146 | eptr += len; | eptr += len; |
4147 | } | } |
# | Line 4060 for (;;) | Line 4153 for (;;) |
4153 | ||
4154 | /* Handle all other cases when the coding is UTF-8 */ | /* Handle all other cases when the coding is UTF-8 */ |
4155 | ||
4156 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
4157 | if (utf8) switch(ctype) | if (utf) switch(ctype) |
4158 | { | { |
4159 | case OP_ANY: | case OP_ANY: |
4160 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 4069 for (;;) | Line 4162 for (;;) |
4162 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4163 | { | { |
4164 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4165 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4166 | } | } |
4167 | if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
4168 | eptr++; | eptr++; |
4169 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4170 | } | } |
4171 | break; | break; |
4172 | ||
# | Line 4083 for (;;) | Line 4176 for (;;) |
4176 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4177 | { | { |
4178 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4179 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4180 | } | } |
4181 | eptr++; | eptr++; |
4182 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4183 | } | } |
4184 | break; | break; |
4185 | ||
4186 | case OP_ANYBYTE: | case OP_ANYBYTE: |
4187 | if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH); | if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); |
4188 | eptr += min; | eptr += min; |
4189 | break; | break; |
4190 | ||
# | Line 4101 for (;;) | Line 4194 for (;;) |
4194 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4195 | { | { |
4196 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4197 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4198 | } | } |
4199 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4200 | switch(c) | switch(c) |
4201 | { | { |
4202 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4203 | ||
4204 | case 0x000d: | case 0x000d: |
4205 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
# | Line 4120 for (;;) | Line 4213 for (;;) |
4213 | case 0x0085: | case 0x0085: |
4214 | case 0x2028: | case 0x2028: |
4215 | case 0x2029: | case 0x2029: |
4216 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
4217 | break; | break; |
4218 | } | } |
4219 | } | } |
# | Line 4132 for (;;) | Line 4225 for (;;) |
4225 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4226 | { | { |
4227 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4228 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4229 | } | } |
4230 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4231 | switch(c) | switch(c) |
# | Line 4157 for (;;) | Line 4250 for (;;) |
4250 | case 0x202f: /* NARROW NO-BREAK SPACE */ | case 0x202f: /* NARROW NO-BREAK SPACE */ |
4251 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
4252 | case 0x3000: /* IDEOGRAPHIC SPACE */ | case 0x3000: /* IDEOGRAPHIC SPACE */ |
4253 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4254 | } | } |
4255 | } | } |
4256 | break; | break; |
# | Line 4168 for (;;) | Line 4261 for (;;) |
4261 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4262 | { | { |
4263 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4264 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4265 | } | } |
4266 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4267 | switch(c) | switch(c) |
4268 | { | { |
4269 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4270 | case 0x09: /* HT */ | case 0x09: /* HT */ |
4271 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
4272 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
# | Line 4204 for (;;) | Line 4297 for (;;) |
4297 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4298 | { | { |
4299 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4300 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4301 | } | } |
4302 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4303 | switch(c) | switch(c) |
# | Line 4217 for (;;) | Line 4310 for (;;) |
4310 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
4311 | case 0x2028: /* LINE SEPARATOR */ | case 0x2028: /* LINE SEPARATOR */ |
4312 | case 0x2029: /* PARAGRAPH SEPARATOR */ | case 0x2029: /* PARAGRAPH SEPARATOR */ |
4313 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4314 | } | } |
4315 | } | } |
4316 | break; | break; |
# | Line 4228 for (;;) | Line 4321 for (;;) |
4321 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4322 | { | { |
4323 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4324 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4325 | } | } |
4326 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4327 | switch(c) | switch(c) |
4328 | { | { |
4329 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4330 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
4331 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
4332 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
# | Line 4252 for (;;) | Line 4345 for (;;) |
4345 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4346 | { | { |
4347 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4348 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4349 | } | } |
4350 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4351 | if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) | if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) |
4352 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4353 | } | } |
4354 | break; | break; |
4355 | ||
# | Line 4266 for (;;) | Line 4359 for (;;) |
4359 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4360 | { | { |
4361 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4362 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4363 | } | } |
4364 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0) |
4365 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4366 | eptr++; | |
4367 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4368 | } | } |
4369 | break; | break; |
# | Line 4280 for (;;) | Line 4374 for (;;) |
4374 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4375 | { | { |
4376 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4377 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4378 | } | } |
4379 | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) |
4380 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4381 | while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); | eptr++; |
4382 | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
4383 | } | } |
4384 | break; | break; |
4385 | ||
# | Line 4294 for (;;) | Line 4389 for (;;) |
4389 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4390 | { | { |
4391 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4392 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4393 | } | } |
4394 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0) |
4395 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4396 | eptr++; | |
4397 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4398 | } | } |
4399 | break; | break; |
# | Line 4308 for (;;) | Line 4404 for (;;) |
4404 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4405 | { | { |
4406 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4407 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4408 | } | } |
4409 | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) |
4410 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4411 | while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); | eptr++; |
4412 | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
4413 | } | } |
4414 | break; | break; |
4415 | ||
# | Line 4322 for (;;) | Line 4419 for (;;) |
4419 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4420 | { | { |
4421 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4422 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4423 | } | } |
4424 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0) |
4425 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4426 | eptr++; | |
4427 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4428 | } | } |
4429 | break; | break; |
# | Line 4335 for (;;) | Line 4433 for (;;) |
4433 | } /* End switch(ctype) */ | } /* End switch(ctype) */ |
4434 | ||
4435 | else | else |
4436 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
4437 | ||
4438 | /* Code for the non-UTF-8 case for minimum matching of operators other | /* Code for the non-UTF-8 case for minimum matching of operators other |
4439 | than OP_PROP and OP_NOTPROP. */ | than OP_PROP and OP_NOTPROP. */ |
# | Line 4348 for (;;) | Line 4446 for (;;) |
4446 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4447 | { | { |
4448 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4449 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4450 | } | } |
4451 | if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
4452 | eptr++; | eptr++; |
4453 | } | } |
4454 | break; | break; |
# | Line 4359 for (;;) | Line 4457 for (;;) |
4457 | if (eptr > md->end_subject - min) | if (eptr > md->end_subject - min) |
4458 | { | { |
4459 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4460 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4461 | } | } |
4462 | eptr += min; | eptr += min; |
4463 | break; | break; |
# | Line 4368 for (;;) | Line 4466 for (;;) |
4466 | if (eptr > md->end_subject - min) | if (eptr > md->end_subject - min) |
4467 | { | { |
4468 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4469 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4470 | } | } |
4471 | eptr += min; | eptr += min; |
4472 | break; | break; |
# | Line 4379 for (;;) | Line 4477 for (;;) |
4477 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4478 | { | { |
4479 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4480 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4481 | } | } |
4482 | switch(*eptr++) | switch(*eptr++) |
4483 | { | { |
4484 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4485 | ||
4486 | case 0x000d: | case 0x000d: |
4487 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
# | Line 4395 for (;;) | Line 4493 for (;;) |
4493 | case 0x000b: | case 0x000b: |
4494 | case 0x000c: | case 0x000c: |
4495 | case 0x0085: | case 0x0085: |
4496 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
4497 | case 0x2028: | |
4498 | case 0x2029: | |
4499 | #endif | |
4500 | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); | |
4501 | break; | break; |
4502 | } | } |
4503 | } | } |
# | Line 4407 for (;;) | Line 4509 for (;;) |
4509 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4510 | { | { |
4511 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4512 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4513 | } | } |
4514 | switch(*eptr++) | switch(*eptr++) |
4515 | { | { |
# | Line 4415 for (;;) | Line 4517 for (;;) |
4517 | case 0x09: /* HT */ | case 0x09: /* HT */ |
4518 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
4519 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
4520 | MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
4521 | case 0x1680: /* OGHAM SPACE MARK */ | |
4522 | case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | |
4523 | case 0x2000: /* EN QUAD */ | |
4524 | case 0x2001: /* EM QUAD */ | |
4525 | case 0x2002: /* EN SPACE */ | |
4526 | case 0x2003: /* EM SPACE */ | |
4527 | case 0x2004: /* THREE-PER-EM SPACE */ | |
4528 | case 0x2005: /* FOUR-PER-EM SPACE */ | |
4529 | case 0x2006: /* SIX-PER-EM SPACE */ | |
4530 | case 0x2007: /* FIGURE SPACE */ | |
4531 | case 0x2008: /* PUNCTUATION SPACE */ | |
4532 | case 0x2009: /* THIN SPACE */ | |
4533 | case 0x200A: /* HAIR SPACE */ | |
4534 | case 0x202f: /* NARROW NO-BREAK SPACE */ | |
4535 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | |
4536 | case 0x3000: /* IDEOGRAPHIC SPACE */ | |
4537 | #endif | |
4538 | RRETURN(MATCH_NOMATCH); | |
4539 | } | } |
4540 | } | } |
4541 | break; | break; |
# | Line 4426 for (;;) | Line 4546 for (;;) |
4546 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4547 | { | { |
4548 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4549 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4550 | } | } |
4551 | switch(*eptr++) | switch(*eptr++) |
4552 | { | { |
4553 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4554 | case 0x09: /* HT */ | case 0x09: /* HT */ |
4555 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
4556 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
4557 | #ifdef COMPILE_PCRE16 | |
4558 | case 0x1680: /* OGHAM SPACE MARK */ | |
4559 | case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | |
4560 | case 0x2000: /* EN QUAD */ | |
4561 | case 0x2001: /* EM QUAD */ | |
4562 | case 0x2002: /* EN SPACE */ | |
4563 | case 0x2003: /* EM SPACE */ | |
4564 | case 0x2004: /* THREE-PER-EM SPACE */ | |
4565 | case 0x2005: /* FOUR-PER-EM SPACE */ | |
4566 | case 0x2006: /* SIX-PER-EM SPACE */ | |
4567 | case 0x2007: /* FIGURE SPACE */ | |
4568 | case 0x2008: /* PUNCTUATION SPACE */ | |
4569 | case 0x2009: /* THIN SPACE */ | |
4570 | case 0x200A: /* HAIR SPACE */ | |
4571 | case 0x202f: /* NARROW NO-BREAK SPACE */ | |
4572 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | |
4573 | case 0x3000: /* IDEOGRAPHIC SPACE */ | |
4574 | #endif | |
4575 | break; | break; |
4576 | } | } |
4577 | } | } |
# | Line 4445 for (;;) | Line 4583 for (;;) |
4583 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4584 | { | { |
4585 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4586 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4587 | } | } |
4588 | switch(*eptr++) | switch(*eptr++) |
4589 | { | { |
# | Line 4455 for (;;) | Line 4593 for (;;) |
4593 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
4594 | case 0x0d: /* CR */ | case 0x0d: /* CR */ |
4595 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
4596 | MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
4597 | case 0x2028: /* LINE SEPARATOR */ | |
4598 | case 0x2029: /* PARAGRAPH SEPARATOR */ | |
4599 | #endif | |
4600 | RRETURN(MATCH_NOMATCH); | |
4601 | } | } |
4602 | } | } |
4603 | break; | break; |
# | Line 4466 for (;;) | Line 4608 for (;;) |
4608 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4609 | { | { |
4610 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4611 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4612 | } | } |
4613 | switch(*eptr++) | switch(*eptr++) |
4614 | { | { |
4615 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4616 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
4617 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
4618 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
4619 | case 0x0d: /* CR */ | case 0x0d: /* CR */ |
4620 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
4621 | #ifdef COMPILE_PCRE16 | |
4622 | case 0x2028: /* LINE SEPARATOR */ | |
4623 | case 0x2029: /* PARAGRAPH SEPARATOR */ | |
4624 | #endif | |
4625 | break; | break; |
4626 | } | } |
4627 | } | } |
# | Line 4487 for (;;) | Line 4633 for (;;) |
4633 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4634 | { | { |
4635 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4636 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4637 | } | } |
4638 | if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) |
4639 | RRETURN(MATCH_NOMATCH); | |
4640 | eptr++; | |
4641 | } | } |
4642 | break; | break; |
4643 | ||
# | Line 4499 for (;;) | Line 4647 for (;;) |
4647 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4648 | { | { |
4649 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4650 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4651 | } | } |
4652 | if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) |
4653 | RRETURN(MATCH_NOMATCH); | |
4654 | eptr++; | |
4655 | } | } |
4656 | break; | break; |
4657 | ||
# | Line 4511 for (;;) | Line 4661 for (;;) |
4661 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4662 | { | { |
4663 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4664 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4665 | } | } |
4666 | if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) |
4667 | RRETURN(MATCH_NOMATCH); | |
4668 | eptr++; | |
4669 | } | } |
4670 | break; | break; |
4671 | ||
# | Line 4523 for (;;) | Line 4675 for (;;) |
4675 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4676 | { | { |
4677 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4678 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4679 | } | } |
4680 | if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) |
4681 | RRETURN(MATCH_NOMATCH); | |
4682 | eptr++; | |
4683 | } | } |
4684 | break; | break; |
4685 | ||
# | Line 4535 for (;;) | Line 4689 for (;;) |
4689 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4690 | { | { |
4691 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4692 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4693 | } | } |
4694 | if ((md->ctypes[*eptr++] & ctype_word) != 0) | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) |
4695 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4696 | eptr++; | |
4697 | } | } |
4698 | break; | break; |
4699 | ||
# | Line 4548 for (;;) | Line 4703 for (;;) |
4703 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4704 | { | { |
4705 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4706 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4707 | } | } |
4708 | if ((md->ctypes[*eptr++] & ctype_word) == 0) | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) |
4709 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4710 | eptr++; | |
4711 | } | } |
4712 | break; | break; |
4713 | ||
# | Line 4580 for (;;) | Line 4736 for (;;) |
4736 | { | { |
4737 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); |
4738 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4739 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4740 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4741 | { | { |
4742 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4743 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4744 | } | } |
4745 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4746 | if (prop_fail_result) MRRETURN(MATCH_NOMATCH); | if (prop_fail_result) RRETURN(MATCH_NOMATCH); |
4747 | } | } |
4748 | /* Control never gets here */ | /* Control never gets here */ |
4749 | ||
# | Line 4597 for (;;) | Line 4753 for (;;) |
4753 | int chartype; | int chartype; |
4754 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); |
4755 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4756 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4757 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4758 | { | { |
4759 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4760 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4761 | } | } |
4762 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4763 | chartype = UCD_CHARTYPE(c); | chartype = UCD_CHARTYPE(c); |
4764 | if ((chartype == ucp_Lu || | if ((chartype == ucp_Lu || |
4765 | chartype == ucp_Ll || | chartype == ucp_Ll || |
4766 | chartype == ucp_Lt) == prop_fail_result) | chartype == ucp_Lt) == prop_fail_result) |
4767 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4768 | } | } |
4769 | /* Control never gets here */ | /* Control never gets here */ |
4770 | ||
# | Line 4617 for (;;) | Line 4773 for (;;) |
4773 | { | { |
4774 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); |
4775 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4776 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4777 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4778 | { | { |
4779 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4780 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4781 | } | } |
4782 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4783 | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
4784 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4785 | } | } |
4786 | /* Control never gets here */ | /* Control never gets here */ |
4787 | ||
# | Line 4634 for (;;) | Line 4790 for (;;) |
4790 | { | { |
4791 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); |
4792 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4793 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4794 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4795 | { | { |
4796 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4797 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4798 | } | } |
4799 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4800 | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
4801 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4802 | } | } |
4803 | /* Control never gets here */ | /* Control never gets here */ |
4804 | ||
# | Line 4651 for (;;) | Line 4807 for (;;) |
4807 | { | { |
4808 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); |
4809 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4810 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4811 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4812 | { | { |
4813 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4814 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4815 | } | } |
4816 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4817 | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
4818 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4819 | } | } |
4820 | /* Control never gets here */ | /* Control never gets here */ |
4821 | ||
# | Line 4669 for (;;) | Line 4825 for (;;) |
4825 | int category; | int category; |
4826 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); |
4827 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4828 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4829 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4830 | { | { |
4831 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4832 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4833 | } | } |
4834 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4835 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4836 | if ((category == ucp_L || category == ucp_N) == prop_fail_result) | if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
4837 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4838 | } | } |
4839 | /* Control never gets here */ | /* Control never gets here */ |
4840 | ||
# | Line 4687 for (;;) | Line 4843 for (;;) |
4843 | { | { |
4844 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); |
4845 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4846 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4847 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4848 | { | { |
4849 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4850 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4851 | } | } |
4852 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4853 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4854 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
4855 | == prop_fail_result) | == prop_fail_result) |
4856 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4857 | } | } |
4858 | /* Control never gets here */ | /* Control never gets here */ |
4859 | ||
# | Line 4706 for (;;) | Line 4862 for (;;) |
4862 | { | { |
4863 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM61); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM61); |
4864 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4865 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4866 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4867 | { | { |
4868 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4869 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4870 | } | } |
4871 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4872 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4873 | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
4874 | == prop_fail_result) | == prop_fail_result) |
4875 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4876 | } | } |
4877 | /* Control never gets here */ | /* Control never gets here */ |
4878 | ||
# | Line 4726 for (;;) | Line 4882 for (;;) |
4882 | int category; | int category; |
4883 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); |
4884 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4885 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4886 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4887 | { | { |
4888 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4889 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4890 | } | } |
4891 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4892 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
# | Line 4738 for (;;) | Line 4894 for (;;) |
4894 | category == ucp_N || | category == ucp_N || |
4895 | c == CHAR_UNDERSCORE) | c == CHAR_UNDERSCORE) |
4896 | == prop_fail_result) | == prop_fail_result) |
4897 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4898 | } | } |
4899 | /* Control never gets here */ | /* Control never gets here */ |
4900 | ||
# | Line 4758 for (;;) | Line 4914 for (;;) |
4914 | { | { |
4915 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM41); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM41); |
4916 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4917 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4918 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4919 | { | { |
4920 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4921 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4922 | } | } |
4923 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4924 | if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); | if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH); |
4925 | while (eptr < md->end_subject) | while (eptr < md->end_subject) |
4926 | { | { |
4927 | int len = 1; | int len = 1; |
4928 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
4929 | if (UCD_CATEGORY(c) != ucp_M) break; | if (UCD_CATEGORY(c) != ucp_M) break; |
4930 | eptr += len; | eptr += len; |
4931 | } | } |
# | Line 4778 for (;;) | Line 4934 for (;;) |
4934 | else | else |
4935 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
4936 | ||
4937 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
4938 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
4939 | { | { |
4940 | for (fi = min;; fi++) | for (fi = min;; fi++) |
4941 | { | { |
4942 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM42); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM42); |
4943 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4944 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4945 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4946 | { | { |
4947 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4948 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4949 | } | } |
4950 | if (ctype == OP_ANY && IS_NEWLINE(eptr)) | if (ctype == OP_ANY && IS_NEWLINE(eptr)) |
4951 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4952 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4953 | switch(ctype) | switch(ctype) |
4954 | { | { |
# | Line 4805 for (;;) | Line 4960 for (;;) |
4960 | case OP_ANYNL: | case OP_ANYNL: |
4961 | switch(c) | switch(c) |
4962 | { | { |
4963 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4964 | case 0x000d: | case 0x000d: |
4965 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
4966 | break; | break; |
# | Line 4817 for (;;) | Line 4972 for (;;) |
4972 | case 0x0085: | case 0x0085: |
4973 | case 0x2028: | case 0x2028: |
4974 | case 0x2029: | case 0x2029: |
4975 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
4976 | break; | break; |
4977 | } | } |
4978 | break; | break; |
# | Line 4845 for (;;) | Line 5000 for (;;) |
5000 | case 0x202f: /* NARROW NO-BREAK SPACE */ | case 0x202f: /* NARROW NO-BREAK SPACE */ |
5001 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ |
5002 | case 0x3000: /* IDEOGRAPHIC SPACE */ | case 0x3000: /* IDEOGRAPHIC SPACE */ |
5003 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5004 | } | } |
5005 | break; | break; |
5006 | ||
5007 | case OP_HSPACE: | case OP_HSPACE: |
5008 | switch(c) | switch(c) |
5009 | { | { |
5010 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
5011 | case 0x09: /* HT */ | case 0x09: /* HT */ |
5012 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
5013 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
# | Line 4887 for (;;) | Line 5042 for (;;) |
5042 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
5043 | case 0x2028: /* LINE SEPARATOR */ | case 0x2028: /* LINE SEPARATOR */ |
5044 | case 0x2029: /* PARAGRAPH SEPARATOR */ | case 0x2029: /* PARAGRAPH SEPARATOR */ |
5045 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5046 | } | } |
5047 | break; | break; |
5048 | ||
5049 | case OP_VSPACE: | case OP_VSPACE: |
5050 | switch(c) | switch(c) |
5051 | { | { |
5052 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
5053 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
5054 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
5055 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
# | Line 4908 for (;;) | Line 5063 for (;;) |
5063 | ||
5064 | case OP_NOT_DIGIT: | case OP_NOT_DIGIT: |
5065 | if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) | if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) |
5066 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5067 | break; | break; |
5068 | ||
5069 | case OP_DIGIT: | case OP_DIGIT: |
5070 | if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) | if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) |
5071 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5072 | break; | break; |
5073 | ||
5074 | case OP_NOT_WHITESPACE: | case OP_NOT_WHITESPACE: |
5075 | if (c < 256 && (md->ctypes[c] & ctype_space) != 0) | if (c < 256 && (md->ctypes[c] & ctype_space) != 0) |
5076 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5077 | break; | break; |
5078 | ||
5079 | case OP_WHITESPACE: | case OP_WHITESPACE: |
5080 | if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) | if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) |
5081 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5082 | break; | break; |
5083 | ||
5084 | case OP_NOT_WORDCHAR: | case OP_NOT_WORDCHAR: |
5085 | if (c < 256 && (md->ctypes[c] & ctype_word) != 0) | if (c < 256 && (md->ctypes[c] & ctype_word) != 0) |
5086 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5087 | break; | break; |
5088 | ||
5089 | case OP_WORDCHAR: | case OP_WORDCHAR: |
5090 | if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) | if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) |
5091 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5092 | break; | break; |
5093 | ||
5094 | default: | default: |
# | Line 4943 for (;;) | Line 5098 for (;;) |
5098 | } | } |
5099 | else | else |
5100 | #endif | #endif |
5101 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
5102 | { | { |
5103 | for (fi = min;; fi++) | for (fi = min;; fi++) |
5104 | { | { |
5105 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM43); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM43); |
5106 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
5107 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
5108 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
5109 | { | { |
5110 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
5111 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5112 | } | } |
5113 | if (ctype == OP_ANY && IS_NEWLINE(eptr)) | if (ctype == OP_ANY && IS_NEWLINE(eptr)) |
5114 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
5115 | c = *eptr++; | c = *eptr++; |
5116 | switch(ctype) | switch(ctype) |
5117 | { | { |
# | Line 4968 for (;;) | Line 5123 for (;;) |
5123 | case OP_ANYNL: | case OP_ANYNL: |
5124 | switch(c) | switch(c) |
5125 | { | { |
5126 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
5127 | case 0x000d: | case 0x000d: |
5128 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
5129 | break; | break; |
# | Line 4979 for (;;) | Line 5134 for (;;) |
5134 | case 0x000b: | case 0x000b: |
5135 | case 0x000c: | case 0x000c: |
5136 | case 0x0085: | case 0x0085: |
5137 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
5138 | case 0x2028: | |
5139 | case 0x2029: | |
5140 | #endif | |
5141 | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); | |
5142 | break; | break; |
5143 | } | } |
5144 | break; | break; |
# | Line 4991 for (;;) | Line 5150 for (;;) |
5150 | case 0x09: /* HT */ | case 0x09: /* HT */ |
5151 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
5152 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
5153 | MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
5154 | case 0x1680: /* OGHAM SPACE MARK */ | |
5155 | case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | |
5156 | case 0x2000: /* EN QUAD */ | |
5157 | case 0x2001: /* EM QUAD */ | |
5158 | case 0x2002: /* EN SPACE */ | |
5159 | case 0x2003: /* EM SPACE */ | |
5160 | case 0x2004: /* THREE-PER-EM SPACE */ | |
5161 | case 0x2005: /* FOUR-PER-EM SPACE */ | |
5162 | case 0x2006: /* SIX-PER-EM SPACE */ | |
5163 | case 0x2007: /* FIGURE SPACE */ | |
5164 | case 0x2008: /* PUNCTUATION SPACE */ | |
5165 | case 0x2009: /* THIN SPACE */ | |
5166 | case 0x200A: /* HAIR SPACE */ | |
5167 | case 0x202f: /* NARROW NO-BREAK SPACE */ | |
5168 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | |
5169 | case 0x3000: /* IDEOGRAPHIC SPACE */ | |
5170 | #endif | |
5171 | RRETURN(MATCH_NOMATCH); | |
5172 | } | } |
5173 | break; | break; |
5174 | ||
5175 | case OP_HSPACE: | case OP_HSPACE: |
5176 | switch(c) | switch(c) |
5177 | { | { |
5178 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
5179 | case 0x09: /* HT */ | case 0x09: /* HT */ |
5180 | case 0x20: /* SPACE */ | case 0x20: /* SPACE */ |
5181 | case 0xa0: /* NBSP */ | case 0xa0: /* NBSP */ |
5182 | #ifdef COMPILE_PCRE16 | |
5183 | case 0x1680: /* OGHAM SPACE MARK */ | |
5184 | case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | |
5185 | case 0x2000: /* EN QUAD */ | |
5186 | case 0x2001: /* EM QUAD */ | |
5187 | case 0x2002: /* EN SPACE */ | |
5188 | case 0x2003: /* EM SPACE */ | |
5189 | case 0x2004: /* THREE-PER-EM SPACE */ | |
5190 | case 0x2005: /* FOUR-PER-EM SPACE */ | |
5191 | case 0x2006: /* SIX-PER-EM SPACE */ | |
5192 | case 0x2007: /* FIGURE SPACE */ | |
5193 | case 0x2008: /* PUNCTUATION SPACE */ | |
5194 | case 0x2009: /* THIN SPACE */ | |
5195 | case 0x200A: /* HAIR SPACE */ | |
5196 | case 0x202f: /* NARROW NO-BREAK SPACE */ | |
5197 | case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | |
5198 | case 0x3000: /* IDEOGRAPHIC SPACE */ | |
5199 | #endif | |
5200 | break; | break; |
5201 | } | } |
5202 | break; | break; |
# | Line 5015 for (;;) | Line 5210 for (;;) |
5210 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
5211 | case 0x0d: /* CR */ | case 0x0d: /* CR */ |
5212 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
5213 | MRRETURN(MATCH_NOMATCH); | #ifdef COMPILE_PCRE16 |
5214 | case 0x2028: /* LINE SEPARATOR */ | |
5215 | case 0x2029: /* PARAGRAPH SEPARATOR */ | |
5216 | #endif | |
5217 | RRETURN(MATCH_NOMATCH); | |
5218 | } | } |
5219 | break; | break; |
5220 | ||
5221 | case OP_VSPACE: | case OP_VSPACE: |
5222 | switch(c) | switch(c) |
5223 | { | { |
5224 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
5225 | case 0x0a: /* LF */ | case 0x0a: /* LF */ |
5226 | case 0x0b: /* VT */ | case 0x0b: /* VT */ |
5227 | case 0x0c: /* FF */ | case 0x0c: /* FF */ |
5228 | case 0x0d: /* CR */ | case 0x0d: /* CR */ |
5229 | case 0x85: /* NEL */ | case 0x85: /* NEL */ |
5230 | #ifdef COMPILE_PCRE16 | |
5231 | case 0x2028: /* LINE SEPARATOR */ | |
5232 | case 0x2029: /* PARAGRAPH SEPARATOR */ | |
5233 | #endif | |
5234 | break; | break; |
5235 | } | } |
5236 | break; | break; |
5237 | ||
5238 | case OP_NOT_DIGIT: | case OP_NOT_DIGIT: |
5239 | if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); |
5240 | break; | break; |
5241 | ||
5242 | case OP_DIGIT: | case OP_DIGIT: |
5243 | if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); |
5244 | break; | break; |
5245 | ||
5246 | case OP_NOT_WHITESPACE: | case OP_NOT_WHITESPACE: |
5247 | if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); |
5248 | break; | break; |
5249 | ||
5250 | case OP_WHITESPACE: | case OP_WHITESPACE: |
5251 | if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); |
5252 | break; | break; |
5253 | ||
5254 | case OP_NOT_WORDCHAR: | case OP_NOT_WORDCHAR: |
5255 | if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); |
5256 | break; | break; |
5257 | ||
5258 | case OP_WORDCHAR: | case OP_WORDCHAR: |
5259 | if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); |
5260 | break; | break; |
5261 | ||
5262 | default: | default: |
# | Line 5242 for (;;) | Line 5445 for (;;) |
5445 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); |
5446 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
5447 | if (eptr-- == pp) break; /* Stop if tried at original pos */ | if (eptr-- == pp) break; /* Stop if tried at original pos */ |
5448 | if (utf8) BACKCHAR(eptr); | if (utf) BACKCHAR(eptr); |
5449 | } | } |
5450 | } | } |
5451 | ||
# | Line 5259 for (;;) | Line 5462 for (;;) |
5462 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
5463 | break; | break; |
5464 | } | } |
5465 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
5466 | if (UCD_CATEGORY(c) == ucp_M) break; | if (UCD_CATEGORY(c) == ucp_M) break; |
5467 | eptr += len; | eptr += len; |
5468 | while (eptr < md->end_subject) | while (eptr < md->end_subject) |
5469 | { | { |
5470 | len = 1; | len = 1; |
5471 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
5472 | if (UCD_CATEGORY(c) != ucp_M) break; | if (UCD_CATEGORY(c) != ucp_M) break; |
5473 | eptr += len; | eptr += len; |
5474 | } | } |
# | Line 5282 for (;;) | Line 5485 for (;;) |
5485 | if (eptr-- == pp) break; /* Stop if tried at original pos */ | if (eptr-- == pp) break; /* Stop if tried at original pos */ |
5486 | for (;;) /* Move back over one extended */ | for (;;) /* Move back over one extended */ |
5487 | { | { |
5488 | if (!utf8) c = *eptr; else | if (!utf) c = *eptr; else |
5489 | { | { |
5490 | BACKCHAR(eptr); | BACKCHAR(eptr); |
5491 | GETCHAR(c, eptr); | GETCHAR(c, eptr); |
# | Line 5296 for (;;) | Line 5499 for (;;) |
5499 | else | else |
5500 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
5501 | ||
5502 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
5503 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
5504 | { | { |
5505 | switch(ctype) | switch(ctype) |
5506 | { | { |
# | Line 5315 for (;;) | Line 5516 for (;;) |
5516 | } | } |
5517 | if (IS_NEWLINE(eptr)) break; | if (IS_NEWLINE(eptr)) break; |
5518 | eptr++; | eptr++; |
5519 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
5520 | } | } |
5521 | } | } |
5522 | ||
# | Line 5332 for (;;) | Line 5533 for (;;) |
5533 | } | } |
5534 | if (IS_NEWLINE(eptr)) break; | if (IS_NEWLINE(eptr)) break; |
5535 | eptr++; | eptr++; |
5536 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
5537 | } | } |
5538 | } | } |
5539 | break; | break; |
# | Line 5348 for (;;) | Line 5549 for (;;) |
5549 | break; | break; |
5550 | } | } |
5551 | eptr++; | eptr++; |
5552 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
5553 | } | } |
5554 | } | } |
5555 |