Parent Directory
|
Revision Log
|
Patch
revision 835 by ph10, Wed Dec 28 16:10:09 2011 UTC | revision 1092 by chpe, Tue Oct 16 15:55:55 2012 UTC | |
---|---|---|
# | Line 6 | Line 6 |
6 | and semantics are as close as possible to those of the Perl 5 language. | and semantics are as close as possible to those of the Perl 5 language. |
7 | ||
8 | Written by Philip Hazel | Written by Philip Hazel |
9 | Copyright (c) 1997-2011 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
10 | ||
11 | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
12 | Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without |
# | Line 37 POSSIBILITY OF SUCH DAMAGE. | Line 37 POSSIBILITY OF SUCH DAMAGE. |
37 | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
38 | */ | */ |
39 | ||
40 | /* This module contains pcre_exec(), the externally visible function that does | /* This module contains pcre_exec(), the externally visible function that does |
41 | pattern matching using an NFA algorithm, trying to mimic Perl as closely as | pattern matching using an NFA algorithm, trying to mimic Perl as closely as |
42 | possible. There are also some static supporting functions. */ | possible. There are also some static supporting functions. */ |
# | Line 82 negative to avoid the external error cod | Line 81 negative to avoid the external error cod |
81 | #define MATCH_SKIP_ARG (-993) | #define MATCH_SKIP_ARG (-993) |
82 | #define MATCH_THEN (-992) | #define MATCH_THEN (-992) |
83 | ||
/* This is a convenience macro for code that occurs many times. */ | ||
#define MRRETURN(ra) \ | ||
{ \ | ||
md->mark = markptr; \ | ||
RRETURN(ra); \ | ||
} | ||
84 | /* Maximum number of ints of offset to save on the stack for recursive calls. | /* Maximum number of ints of offset to save on the stack for recursive calls. |
85 | If the offset vector is bigger, malloc is used. This should be a multiple of 3, | If the offset vector is bigger, malloc is used. This should be a multiple of 3, |
86 | because the offset vector is always a multiple of 3 long. */ | because the offset vector is always a multiple of 3 long. */ |
# | Line 121 Returns: nothing | Line 112 Returns: nothing |
112 | */ | */ |
113 | ||
114 | static void | static void |
115 | pchars(const uschar *p, int length, BOOL is_subject, match_data *md) | pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) |
116 | { | { |
117 | unsigned int c; | unsigned int c; |
118 | if (is_subject && length > md->end_subject - p) length = md->end_subject - p; | if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
# | Line 148 Arguments: | Line 139 Arguments: |
139 | md points to match data block | md points to match data block |
140 | caseless TRUE if caseless | caseless TRUE if caseless |
141 | ||
142 | Returns: < 0 if not matched, otherwise the number of subject bytes matched | Returns: >= 0 the number of subject bytes matched |
143 | -1 no match | |
144 | -2 partial match; always given if at end subject | |
145 | */ | */ |
146 | ||
147 | static int | static int |
148 | match_ref(int offset, register USPTR eptr, int length, match_data *md, | match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md, |
149 | BOOL caseless) | BOOL caseless) |
150 | { | { |
151 | USPTR eptr_start = eptr; | PCRE_PUCHAR eptr_start = eptr; |
152 | register USPTR p = md->start_subject + md->offset_vector[offset]; | register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; |
153 | ||
154 | #ifdef PCRE_DEBUG | #ifdef PCRE_DEBUG |
155 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
# | Line 171 pchars(p, length, FALSE, md); | Line 164 pchars(p, length, FALSE, md); |
164 | printf("\n"); | printf("\n"); |
165 | #endif | #endif |
166 | ||
167 | /* Always fail if reference not set (and not JavaScript compatible). */ | /* Always fail if reference not set (and not JavaScript compatible - in that |
168 | case the length is passed as zero). */ | |
169 | ||
170 | if (length < 0) return -1; | if (length < 0) return -1; |
171 | ||
# | Line 181 ASCII characters. */ | Line 175 ASCII characters. */ |
175 | ||
176 | if (caseless) | if (caseless) |
177 | { | { |
178 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
179 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
180 | if (md->utf8) | if (md->utf) |
181 | { | { |
182 | /* Match characters up to the end of the reference. NOTE: the number of | /* Match characters up to the end of the reference. NOTE: the number of |
183 | bytes matched may differ, because there are some characters whose upper and | data units matched may differ, because in UTF-8 there are some characters |
184 | lower case versions code as different numbers of bytes. For example, U+023A | whose upper and lower case versions code have different numbers of bytes. |
185 | (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8); | For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 |
186 | a sequence of 3 of the former uses 6 bytes, as does a sequence of two of | (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a |
187 | the latter. It is important, therefore, to check the length along the | sequence of two of the latter. It is important, therefore, to check the |
188 | reference, not along the subject (earlier code did this wrong). */ | length along the reference, not along the subject (earlier code did this |
189 | wrong). */ | |
190 | ||
191 | USPTR endptr = p + length; | PCRE_PUCHAR endptr = p + length; |
192 | while (p < endptr) | while (p < endptr) |
193 | { | { |
194 | int c, d; | pcre_uint32 c, d; |
195 | if (eptr >= md->end_subject) return -1; | const ucd_record *ur; |
196 | if (eptr >= md->end_subject) return -2; /* Partial match */ | |
197 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
198 | GETCHARINC(d, p); | GETCHARINC(d, p); |
199 | if (c != d && c != UCD_OTHERCASE(d)) return -1; | ur = GET_UCD(d); |
200 | if (c != d && c != d + ur->other_case) | |
201 | { | |
202 | const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset; | |
203 | for (;;) | |
204 | { | |
205 | if (c < *pp) return -1; | |
206 | if (c == *pp++) break; | |
207 | } | |
208 | } | |
209 | } | } |
210 | } | } |
211 | else | else |
# | Line 210 if (caseless) | Line 215 if (caseless) |
215 | /* The same code works when not in UTF-8 mode and in UTF-8 mode when there | /* The same code works when not in UTF-8 mode and in UTF-8 mode when there |
216 | is no UCP support. */ | is no UCP support. */ |
217 | { | { |
if (eptr + length > md->end_subject) return -1; | ||
218 | while (length-- > 0) | while (length-- > 0) |
219 | { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; } | { |
220 | if (eptr >= md->end_subject) return -2; /* Partial match */ | |
221 | if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1; | |
222 | p++; | |
223 | eptr++; | |
224 | } | |
225 | } | } |
226 | } | } |
227 | ||
# | Line 221 are in UTF-8 mode. */ | Line 230 are in UTF-8 mode. */ |
230 | ||
231 | else | else |
232 | { | { |
233 | if (eptr + length > md->end_subject) return -1; | while (length-- > 0) |
234 | while (length-- > 0) if (*p++ != *eptr++) return -1; | { |
235 | if (eptr >= md->end_subject) return -2; /* Partial match */ | |
236 | if (*p++ != *eptr++) return -1; | |
237 | } | |
238 | } | } |
239 | ||
240 | return eptr - eptr_start; | return (int)(eptr - eptr_start); |
241 | } | } |
242 | ||
243 | ||
# | Line 290 actually used in this definition. */ | Line 302 actually used in this definition. */ |
302 | #define RMATCH(ra,rb,rc,rd,re,rw) \ | #define RMATCH(ra,rb,rc,rd,re,rw) \ |
303 | { \ | { \ |
304 | printf("match() called in line %d\n", __LINE__); \ | printf("match() called in line %d\n", __LINE__); \ |
305 | rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \ | rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \ |
306 | printf("to line %d\n", __LINE__); \ | printf("to line %d\n", __LINE__); \ |
307 | } | } |
308 | #define RRETURN(ra) \ | #define RRETURN(ra) \ |
# | Line 300 actually used in this definition. */ | Line 312 actually used in this definition. */ |
312 | } | } |
313 | #else | #else |
314 | #define RMATCH(ra,rb,rc,rd,re,rw) \ | #define RMATCH(ra,rb,rc,rd,re,rw) \ |
315 | rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1) | rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1) |
316 | #define RRETURN(ra) return ra | #define RRETURN(ra) return ra |
317 | #endif | #endif |
318 | ||
# | Line 315 argument of match(), which never changes | Line 327 argument of match(), which never changes |
327 | ||
328 | #define RMATCH(ra,rb,rc,rd,re,rw)\ | #define RMATCH(ra,rb,rc,rd,re,rw)\ |
329 | {\ | {\ |
330 | heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\ | heapframe *newframe = frame->Xnextframe;\ |
331 | if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ | if (newframe == NULL)\ |
332 | frame->Xwhere = rw; \ | {\ |
333 | newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ | |
334 | if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ | |
335 | newframe->Xnextframe = NULL;\ | |
336 | frame->Xnextframe = newframe;\ | |
337 | }\ | |
338 | frame->Xwhere = rw;\ | |
339 | newframe->Xeptr = ra;\ | newframe->Xeptr = ra;\ |
340 | newframe->Xecode = rb;\ | newframe->Xecode = rb;\ |
341 | newframe->Xmstart = mstart;\ | newframe->Xmstart = mstart;\ |
newframe->Xmarkptr = markptr;\ | ||
342 | newframe->Xoffset_top = rc;\ | newframe->Xoffset_top = rc;\ |
343 | newframe->Xeptrb = re;\ | newframe->Xeptrb = re;\ |
344 | newframe->Xrdepth = frame->Xrdepth + 1;\ | newframe->Xrdepth = frame->Xrdepth + 1;\ |
# | Line 337 argument of match(), which never changes | Line 354 argument of match(), which never changes |
354 | {\ | {\ |
355 | heapframe *oldframe = frame;\ | heapframe *oldframe = frame;\ |
356 | frame = oldframe->Xprevframe;\ | frame = oldframe->Xprevframe;\ |
(pcre_stack_free)(oldframe);\ | ||
357 | if (frame != NULL)\ | if (frame != NULL)\ |
358 | {\ | {\ |
359 | rrc = ra;\ | rrc = ra;\ |
# | Line 351 argument of match(), which never changes | Line 367 argument of match(), which never changes |
367 | ||
368 | typedef struct heapframe { | typedef struct heapframe { |
369 | struct heapframe *Xprevframe; | struct heapframe *Xprevframe; |
370 | struct heapframe *Xnextframe; | |
371 | ||
372 | /* Function arguments that may change */ | /* Function arguments that may change */ |
373 | ||
374 | USPTR Xeptr; | PCRE_PUCHAR Xeptr; |
375 | const uschar *Xecode; | const pcre_uchar *Xecode; |
376 | USPTR Xmstart; | PCRE_PUCHAR Xmstart; |
USPTR Xmarkptr; | ||
377 | int Xoffset_top; | int Xoffset_top; |
378 | eptrblock *Xeptrb; | eptrblock *Xeptrb; |
379 | unsigned int Xrdepth; | unsigned int Xrdepth; |
380 | ||
381 | /* Function local variables */ | /* Function local variables */ |
382 | ||
383 | USPTR Xcallpat; | PCRE_PUCHAR Xcallpat; |
384 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
385 | USPTR Xcharptr; | PCRE_PUCHAR Xcharptr; |
386 | #endif | #endif |
387 | USPTR Xdata; | PCRE_PUCHAR Xdata; |
388 | USPTR Xnext; | PCRE_PUCHAR Xnext; |
389 | USPTR Xpp; | PCRE_PUCHAR Xpp; |
390 | USPTR Xprev; | PCRE_PUCHAR Xprev; |
391 | USPTR Xsaved_eptr; | PCRE_PUCHAR Xsaved_eptr; |
392 | ||
393 | recursion_info Xnew_recursive; | recursion_info Xnew_recursive; |
394 | ||
# | Line 385 typedef struct heapframe { | Line 401 typedef struct heapframe { |
401 | int Xprop_value; | int Xprop_value; |
402 | int Xprop_fail_result; | int Xprop_fail_result; |
403 | int Xoclength; | int Xoclength; |
404 | uschar Xocchars[8]; | pcre_uchar Xocchars[6]; |
405 | #endif | #endif |
406 | ||
407 | int Xcodelink; | int Xcodelink; |
# | Line 427 returns a negative (error) response, the | Line 443 returns a negative (error) response, the |
443 | same response. */ | same response. */ |
444 | ||
445 | /* These macros pack up tests that are used for partial matching, and which | /* These macros pack up tests that are used for partial matching, and which |
446 | appears several times in the code. We set the "hit end" flag if the pointer is | appear several times in the code. We set the "hit end" flag if the pointer is |
447 | at the end of the subject and also past the start of the subject (i.e. | at the end of the subject and also past the start of the subject (i.e. |
448 | something has been matched). For hard partial matching, we then return | something has been matched). For hard partial matching, we then return |
449 | immediately. The second one is used when we already know we are past the end of | immediately. The second one is used when we already know we are past the end of |
# | Line 438 the subject. */ | Line 454 the subject. */ |
454 | eptr > md->start_used_ptr) \ | eptr > md->start_used_ptr) \ |
455 | { \ | { \ |
456 | md->hitend = TRUE; \ | md->hitend = TRUE; \ |
457 | if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ |
458 | } | } |
459 | ||
460 | #define SCHECK_PARTIAL()\ | #define SCHECK_PARTIAL()\ |
461 | if (md->partial != 0 && eptr > md->start_used_ptr) \ | if (md->partial != 0 && eptr > md->start_used_ptr) \ |
462 | { \ | { \ |
463 | md->hitend = TRUE; \ | md->hitend = TRUE; \ |
464 | if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ |
465 | } | } |
466 | ||
467 | ||
468 | /* Performance note: It might be tempting to extract commonly used fields from | /* Performance note: It might be tempting to extract commonly used fields from |
469 | the md structure (e.g. utf8, end_subject) into individual variables to improve | the md structure (e.g. utf, end_subject) into individual variables to improve |
470 | performance. Tests using gcc on a SPARC disproved this; in the first case, it | performance. Tests using gcc on a SPARC disproved this; in the first case, it |
471 | made performance worse. | made performance worse. |
472 | ||
# | Line 459 Arguments: | Line 475 Arguments: |
475 | ecode pointer to current position in compiled code | ecode pointer to current position in compiled code |
476 | mstart pointer to the current match start position (can be modified | mstart pointer to the current match start position (can be modified |
477 | by encountering \K) | by encountering \K) |
markptr pointer to the most recent MARK name, or NULL | ||
478 | offset_top current top pointer | offset_top current top pointer |
479 | md pointer to "static" info for the match | md pointer to "static" info for the match |
480 | eptrb pointer to chain of blocks containing eptr at start of | eptrb pointer to chain of blocks containing eptr at start of |
# | Line 474 Returns: MATCH_MATCH if matched | Line 489 Returns: MATCH_MATCH if matched |
489 | */ | */ |
490 | ||
491 | static int | static int |
492 | match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, | match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode, |
493 | const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb, | PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb, |
494 | unsigned int rdepth) | unsigned int rdepth) |
495 | { | { |
496 | /* These variables do not need to be preserved over recursion in this function, | /* These variables do not need to be preserved over recursion in this function, |
# | Line 484 so they can be ordinary variables in all | Line 499 so they can be ordinary variables in all |
499 | ||
500 | register int rrc; /* Returns from recursive calls */ | register int rrc; /* Returns from recursive calls */ |
501 | register int i; /* Used for loops not involving calls to RMATCH() */ | register int i; /* Used for loops not involving calls to RMATCH() */ |
502 | register unsigned int c; /* Character values not kept over RMATCH() calls */ | register pcre_uint32 c; /* Character values not kept over RMATCH() calls */ |
503 | register BOOL utf8; /* Local copy of UTF-8 flag for speed */ | register BOOL utf; /* Local copy of UTF flag for speed */ |
504 | ||
505 | BOOL minimize, possessive; /* Quantifier options */ | BOOL minimize, possessive; /* Quantifier options */ |
506 | BOOL caseless; | BOOL caseless; |
507 | int condcode; | int condcode; |
508 | ||
509 | /* When recursion is not being used, all "local" variables that have to be | /* When recursion is not being used, all "local" variables that have to be |
510 | preserved over calls to RMATCH() are part of a "frame" which is obtained from | preserved over calls to RMATCH() are part of a "frame". We set up the top-level |
511 | heap storage. Set up the top-level frame here; others are obtained from the | frame on the stack here; subsequent instantiations are obtained from the heap |
512 | heap whenever RMATCH() does a "recursion". See the macro definitions above. */ | whenever RMATCH() does a "recursion". See the macro definitions above. Putting |
513 | the top-level on the stack rather than malloc-ing them all gives a performance | |
514 | boost in many cases where there is not much "recursion". */ | |
515 | ||
516 | #ifdef NO_RECURSE | #ifdef NO_RECURSE |
517 | heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe)); | heapframe *frame = (heapframe *)md->match_frames_base; |
if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); | ||
frame->Xprevframe = NULL; /* Marks the top level */ | ||
518 | ||
519 | /* Copy in the original argument variables */ | /* Copy in the original argument variables */ |
520 | ||
521 | frame->Xeptr = eptr; | frame->Xeptr = eptr; |
522 | frame->Xecode = ecode; | frame->Xecode = ecode; |
523 | frame->Xmstart = mstart; | frame->Xmstart = mstart; |
frame->Xmarkptr = markptr; | ||
524 | frame->Xoffset_top = offset_top; | frame->Xoffset_top = offset_top; |
525 | frame->Xeptrb = eptrb; | frame->Xeptrb = eptrb; |
526 | frame->Xrdepth = rdepth; | frame->Xrdepth = rdepth; |
# | Line 520 HEAP_RECURSE: | Line 534 HEAP_RECURSE: |
534 | #define eptr frame->Xeptr | #define eptr frame->Xeptr |
535 | #define ecode frame->Xecode | #define ecode frame->Xecode |
536 | #define mstart frame->Xmstart | #define mstart frame->Xmstart |
#define markptr frame->Xmarkptr | ||
537 | #define offset_top frame->Xoffset_top | #define offset_top frame->Xoffset_top |
538 | #define eptrb frame->Xeptrb | #define eptrb frame->Xeptrb |
539 | #define rdepth frame->Xrdepth | #define rdepth frame->Xrdepth |
540 | ||
541 | /* Ditto for the local variables */ | /* Ditto for the local variables */ |
542 | ||
543 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
544 | #define charptr frame->Xcharptr | #define charptr frame->Xcharptr |
545 | #endif | #endif |
546 | #define callpat frame->Xcallpat | #define callpat frame->Xcallpat |
# | Line 585 declarations can be cut out in a block. | Line 598 declarations can be cut out in a block. |
598 | below are for variables that do not have to be preserved over a recursive call | below are for variables that do not have to be preserved over a recursive call |
599 | to RMATCH(). */ | to RMATCH(). */ |
600 | ||
601 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
602 | const uschar *charptr; | const pcre_uchar *charptr; |
603 | #endif | #endif |
604 | const uschar *callpat; | const pcre_uchar *callpat; |
605 | const uschar *data; | const pcre_uchar *data; |
606 | const uschar *next; | const pcre_uchar *next; |
607 | USPTR pp; | PCRE_PUCHAR pp; |
608 | const uschar *prev; | const pcre_uchar *prev; |
609 | USPTR saved_eptr; | PCRE_PUCHAR saved_eptr; |
610 | ||
611 | recursion_info new_recursive; | recursion_info new_recursive; |
612 | ||
# | Line 606 int prop_type; | Line 619 int prop_type; |
619 | int prop_value; | int prop_value; |
620 | int prop_fail_result; | int prop_fail_result; |
621 | int oclength; | int oclength; |
622 | uschar occhars[8]; | pcre_uchar occhars[6]; |
623 | #endif | #endif |
624 | ||
625 | int codelink; | int codelink; |
# | Line 616 int max; | Line 629 int max; |
629 | int min; | int min; |
630 | int number; | int number; |
631 | int offset; | int offset; |
632 | int op; | pcre_uchar op; |
633 | int save_capture_last; | int save_capture_last; |
634 | int save_offset1, save_offset2, save_offset3; | int save_offset1, save_offset2, save_offset3; |
635 | int stacksave[REC_STACK_SAVE_MAX]; | int stacksave[REC_STACK_SAVE_MAX]; |
636 | ||
637 | eptrblock newptrb; | eptrblock newptrb; |
638 | ||
639 | /* There is a special fudge for calling match() in a way that causes it to | |
640 | measure the size of its basic stack frame when the stack is being used for | |
641 | recursion. The second argument (ecode) being NULL triggers this behaviour. It | |
642 | cannot normally ever be NULL. The return is the negated value of the frame | |
643 | size. */ | |
644 | ||
645 | if (ecode == NULL) | |
646 | { | |
647 | if (rdepth == 0) | |
648 | return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1); | |
649 | else | |
650 | { | |
651 | int len = (char *)&rdepth - (char *)eptr; | |
652 | return (len > 0)? -len : len; | |
653 | } | |
654 | } | |
655 | #endif /* NO_RECURSE */ | #endif /* NO_RECURSE */ |
656 | ||
657 | /* To save space on the stack and in the heap frame, I have doubled up on some | /* To save space on the stack and in the heap frame, I have doubled up on some |
# | Line 634 the alternative names that are used. */ | Line 664 the alternative names that are used. */ |
664 | #define code_offset codelink | #define code_offset codelink |
665 | #define condassert condition | #define condassert condition |
666 | #define matched_once prev_is_word | #define matched_once prev_is_word |
667 | #define foc number | |
668 | #define save_mark data | |
669 | ||
670 | /* These statements are here to stop the compiler complaining about unitialized | /* These statements are here to stop the compiler complaining about unitialized |
671 | variables. */ | variables. */ |
# | Line 659 defined). However, RMATCH isn't like a f | Line 691 defined). However, RMATCH isn't like a f |
691 | complicated macro. It has to be used in one particular way. This shouldn't, | complicated macro. It has to be used in one particular way. This shouldn't, |
692 | however, impact performance when true recursion is being used. */ | however, impact performance when true recursion is being used. */ |
693 | ||
694 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
695 | utf8 = md->utf8; /* Local copy of the flag */ | utf = md->utf; /* Local copy of the flag */ |
696 | #else | #else |
697 | utf8 = FALSE; | utf = FALSE; |
698 | #endif | #endif |
699 | ||
700 | /* First check that we haven't called match() too many times, or that we | /* First check that we haven't called match() too many times, or that we |
# | Line 701 for (;;) | Line 733 for (;;) |
733 | switch(op) | switch(op) |
734 | { | { |
735 | case OP_MARK: | case OP_MARK: |
736 | markptr = ecode + 2; | md->nomatch_mark = ecode + 2; |
737 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | md->mark = NULL; /* In case previously set by assertion */ |
738 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
739 | eptrb, RM55); | eptrb, RM55); |
740 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
741 | md->mark == NULL) md->mark = ecode + 2; | |
742 | ||
743 | /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an | /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an |
744 | argument, and we must check whether that argument matches this MARK's | argument, and we must check whether that argument matches this MARK's |
# | Line 712 for (;;) | Line 747 for (;;) |
747 | position and return MATCH_SKIP. Otherwise, pass back the return code | position and return MATCH_SKIP. Otherwise, pass back the return code |
748 | unaltered. */ | unaltered. */ |
749 | ||
750 | if (rrc == MATCH_SKIP_ARG && | else if (rrc == MATCH_SKIP_ARG && |
751 | strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0) | STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0) |
752 | { | { |
753 | md->start_match_ptr = eptr; | md->start_match_ptr = eptr; |
754 | RRETURN(MATCH_SKIP); | RRETURN(MATCH_SKIP); |
755 | } | } |
if (md->mark == NULL) md->mark = markptr; | ||
756 | RRETURN(rrc); | RRETURN(rrc); |
757 | ||
758 | case OP_FAIL: | case OP_FAIL: |
759 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
760 | ||
761 | /* COMMIT overrides PRUNE, SKIP, and THEN */ | /* COMMIT overrides PRUNE, SKIP, and THEN */ |
762 | ||
763 | case OP_COMMIT: | case OP_COMMIT: |
764 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
765 | eptrb, RM52); | eptrb, RM52); |
766 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && |
767 | rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && | rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && |
768 | rrc != MATCH_THEN) | rrc != MATCH_THEN) |
769 | RRETURN(rrc); | RRETURN(rrc); |
770 | MRRETURN(MATCH_COMMIT); | RRETURN(MATCH_COMMIT); |
771 | ||
772 | /* PRUNE overrides THEN */ | /* PRUNE overrides THEN */ |
773 | ||
774 | case OP_PRUNE: | case OP_PRUNE: |
775 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
776 | eptrb, RM51); | eptrb, RM51); |
777 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
778 | MRRETURN(MATCH_PRUNE); | RRETURN(MATCH_PRUNE); |
779 | ||
780 | case OP_PRUNE_ARG: | case OP_PRUNE_ARG: |
781 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | md->nomatch_mark = ecode + 2; |
782 | md->mark = NULL; /* In case previously set by assertion */ | |
783 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
784 | eptrb, RM56); | eptrb, RM56); |
785 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
786 | md->mark == NULL) md->mark = ecode + 2; | |
787 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
md->mark = ecode + 2; | ||
788 | RRETURN(MATCH_PRUNE); | RRETURN(MATCH_PRUNE); |
789 | ||
790 | /* SKIP overrides PRUNE and THEN */ | /* SKIP overrides PRUNE and THEN */ |
791 | ||
792 | case OP_SKIP: | case OP_SKIP: |
793 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
794 | eptrb, RM53); | eptrb, RM53); |
795 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
796 | RRETURN(rrc); | RRETURN(rrc); |
797 | md->start_match_ptr = eptr; /* Pass back current position */ | md->start_match_ptr = eptr; /* Pass back current position */ |
798 | MRRETURN(MATCH_SKIP); | RRETURN(MATCH_SKIP); |
799 | ||
800 | /* Note that, for Perl compatibility, SKIP with an argument does NOT set | |
801 | nomatch_mark. There is a flag that disables this opcode when re-matching a | |
802 | pattern that ended with a SKIP for which there was not a matching MARK. */ | |
803 | ||
804 | case OP_SKIP_ARG: | case OP_SKIP_ARG: |
805 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, | if (md->ignore_skip_arg) |
806 | { | |
807 | ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; | |
808 | break; | |
809 | } | |
810 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, | |
811 | eptrb, RM57); | eptrb, RM57); |
812 | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) | if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
813 | RRETURN(rrc); | RRETURN(rrc); |
814 | ||
815 | /* Pass back the current skip name by overloading md->start_match_ptr and | /* Pass back the current skip name by overloading md->start_match_ptr and |
816 | returning the special MATCH_SKIP_ARG return code. This will either be | returning the special MATCH_SKIP_ARG return code. This will either be |
817 | caught by a matching MARK, or get to the top, where it is treated the same | caught by a matching MARK, or get to the top, where it causes a rematch |
818 | as PRUNE. */ | with the md->ignore_skip_arg flag set. */ |
819 | ||
820 | md->start_match_ptr = ecode + 2; | md->start_match_ptr = ecode + 2; |
821 | RRETURN(MATCH_SKIP_ARG); | RRETURN(MATCH_SKIP_ARG); |
# | Line 780 for (;;) | Line 825 for (;;) |
825 | match pointer to do this. */ | match pointer to do this. */ |
826 | ||
827 | case OP_THEN: | case OP_THEN: |
828 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
829 | eptrb, RM54); | eptrb, RM54); |
830 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
831 | md->start_match_ptr = ecode; | md->start_match_ptr = ecode; |
832 | MRRETURN(MATCH_THEN); | RRETURN(MATCH_THEN); |
833 | ||
834 | case OP_THEN_ARG: | case OP_THEN_ARG: |
835 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, | md->nomatch_mark = ecode + 2; |
836 | md->mark = NULL; /* In case previously set by assertion */ | |
837 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, | |
838 | md, eptrb, RM58); | md, eptrb, RM58); |
839 | if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && | |
840 | md->mark == NULL) md->mark = ecode + 2; | |
841 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
842 | md->start_match_ptr = ecode; | md->start_match_ptr = ecode; |
md->mark = ecode + 2; | ||
843 | RRETURN(MATCH_THEN); | RRETURN(MATCH_THEN); |
844 | ||
845 | /* Handle an atomic group that does not contain any capturing parentheses. | /* Handle an atomic group that does not contain any capturing parentheses. |
# | Line 810 for (;;) | Line 858 for (;;) |
858 | case OP_ONCE_NC: | case OP_ONCE_NC: |
859 | prev = ecode; | prev = ecode; |
860 | saved_eptr = eptr; | saved_eptr = eptr; |
861 | save_mark = md->mark; | |
862 | do | do |
863 | { | { |
864 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); |
865 | if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ | if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ |
866 | { | { |
867 | mstart = md->start_match_ptr; | mstart = md->start_match_ptr; |
markptr = md->mark; | ||
868 | break; | break; |
869 | } | } |
870 | if (rrc == MATCH_THEN) | if (rrc == MATCH_THEN) |
# | Line 829 for (;;) | Line 877 for (;;) |
877 | ||
878 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
879 | ecode += GET(ecode,1); | ecode += GET(ecode,1); |
880 | md->mark = save_mark; | |
881 | } | } |
882 | while (*ecode == OP_ALT); | while (*ecode == OP_ALT); |
883 | ||
# | Line 868 for (;;) | Line 917 for (;;) |
917 | } | } |
918 | else /* OP_KETRMAX */ | else /* OP_KETRMAX */ |
919 | { | { |
md->match_function_type = MATCH_CBEGROUP; | ||
920 | RMATCH(eptr, prev, offset_top, md, eptrb, RM66); | RMATCH(eptr, prev, offset_top, md, eptrb, RM66); |
921 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
922 | ecode += 1 + LINK_SIZE; | ecode += 1 + LINK_SIZE; |
# | Line 908 for (;;) | Line 956 for (;;) |
956 | save_offset2 = md->offset_vector[offset+1]; | save_offset2 = md->offset_vector[offset+1]; |
957 | save_offset3 = md->offset_vector[md->offset_end - number]; | save_offset3 = md->offset_vector[md->offset_end - number]; |
958 | save_capture_last = md->capture_last; | save_capture_last = md->capture_last; |
959 | save_mark = md->mark; | |
960 | ||
961 | DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); | DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); |
962 | md->offset_vector[md->offset_end - number] = | md->offset_vector[md->offset_end - number] = |
# | Line 916 for (;;) | Line 965 for (;;) |
965 | for (;;) | for (;;) |
966 | { | { |
967 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
968 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
969 | eptrb, RM1); | eptrb, RM1); |
970 | if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ | if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ |
971 | ||
# | Line 944 for (;;) | Line 993 for (;;) |
993 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
994 | md->capture_last = save_capture_last; | md->capture_last = save_capture_last; |
995 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
996 | md->mark = save_mark; | |
997 | if (*ecode != OP_ALT) break; | if (*ecode != OP_ALT) break; |
998 | } | } |
999 | ||
# | Line 954 for (;;) | Line 1004 for (;;) |
1004 | ||
1005 | /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ | /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ |
1006 | ||
if (md->mark == NULL) md->mark = markptr; | ||
1007 | RRETURN(rrc); | RRETURN(rrc); |
1008 | } | } |
1009 | ||
# | Line 996 for (;;) | Line 1045 for (;;) |
1045 | ||
1046 | for (;;) | for (;;) |
1047 | { | { |
1048 | if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA || op == OP_ONCE) |
1049 | md->match_function_type = MATCH_CBEGROUP; | |
1050 | ||
1051 | /* If this is not a possibly empty group, and there are no (*THEN)s in | /* If this is not a possibly empty group, and there are no (*THEN)s in |
1052 | the pattern, and this is the final alternative, optimize as described | the pattern, and this is the final alternative, optimize as described |
# | Line 1004 for (;;) | Line 1054 for (;;) |
1054 | ||
1055 | else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) | else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) |
1056 | { | { |
1057 | ecode += _pcre_OP_lengths[*ecode]; | ecode += PRIV(OP_lengths)[*ecode]; |
1058 | goto TAIL_RECURSE; | goto TAIL_RECURSE; |
1059 | } | } |
1060 | ||
1061 | /* In all other cases, we have to make another call to match(). */ | /* In all other cases, we have to make another call to match(). */ |
1062 | ||
1063 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, | save_mark = md->mark; |
1064 | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, | |
1065 | RM2); | RM2); |
1066 | ||
1067 | /* See comment in the code for capturing groups above about handling | /* See comment in the code for capturing groups above about handling |
# | Line 1028 for (;;) | Line 1079 for (;;) |
1079 | { | { |
1080 | if (rrc == MATCH_ONCE) | if (rrc == MATCH_ONCE) |
1081 | { | { |
1082 | const uschar *scode = ecode; | const pcre_uchar *scode = ecode; |
1083 | if (*scode != OP_ONCE) /* If not at start, find it */ | if (*scode != OP_ONCE) /* If not at start, find it */ |
1084 | { | { |
1085 | while (*scode == OP_ALT) scode += GET(scode, 1); | while (*scode == OP_ALT) scode += GET(scode, 1); |
# | Line 1039 for (;;) | Line 1090 for (;;) |
1090 | RRETURN(rrc); | RRETURN(rrc); |
1091 | } | } |
1092 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
1093 | md->mark = save_mark; | |
1094 | if (*ecode != OP_ALT) break; | if (*ecode != OP_ALT) break; |
1095 | } | } |
1096 | ||
if (md->mark == NULL) md->mark = markptr; | ||
1097 | RRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1098 | ||
1099 | /* Handle possessive capturing brackets with an unlimited repeat. We come | /* Handle possessive capturing brackets with an unlimited repeat. We come |
# | Line 1071 for (;;) | Line 1122 for (;;) |
1122 | if (offset < md->offset_max) | if (offset < md->offset_max) |
1123 | { | { |
1124 | matched_once = FALSE; | matched_once = FALSE; |
1125 | code_offset = ecode - md->start_code; | code_offset = (int)(ecode - md->start_code); |
1126 | ||
1127 | save_offset1 = md->offset_vector[offset]; | save_offset1 = md->offset_vector[offset]; |
1128 | save_offset2 = md->offset_vector[offset+1]; | save_offset2 = md->offset_vector[offset+1]; |
# | Line 1094 for (;;) | Line 1145 for (;;) |
1145 | md->offset_vector[md->offset_end - number] = | md->offset_vector[md->offset_end - number] = |
1146 | (int)(eptr - md->start_subject); | (int)(eptr - md->start_subject); |
1147 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
1148 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
1149 | eptrb, RM63); | eptrb, RM63); |
1150 | if (rrc == MATCH_KETRPOS) | if (rrc == MATCH_KETRPOS) |
1151 | { | { |
# | Line 1130 for (;;) | Line 1181 for (;;) |
1181 | md->offset_vector[md->offset_end - number] = save_offset3; | md->offset_vector[md->offset_end - number] = save_offset3; |
1182 | } | } |
1183 | ||
if (md->mark == NULL) md->mark = markptr; | ||
1184 | if (allow_zero || matched_once) | if (allow_zero || matched_once) |
1185 | { | { |
1186 | ecode += 1 + LINK_SIZE; | ecode += 1 + LINK_SIZE; |
# | Line 1162 for (;;) | Line 1212 for (;;) |
1212 | ||
1213 | POSSESSIVE_NON_CAPTURE: | POSSESSIVE_NON_CAPTURE: |
1214 | matched_once = FALSE; | matched_once = FALSE; |
1215 | code_offset = ecode - md->start_code; | code_offset = (int)(ecode - md->start_code); |
1216 | ||
1217 | for (;;) | for (;;) |
1218 | { | { |
1219 | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
1220 | RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, | RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, |
1221 | eptrb, RM48); | eptrb, RM48); |
1222 | if (rrc == MATCH_KETRPOS) | if (rrc == MATCH_KETRPOS) |
1223 | { | { |
# | Line 1217 for (;;) | Line 1267 for (;;) |
1267 | ||
1268 | if (ecode[LINK_SIZE+1] == OP_CALLOUT) | if (ecode[LINK_SIZE+1] == OP_CALLOUT) |
1269 | { | { |
1270 | if (pcre_callout != NULL) | if (PUBL(callout) != NULL) |
1271 | { | { |
1272 | pcre_callout_block cb; | PUBL(callout_block) cb; |
1273 | cb.version = 2; /* Version 1 of the callout block */ | cb.version = 2; /* Version 1 of the callout block */ |
1274 | cb.callout_number = ecode[LINK_SIZE+2]; | cb.callout_number = ecode[LINK_SIZE+2]; |
1275 | cb.offset_vector = md->offset_vector; | cb.offset_vector = md->offset_vector; |
1276 | #if defined COMPILE_PCRE8 | |
1277 | cb.subject = (PCRE_SPTR)md->start_subject; | cb.subject = (PCRE_SPTR)md->start_subject; |
1278 | #elif defined COMPILE_PCRE16 | |
1279 | cb.subject = (PCRE_SPTR16)md->start_subject; | |
1280 | #elif defined COMPILE_PCRE32 | |
1281 | cb.subject = (PCRE_SPTR32)md->start_subject; | |
1282 | #endif | |
1283 | cb.subject_length = (int)(md->end_subject - md->start_subject); | cb.subject_length = (int)(md->end_subject - md->start_subject); |
1284 | cb.start_match = (int)(mstart - md->start_subject); | cb.start_match = (int)(mstart - md->start_subject); |
1285 | cb.current_position = (int)(eptr - md->start_subject); | cb.current_position = (int)(eptr - md->start_subject); |
# | Line 1232 for (;;) | Line 1288 for (;;) |
1288 | cb.capture_top = offset_top/2; | cb.capture_top = offset_top/2; |
1289 | cb.capture_last = md->capture_last; | cb.capture_last = md->capture_last; |
1290 | cb.callout_data = md->callout_data; | cb.callout_data = md->callout_data; |
1291 | cb.mark = markptr; | cb.mark = md->nomatch_mark; |
1292 | if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); | if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1293 | if (rrc < 0) RRETURN(rrc); | if (rrc < 0) RRETURN(rrc); |
1294 | } | } |
1295 | ecode += _pcre_OP_lengths[OP_CALLOUT]; | ecode += PRIV(OP_lengths)[OP_CALLOUT]; |
1296 | } | } |
1297 | ||
1298 | condcode = ecode[LINK_SIZE+1]; | condcode = ecode[LINK_SIZE+1]; |
# | Line 1262 for (;;) | Line 1318 for (;;) |
1318 | ||
1319 | if (!condition && condcode == OP_NRREF) | if (!condition && condcode == OP_NRREF) |
1320 | { | { |
1321 | uschar *slotA = md->name_table; | pcre_uchar *slotA = md->name_table; |
1322 | for (i = 0; i < md->name_count; i++) | for (i = 0; i < md->name_count; i++) |
1323 | { | { |
1324 | if (GET2(slotA, 0) == recno) break; | if (GET2(slotA, 0) == recno) break; |
# | Line 1275 for (;;) | Line 1331 for (;;) |
1331 | ||
1332 | if (i < md->name_count) | if (i < md->name_count) |
1333 | { | { |
1334 | uschar *slotB = slotA; | pcre_uchar *slotB = slotA; |
1335 | while (slotB > md->name_table) | while (slotB > md->name_table) |
1336 | { | { |
1337 | slotB -= md->name_entry_size; | slotB -= md->name_entry_size; |
1338 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1339 | { | { |
1340 | condition = GET2(slotB, 0) == md->recursive->group_num; | condition = GET2(slotB, 0) == md->recursive->group_num; |
1341 | if (condition) break; | if (condition) break; |
# | Line 1295 for (;;) | Line 1351 for (;;) |
1351 | for (i++; i < md->name_count; i++) | for (i++; i < md->name_count; i++) |
1352 | { | { |
1353 | slotB += md->name_entry_size; | slotB += md->name_entry_size; |
1354 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1355 | { | { |
1356 | condition = GET2(slotB, 0) == md->recursive->group_num; | condition = GET2(slotB, 0) == md->recursive->group_num; |
1357 | if (condition) break; | if (condition) break; |
# | Line 1308 for (;;) | Line 1364 for (;;) |
1364 | ||
1365 | /* Chose branch according to the condition */ | /* Chose branch according to the condition */ |
1366 | ||
1367 | ecode += condition? 3 : GET(ecode, 1); | ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); |
1368 | } | } |
1369 | } | } |
1370 | ||
# | Line 1325 for (;;) | Line 1381 for (;;) |
1381 | if (!condition && condcode == OP_NCREF) | if (!condition && condcode == OP_NCREF) |
1382 | { | { |
1383 | int refno = offset >> 1; | int refno = offset >> 1; |
1384 | uschar *slotA = md->name_table; | pcre_uchar *slotA = md->name_table; |
1385 | ||
1386 | for (i = 0; i < md->name_count; i++) | for (i = 0; i < md->name_count; i++) |
1387 | { | { |
# | Line 1339 for (;;) | Line 1395 for (;;) |
1395 | ||
1396 | if (i < md->name_count) | if (i < md->name_count) |
1397 | { | { |
1398 | uschar *slotB = slotA; | pcre_uchar *slotB = slotA; |
1399 | while (slotB > md->name_table) | while (slotB > md->name_table) |
1400 | { | { |
1401 | slotB -= md->name_entry_size; | slotB -= md->name_entry_size; |
1402 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1403 | { | { |
1404 | offset = GET2(slotB, 0) << 1; | offset = GET2(slotB, 0) << 1; |
1405 | condition = offset < offset_top && | condition = offset < offset_top && |
# | Line 1361 for (;;) | Line 1417 for (;;) |
1417 | for (i++; i < md->name_count; i++) | for (i++; i < md->name_count; i++) |
1418 | { | { |
1419 | slotB += md->name_entry_size; | slotB += md->name_entry_size; |
1420 | if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) | if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
1421 | { | { |
1422 | offset = GET2(slotB, 0) << 1; | offset = GET2(slotB, 0) << 1; |
1423 | condition = offset < offset_top && | condition = offset < offset_top && |
# | Line 1376 for (;;) | Line 1432 for (;;) |
1432 | ||
1433 | /* Chose branch according to the condition */ | /* Chose branch according to the condition */ |
1434 | ||
1435 | ecode += condition? 3 : GET(ecode, 1); | ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); |
1436 | } | } |
1437 | ||
1438 | else if (condcode == OP_DEF) /* DEFINE - always false */ | else if (condcode == OP_DEF) /* DEFINE - always false */ |
# | Line 1468 for (;;) | Line 1524 for (;;) |
1524 | md->offset_vector[offset+1] = (int)(eptr - md->start_subject); | md->offset_vector[offset+1] = (int)(eptr - md->start_subject); |
1525 | if (offset_top <= offset) offset_top = offset + 2; | if (offset_top <= offset) offset_top = offset + 2; |
1526 | } | } |
1527 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
1528 | break; | break; |
1529 | ||
1530 | ||
# | Line 1488 for (;;) | Line 1544 for (;;) |
1544 | (md->notempty || | (md->notempty || |
1545 | (md->notempty_atstart && | (md->notempty_atstart && |
1546 | mstart == md->start_subject + md->start_offset))) | mstart == md->start_subject + md->start_offset))) |
1547 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1548 | ||
1549 | /* Otherwise, we have a match. */ | /* Otherwise, we have a match. */ |
1550 | ||
# | Line 1497 for (;;) | Line 1553 for (;;) |
1553 | md->start_match_ptr = mstart; /* and the start (\K can modify) */ | md->start_match_ptr = mstart; /* and the start (\K can modify) */ |
1554 | ||
1555 | /* For some reason, the macros don't work properly if an expression is | /* For some reason, the macros don't work properly if an expression is |
1556 | given as the argument to MRRETURN when the heap is in use. */ | given as the argument to RRETURN when the heap is in use. */ |
1557 | ||
1558 | rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; | rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; |
1559 | MRRETURN(rrc); | RRETURN(rrc); |
1560 | ||
1561 | /* Assertion brackets. Check the alternative branches in turn - the | /* Assertion brackets. Check the alternative branches in turn - the |
1562 | matching won't pass the KET for an assertion. If any one branch matches, | matching won't pass the KET for an assertion. If any one branch matches, |
# | Line 1515 for (;;) | Line 1571 for (;;) |
1571 | ||
1572 | case OP_ASSERT: | case OP_ASSERT: |
1573 | case OP_ASSERTBACK: | case OP_ASSERTBACK: |
1574 | save_mark = md->mark; | |
1575 | if (md->match_function_type == MATCH_CONDASSERT) | if (md->match_function_type == MATCH_CONDASSERT) |
1576 | { | { |
1577 | condassert = TRUE; | condassert = TRUE; |
# | Line 1528 for (;;) | Line 1585 for (;;) |
1585 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
1586 | { | { |
1587 | mstart = md->start_match_ptr; /* In case \K reset it */ | mstart = md->start_match_ptr; /* In case \K reset it */ |
markptr = md->mark; | ||
1588 | break; | break; |
1589 | } | } |
1590 | md->mark = save_mark; | |
1591 | ||
1592 | /* PCRE does not allow THEN to escape beyond an assertion; it is treated | /* A COMMIT failure must fail the entire assertion, without trying any |
1593 | as NOMATCH. */ | subsequent branches. */ |
1594 | ||
1595 | if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH); | |
1596 | ||
1597 | /* PCRE does not allow THEN to escape beyond an assertion; it | |
1598 | is treated as NOMATCH. */ | |
1599 | ||
1600 | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); | if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
1601 | ecode += GET(ecode, 1); | ecode += GET(ecode, 1); |
1602 | } | } |
1603 | while (*ecode == OP_ALT); | while (*ecode == OP_ALT); |
1604 | ||
1605 | if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH); | if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); |
1606 | ||
1607 | /* If checking an assertion for a condition, return MATCH_MATCH. */ | /* If checking an assertion for a condition, return MATCH_MATCH. */ |
1608 | ||
# | Line 1560 for (;;) | Line 1622 for (;;) |
1622 | ||
1623 | case OP_ASSERT_NOT: | case OP_ASSERT_NOT: |
1624 | case OP_ASSERTBACK_NOT: | case OP_ASSERTBACK_NOT: |
1625 | save_mark = md->mark; | |
1626 | if (md->match_function_type == MATCH_CONDASSERT) | if (md->match_function_type == MATCH_CONDASSERT) |
1627 | { | { |
1628 | condassert = TRUE; | condassert = TRUE; |
# | Line 1570 for (;;) | Line 1633 for (;;) |
1633 | do | do |
1634 | { | { |
1635 | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); | RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); |
1636 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH); | md->mark = save_mark; |
1637 | if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); | |
1638 | if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) | if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) |
1639 | { | { |
1640 | do ecode += GET(ecode,1); while (*ecode == OP_ALT); | do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
# | Line 1596 for (;;) | Line 1660 for (;;) |
1660 | back a number of characters, not bytes. */ | back a number of characters, not bytes. */ |
1661 | ||
1662 | case OP_REVERSE: | case OP_REVERSE: |
1663 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
1664 | if (utf8) | if (utf) |
1665 | { | { |
1666 | i = GET(ecode, 1); | i = GET(ecode, 1); |
1667 | while (i-- > 0) | while (i-- > 0) |
1668 | { | { |
1669 | eptr--; | eptr--; |
1670 | if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
1671 | BACKCHAR(eptr); | BACKCHAR(eptr); |
1672 | } | } |
1673 | } | } |
# | Line 1614 for (;;) | Line 1678 for (;;) |
1678 | ||
1679 | { | { |
1680 | eptr -= GET(ecode, 1); | eptr -= GET(ecode, 1); |
1681 | if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
1682 | } | } |
1683 | ||
1684 | /* Save the earliest consulted character, then skip to next op code */ | /* Save the earliest consulted character, then skip to next op code */ |
# | Line 1628 for (;;) | Line 1692 for (;;) |
1692 | function is able to force a failure. */ | function is able to force a failure. */ |
1693 | ||
1694 | case OP_CALLOUT: | case OP_CALLOUT: |
1695 | if (pcre_callout != NULL) | if (PUBL(callout) != NULL) |
1696 | { | { |
1697 | pcre_callout_block cb; | PUBL(callout_block) cb; |
1698 | cb.version = 2; /* Version 1 of the callout block */ | cb.version = 2; /* Version 1 of the callout block */ |
1699 | cb.callout_number = ecode[1]; | cb.callout_number = ecode[1]; |
1700 | cb.offset_vector = md->offset_vector; | cb.offset_vector = md->offset_vector; |
1701 | #if defined COMPILE_PCRE8 | |
1702 | cb.subject = (PCRE_SPTR)md->start_subject; | cb.subject = (PCRE_SPTR)md->start_subject; |
1703 | #elif defined COMPILE_PCRE16 | |
1704 | cb.subject = (PCRE_SPTR16)md->start_subject; | |
1705 | #elif defined COMPILE_PCRE32 | |
1706 | cb.subject = (PCRE_SPTR32)md->start_subject; | |
1707 | #endif | |
1708 | cb.subject_length = (int)(md->end_subject - md->start_subject); | cb.subject_length = (int)(md->end_subject - md->start_subject); |
1709 | cb.start_match = (int)(mstart - md->start_subject); | cb.start_match = (int)(mstart - md->start_subject); |
1710 | cb.current_position = (int)(eptr - md->start_subject); | cb.current_position = (int)(eptr - md->start_subject); |
# | Line 1643 for (;;) | Line 1713 for (;;) |
1713 | cb.capture_top = offset_top/2; | cb.capture_top = offset_top/2; |
1714 | cb.capture_last = md->capture_last; | cb.capture_last = md->capture_last; |
1715 | cb.callout_data = md->callout_data; | cb.callout_data = md->callout_data; |
1716 | cb.mark = markptr; | cb.mark = md->nomatch_mark; |
1717 | if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); | if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
1718 | if (rrc < 0) RRETURN(rrc); | if (rrc < 0) RRETURN(rrc); |
1719 | } | } |
1720 | ecode += 2 + 2*LINK_SIZE; | ecode += 2 + 2*LINK_SIZE; |
# | Line 1703 for (;;) | Line 1773 for (;;) |
1773 | else | else |
1774 | { | { |
1775 | new_recursive.offset_save = | new_recursive.offset_save = |
1776 | (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); | (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int)); |
1777 | if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); | if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
1778 | } | } |
1779 | memcpy(new_recursive.offset_save, md->offset_vector, | memcpy(new_recursive.offset_save, md->offset_vector, |
# | Line 1718 for (;;) | Line 1788 for (;;) |
1788 | do | do |
1789 | { | { |
1790 | if (cbegroup) md->match_function_type = MATCH_CBEGROUP; | if (cbegroup) md->match_function_type = MATCH_CBEGROUP; |
1791 | RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top, | RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, |
1792 | md, eptrb, RM6); | md, eptrb, RM6); |
1793 | memcpy(md->offset_vector, new_recursive.offset_save, | memcpy(md->offset_vector, new_recursive.offset_save, |
1794 | new_recursive.saved_max * sizeof(int)); | new_recursive.saved_max * sizeof(int)); |
# | Line 1727 for (;;) | Line 1797 for (;;) |
1797 | { | { |
1798 | DPRINTF(("Recursion matched\n")); | DPRINTF(("Recursion matched\n")); |
1799 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1800 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1801 | ||
1802 | /* Set where we got to in the subject, and reset the start in case | /* Set where we got to in the subject, and reset the start in case |
1803 | it was changed by \K. This *is* propagated back out of a recursion, | it was changed by \K. This *is* propagated back out of a recursion, |
# | Line 1738 for (;;) | Line 1808 for (;;) |
1808 | goto RECURSION_MATCHED; /* Exit loop; end processing */ | goto RECURSION_MATCHED; /* Exit loop; end processing */ |
1809 | } | } |
1810 | ||
1811 | /* PCRE does not allow THEN to escape beyond a recursion; it is treated | /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it |
1812 | as NOMATCH. */ | is treated as NOMATCH. */ |
1813 | ||
1814 | else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) | else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN && |
1815 | rrc != MATCH_COMMIT) | |
1816 | { | { |
1817 | DPRINTF(("Recursion gave error %d\n", rrc)); | DPRINTF(("Recursion gave error %d\n", rrc)); |
1818 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1819 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1820 | RRETURN(rrc); | RRETURN(rrc); |
1821 | } | } |
1822 | ||
# | Line 1757 for (;;) | Line 1828 for (;;) |
1828 | DPRINTF(("Recursion didn't match\n")); | DPRINTF(("Recursion didn't match\n")); |
1829 | md->recursive = new_recursive.prevrec; | md->recursive = new_recursive.prevrec; |
1830 | if (new_recursive.offset_save != stacksave) | if (new_recursive.offset_save != stacksave) |
1831 | (pcre_free)(new_recursive.offset_save); | (PUBL(free))(new_recursive.offset_save); |
1832 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
1833 | } | } |
1834 | ||
1835 | RECURSION_MATCHED: | RECURSION_MATCHED: |
# | Line 1838 for (;;) | Line 1909 for (;;) |
1909 | md->end_match_ptr = eptr; /* For ONCE_NC */ | md->end_match_ptr = eptr; /* For ONCE_NC */ |
1910 | md->end_offset_top = offset_top; | md->end_offset_top = offset_top; |
1911 | md->start_match_ptr = mstart; | md->start_match_ptr = mstart; |
1912 | MRRETURN(MATCH_MATCH); /* Sets md->mark */ | RRETURN(MATCH_MATCH); /* Sets md->mark */ |
1913 | } | } |
1914 | ||
1915 | /* For capturing groups we have to check the group number back at the start | /* For capturing groups we have to check the group number back at the start |
# | Line 1952 for (;;) | Line 2023 for (;;) |
2023 | } | } |
2024 | if (*prev >= OP_SBRA) /* Could match an empty string */ | if (*prev >= OP_SBRA) /* Could match an empty string */ |
2025 | { | { |
md->match_function_type = MATCH_CBEGROUP; | ||
2026 | RMATCH(eptr, prev, offset_top, md, eptrb, RM50); | RMATCH(eptr, prev, offset_top, md, eptrb, RM50); |
2027 | RRETURN(rrc); | RRETURN(rrc); |
2028 | } | } |
# | Line 1961 for (;;) | Line 2031 for (;;) |
2031 | } | } |
2032 | else /* OP_KETRMAX */ | else /* OP_KETRMAX */ |
2033 | { | { |
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; | ||
2034 | RMATCH(eptr, prev, offset_top, md, eptrb, RM13); | RMATCH(eptr, prev, offset_top, md, eptrb, RM13); |
2035 | if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; | if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; |
2036 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
# | Line 1980 for (;;) | Line 2049 for (;;) |
2049 | /* Not multiline mode: start of subject assertion, unless notbol. */ | /* Not multiline mode: start of subject assertion, unless notbol. */ |
2050 | ||
2051 | case OP_CIRC: | case OP_CIRC: |
2052 | if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH); | if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); |
2053 | ||
2054 | /* Start of subject assertion */ | /* Start of subject assertion */ |
2055 | ||
2056 | case OP_SOD: | case OP_SOD: |
2057 | if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH); | if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); |
2058 | ecode++; | ecode++; |
2059 | break; | break; |
2060 | ||
2061 | /* Multiline mode: start of subject unless notbol, or after any newline. */ | /* Multiline mode: start of subject unless notbol, or after any newline. */ |
2062 | ||
2063 | case OP_CIRCM: | case OP_CIRCM: |
2064 | if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH); | if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); |
2065 | if (eptr != md->start_subject && | if (eptr != md->start_subject && |
2066 | (eptr == md->end_subject || !WAS_NEWLINE(eptr))) | (eptr == md->end_subject || !WAS_NEWLINE(eptr))) |
2067 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2068 | ecode++; | ecode++; |
2069 | break; | break; |
2070 | ||
2071 | /* Start of match assertion */ | /* Start of match assertion */ |
2072 | ||
2073 | case OP_SOM: | case OP_SOM: |
2074 | if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH); | if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); |
2075 | ecode++; | ecode++; |
2076 | break; | break; |
2077 | ||
# | Line 2018 for (;;) | Line 2087 for (;;) |
2087 | ||
2088 | case OP_DOLLM: | case OP_DOLLM: |
2089 | if (eptr < md->end_subject) | if (eptr < md->end_subject) |
2090 | { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); } | { |
2091 | if (!IS_NEWLINE(eptr)) | |
2092 | { | |
2093 | if (md->partial != 0 && | |
2094 | eptr + 1 >= md->end_subject && | |
2095 | NLBLOCK->nltype == NLTYPE_FIXED && | |
2096 | NLBLOCK->nllen == 2 && | |
2097 | *eptr == NLBLOCK->nl[0]) | |
2098 | { | |
2099 | md->hitend = TRUE; | |
2100 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2101 | } | |
2102 | RRETURN(MATCH_NOMATCH); | |
2103 | } | |
2104 | } | |
2105 | else | else |
2106 | { | { |
2107 | if (md->noteol) MRRETURN(MATCH_NOMATCH); | if (md->noteol) RRETURN(MATCH_NOMATCH); |
2108 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2109 | } | } |
2110 | ecode++; | ecode++; |
# | Line 2031 for (;;) | Line 2114 for (;;) |
2114 | subject unless noteol is set. */ | subject unless noteol is set. */ |
2115 | ||
2116 | case OP_DOLL: | case OP_DOLL: |
2117 | if (md->noteol) MRRETURN(MATCH_NOMATCH); | if (md->noteol) RRETURN(MATCH_NOMATCH); |
2118 | if (!md->endonly) goto ASSERT_NL_OR_EOS; | if (!md->endonly) goto ASSERT_NL_OR_EOS; |
2119 | ||
2120 | /* ... else fall through for endonly */ | /* ... else fall through for endonly */ |
# | Line 2039 for (;;) | Line 2122 for (;;) |
2122 | /* End of subject assertion (\z) */ | /* End of subject assertion (\z) */ |
2123 | ||
2124 | case OP_EOD: | case OP_EOD: |
2125 | if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH); | if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); |
2126 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2127 | ecode++; | ecode++; |
2128 | break; | break; |
# | Line 2050 for (;;) | Line 2133 for (;;) |
2133 | ASSERT_NL_OR_EOS: | ASSERT_NL_OR_EOS: |
2134 | if (eptr < md->end_subject && | if (eptr < md->end_subject && |
2135 | (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) | (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) |
2136 | MRRETURN(MATCH_NOMATCH); | { |
2137 | if (md->partial != 0 && | |
2138 | eptr + 1 >= md->end_subject && | |
2139 | NLBLOCK->nltype == NLTYPE_FIXED && | |
2140 | NLBLOCK->nllen == 2 && | |
2141 | *eptr == NLBLOCK->nl[0]) | |
2142 | { | |
2143 | md->hitend = TRUE; | |
2144 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2145 | } | |
2146 | RRETURN(MATCH_NOMATCH); | |
2147 | } | |
2148 | ||
2149 | /* Either at end of string or \n before end. */ | /* Either at end of string or \n before end. */ |
2150 | ||
# | Line 2069 for (;;) | Line 2163 for (;;) |
2163 | be "non-word" characters. Remember the earliest consulted character for | be "non-word" characters. Remember the earliest consulted character for |
2164 | partial matching. */ | partial matching. */ |
2165 | ||
2166 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2167 | if (utf8) | if (utf) |
2168 | { | { |
2169 | /* Get status of previous character */ | /* Get status of previous character */ |
2170 | ||
2171 | if (eptr == md->start_subject) prev_is_word = FALSE; else | if (eptr == md->start_subject) prev_is_word = FALSE; else |
2172 | { | { |
2173 | USPTR lastptr = eptr - 1; | PCRE_PUCHAR lastptr = eptr - 1; |
2174 | while((*lastptr & 0xc0) == 0x80) lastptr--; | BACKCHAR(lastptr); |
2175 | if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; | if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; |
2176 | GETCHAR(c, lastptr); | GETCHAR(c, lastptr); |
2177 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
# | Line 2142 for (;;) | Line 2236 for (;;) |
2236 | } | } |
2237 | else | else |
2238 | #endif | #endif |
2239 | prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); | prev_is_word = MAX_255(eptr[-1]) |
2240 | && ((md->ctypes[eptr[-1]] & ctype_word) != 0); | |
2241 | } | } |
2242 | ||
2243 | /* Get status of next character */ | /* Get status of next character */ |
# | Line 2165 for (;;) | Line 2260 for (;;) |
2260 | } | } |
2261 | else | else |
2262 | #endif | #endif |
2263 | cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); | cur_is_word = MAX_255(*eptr) |
2264 | && ((md->ctypes[*eptr] & ctype_word) != 0); | |
2265 | } | } |
2266 | ||
2267 | /* Now see if the situation is what we want */ | /* Now see if the situation is what we want */ |
2268 | ||
2269 | if ((*ecode++ == OP_WORD_BOUNDARY)? | if ((*ecode++ == OP_WORD_BOUNDARY)? |
2270 | cur_is_word == prev_is_word : cur_is_word != prev_is_word) | cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
2271 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2272 | } | } |
2273 | break; | break; |
2274 | ||
2275 | /* Match a single character type; inline for speed */ | /* Match any single character type except newline; have to take care with |
2276 | CRLF newlines and partial matching. */ | |
2277 | ||
2278 | case OP_ANY: | case OP_ANY: |
2279 | if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
2280 | if (md->partial != 0 && | |
2281 | eptr + 1 >= md->end_subject && | |
2282 | NLBLOCK->nltype == NLTYPE_FIXED && | |
2283 | NLBLOCK->nllen == 2 && | |
2284 | *eptr == NLBLOCK->nl[0]) | |
2285 | { | |
2286 | md->hitend = TRUE; | |
2287 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2288 | } | |
2289 | ||
2290 | /* Fall through */ | /* Fall through */ |
2291 | ||
2292 | /* Match any single character whatsoever. */ | |
2293 | ||
2294 | case OP_ALLANY: | case OP_ALLANY: |
2295 | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
2296 | { /* not be updated before SCHECK_PARTIAL. */ | { /* not be updated before SCHECK_PARTIAL. */ |
2297 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2298 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2299 | } | } |
2300 | eptr++; | eptr++; |
2301 | if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | #ifdef SUPPORT_UTF |
2302 | if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
2303 | #endif | |
2304 | ecode++; | ecode++; |
2305 | break; | break; |
2306 | ||
# | Line 2200 for (;;) | Line 2311 for (;;) |
2311 | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ | if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
2312 | { /* not be updated before SCHECK_PARTIAL. */ | { /* not be updated before SCHECK_PARTIAL. */ |
2313 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2314 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2315 | } | } |
2316 | eptr++; | eptr++; |
2317 | ecode++; | ecode++; |
# | Line 2210 for (;;) | Line 2321 for (;;) |
2321 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2322 | { | { |
2323 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2324 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2325 | } | } |
2326 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2327 | if ( | if ( |
2328 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2329 | c < 256 && | c < 256 && |
2330 | #endif | #endif |
2331 | (md->ctypes[c] & ctype_digit) != 0 | (md->ctypes[c] & ctype_digit) != 0 |
2332 | ) | ) |
2333 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2334 | ecode++; | ecode++; |
2335 | break; | break; |
2336 | ||
# | Line 2227 for (;;) | Line 2338 for (;;) |
2338 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2339 | { | { |
2340 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2341 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2342 | } | } |
2343 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2344 | if ( | if ( |
2345 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2346 | c >= 256 || | c > 255 || |
2347 | #endif | #endif |
2348 | (md->ctypes[c] & ctype_digit) == 0 | (md->ctypes[c] & ctype_digit) == 0 |
2349 | ) | ) |
2350 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2351 | ecode++; | ecode++; |
2352 | break; | break; |
2353 | ||
# | Line 2244 for (;;) | Line 2355 for (;;) |
2355 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2356 | { | { |
2357 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2358 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2359 | } | } |
2360 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2361 | if ( | if ( |
2362 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2363 | c < 256 && | c < 256 && |
2364 | #endif | #endif |
2365 | (md->ctypes[c] & ctype_space) != 0 | (md->ctypes[c] & ctype_space) != 0 |
2366 | ) | ) |
2367 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2368 | ecode++; | ecode++; |
2369 | break; | break; |
2370 | ||
# | Line 2261 for (;;) | Line 2372 for (;;) |
2372 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2373 | { | { |
2374 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2375 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2376 | } | } |
2377 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2378 | if ( | if ( |
2379 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2380 | c >= 256 || | c > 255 || |
2381 | #endif | #endif |
2382 | (md->ctypes[c] & ctype_space) == 0 | (md->ctypes[c] & ctype_space) == 0 |
2383 | ) | ) |
2384 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2385 | ecode++; | ecode++; |
2386 | break; | break; |
2387 | ||
# | Line 2278 for (;;) | Line 2389 for (;;) |
2389 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2390 | { | { |
2391 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2392 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2393 | } | } |
2394 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2395 | if ( | if ( |
2396 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2397 | c < 256 && | c < 256 && |
2398 | #endif | #endif |
2399 | (md->ctypes[c] & ctype_word) != 0 | (md->ctypes[c] & ctype_word) != 0 |
2400 | ) | ) |
2401 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2402 | ecode++; | ecode++; |
2403 | break; | break; |
2404 | ||
# | Line 2295 for (;;) | Line 2406 for (;;) |
2406 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2407 | { | { |
2408 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2409 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2410 | } | } |
2411 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2412 | if ( | if ( |
2413 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2414 | c >= 256 || | c > 255 || |
2415 | #endif | #endif |
2416 | (md->ctypes[c] & ctype_word) == 0 | (md->ctypes[c] & ctype_word) == 0 |
2417 | ) | ) |
2418 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2419 | ecode++; | ecode++; |
2420 | break; | break; |
2421 | ||
# | Line 2312 for (;;) | Line 2423 for (;;) |
2423 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2424 | { | { |
2425 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2426 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2427 | } | } |
2428 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2429 | switch(c) | switch(c) |
2430 | { | { |
2431 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
2432 | ||
2433 | case 0x000d: | case CHAR_CR: |
2434 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr >= md->end_subject) |
2435 | { | |
2436 | SCHECK_PARTIAL(); | |
2437 | } | |
2438 | else if (*eptr == CHAR_LF) eptr++; | |
2439 | break; | break; |
2440 | ||
2441 | case 0x000a: | case CHAR_LF: |
2442 | break; | break; |
2443 | ||
2444 | case 0x000b: | case CHAR_VT: |
2445 | case 0x000c: | case CHAR_FF: |
2446 | case 0x0085: | case CHAR_NEL: |
2447 | #ifndef EBCDIC | |
2448 | case 0x2028: | case 0x2028: |
2449 | case 0x2029: | case 0x2029: |
2450 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | #endif /* Not EBCDIC */ |
2451 | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); | |
2452 | break; | break; |
2453 | } | } |
2454 | ecode++; | ecode++; |
# | Line 2341 for (;;) | Line 2458 for (;;) |
2458 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2459 | { | { |
2460 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2461 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2462 | } | } |
2463 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2464 | switch(c) | switch(c) |
2465 | { | { |
2466 | HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ | |
2467 | default: break; | default: break; |
case 0x09: /* HT */ | ||
case 0x20: /* SPACE */ | ||
case 0xa0: /* NBSP */ | ||
case 0x1680: /* OGHAM SPACE MARK */ | ||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | ||
case 0x2000: /* EN QUAD */ | ||
case 0x2001: /* EM QUAD */ | ||
case 0x2002: /* EN SPACE */ | ||
case 0x2003: /* EM SPACE */ | ||
case 0x2004: /* THREE-PER-EM SPACE */ | ||
case 0x2005: /* FOUR-PER-EM SPACE */ | ||
case 0x2006: /* SIX-PER-EM SPACE */ | ||
case 0x2007: /* FIGURE SPACE */ | ||
case 0x2008: /* PUNCTUATION SPACE */ | ||
case 0x2009: /* THIN SPACE */ | ||
case 0x200A: /* HAIR SPACE */ | ||
case 0x202f: /* NARROW NO-BREAK SPACE */ | ||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | ||
case 0x3000: /* IDEOGRAPHIC SPACE */ | ||
MRRETURN(MATCH_NOMATCH); | ||
2468 | } | } |
2469 | ecode++; | ecode++; |
2470 | break; | break; |
# | Line 2375 for (;;) | Line 2473 for (;;) |
2473 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2474 | { | { |
2475 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2476 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2477 | } | } |
2478 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2479 | switch(c) | switch(c) |
2480 | { | { |
2481 | default: MRRETURN(MATCH_NOMATCH); | HSPACE_CASES: break; /* Byte and multibyte cases */ |
2482 | case 0x09: /* HT */ | default: RRETURN(MATCH_NOMATCH); |
case 0x20: /* SPACE */ | ||
case 0xa0: /* NBSP */ | ||
case 0x1680: /* OGHAM SPACE MARK */ | ||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | ||
case 0x2000: /* EN QUAD */ | ||
case 0x2001: /* EM QUAD */ | ||
case 0x2002: /* EN SPACE */ | ||
case 0x2003: /* EM SPACE */ | ||
case 0x2004: /* THREE-PER-EM SPACE */ | ||
case 0x2005: /* FOUR-PER-EM SPACE */ | ||
case 0x2006: /* SIX-PER-EM SPACE */ | ||
case 0x2007: /* FIGURE SPACE */ | ||
case 0x2008: /* PUNCTUATION SPACE */ | ||
case 0x2009: /* THIN SPACE */ | ||
case 0x200A: /* HAIR SPACE */ | ||
case 0x202f: /* NARROW NO-BREAK SPACE */ | ||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | ||
case 0x3000: /* IDEOGRAPHIC SPACE */ | ||
break; | ||
2483 | } | } |
2484 | ecode++; | ecode++; |
2485 | break; | break; |
# | Line 2409 for (;;) | Line 2488 for (;;) |
2488 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2489 | { | { |
2490 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2491 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2492 | } | } |
2493 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2494 | switch(c) | switch(c) |
2495 | { | { |
2496 | VSPACE_CASES: RRETURN(MATCH_NOMATCH); | |
2497 | default: break; | default: break; |
case 0x0a: /* LF */ | ||
case 0x0b: /* VT */ | ||
case 0x0c: /* FF */ | ||
case 0x0d: /* CR */ | ||
case 0x85: /* NEL */ | ||
case 0x2028: /* LINE SEPARATOR */ | ||
case 0x2029: /* PARAGRAPH SEPARATOR */ | ||
MRRETURN(MATCH_NOMATCH); | ||
2498 | } | } |
2499 | ecode++; | ecode++; |
2500 | break; | break; |
# | Line 2431 for (;;) | Line 2503 for (;;) |
2503 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2504 | { | { |
2505 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2506 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2507 | } | } |
2508 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2509 | switch(c) | switch(c) |
2510 | { | { |
2511 | default: MRRETURN(MATCH_NOMATCH); | VSPACE_CASES: break; |
2512 | case 0x0a: /* LF */ | default: RRETURN(MATCH_NOMATCH); |
case 0x0b: /* VT */ | ||
case 0x0c: /* FF */ | ||
case 0x0d: /* CR */ | ||
case 0x85: /* NEL */ | ||
case 0x2028: /* LINE SEPARATOR */ | ||
case 0x2029: /* PARAGRAPH SEPARATOR */ | ||
break; | ||
2513 | } | } |
2514 | ecode++; | ecode++; |
2515 | break; | break; |
# | Line 2458 for (;;) | Line 2523 for (;;) |
2523 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2524 | { | { |
2525 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2526 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2527 | } | } |
2528 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
2529 | { | { |
2530 | const pcre_uint32 *cp; | |
2531 | const ucd_record *prop = GET_UCD(c); | const ucd_record *prop = GET_UCD(c); |
2532 | ||
2533 | switch(ecode[1]) | switch(ecode[1]) |
2534 | { | { |
2535 | case PT_ANY: | case PT_ANY: |
2536 | if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH); | if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH); |
2537 | break; | break; |
2538 | ||
2539 | case PT_LAMP: | case PT_LAMP: |
2540 | if ((prop->chartype == ucp_Lu || | if ((prop->chartype == ucp_Lu || |
2541 | prop->chartype == ucp_Ll || | prop->chartype == ucp_Ll || |
2542 | prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) | prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) |
2543 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2544 | break; | break; |
2545 | ||
2546 | case PT_GC: | case PT_GC: |
2547 | if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP)) | if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) |
2548 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2549 | break; | break; |
2550 | ||
2551 | case PT_PC: | case PT_PC: |
2552 | if ((ecode[2] != prop->chartype) == (op == OP_PROP)) | if ((ecode[2] != prop->chartype) == (op == OP_PROP)) |
2553 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2554 | break; | break; |
2555 | ||
2556 | case PT_SC: | case PT_SC: |
2557 | if ((ecode[2] != prop->script) == (op == OP_PROP)) | if ((ecode[2] != prop->script) == (op == OP_PROP)) |
2558 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2559 | break; | break; |
2560 | ||
2561 | /* These are specials */ | /* These are specials */ |
2562 | ||
2563 | case PT_ALNUM: | case PT_ALNUM: |
2564 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
2565 | _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) | PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) |
2566 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2567 | break; | break; |
2568 | ||
2569 | case PT_SPACE: /* Perl space */ | case PT_SPACE: /* Perl space */ |
2570 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
2571 | c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) | c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) |
2572 | == (op == OP_NOTPROP)) | == (op == OP_NOTPROP)) |
2573 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2574 | break; | break; |
2575 | ||
2576 | case PT_PXSPACE: /* POSIX space */ | case PT_PXSPACE: /* POSIX space */ |
2577 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || |
2578 | c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || | c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || |
2579 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
2580 | == (op == OP_NOTPROP)) | == (op == OP_NOTPROP)) |
2581 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2582 | break; | break; |
2583 | ||
2584 | case PT_WORD: | case PT_WORD: |
2585 | if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
2586 | _pcre_ucp_gentype[prop->chartype] == ucp_N || | PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
2587 | c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) | c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) |
2588 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2589 | break; | |
2590 | ||
2591 | case PT_CLIST: | |
2592 | cp = PRIV(ucd_caseless_sets) + prop->caseset; | |
2593 | for (;;) | |
2594 | { | |
2595 | if (c < *cp) | |
2596 | { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; } | |
2597 | if (c == *cp++) | |
2598 | { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } } | |
2599 | } | |
2600 | break; | break; |
2601 | ||
2602 | /* This should never occur */ | /* This should never occur */ |
# | Line 2539 for (;;) | Line 2616 for (;;) |
2616 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2617 | { | { |
2618 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2619 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2620 | } | } |
2621 | GETCHARINCTEST(c, eptr); | else |
if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); | ||
while (eptr < md->end_subject) | ||
2622 | { | { |
2623 | int len = 1; | int lgb, rgb; |
2624 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | GETCHARINCTEST(c, eptr); |
2625 | if (UCD_CATEGORY(c) != ucp_M) break; | lgb = UCD_GRAPHBREAK(c); |
2626 | eptr += len; | while (eptr < md->end_subject) |
2627 | { | |
2628 | int len = 1; | |
2629 | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } | |
2630 | rgb = UCD_GRAPHBREAK(c); | |
2631 | if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; | |
2632 | lgb = rgb; | |
2633 | eptr += len; | |
2634 | } | |
2635 | } | } |
2636 | CHECK_PARTIAL(); | |
2637 | ecode++; | ecode++; |
2638 | break; | break; |
2639 | #endif | #endif /* SUPPORT_UCP */ |
2640 | ||
2641 | ||
2642 | /* Match a back reference, possibly repeatedly. Look past the end of the | /* Match a back reference, possibly repeatedly. Look past the end of the |
# | Line 2567 for (;;) | Line 2651 for (;;) |
2651 | case OP_REFI: | case OP_REFI: |
2652 | caseless = op == OP_REFI; | caseless = op == OP_REFI; |
2653 | offset = GET2(ecode, 1) << 1; /* Doubled ref number */ | offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
2654 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
2655 | ||
2656 | /* If the reference is unset, there are two possibilities: | /* If the reference is unset, there are two possibilities: |
2657 | ||
# | Line 2607 for (;;) | Line 2691 for (;;) |
2691 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
2692 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
2693 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
2694 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
2695 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
2696 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
2697 | break; | break; |
2698 | ||
2699 | default: /* No repeat follows */ | default: /* No repeat follows */ |
2700 | if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) |
2701 | { | { |
2702 | if (length == -2) eptr = md->end_subject; /* Partial match */ | |
2703 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2704 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2705 | } | } |
2706 | eptr += length; | eptr += length; |
2707 | continue; /* With the main loop */ | continue; /* With the main loop */ |
2708 | } | } |
2709 | ||
2710 | /* Handle repeated back references. If the length of the reference is | /* Handle repeated back references. If the length of the reference is |
2711 | zero, just continue with the main loop. */ | zero, just continue with the main loop. If the length is negative, it |
2712 | means the reference is unset in non-Java-compatible mode. If the minimum is | |
2713 | zero, we can continue at the same level without recursion. For any other | |
2714 | minimum, carrying on will result in NOMATCH. */ | |
2715 | ||
2716 | if (length == 0) continue; | if (length == 0) continue; |
2717 | if (length < 0 && min == 0) continue; | |
2718 | ||
2719 | /* First, ensure the minimum number of matches are present. We get back | /* First, ensure the minimum number of matches are present. We get back |
2720 | the length of the reference string explicitly rather than passing the | the length of the reference string explicitly rather than passing the |
# | Line 2636 for (;;) | Line 2725 for (;;) |
2725 | int slength; | int slength; |
2726 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2727 | { | { |
2728 | if (slength == -2) eptr = md->end_subject; /* Partial match */ | |
2729 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2730 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2731 | } | } |
2732 | eptr += slength; | eptr += slength; |
2733 | } | } |
# | Line 2656 for (;;) | Line 2746 for (;;) |
2746 | int slength; | int slength; |
2747 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); |
2748 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2749 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2750 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2751 | { | { |
2752 | if (slength == -2) eptr = md->end_subject; /* Partial match */ | |
2753 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
2754 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2755 | } | } |
2756 | eptr += slength; | eptr += slength; |
2757 | } | } |
# | Line 2677 for (;;) | Line 2768 for (;;) |
2768 | int slength; | int slength; |
2769 | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) | if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) |
2770 | { | { |
2771 | CHECK_PARTIAL(); | /* Can't use CHECK_PARTIAL because we don't want to update eptr in |
2772 | the soft partial matching case. */ | |
2773 | ||
2774 | if (slength == -2 && md->partial != 0 && | |
2775 | md->end_subject > md->start_used_ptr) | |
2776 | { | |
2777 | md->hitend = TRUE; | |
2778 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
2779 | } | |
2780 | break; | break; |
2781 | } | } |
2782 | eptr += slength; | eptr += slength; |
2783 | } | } |
2784 | ||
2785 | while (eptr >= pp) | while (eptr >= pp) |
2786 | { | { |
2787 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM15); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM15); |
2788 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2789 | eptr -= length; | eptr -= length; |
2790 | } | } |
2791 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2792 | } | } |
2793 | /* Control never gets here */ | /* Control never gets here */ |
2794 | ||
# | Line 2706 for (;;) | Line 2806 for (;;) |
2806 | case OP_NCLASS: | case OP_NCLASS: |
2807 | case OP_CLASS: | case OP_CLASS: |
2808 | { | { |
2809 | /* The data variable is saved across frames, so the byte map needs to | |
2810 | be stored there. */ | |
2811 | #define BYTE_MAP ((pcre_uint8 *)data) | |
2812 | data = ecode + 1; /* Save for matching */ | data = ecode + 1; /* Save for matching */ |
2813 | ecode += 33; /* Advance past the item */ | ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */ |
2814 | ||
2815 | switch (*ecode) | switch (*ecode) |
2816 | { | { |
# | Line 2728 for (;;) | Line 2831 for (;;) |
2831 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
2832 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
2833 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
2834 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
2835 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
2836 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
2837 | break; | break; |
2838 | ||
2839 | default: /* No repeat follows */ | default: /* No repeat follows */ |
# | Line 2740 for (;;) | Line 2843 for (;;) |
2843 | ||
2844 | /* First, ensure the minimum number of matches are present. */ | /* First, ensure the minimum number of matches are present. */ |
2845 | ||
2846 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2847 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2848 | { | { |
2849 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
2850 | { | { |
2851 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2852 | { | { |
2853 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2854 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2855 | } | } |
2856 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
2857 | if (c > 255) | if (c > 255) |
2858 | { | { |
2859 | if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH); | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
2860 | } | } |
2861 | else | else |
2862 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | ||
} | ||
2863 | } | } |
2864 | } | } |
2865 | else | else |
2866 | #endif | #endif |
2867 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2868 | { | { |
2869 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
2870 | { | { |
2871 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2872 | { | { |
2873 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2874 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2875 | } | } |
2876 | c = *eptr++; | c = *eptr++; |
2877 | if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | #ifndef COMPILE_PCRE8 |
2878 | if (c > 255) | |
2879 | { | |
2880 | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); | |
2881 | } | |
2882 | else | |
2883 | #endif | |
2884 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); | |
2885 | } | } |
2886 | } | } |
2887 | ||
# | Line 2788 for (;;) | Line 2895 for (;;) |
2895 | ||
2896 | if (minimize) | if (minimize) |
2897 | { | { |
2898 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2899 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2900 | { | { |
2901 | for (fi = min;; fi++) | for (fi = min;; fi++) |
2902 | { | { |
2903 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); |
2904 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2905 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2906 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2907 | { | { |
2908 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2909 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2910 | } | } |
2911 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
2912 | if (c > 255) | if (c > 255) |
2913 | { | { |
2914 | if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH); | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
2915 | } | } |
2916 | else | else |
2917 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | ||
} | ||
2918 | } | } |
2919 | } | } |
2920 | else | else |
2921 | #endif | #endif |
2922 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2923 | { | { |
2924 | for (fi = min;; fi++) | for (fi = min;; fi++) |
2925 | { | { |
2926 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); |
2927 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
2928 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
2929 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
2930 | { | { |
2931 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
2932 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
2933 | } | } |
2934 | c = *eptr++; | c = *eptr++; |
2935 | if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH); | #ifndef COMPILE_PCRE8 |
2936 | if (c > 255) | |
2937 | { | |
2938 | if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); | |
2939 | } | |
2940 | else | |
2941 | #endif | |
2942 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); | |
2943 | } | } |
2944 | } | } |
2945 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 2840 for (;;) | Line 2951 for (;;) |
2951 | { | { |
2952 | pp = eptr; | pp = eptr; |
2953 | ||
2954 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
2955 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
2956 | { | { |
2957 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
2958 | { | { |
# | Line 2858 for (;;) | Line 2968 for (;;) |
2968 | if (op == OP_CLASS) break; | if (op == OP_CLASS) break; |
2969 | } | } |
2970 | else | else |
2971 | { | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; |
if ((data[c/8] & (1 << (c&7))) == 0) break; | ||
} | ||
2972 | eptr += len; | eptr += len; |
2973 | } | } |
2974 | for (;;) | for (;;) |
# | Line 2873 for (;;) | Line 2981 for (;;) |
2981 | } | } |
2982 | else | else |
2983 | #endif | #endif |
2984 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
2985 | { | { |
2986 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
2987 | { | { |
# | Line 2883 for (;;) | Line 2991 for (;;) |
2991 | break; | break; |
2992 | } | } |
2993 | c = *eptr; | c = *eptr; |
2994 | if ((data[c/8] & (1 << (c&7))) == 0) break; | #ifndef COMPILE_PCRE8 |
2995 | if (c > 255) | |
2996 | { | |
2997 | if (op == OP_CLASS) break; | |
2998 | } | |
2999 | else | |
3000 | #endif | |
3001 | if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; | |
3002 | eptr++; | eptr++; |
3003 | } | } |
3004 | while (eptr >= pp) | while (eptr >= pp) |
# | Line 2894 for (;;) | Line 3009 for (;;) |
3009 | } | } |
3010 | } | } |
3011 | ||
3012 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3013 | } | } |
3014 | #undef BYTE_MAP | |
3015 | } | } |
3016 | /* Control never gets here */ | /* Control never gets here */ |
3017 | ||
# | Line 2904 for (;;) | Line 3020 for (;;) |
3020 | when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 | when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 |
3021 | mode, because Unicode properties are supported in non-UTF-8 mode. */ | mode, because Unicode properties are supported in non-UTF-8 mode. */ |
3022 | ||
3023 | #ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
3024 | case OP_XCLASS: | case OP_XCLASS: |
3025 | { | { |
3026 | data = ecode + 1 + LINK_SIZE; /* Save for matching */ | data = ecode + 1 + LINK_SIZE; /* Save for matching */ |
# | Line 2929 for (;;) | Line 3045 for (;;) |
3045 | case OP_CRMINRANGE: | case OP_CRMINRANGE: |
3046 | minimize = (*ecode == OP_CRMINRANGE); | minimize = (*ecode == OP_CRMINRANGE); |
3047 | min = GET2(ecode, 1); | min = GET2(ecode, 1); |
3048 | max = GET2(ecode, 3); | max = GET2(ecode, 1 + IMM2_SIZE); |
3049 | if (max == 0) max = INT_MAX; | if (max == 0) max = INT_MAX; |
3050 | ecode += 5; | ecode += 1 + 2 * IMM2_SIZE; |
3051 | break; | break; |
3052 | ||
3053 | default: /* No repeat follows */ | default: /* No repeat follows */ |
# | Line 2946 for (;;) | Line 3062 for (;;) |
3062 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3063 | { | { |
3064 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3065 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3066 | } | } |
3067 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
3068 | if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH); | if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); |
3069 | } | } |
3070 | ||
3071 | /* If max == min we can continue with the main loop without the | /* If max == min we can continue with the main loop without the |
# | Line 2966 for (;;) | Line 3082 for (;;) |
3082 | { | { |
3083 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); |
3084 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3085 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3086 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3087 | { | { |
3088 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3089 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3090 | } | } |
3091 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
3092 | if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH); | if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); |
3093 | } | } |
3094 | /* Control never gets here */ | /* Control never gets here */ |
3095 | } | } |
# | Line 2991 for (;;) | Line 3107 for (;;) |
3107 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3108 | break; | break; |
3109 | } | } |
3110 | #ifdef SUPPORT_UTF | |
3111 | GETCHARLENTEST(c, eptr, len); | GETCHARLENTEST(c, eptr, len); |
3112 | if (!_pcre_xclass(c, data)) break; | #else |
3113 | c = *eptr; | |
3114 | #endif | |
3115 | if (!PRIV(xclass)(c, data, utf)) break; | |
3116 | eptr += len; | eptr += len; |
3117 | } | } |
3118 | for(;;) | for(;;) |
# | Line 3000 for (;;) | Line 3120 for (;;) |
3120 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); |
3121 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3122 | if (eptr-- == pp) break; /* Stop if tried at original pos */ | if (eptr-- == pp) break; /* Stop if tried at original pos */ |
3123 | if (utf8) BACKCHAR(eptr); | #ifdef SUPPORT_UTF |
3124 | if (utf) BACKCHAR(eptr); | |
3125 | #endif | |
3126 | } | } |
3127 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3128 | } | } |
3129 | ||
3130 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3012 for (;;) | Line 3134 for (;;) |
3134 | /* Match a single character, casefully */ | /* Match a single character, casefully */ |
3135 | ||
3136 | case OP_CHAR: | case OP_CHAR: |
3137 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3138 | if (utf8) | if (utf) |
3139 | { | { |
3140 | length = 1; | length = 1; |
3141 | ecode++; | ecode++; |
# | Line 3021 for (;;) | Line 3143 for (;;) |
3143 | if (length > md->end_subject - eptr) | if (length > md->end_subject - eptr) |
3144 | { | { |
3145 | CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ | CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
3146 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3147 | } | } |
3148 | while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH); | while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); |
3149 | } | } |
3150 | else | else |
3151 | #endif | #endif |
3152 | /* Not UTF mode */ | |
/* Non-UTF-8 mode */ | ||
3153 | { | { |
3154 | if (md->end_subject - eptr < 1) | if (md->end_subject - eptr < 1) |
3155 | { | { |
3156 | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
3157 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3158 | } | } |
3159 | if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH); | if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); |
3160 | ecode += 2; | ecode += 2; |
3161 | } | } |
3162 | break; | break; |
3163 | ||
3164 | /* Match a single character, caselessly */ | /* Match a single character, caselessly. If we are at the end of the |
3165 | subject, give up immediately. */ | |
3166 | ||
3167 | case OP_CHARI: | case OP_CHARI: |
3168 | #ifdef SUPPORT_UTF8 | if (eptr >= md->end_subject) |
3169 | if (utf8) | { |
3170 | SCHECK_PARTIAL(); | |
3171 | RRETURN(MATCH_NOMATCH); | |
3172 | } | |
3173 | ||
3174 | #ifdef SUPPORT_UTF | |
3175 | if (utf) | |
3176 | { | { |
3177 | length = 1; | length = 1; |
3178 | ecode++; | ecode++; |
3179 | GETCHARLEN(fc, ecode, length); | GETCHARLEN(fc, ecode, length); |
3180 | ||
if (length > md->end_subject - eptr) | ||
{ | ||
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ | ||
MRRETURN(MATCH_NOMATCH); | ||
} | ||
3181 | /* If the pattern character's value is < 128, we have only one byte, and | /* If the pattern character's value is < 128, we have only one byte, and |
3182 | can use the fast lookup table. */ | we know that its other case must also be one byte long, so we can use the |
3183 | fast lookup table. We know that there is at least one byte left in the | |
3184 | subject. */ | |
3185 | ||
3186 | if (fc < 128) | if (fc < 128) |
3187 | { | { |
3188 | if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (md->lcc[fc] |
3189 | != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); | |
3190 | ecode++; | |
3191 | eptr++; | |
3192 | } | } |
3193 | ||
3194 | /* Otherwise we must pick up the subject character */ | /* Otherwise we must pick up the subject character. Note that we cannot |
3195 | use the value of "length" to check for sufficient bytes left, because the | |
3196 | other case of the character may have more or fewer bytes. */ | |
3197 | ||
3198 | else | else |
3199 | { | { |
3200 | unsigned int dc; | pcre_uint32 dc; |
3201 | GETCHARINC(dc, eptr); | GETCHARINC(dc, eptr); |
3202 | ecode += length; | ecode += length; |
3203 | ||
# | Line 3080 for (;;) | Line 3209 for (;;) |
3209 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3210 | if (dc != UCD_OTHERCASE(fc)) | if (dc != UCD_OTHERCASE(fc)) |
3211 | #endif | #endif |
3212 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3213 | } | } |
3214 | } | } |
3215 | } | } |
3216 | else | else |
3217 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
3218 | ||
3219 | /* Non-UTF-8 mode */ | /* Not UTF mode */ |
3220 | { | { |
3221 | if (md->end_subject - eptr < 1) | if (TABLE_GET(ecode[1], md->lcc, ecode[1]) |
3222 | { | != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); |
3223 | SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ | eptr++; |
MRRETURN(MATCH_NOMATCH); | ||
} | ||
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | ||
3224 | ecode += 2; | ecode += 2; |
3225 | } | } |
3226 | break; | break; |
# | Line 3104 for (;;) | Line 3230 for (;;) |
3230 | case OP_EXACT: | case OP_EXACT: |
3231 | case OP_EXACTI: | case OP_EXACTI: |
3232 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3233 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3234 | goto REPEATCHAR; | goto REPEATCHAR; |
3235 | ||
3236 | case OP_POSUPTO: | case OP_POSUPTO: |
# | Line 3119 for (;;) | Line 3245 for (;;) |
3245 | min = 0; | min = 0; |
3246 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3247 | minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; | minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; |
3248 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3249 | goto REPEATCHAR; | goto REPEATCHAR; |
3250 | ||
3251 | case OP_POSSTAR: | case OP_POSSTAR: |
# | Line 3167 for (;;) | Line 3293 for (;;) |
3293 | /* Common code for all repeated single-character matches. */ | /* Common code for all repeated single-character matches. */ |
3294 | ||
3295 | REPEATCHAR: | REPEATCHAR: |
3296 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3297 | if (utf8) | if (utf) |
3298 | { | { |
3299 | length = 1; | length = 1; |
3300 | charptr = ecode; | charptr = ecode; |
# | Line 3184 for (;;) | Line 3310 for (;;) |
3310 | unsigned int othercase; | unsigned int othercase; |
3311 | if (op >= OP_STARI && /* Caseless */ | if (op >= OP_STARI && /* Caseless */ |
3312 | (othercase = UCD_OTHERCASE(fc)) != fc) | (othercase = UCD_OTHERCASE(fc)) != fc) |
3313 | oclength = _pcre_ord2utf8(othercase, occhars); | oclength = PRIV(ord2utf)(othercase, occhars); |
3314 | else oclength = 0; | else oclength = 0; |
3315 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3316 | ||
3317 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3318 | { | { |
3319 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3320 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3321 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3322 | else if (oclength > 0 && | else if (oclength > 0 && |
3323 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3324 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3325 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3326 | else | else |
3327 | { | { |
3328 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
3329 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3330 | } | } |
3331 | } | } |
3332 | ||
# | Line 3212 for (;;) | Line 3338 for (;;) |
3338 | { | { |
3339 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); |
3340 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3341 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3342 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3343 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3344 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3345 | else if (oclength > 0 && | else if (oclength > 0 && |
3346 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3347 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3348 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3349 | else | else |
3350 | { | { |
3351 | CHECK_PARTIAL(); | CHECK_PARTIAL(); |
3352 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3353 | } | } |
3354 | } | } |
3355 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3235 for (;;) | Line 3361 for (;;) |
3361 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3362 | { | { |
3363 | if (eptr <= md->end_subject - length && | if (eptr <= md->end_subject - length && |
3364 | memcmp(eptr, charptr, length) == 0) eptr += length; | memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3365 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3366 | else if (oclength > 0 && | else if (oclength > 0 && |
3367 | eptr <= md->end_subject - oclength && | eptr <= md->end_subject - oclength && |
3368 | memcmp(eptr, occhars, oclength) == 0) eptr += oclength; | memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; |
3369 | #endif /* SUPPORT_UCP */ | #endif /* SUPPORT_UCP */ |
3370 | else | else |
3371 | { | { |
# | Line 3254 for (;;) | Line 3380 for (;;) |
3380 | { | { |
3381 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); |
3382 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3383 | if (eptr == pp) { MRRETURN(MATCH_NOMATCH); } | if (eptr == pp) { RRETURN(MATCH_NOMATCH); } |
3384 | #ifdef SUPPORT_UCP | #ifdef SUPPORT_UCP |
3385 | eptr--; | eptr--; |
3386 | BACKCHAR(eptr); | BACKCHAR(eptr); |
# | Line 3271 for (;;) | Line 3397 for (;;) |
3397 | value of fc will always be < 128. */ | value of fc will always be < 128. */ |
3398 | } | } |
3399 | else | else |
3400 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
3401 | /* When not in UTF-8 mode, load a single-byte character. */ | |
3402 | fc = *ecode++; | |
3403 | ||
3404 | /* When not in UTF-8 mode, load a single-byte character. */ | /* The value of fc at this point is always one character, though we may |
3405 | or may not be in UTF mode. The code is duplicated for the caseless and | |
fc = *ecode++; | ||
/* The value of fc at this point is always less than 256, though we may or | ||
may not be in UTF-8 mode. The code is duplicated for the caseless and | ||
3406 | caseful cases, for speed, since matching characters is likely to be quite | caseful cases, for speed, since matching characters is likely to be quite |
3407 | common. First, ensure the minimum number of matches are present. If min = | common. First, ensure the minimum number of matches are present. If min = |
3408 | max, continue at the same level without recursing. Otherwise, if | max, continue at the same level without recursing. Otherwise, if |
# | Line 3287 for (;;) | Line 3411 for (;;) |
3411 | maximizing, find the maximum number of characters and work backwards. */ | maximizing, find the maximum number of characters and work backwards. */ |
3412 | ||
3413 | DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, | DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, |
3414 | max, eptr)); | max, (char *)eptr)); |
3415 | ||
3416 | if (op >= OP_STARI) /* Caseless */ | if (op >= OP_STARI) /* Caseless */ |
3417 | { | { |
3418 | fc = md->lcc[fc]; | #ifdef COMPILE_PCRE8 |
3419 | /* fc must be < 128 if UTF is enabled. */ | |
3420 | foc = md->fcc[fc]; | |
3421 | #else | |
3422 | #ifdef SUPPORT_UTF | |
3423 | #ifdef SUPPORT_UCP | |
3424 | if (utf && fc > 127) | |
3425 | foc = UCD_OTHERCASE(fc); | |
3426 | #else | |
3427 | if (utf && fc > 127) | |
3428 | foc = fc; | |
3429 | #endif /* SUPPORT_UCP */ | |
3430 | else | |
3431 | #endif /* SUPPORT_UTF */ | |
3432 | foc = TABLE_GET(fc, md->fcc, fc); | |
3433 | #endif /* COMPILE_PCRE8 */ | |
3434 | ||
3435 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3436 | { | { |
3437 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3438 | { | { |
3439 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3440 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3441 | } | } |
3442 | if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
3443 | eptr++; | |
3444 | } | } |
3445 | if (min == max) continue; | if (min == max) continue; |
3446 | if (minimize) | if (minimize) |
# | Line 3308 for (;;) | Line 3449 for (;;) |
3449 | { | { |
3450 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); |
3451 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3452 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3453 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3454 | { | { |
3455 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3456 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3457 | } | } |
3458 | if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); |
3459 | eptr++; | |
3460 | } | } |
3461 | /* Control never gets here */ | /* Control never gets here */ |
3462 | } | } |
# | Line 3328 for (;;) | Line 3470 for (;;) |
3470 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3471 | break; | break; |
3472 | } | } |
3473 | if (fc != md->lcc[*eptr]) break; | if (fc != *eptr && foc != *eptr) break; |
3474 | eptr++; | eptr++; |
3475 | } | } |
3476 | ||
# | Line 3340 for (;;) | Line 3482 for (;;) |
3482 | eptr--; | eptr--; |
3483 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3484 | } | } |
3485 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3486 | } | } |
3487 | /* Control never gets here */ | /* Control never gets here */ |
3488 | } | } |
# | Line 3354 for (;;) | Line 3496 for (;;) |
3496 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3497 | { | { |
3498 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3499 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3500 | } | } |
3501 | if (fc != *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
3502 | } | } |
3503 | ||
3504 | if (min == max) continue; | if (min == max) continue; |
# | Line 3367 for (;;) | Line 3509 for (;;) |
3509 | { | { |
3510 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); |
3511 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3512 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3513 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3514 | { | { |
3515 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3516 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3517 | } | } |
3518 | if (fc != *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
3519 | } | } |
3520 | /* Control never gets here */ | /* Control never gets here */ |
3521 | } | } |
# | Line 3398 for (;;) | Line 3540 for (;;) |
3540 | eptr--; | eptr--; |
3541 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3542 | } | } |
3543 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3544 | } | } |
3545 | } | } |
3546 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3411 for (;;) | Line 3553 for (;;) |
3553 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3554 | { | { |
3555 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3556 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3557 | } | } |
3558 | ecode++; | #ifdef SUPPORT_UTF |
3559 | GETCHARINCTEST(c, eptr); | if (utf) |
if (op == OP_NOTI) /* The caseless case */ | ||
3560 | { | { |
3561 | #ifdef SUPPORT_UTF8 | register pcre_uint32 ch, och; |
3562 | if (c < 256) | |
3563 | #endif | ecode++; |
3564 | c = md->lcc[c]; | GETCHARINC(ch, ecode); |
3565 | if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH); | GETCHARINC(c, eptr); |
3566 | ||
3567 | if (op == OP_NOT) | |
3568 | { | |
3569 | if (ch == c) RRETURN(MATCH_NOMATCH); | |
3570 | } | |
3571 | else | |
3572 | { | |
3573 | #ifdef SUPPORT_UCP | |
3574 | if (ch > 127) | |
3575 | och = UCD_OTHERCASE(ch); | |
3576 | #else | |
3577 | if (ch > 127) | |
3578 | och = ch; | |
3579 | #endif /* SUPPORT_UCP */ | |
3580 | else | |
3581 | och = TABLE_GET(ch, md->fcc, ch); | |
3582 | if (ch == c || och == c) RRETURN(MATCH_NOMATCH); | |
3583 | } | |
3584 | } | } |
3585 | else /* Caseful */ | else |
3586 | #endif | |
3587 | { | { |
3588 | if (*ecode++ == c) MRRETURN(MATCH_NOMATCH); | register pcre_uint32 ch = ecode[1]; |
3589 | c = *eptr++; | |
3590 | if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c)) | |
3591 | RRETURN(MATCH_NOMATCH); | |
3592 | ecode += 2; | |
3593 | } | } |
3594 | break; | break; |
3595 | ||
# | Line 3439 for (;;) | Line 3603 for (;;) |
3603 | case OP_NOTEXACT: | case OP_NOTEXACT: |
3604 | case OP_NOTEXACTI: | case OP_NOTEXACTI: |
3605 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3606 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3607 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3608 | ||
3609 | case OP_NOTUPTO: | case OP_NOTUPTO: |
# | Line 3449 for (;;) | Line 3613 for (;;) |
3613 | min = 0; | min = 0; |
3614 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3615 | minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; | minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; |
3616 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3617 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3618 | ||
3619 | case OP_NOTPOSSTAR: | case OP_NOTPOSSTAR: |
# | Line 3481 for (;;) | Line 3645 for (;;) |
3645 | possessive = TRUE; | possessive = TRUE; |
3646 | min = 0; | min = 0; |
3647 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3648 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3649 | goto REPEATNOTCHAR; | goto REPEATNOTCHAR; |
3650 | ||
3651 | case OP_NOTSTAR: | case OP_NOTSTAR: |
# | Line 3505 for (;;) | Line 3669 for (;;) |
3669 | /* Common code for all repeated single-byte matches. */ | /* Common code for all repeated single-byte matches. */ |
3670 | ||
3671 | REPEATNOTCHAR: | REPEATNOTCHAR: |
3672 | fc = *ecode++; | GETCHARINCTEST(fc, ecode); |
3673 | ||
3674 | /* The code is duplicated for the caseless and caseful cases, for speed, | /* The code is duplicated for the caseless and caseful cases, for speed, |
3675 | since matching characters is likely to be quite common. First, ensure the | since matching characters is likely to be quite common. First, ensure the |
# | Line 3516 for (;;) | Line 3680 for (;;) |
3680 | characters and work backwards. */ | characters and work backwards. */ |
3681 | ||
3682 | DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, | DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, |
3683 | max, eptr)); | max, (char *)eptr)); |
3684 | ||
3685 | if (op >= OP_NOTSTARI) /* Caseless */ | if (op >= OP_NOTSTARI) /* Caseless */ |
3686 | { | { |
3687 | fc = md->lcc[fc]; | #ifdef SUPPORT_UTF |
3688 | #ifdef SUPPORT_UCP | |
3689 | if (utf && fc > 127) | |
3690 | foc = UCD_OTHERCASE(fc); | |
3691 | #else | |
3692 | if (utf && fc > 127) | |
3693 | foc = fc; | |
3694 | #endif /* SUPPORT_UCP */ | |
3695 | else | |
3696 | #endif /* SUPPORT_UTF */ | |
3697 | foc = TABLE_GET(fc, md->fcc, fc); | |
3698 | ||
3699 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3700 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3701 | { | { |
3702 | register unsigned int d; | register unsigned int d; |
3703 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 3532 for (;;) | Line 3705 for (;;) |
3705 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3706 | { | { |
3707 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3708 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3709 | } | } |
3710 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3711 | if (d < 256) d = md->lcc[d]; | if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); |
if (fc == d) MRRETURN(MATCH_NOMATCH); | ||
3712 | } | } |
3713 | } | } |
3714 | else | else |
3715 | #endif | #endif |
3716 | /* Not UTF mode */ | |
/* Not UTF-8 mode */ | ||
3717 | { | { |
3718 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3719 | { | { |
3720 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3721 | { | { |
3722 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3723 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3724 | } | } |
3725 | if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); |
3726 | eptr++; | |
3727 | } | } |
3728 | } | } |
3729 | ||
# | Line 3559 for (;;) | Line 3731 for (;;) |
3731 | ||
3732 | if (minimize) | if (minimize) |
3733 | { | { |
3734 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3735 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3736 | { | { |
3737 | register unsigned int d; | register unsigned int d; |
3738 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3739 | { | { |
3740 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); |
3741 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3742 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3743 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3744 | { | { |
3745 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3746 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3747 | } | } |
3748 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3749 | if (d < 256) d = md->lcc[d]; | if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); |
if (fc == d) MRRETURN(MATCH_NOMATCH); | ||
3750 | } | } |
3751 | } | } |
3752 | else | else |
3753 | #endif | #endif |
3754 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3755 | { | { |
3756 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3757 | { | { |
3758 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); |
3759 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3760 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3761 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3762 | { | { |
3763 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3764 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3765 | } | } |
3766 | if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); |
3767 | eptr++; | |
3768 | } | } |
3769 | } | } |
3770 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3605 for (;;) | Line 3776 for (;;) |
3776 | { | { |
3777 | pp = eptr; | pp = eptr; |
3778 | ||
3779 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3780 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3781 | { | { |
3782 | register unsigned int d; | register unsigned int d; |
3783 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
# | Line 3619 for (;;) | Line 3789 for (;;) |
3789 | break; | break; |
3790 | } | } |
3791 | GETCHARLEN(d, eptr, len); | GETCHARLEN(d, eptr, len); |
3792 | if (d < 256) d = md->lcc[d]; | if (fc == d || (unsigned int)foc == d) break; |
if (fc == d) break; | ||
3793 | eptr += len; | eptr += len; |
3794 | } | } |
3795 | if (possessive) continue; | if (possessive) continue; |
3796 | for(;;) | for(;;) |
3797 | { | { |
3798 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); |
3799 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
# | Line 3634 for (;;) | Line 3803 for (;;) |
3803 | } | } |
3804 | else | else |
3805 | #endif | #endif |
3806 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3807 | { | { |
3808 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3809 | { | { |
# | Line 3643 for (;;) | Line 3812 for (;;) |
3812 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3813 | break; | break; |
3814 | } | } |
3815 | if (fc == md->lcc[*eptr]) break; | if (fc == *eptr || foc == *eptr) break; |
3816 | eptr++; | eptr++; |
3817 | } | } |
3818 | if (possessive) continue; | if (possessive) continue; |
# | Line 3655 for (;;) | Line 3824 for (;;) |
3824 | } | } |
3825 | } | } |
3826 | ||
3827 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3828 | } | } |
3829 | /* Control never gets here */ | /* Control never gets here */ |
3830 | } | } |
# | Line 3664 for (;;) | Line 3833 for (;;) |
3833 | ||
3834 | else | else |
3835 | { | { |
3836 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3837 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3838 | { | { |
3839 | register unsigned int d; | register unsigned int d; |
3840 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 3674 for (;;) | Line 3842 for (;;) |
3842 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3843 | { | { |
3844 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3845 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3846 | } | } |
3847 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3848 | if (fc == d) MRRETURN(MATCH_NOMATCH); | if (fc == d) RRETURN(MATCH_NOMATCH); |
3849 | } | } |
3850 | } | } |
3851 | else | else |
3852 | #endif | #endif |
3853 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3854 | { | { |
3855 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
3856 | { | { |
3857 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3858 | { | { |
3859 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3860 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3861 | } | } |
3862 | if (fc == *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr++) RRETURN(MATCH_NOMATCH); |
3863 | } | } |
3864 | } | } |
3865 | ||
# | Line 3699 for (;;) | Line 3867 for (;;) |
3867 | ||
3868 | if (minimize) | if (minimize) |
3869 | { | { |
3870 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3871 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3872 | { | { |
3873 | register unsigned int d; | register unsigned int d; |
3874 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3875 | { | { |
3876 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); |
3877 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3878 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3879 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3880 | { | { |
3881 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3882 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3883 | } | } |
3884 | GETCHARINC(d, eptr); | GETCHARINC(d, eptr); |
3885 | if (fc == d) MRRETURN(MATCH_NOMATCH); | if (fc == d) RRETURN(MATCH_NOMATCH); |
3886 | } | } |
3887 | } | } |
3888 | else | else |
3889 | #endif | #endif |
3890 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3891 | { | { |
3892 | for (fi = min;; fi++) | for (fi = min;; fi++) |
3893 | { | { |
3894 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); |
3895 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3896 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
3897 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
3898 | { | { |
3899 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
3900 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3901 | } | } |
3902 | if (fc == *eptr++) MRRETURN(MATCH_NOMATCH); | if (fc == *eptr++) RRETURN(MATCH_NOMATCH); |
3903 | } | } |
3904 | } | } |
3905 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3744 for (;;) | Line 3911 for (;;) |
3911 | { | { |
3912 | pp = eptr; | pp = eptr; |
3913 | ||
3914 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
3915 | /* UTF-8 mode */ | if (utf) |
if (utf8) | ||
3916 | { | { |
3917 | register unsigned int d; | register unsigned int d; |
3918 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
# | Line 3772 for (;;) | Line 3938 for (;;) |
3938 | } | } |
3939 | else | else |
3940 | #endif | #endif |
3941 | /* Not UTF-8 mode */ | /* Not UTF mode */ |
3942 | { | { |
3943 | for (i = min; i < max; i++) | for (i = min; i < max; i++) |
3944 | { | { |
# | Line 3793 for (;;) | Line 3959 for (;;) |
3959 | } | } |
3960 | } | } |
3961 | ||
3962 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
3963 | } | } |
3964 | } | } |
3965 | /* Control never gets here */ | /* Control never gets here */ |
# | Line 3805 for (;;) | Line 3971 for (;;) |
3971 | case OP_TYPEEXACT: | case OP_TYPEEXACT: |
3972 | min = max = GET2(ecode, 1); | min = max = GET2(ecode, 1); |
3973 | minimize = TRUE; | minimize = TRUE; |
3974 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3975 | goto REPEATTYPE; | goto REPEATTYPE; |
3976 | ||
3977 | case OP_TYPEUPTO: | case OP_TYPEUPTO: |
# | Line 3813 for (;;) | Line 3979 for (;;) |
3979 | min = 0; | min = 0; |
3980 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
3981 | minimize = *ecode == OP_TYPEMINUPTO; | minimize = *ecode == OP_TYPEMINUPTO; |
3982 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
3983 | goto REPEATTYPE; | goto REPEATTYPE; |
3984 | ||
3985 | case OP_TYPEPOSSTAR: | case OP_TYPEPOSSTAR: |
# | Line 3841 for (;;) | Line 4007 for (;;) |
4007 | possessive = TRUE; | possessive = TRUE; |
4008 | min = 0; | min = 0; |
4009 | max = GET2(ecode, 1); | max = GET2(ecode, 1); |
4010 | ecode += 3; | ecode += 1 + IMM2_SIZE; |
4011 | goto REPEATTYPE; | goto REPEATTYPE; |
4012 | ||
4013 | case OP_TYPESTAR: | case OP_TYPESTAR: |
# | Line 3887 for (;;) | Line 4053 for (;;) |
4053 | switch(prop_type) | switch(prop_type) |
4054 | { | { |
4055 | case PT_ANY: | case PT_ANY: |
4056 | if (prop_fail_result) MRRETURN(MATCH_NOMATCH); | if (prop_fail_result) RRETURN(MATCH_NOMATCH); |
4057 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
4058 | { | { |
4059 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4060 | { | { |
4061 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4062 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4063 | } | } |
4064 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4065 | } | } |
# | Line 3906 for (;;) | Line 4072 for (;;) |
4072 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4073 | { | { |
4074 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4075 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4076 | } | } |
4077 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4078 | chartype = UCD_CHARTYPE(c); | chartype = UCD_CHARTYPE(c); |
4079 | if ((chartype == ucp_Lu || | if ((chartype == ucp_Lu || |
4080 | chartype == ucp_Ll || | chartype == ucp_Ll || |
4081 | chartype == ucp_Lt) == prop_fail_result) | chartype == ucp_Lt) == prop_fail_result) |
4082 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4083 | } | } |
4084 | break; | break; |
4085 | ||
# | Line 3923 for (;;) | Line 4089 for (;;) |
4089 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4090 | { | { |
4091 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4092 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4093 | } | } |
4094 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4095 | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
4096 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4097 | } | } |
4098 | break; | break; |
4099 | ||
# | Line 3937 for (;;) | Line 4103 for (;;) |
4103 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4104 | { | { |
4105 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4106 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4107 | } | } |
4108 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4109 | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
4110 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4111 | } | } |
4112 | break; | break; |
4113 | ||
# | Line 3951 for (;;) | Line 4117 for (;;) |
4117 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4118 | { | { |
4119 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4120 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4121 | } | } |
4122 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4123 | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
4124 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4125 | } | } |
4126 | break; | break; |
4127 | ||
# | Line 3966 for (;;) | Line 4132 for (;;) |
4132 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4133 | { | { |
4134 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4135 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4136 | } | } |
4137 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4138 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4139 | if ((category == ucp_L || category == ucp_N) == prop_fail_result) | if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
4140 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4141 | } | } |
4142 | break; | break; |
4143 | ||
# | Line 3981 for (;;) | Line 4147 for (;;) |
4147 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4148 | { | { |
4149 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4150 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4151 | } | } |
4152 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4153 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4154 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
4155 | == prop_fail_result) | == prop_fail_result) |
4156 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4157 | } | } |
4158 | break; | break; |
4159 | ||
# | Line 3997 for (;;) | Line 4163 for (;;) |
4163 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4164 | { | { |
4165 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4166 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4167 | } | } |
4168 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4169 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4170 | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
4171 | == prop_fail_result) | == prop_fail_result) |
4172 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4173 | } | } |
4174 | break; | break; |
4175 | ||
# | Line 4014 for (;;) | Line 4180 for (;;) |
4180 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4181 | { | { |
4182 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4183 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4184 | } | } |
4185 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4186 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4187 | if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) | if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) |
4188 | == prop_fail_result) | == prop_fail_result) |
4189 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4190 | } | } |
4191 | break; | break; |
4192 | ||
4193 | case PT_CLIST: | |
4194 | for (i = 1; i <= min; i++) | |
4195 | { | |
4196 | const pcre_uint32 *cp; | |
4197 | if (eptr >= md->end_subject) | |
4198 | { | |
4199 | SCHECK_PARTIAL(); | |
4200 | RRETURN(MATCH_NOMATCH); | |
4201 | } | |
4202 | GETCHARINCTEST(c, eptr); | |
4203 | cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c); | |
4204 | for (;;) | |
4205 | { | |
4206 | if (c < *cp) | |
4207 | { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } } | |
4208 | if (c == *cp++) | |
4209 | { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; } | |
4210 | } | |
4211 | } | |
4212 | break; | |
4213 | ||
4214 | /* This should not occur */ | /* This should not occur */ |
4215 | ||
4216 | default: | default: |
# | Line 4041 for (;;) | Line 4228 for (;;) |
4228 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4229 | { | { |
4230 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4231 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4232 | } | } |
4233 | GETCHARINCTEST(c, eptr); | else |
if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); | ||
while (eptr < md->end_subject) | ||
4234 | { | { |
4235 | int len = 1; | int lgb, rgb; |
4236 | if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } | GETCHARINCTEST(c, eptr); |
4237 | if (UCD_CATEGORY(c) != ucp_M) break; | lgb = UCD_GRAPHBREAK(c); |
4238 | eptr += len; | while (eptr < md->end_subject) |
4239 | { | |
4240 | int len = 1; | |
4241 | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } | |
4242 | rgb = UCD_GRAPHBREAK(c); | |
4243 | if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; | |
4244 | lgb = rgb; | |
4245 | eptr += len; | |
4246 | } | |
4247 | } | } |
4248 | CHECK_PARTIAL(); | |
4249 | } | } |
4250 | } | } |
4251 | ||
# | Line 4060 for (;;) | Line 4254 for (;;) |
4254 | ||
4255 | /* Handle all other cases when the coding is UTF-8 */ | /* Handle all other cases when the coding is UTF-8 */ |
4256 | ||
4257 | #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
4258 | if (utf8) switch(ctype) | if (utf) switch(ctype) |
4259 | { | { |
4260 | case OP_ANY: | case OP_ANY: |
4261 | for (i = 1; i <= min; i++) | for (i = 1; i <= min; i++) |
# | Line 4069 for (;;) | Line 4263 for (;;) |
4263 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4264 | { | { |
4265 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4266 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4267 | } | |
4268 | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); | |
4269 | if (md->partial != 0 && | |
4270 | eptr + 1 >= md->end_subject && | |
4271 | NLBLOCK->nltype == NLTYPE_FIXED && | |
4272 | NLBLOCK->nllen == 2 && | |
4273 | *eptr == NLBLOCK->nl[0]) | |
4274 | { | |
4275 | md->hitend = TRUE; | |
4276 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
4277 | } | } |
if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | ||
4278 | eptr++; | eptr++; |
4279 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4280 | } | } |
4281 | break; | break; |
4282 | ||
# | Line 4083 for (;;) | Line 4286 for (;;) |
4286 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4287 | { | { |
4288 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4289 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4290 | } | } |
4291 | eptr++; | eptr++; |
4292 | while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); |
4293 | } | } |
4294 | break; | break; |
4295 | ||
4296 | case OP_ANYBYTE: | case OP_ANYBYTE: |
4297 | if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH); | if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); |
4298 | eptr += min; | eptr += min; |
4299 | break; | break; |
4300 | ||
# | Line 4101 for (;;) | Line 4304 for (;;) |
4304 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4305 | { | { |
4306 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4307 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4308 | } | } |
4309 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4310 | switch(c) | switch(c) |
4311 | { | { |
4312 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4313 | ||
4314 | case 0x000d: | case CHAR_CR: |
4315 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; |
4316 | break; | break; |
4317 | ||
4318 | case 0x000a: | case CHAR_LF: |
4319 | break; | break; |
4320 | ||
4321 | case 0x000b: | case CHAR_VT: |
4322 | case 0x000c: | case CHAR_FF: |
4323 | case 0x0085: | case CHAR_NEL: |
4324 | #ifndef EBCDIC | |
4325 | case 0x2028: | case 0x2028: |
4326 | case 0x2029: | case 0x2029: |
4327 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | #endif /* Not EBCDIC */ |
4328 | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); | |
4329 | break; | break; |
4330 | } | } |
4331 | } | } |
# | Line 4132 for (;;) | Line 4337 for (;;) |
4337 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4338 | { | { |
4339 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4340 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4341 | } | } |
4342 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4343 | switch(c) | switch(c) |
4344 | { | { |
4345 | HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ | |
4346 | default: break; | default: break; |
case 0x09: /* HT */ | ||
case 0x20: /* SPACE */ | ||
case 0xa0: /* NBSP */ | ||
case 0x1680: /* OGHAM SPACE MARK */ | ||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | ||
case 0x2000: /* EN QUAD */ | ||
case 0x2001: /* EM QUAD */ | ||
case 0x2002: /* EN SPACE */ | ||
case 0x2003: /* EM SPACE */ | ||
case 0x2004: /* THREE-PER-EM SPACE */ | ||
case 0x2005: /* FOUR-PER-EM SPACE */ | ||
case 0x2006: /* SIX-PER-EM SPACE */ | ||
case 0x2007: /* FIGURE SPACE */ | ||
case 0x2008: /* PUNCTUATION SPACE */ | ||
case 0x2009: /* THIN SPACE */ | ||
case 0x200A: /* HAIR SPACE */ | ||
case 0x202f: /* NARROW NO-BREAK SPACE */ | ||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | ||
case 0x3000: /* IDEOGRAPHIC SPACE */ | ||
MRRETURN(MATCH_NOMATCH); | ||
4347 | } | } |
4348 | } | } |
4349 | break; | break; |
# | Line 4168 for (;;) | Line 4354 for (;;) |
4354 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4355 | { | { |
4356 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4357 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4358 | } | } |
4359 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4360 | switch(c) | switch(c) |
4361 | { | { |
4362 | default: MRRETURN(MATCH_NOMATCH); | HSPACE_CASES: break; /* Byte and multibyte cases */ |
4363 | case 0x09: /* HT */ | default: RRETURN(MATCH_NOMATCH); |
case 0x20: /* SPACE */ | ||
case 0xa0: /* NBSP */ | ||
case 0x1680: /* OGHAM SPACE MARK */ | ||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ | ||
case 0x2000: /* EN QUAD */ | ||
case 0x2001: /* EM QUAD */ | ||
case 0x2002: /* EN SPACE */ | ||
case 0x2003: /* EM SPACE */ | ||
case 0x2004: /* THREE-PER-EM SPACE */ | ||
case 0x2005: /* FOUR-PER-EM SPACE */ | ||
case 0x2006: /* SIX-PER-EM SPACE */ | ||
case 0x2007: /* FIGURE SPACE */ | ||
case 0x2008: /* PUNCTUATION SPACE */ | ||
case 0x2009: /* THIN SPACE */ | ||
case 0x200A: /* HAIR SPACE */ | ||
case 0x202f: /* NARROW NO-BREAK SPACE */ | ||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ | ||
case 0x3000: /* IDEOGRAPHIC SPACE */ | ||
break; | ||
4364 | } | } |
4365 | } | } |
4366 | break; | break; |
# | Line 4204 for (;;) | Line 4371 for (;;) |
4371 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4372 | { | { |
4373 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4374 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4375 | } | } |
4376 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4377 | switch(c) | switch(c) |
4378 | { | { |
4379 | VSPACE_CASES: RRETURN(MATCH_NOMATCH); | |
4380 | default: break; | default: break; |
case 0x0a: /* LF */ | ||
case 0x0b: /* VT */ | ||
case 0x0c: /* FF */ | ||
case 0x0d: /* CR */ | ||
case 0x85: /* NEL */ | ||
case 0x2028: /* LINE SEPARATOR */ | ||
case 0x2029: /* PARAGRAPH SEPARATOR */ | ||
MRRETURN(MATCH_NOMATCH); | ||
4381 | } | } |
4382 | } | } |
4383 | break; | break; |
# | Line 4228 for (;;) | Line 4388 for (;;) |
4388 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4389 | { | { |
4390 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4391 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4392 | } | } |
4393 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4394 | switch(c) | switch(c) |
4395 | { | { |
4396 | default: MRRETURN(MATCH_NOMATCH); | VSPACE_CASES: break; |
4397 | case 0x0a: /* LF */ | default: RRETURN(MATCH_NOMATCH); |
case 0x0b: /* VT */ | ||
case 0x0c: /* FF */ | ||
case 0x0d: /* CR */ | ||
case 0x85: /* NEL */ | ||
case 0x2028: /* LINE SEPARATOR */ | ||
case 0x2029: /* PARAGRAPH SEPARATOR */ | ||
break; | ||
4398 | } | } |
4399 | } | } |
4400 | break; | break; |
# | Line 4252 for (;;) | Line 4405 for (;;) |
4405 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4406 | { | { |
4407 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4408 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4409 | } | } |
4410 | GETCHARINC(c, eptr); | GETCHARINC(c, eptr); |
4411 | if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) | if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) |
4412 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4413 | } | } |
4414 | break; | break; |
4415 | ||
# | Line 4266 for (;;) | Line 4419 for (;;) |
4419 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4420 | { | { |
4421 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4422 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4423 | } | } |
4424 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0) |
4425 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4426 | eptr++; | |
4427 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4428 | } | } |
4429 | break; | break; |
# | Line 4280 for (;;) | Line 4434 for (;;) |
4434 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4435 | { | { |
4436 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4437 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4438 | } | } |
4439 | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) |
4440 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4441 | while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); | eptr++; |
4442 | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
4443 | } | } |
4444 | break; | break; |
4445 | ||
# | Line 4294 for (;;) | Line 4449 for (;;) |
4449 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4450 | { | { |
4451 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4452 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4453 | } | } |
4454 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0) |
4455 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4456 | eptr++; | |
4457 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4458 | } | } |
4459 | break; | break; |
# | Line 4308 for (;;) | Line 4464 for (;;) |
4464 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4465 | { | { |
4466 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4467 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4468 | } | } |
4469 | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) | if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) |
4470 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4471 | while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); | eptr++; |
4472 | ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); | |
4473 | } | } |
4474 | break; | break; |
4475 | ||
# | Line 4322 for (;;) | Line 4479 for (;;) |
4479 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4480 | { | { |
4481 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4482 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4483 | } | } |
4484 | if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) | if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0) |
4485 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4486 | eptr++; | |
4487 | /* No need to skip more bytes - we know it's a 1-byte character */ | /* No need to skip more bytes - we know it's a 1-byte character */ |
4488 | } | } |
4489 | break; | break; |
# | Line 4335 for (;;) | Line 4493 for (;;) |
4493 | } /* End switch(ctype) */ | } /* End switch(ctype) */ |
4494 | ||
4495 | else | else |
4496 | #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
4497 | ||
4498 | /* Code for the non-UTF-8 case for minimum matching of operators other | /* Code for the non-UTF-8 case for minimum matching of operators other |
4499 | than OP_PROP and OP_NOTPROP. */ | than OP_PROP and OP_NOTPROP. */ |
# | Line 4348 for (;;) | Line 4506 for (;;) |
4506 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4507 | { | { |
4508 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4509 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4510 | } | |
4511 | if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); | |
4512 | if (md->partial != 0 && | |
4513 | eptr + 1 >= md->end_subject && | |
4514 | NLBLOCK->nltype == NLTYPE_FIXED && | |
4515 | NLBLOCK->nllen == 2 && | |
4516 | *eptr == NLBLOCK->nl[0]) | |
4517 | { | |
4518 | md->hitend = TRUE; | |
4519 | if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); | |
4520 | } | } |
if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); | ||
4521 | eptr++; | eptr++; |
4522 | } | } |
4523 | break; | break; |
# | Line 4359 for (;;) | Line 4526 for (;;) |
4526 | if (eptr > md->end_subject - min) | if (eptr > md->end_subject - min) |
4527 | { | { |
4528 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4529 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4530 | } | } |
4531 | eptr += min; | eptr += min; |
4532 | break; | break; |
# | Line 4368 for (;;) | Line 4535 for (;;) |
4535 | if (eptr > md->end_subject - min) | if (eptr > md->end_subject - min) |
4536 | { | { |
4537 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4538 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4539 | } | } |
4540 | eptr += min; | eptr += min; |
4541 | break; | break; |
# | Line 4379 for (;;) | Line 4546 for (;;) |
4546 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4547 | { | { |
4548 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4549 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4550 | } | } |
4551 | switch(*eptr++) | switch(*eptr++) |
4552 | { | { |
4553 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4554 | ||
4555 | case 0x000d: | case CHAR_CR: |
4556 | if (eptr < md->end_subject && *eptr == 0x0a) eptr++; | if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; |
4557 | break; | break; |
4558 | ||
4559 | case 0x000a: | case CHAR_LF: |
4560 | break; | break; |
4561 | ||
4562 | case 0x000b: | case CHAR_VT: |
4563 | case 0x000c: | case CHAR_FF: |
4564 | case 0x0085: | case CHAR_NEL: |
4565 | if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH); | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4566 | case 0x2028: | |
4567 | case 0x2029: | |
4568 | #endif | |
4569 | if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); | |
4570 | break; | break; |
4571 | } | } |
4572 | } | } |
# | Line 4407 for (;;) | Line 4578 for (;;) |
4578 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4579 | { | { |
4580 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4581 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4582 | } | } |
4583 | switch(*eptr++) | switch(*eptr++) |
4584 | { | { |
4585 | default: break; | default: break; |
4586 | case 0x09: /* HT */ | HSPACE_BYTE_CASES: |
4587 | case 0x20: /* SPACE */ | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4588 | case 0xa0: /* NBSP */ | HSPACE_MULTIBYTE_CASES: |
4589 | MRRETURN(MATCH_NOMATCH); | #endif |
4590 | RRETURN(MATCH_NOMATCH); | |
4591 | } | } |
4592 | } | } |
4593 | break; | break; |
# | Line 4426 for (;;) | Line 4598 for (;;) |
4598 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4599 | { | { |
4600 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4601 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4602 | } | } |
4603 | switch(*eptr++) | switch(*eptr++) |
4604 | { | { |
4605 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4606 | case 0x09: /* HT */ | HSPACE_BYTE_CASES: |
4607 | case 0x20: /* SPACE */ | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4608 | case 0xa0: /* NBSP */ | HSPACE_MULTIBYTE_CASES: |
4609 | #endif | |
4610 | break; | break; |
4611 | } | } |
4612 | } | } |
# | Line 4445 for (;;) | Line 4618 for (;;) |
4618 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4619 | { | { |
4620 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4621 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4622 | } | } |
4623 | switch(*eptr++) | switch(*eptr++) |
4624 | { | { |
4625 | VSPACE_BYTE_CASES: | |
4626 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 | |
4627 | VSPACE_MULTIBYTE_CASES: | |
4628 | #endif | |
4629 | RRETURN(MATCH_NOMATCH); | |
4630 | default: break; | default: break; |
case 0x0a: /* LF */ | ||
case 0x0b: /* VT */ | ||
case 0x0c: /* FF */ | ||
case 0x0d: /* CR */ | ||
case 0x85: /* NEL */ | ||
MRRETURN(MATCH_NOMATCH); | ||
4631 | } | } |
4632 | } | } |
4633 | break; | break; |
# | Line 4466 for (;;) | Line 4638 for (;;) |
4638 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4639 | { | { |
4640 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4641 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4642 | } | } |
4643 | switch(*eptr++) | switch(*eptr++) |
4644 | { | { |
4645 | default: MRRETURN(MATCH_NOMATCH); | default: RRETURN(MATCH_NOMATCH); |
4646 | case 0x0a: /* LF */ | VSPACE_BYTE_CASES: |
4647 | case 0x0b: /* VT */ | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4648 | case 0x0c: /* FF */ | VSPACE_MULTIBYTE_CASES: |
4649 | case 0x0d: /* CR */ | #endif |
case 0x85: /* NEL */ | ||
4650 | break; | break; |
4651 | } | } |
4652 | } | } |
# | Line 4487 for (;;) | Line 4658 for (;;) |
4658 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4659 | { | { |
4660 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4661 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4662 | } | } |
4663 | if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) |
4664 | RRETURN(MATCH_NOMATCH); | |
4665 | eptr++; | |
4666 | } | } |
4667 | break; | break; |
4668 | ||
# | Line 4499 for (;;) | Line 4672 for (;;) |
4672 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4673 | { | { |
4674 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4675 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4676 | } | } |
4677 | if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) |
4678 | RRETURN(MATCH_NOMATCH); | |
4679 | eptr++; | |
4680 | } | } |
4681 | break; | break; |
4682 | ||
# | Line 4511 for (;;) | Line 4686 for (;;) |
4686 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4687 | { | { |
4688 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4689 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4690 | } | } |
4691 | if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH); | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) |
4692 | RRETURN(MATCH_NOMATCH); | |
4693 | eptr++; | |
4694 | } | } |
4695 | break; | break; |
4696 | ||
# | Line 4523 for (;;) | Line 4700 for (;;) |
4700 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4701 | { | { |
4702 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4703 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4704 | } | } |
4705 | if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH); | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) |
4706 | RRETURN(MATCH_NOMATCH); | |
4707 | eptr++; | |
4708 | } | } |
4709 | break; | break; |
4710 | ||
# | Line 4535 for (;;) | Line 4714 for (;;) |
4714 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4715 | { | { |
4716 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4717 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4718 | } | } |
4719 | if ((md->ctypes[*eptr++] & ctype_word) != 0) | if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) |
4720 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4721 | eptr++; | |
4722 | } | } |
4723 | break; | break; |
4724 | ||
# | Line 4548 for (;;) | Line 4728 for (;;) |
4728 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4729 | { | { |
4730 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4731 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4732 | } | } |
4733 | if ((md->ctypes[*eptr++] & ctype_word) == 0) | if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) |
4734 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4735 | eptr++; | |
4736 | } | } |
4737 | break; | break; |
4738 | ||
# | Line 4580 for (;;) | Line 4761 for (;;) |
4761 | { | { |
4762 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); |
4763 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4764 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4765 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4766 | { | { |
4767 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4768 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4769 | } | } |
4770 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4771 | if (prop_fail_result) MRRETURN(MATCH_NOMATCH); | if (prop_fail_result) RRETURN(MATCH_NOMATCH); |
4772 | } | } |
4773 | /* Control never gets here */ | /* Control never gets here */ |
4774 | ||
# | Line 4597 for (;;) | Line 4778 for (;;) |
4778 | int chartype; | int chartype; |
4779 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); |
4780 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4781 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4782 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4783 | { | { |
4784 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4785 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4786 | } | } |
4787 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4788 | chartype = UCD_CHARTYPE(c); | chartype = UCD_CHARTYPE(c); |
4789 | if ((chartype == ucp_Lu || | if ((chartype == ucp_Lu || |
4790 | chartype == ucp_Ll || | chartype == ucp_Ll || |
4791 | chartype == ucp_Lt) == prop_fail_result) | chartype == ucp_Lt) == prop_fail_result) |
4792 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4793 | } | } |
4794 | /* Control never gets here */ | /* Control never gets here */ |
4795 | ||
# | Line 4617 for (;;) | Line 4798 for (;;) |
4798 | { | { |
4799 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); |
4800 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4801 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4802 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4803 | { | { |
4804 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4805 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4806 | } | } |
4807 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4808 | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) | if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
4809 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4810 | } | } |
4811 | /* Control never gets here */ | /* Control never gets here */ |
4812 | ||
# | Line 4634 for (;;) | Line 4815 for (;;) |
4815 | { | { |
4816 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); |
4817 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4818 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4819 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4820 | { | { |
4821 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4822 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4823 | } | } |
4824 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4825 | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) | if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
4826 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4827 | } | } |
4828 | /* Control never gets here */ | /* Control never gets here */ |
4829 | ||
# | Line 4651 for (;;) | Line 4832 for (;;) |
4832 | { | { |
4833 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); |
4834 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4835 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4836 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4837 | { | { |
4838 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4839 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4840 | } | } |
4841 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4842 | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) | if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
4843 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4844 | } | } |
4845 | /* Control never gets here */ | /* Control never gets here */ |
4846 | ||
# | Line 4669 for (;;) | Line 4850 for (;;) |
4850 | int category; | int category; |
4851 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); |
4852 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4853 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4854 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4855 | { | { |
4856 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4857 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4858 | } | } |
4859 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4860 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
4861 | if ((category == ucp_L || category == ucp_N) == prop_fail_result) | if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
4862 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4863 | } | } |
4864 | /* Control never gets here */ | /* Control never gets here */ |
4865 | ||
# | Line 4687 for (;;) | Line 4868 for (;;) |
4868 | { | { |
4869 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); |
4870 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4871 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4872 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4873 | { | { |
4874 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4875 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4876 | } | } |
4877 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4878 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4879 | c == CHAR_FF || c == CHAR_CR) | c == CHAR_FF || c == CHAR_CR) |
4880 | == prop_fail_result) | == prop_fail_result) |
4881 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4882 | } | } |
4883 | /* Control never gets here */ | /* Control never gets here */ |
4884 | ||
# | Line 4706 for (;;) | Line 4887 for (;;) |
4887 | { | { |
4888 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM61); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM61); |
4889 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4890 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4891 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4892 | { | { |
4893 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4894 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4895 | } | } |
4896 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4897 | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || | if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
4898 | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) | c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
4899 | == prop_fail_result) | == prop_fail_result) |
4900 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4901 | } | } |
4902 | /* Control never gets here */ | /* Control never gets here */ |
4903 | ||
# | Line 4726 for (;;) | Line 4907 for (;;) |
4907 | int category; | int category; |
4908 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); | RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); |
4909 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
4910 | if (fi >= max) MRRETURN(MATCH_NOMATCH); | if (fi >= max) RRETURN(MATCH_NOMATCH); |
4911 | if (eptr >= md->end_subject) | if (eptr >= md->end_subject) |
4912 | { | { |
4913 | SCHECK_PARTIAL(); | SCHECK_PARTIAL(); |
4914 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4915 | } | } |
4916 | GETCHARINCTEST(c, eptr); | GETCHARINCTEST(c, eptr); |
4917 | category = UCD_CATEGORY(c); | category = UCD_CATEGORY(c); |
# | Line 4738 for (;;) | Line 4919 for (;;) |
4919 | category == ucp_N || | category == ucp_N || |
4920 | c == CHAR_UNDERSCORE) | c == CHAR_UNDERSCORE) |
4921 | == prop_fail_result) | == prop_fail_result) |
4922 | MRRETURN(MATCH_NOMATCH); | RRETURN(MATCH_NOMATCH); |
4923 | } | } |
4924 | /* Control never gets here */ | /* Control never gets here */ |
4925 | ||
4926 | /* This should never occur */ | case PT_CLIST: |
4927 | for (fi = min;; fi++) | |
4928 | { | |
4929 | const pcre_uint32 *cp; | |
4930 | RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); | |
4931 | if (rrc != MATCH_NOMATCH) RRETURN(rrc); | |
4932 | if (fi >= max) RRETURN(MATCH_NOMATCH); | |
4933 | if (eptr >= md->end_subject) | |
4934 | { | |
4935 | SCHECK_PARTIAL(); | |
4936 | RRETURN(MATCH_NOMATCH); | |
4937 | } | |
4938 | GETCHARINCTEST(c, eptr); | |
4939 | cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c); | |
4940 | for (;;) | |
4941 | { | |
4942 | if (c < *cp) | |
4943 | { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } } | |
4944 | if (c == *cp++) | |
4945 |