/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 976 - (show annotations)
Sat Jun 16 17:53:17 2012 UTC (7 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 218757 byte(s)
Fix capture problem with repeated, empty-string-matching groups.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 /* This module contains pcre_exec(), the externally visible function that does
41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42 possible. There are also some static supporting functions. */
43
44 #ifdef HAVE_CONFIG_H
45 #include "config.h"
46 #endif
47
48 #define NLBLOCK md /* Block containing newline information */
49 #define PSSTART start_subject /* Field containing processed string start */
50 #define PSEND end_subject /* Field containing processed string end */
51
52 #include "pcre_internal.h"
53
54 /* Undefine some potentially clashing cpp symbols */
55
56 #undef min
57 #undef max
58
59 /* Values for setting in md->match_function_type to indicate two special types
60 of call to match(). We do it this way to save on using another stack variable,
61 as stack usage is to be discouraged. */
62
63 #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
64 #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
65
66 /* Non-error returns from the match() function. Error returns are externally
67 defined PCRE_ERROR_xxx codes, which are all negative. */
68
69 #define MATCH_MATCH 1
70 #define MATCH_NOMATCH 0
71
72 /* Special internal returns from the match() function. Make them sufficiently
73 negative to avoid the external error codes. */
74
75 #define MATCH_ACCEPT (-999)
76 #define MATCH_COMMIT (-998)
77 #define MATCH_KETRPOS (-997)
78 #define MATCH_ONCE (-996)
79 #define MATCH_PRUNE (-995)
80 #define MATCH_SKIP (-994)
81 #define MATCH_SKIP_ARG (-993)
82 #define MATCH_THEN (-992)
83
84 /* Maximum number of ints of offset to save on the stack for recursive calls.
85 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
86 because the offset vector is always a multiple of 3 long. */
87
88 #define REC_STACK_SAVE_MAX 30
89
90 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
91
92 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
93 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
94
95
96
97 #ifdef PCRE_DEBUG
98 /*************************************************
99 * Debugging function to print chars *
100 *************************************************/
101
102 /* Print a sequence of chars in printable format, stopping at the end of the
103 subject if the requested.
104
105 Arguments:
106 p points to characters
107 length number to print
108 is_subject TRUE if printing from within md->start_subject
109 md pointer to matching data block, if is_subject is TRUE
110
111 Returns: nothing
112 */
113
114 static void
115 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
116 {
117 unsigned int c;
118 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
119 while (length-- > 0)
120 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
121 }
122 #endif
123
124
125
126 /*************************************************
127 * Match a back-reference *
128 *************************************************/
129
130 /* Normally, if a back reference hasn't been set, the length that is passed is
131 negative, so the match always fails. However, in JavaScript compatibility mode,
132 the length passed is zero. Note that in caseless UTF-8 mode, the number of
133 subject bytes matched may be different to the number of reference bytes.
134
135 Arguments:
136 offset index into the offset vector
137 eptr pointer into the subject
138 length length of reference to be matched (number of bytes)
139 md points to match data block
140 caseless TRUE if caseless
141
142 Returns: >= 0 the number of subject bytes matched
143 -1 no match
144 -2 partial match; always given if at end subject
145 */
146
147 static int
148 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
149 BOOL caseless)
150 {
151 PCRE_PUCHAR eptr_start = eptr;
152 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
153
154 #ifdef PCRE_DEBUG
155 if (eptr >= md->end_subject)
156 printf("matching subject <null>");
157 else
158 {
159 printf("matching subject ");
160 pchars(eptr, length, TRUE, md);
161 }
162 printf(" against backref ");
163 pchars(p, length, FALSE, md);
164 printf("\n");
165 #endif
166
167 /* Always fail if reference not set (and not JavaScript compatible - in that
168 case the length is passed as zero). */
169
170 if (length < 0) return -1;
171
172 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
173 properly if Unicode properties are supported. Otherwise, we can check only
174 ASCII characters. */
175
176 if (caseless)
177 {
178 #ifdef SUPPORT_UTF
179 #ifdef SUPPORT_UCP
180 if (md->utf)
181 {
182 /* Match characters up to the end of the reference. NOTE: the number of
183 bytes matched may differ, because there are some characters whose upper and
184 lower case versions code as different numbers of bytes. For example, U+023A
185 (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
186 a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
187 the latter. It is important, therefore, to check the length along the
188 reference, not along the subject (earlier code did this wrong). */
189
190 PCRE_PUCHAR endptr = p + length;
191 while (p < endptr)
192 {
193 int c, d;
194 if (eptr >= md->end_subject) return -2; /* Partial match */
195 GETCHARINC(c, eptr);
196 GETCHARINC(d, p);
197 if (c != d && c != UCD_OTHERCASE(d)) return -1;
198 }
199 }
200 else
201 #endif
202 #endif
203
204 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
205 is no UCP support. */
206 {
207 while (length-- > 0)
208 {
209 if (eptr >= md->end_subject) return -2; /* Partial match */
210 if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
211 p++;
212 eptr++;
213 }
214 }
215 }
216
217 /* In the caseful case, we can just compare the bytes, whether or not we
218 are in UTF-8 mode. */
219
220 else
221 {
222 while (length-- > 0)
223 {
224 if (eptr >= md->end_subject) return -2; /* Partial match */
225 if (*p++ != *eptr++) return -1;
226 }
227 }
228
229 return (int)(eptr - eptr_start);
230 }
231
232
233
234 /***************************************************************************
235 ****************************************************************************
236 RECURSION IN THE match() FUNCTION
237
238 The match() function is highly recursive, though not every recursive call
239 increases the recursive depth. Nevertheless, some regular expressions can cause
240 it to recurse to a great depth. I was writing for Unix, so I just let it call
241 itself recursively. This uses the stack for saving everything that has to be
242 saved for a recursive call. On Unix, the stack can be large, and this works
243 fine.
244
245 It turns out that on some non-Unix-like systems there are problems with
246 programs that use a lot of stack. (This despite the fact that every last chip
247 has oodles of memory these days, and techniques for extending the stack have
248 been known for decades.) So....
249
250 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
251 calls by keeping local variables that need to be preserved in blocks of memory
252 obtained from malloc() instead instead of on the stack. Macros are used to
253 achieve this so that the actual code doesn't look very different to what it
254 always used to.
255
256 The original heap-recursive code used longjmp(). However, it seems that this
257 can be very slow on some operating systems. Following a suggestion from Stan
258 Switzer, the use of longjmp() has been abolished, at the cost of having to
259 provide a unique number for each call to RMATCH. There is no way of generating
260 a sequence of numbers at compile time in C. I have given them names, to make
261 them stand out more clearly.
262
263 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
264 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
265 tests. Furthermore, not using longjmp() means that local dynamic variables
266 don't have indeterminate values; this has meant that the frame size can be
267 reduced because the result can be "passed back" by straight setting of the
268 variable instead of being passed in the frame.
269 ****************************************************************************
270 ***************************************************************************/
271
272 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
273 below must be updated in sync. */
274
275 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
276 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
277 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
278 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
279 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
280 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
281 RM61, RM62, RM63, RM64, RM65, RM66 };
282
283 /* These versions of the macros use the stack, as normal. There are debugging
284 versions and production versions. Note that the "rw" argument of RMATCH isn't
285 actually used in this definition. */
286
287 #ifndef NO_RECURSE
288 #define REGISTER register
289
290 #ifdef PCRE_DEBUG
291 #define RMATCH(ra,rb,rc,rd,re,rw) \
292 { \
293 printf("match() called in line %d\n", __LINE__); \
294 rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
295 printf("to line %d\n", __LINE__); \
296 }
297 #define RRETURN(ra) \
298 { \
299 printf("match() returned %d from line %d ", ra, __LINE__); \
300 return ra; \
301 }
302 #else
303 #define RMATCH(ra,rb,rc,rd,re,rw) \
304 rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
305 #define RRETURN(ra) return ra
306 #endif
307
308 #else
309
310
311 /* These versions of the macros manage a private stack on the heap. Note that
312 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
313 argument of match(), which never changes. */
314
315 #define REGISTER
316
317 #define RMATCH(ra,rb,rc,rd,re,rw)\
318 {\
319 heapframe *newframe = frame->Xnextframe;\
320 if (newframe == NULL)\
321 {\
322 newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
323 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
324 newframe->Xnextframe = NULL;\
325 frame->Xnextframe = newframe;\
326 }\
327 frame->Xwhere = rw;\
328 newframe->Xeptr = ra;\
329 newframe->Xecode = rb;\
330 newframe->Xmstart = mstart;\
331 newframe->Xoffset_top = rc;\
332 newframe->Xeptrb = re;\
333 newframe->Xrdepth = frame->Xrdepth + 1;\
334 newframe->Xprevframe = frame;\
335 frame = newframe;\
336 DPRINTF(("restarting from line %d\n", __LINE__));\
337 goto HEAP_RECURSE;\
338 L_##rw:\
339 DPRINTF(("jumped back to line %d\n", __LINE__));\
340 }
341
342 #define RRETURN(ra)\
343 {\
344 heapframe *oldframe = frame;\
345 frame = oldframe->Xprevframe;\
346 if (frame != NULL)\
347 {\
348 rrc = ra;\
349 goto HEAP_RETURN;\
350 }\
351 return ra;\
352 }
353
354
355 /* Structure for remembering the local variables in a private frame */
356
357 typedef struct heapframe {
358 struct heapframe *Xprevframe;
359 struct heapframe *Xnextframe;
360
361 /* Function arguments that may change */
362
363 PCRE_PUCHAR Xeptr;
364 const pcre_uchar *Xecode;
365 PCRE_PUCHAR Xmstart;
366 int Xoffset_top;
367 eptrblock *Xeptrb;
368 unsigned int Xrdepth;
369
370 /* Function local variables */
371
372 PCRE_PUCHAR Xcallpat;
373 #ifdef SUPPORT_UTF
374 PCRE_PUCHAR Xcharptr;
375 #endif
376 PCRE_PUCHAR Xdata;
377 PCRE_PUCHAR Xnext;
378 PCRE_PUCHAR Xpp;
379 PCRE_PUCHAR Xprev;
380 PCRE_PUCHAR Xsaved_eptr;
381
382 recursion_info Xnew_recursive;
383
384 BOOL Xcur_is_word;
385 BOOL Xcondition;
386 BOOL Xprev_is_word;
387
388 #ifdef SUPPORT_UCP
389 int Xprop_type;
390 int Xprop_value;
391 int Xprop_fail_result;
392 int Xoclength;
393 pcre_uchar Xocchars[6];
394 #endif
395
396 int Xcodelink;
397 int Xctype;
398 unsigned int Xfc;
399 int Xfi;
400 int Xlength;
401 int Xmax;
402 int Xmin;
403 int Xnumber;
404 int Xoffset;
405 int Xop;
406 int Xsave_capture_last;
407 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
408 int Xstacksave[REC_STACK_SAVE_MAX];
409
410 eptrblock Xnewptrb;
411
412 /* Where to jump back to */
413
414 int Xwhere;
415
416 } heapframe;
417
418 #endif
419
420
421 /***************************************************************************
422 ***************************************************************************/
423
424
425
426 /*************************************************
427 * Match from current position *
428 *************************************************/
429
430 /* This function is called recursively in many circumstances. Whenever it
431 returns a negative (error) response, the outer incarnation must also return the
432 same response. */
433
434 /* These macros pack up tests that are used for partial matching, and which
435 appear several times in the code. We set the "hit end" flag if the pointer is
436 at the end of the subject and also past the start of the subject (i.e.
437 something has been matched). For hard partial matching, we then return
438 immediately. The second one is used when we already know we are past the end of
439 the subject. */
440
441 #define CHECK_PARTIAL()\
442 if (md->partial != 0 && eptr >= md->end_subject && \
443 eptr > md->start_used_ptr) \
444 { \
445 md->hitend = TRUE; \
446 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
447 }
448
449 #define SCHECK_PARTIAL()\
450 if (md->partial != 0 && eptr > md->start_used_ptr) \
451 { \
452 md->hitend = TRUE; \
453 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
454 }
455
456
457 /* Performance note: It might be tempting to extract commonly used fields from
458 the md structure (e.g. utf, end_subject) into individual variables to improve
459 performance. Tests using gcc on a SPARC disproved this; in the first case, it
460 made performance worse.
461
462 Arguments:
463 eptr pointer to current character in subject
464 ecode pointer to current position in compiled code
465 mstart pointer to the current match start position (can be modified
466 by encountering \K)
467 offset_top current top pointer
468 md pointer to "static" info for the match
469 eptrb pointer to chain of blocks containing eptr at start of
470 brackets - for testing for empty matches
471 rdepth the recursion depth
472
473 Returns: MATCH_MATCH if matched ) these values are >= 0
474 MATCH_NOMATCH if failed to match )
475 a negative MATCH_xxx value for PRUNE, SKIP, etc
476 a negative PCRE_ERROR_xxx value if aborted by an error condition
477 (e.g. stopped by repeated call or recursion limit)
478 */
479
480 static int
481 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
482 PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
483 unsigned int rdepth)
484 {
485 /* These variables do not need to be preserved over recursion in this function,
486 so they can be ordinary variables in all cases. Mark some of them with
487 "register" because they are used a lot in loops. */
488
489 register int rrc; /* Returns from recursive calls */
490 register int i; /* Used for loops not involving calls to RMATCH() */
491 register unsigned int c; /* Character values not kept over RMATCH() calls */
492 register BOOL utf; /* Local copy of UTF flag for speed */
493
494 BOOL minimize, possessive; /* Quantifier options */
495 BOOL caseless;
496 int condcode;
497
498 /* When recursion is not being used, all "local" variables that have to be
499 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
500 frame on the stack here; subsequent instantiations are obtained from the heap
501 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
502 the top-level on the stack rather than malloc-ing them all gives a performance
503 boost in many cases where there is not much "recursion". */
504
505 #ifdef NO_RECURSE
506 heapframe *frame = (heapframe *)md->match_frames_base;
507
508 /* Copy in the original argument variables */
509
510 frame->Xeptr = eptr;
511 frame->Xecode = ecode;
512 frame->Xmstart = mstart;
513 frame->Xoffset_top = offset_top;
514 frame->Xeptrb = eptrb;
515 frame->Xrdepth = rdepth;
516
517 /* This is where control jumps back to to effect "recursion" */
518
519 HEAP_RECURSE:
520
521 /* Macros make the argument variables come from the current frame */
522
523 #define eptr frame->Xeptr
524 #define ecode frame->Xecode
525 #define mstart frame->Xmstart
526 #define offset_top frame->Xoffset_top
527 #define eptrb frame->Xeptrb
528 #define rdepth frame->Xrdepth
529
530 /* Ditto for the local variables */
531
532 #ifdef SUPPORT_UTF
533 #define charptr frame->Xcharptr
534 #endif
535 #define callpat frame->Xcallpat
536 #define codelink frame->Xcodelink
537 #define data frame->Xdata
538 #define next frame->Xnext
539 #define pp frame->Xpp
540 #define prev frame->Xprev
541 #define saved_eptr frame->Xsaved_eptr
542
543 #define new_recursive frame->Xnew_recursive
544
545 #define cur_is_word frame->Xcur_is_word
546 #define condition frame->Xcondition
547 #define prev_is_word frame->Xprev_is_word
548
549 #ifdef SUPPORT_UCP
550 #define prop_type frame->Xprop_type
551 #define prop_value frame->Xprop_value
552 #define prop_fail_result frame->Xprop_fail_result
553 #define oclength frame->Xoclength
554 #define occhars frame->Xocchars
555 #endif
556
557 #define ctype frame->Xctype
558 #define fc frame->Xfc
559 #define fi frame->Xfi
560 #define length frame->Xlength
561 #define max frame->Xmax
562 #define min frame->Xmin
563 #define number frame->Xnumber
564 #define offset frame->Xoffset
565 #define op frame->Xop
566 #define save_capture_last frame->Xsave_capture_last
567 #define save_offset1 frame->Xsave_offset1
568 #define save_offset2 frame->Xsave_offset2
569 #define save_offset3 frame->Xsave_offset3
570 #define stacksave frame->Xstacksave
571
572 #define newptrb frame->Xnewptrb
573
574 /* When recursion is being used, local variables are allocated on the stack and
575 get preserved during recursion in the normal way. In this environment, fi and
576 i, and fc and c, can be the same variables. */
577
578 #else /* NO_RECURSE not defined */
579 #define fi i
580 #define fc c
581
582 /* Many of the following variables are used only in small blocks of the code.
583 My normal style of coding would have declared them within each of those blocks.
584 However, in order to accommodate the version of this code that uses an external
585 "stack" implemented on the heap, it is easier to declare them all here, so the
586 declarations can be cut out in a block. The only declarations within blocks
587 below are for variables that do not have to be preserved over a recursive call
588 to RMATCH(). */
589
590 #ifdef SUPPORT_UTF
591 const pcre_uchar *charptr;
592 #endif
593 const pcre_uchar *callpat;
594 const pcre_uchar *data;
595 const pcre_uchar *next;
596 PCRE_PUCHAR pp;
597 const pcre_uchar *prev;
598 PCRE_PUCHAR saved_eptr;
599
600 recursion_info new_recursive;
601
602 BOOL cur_is_word;
603 BOOL condition;
604 BOOL prev_is_word;
605
606 #ifdef SUPPORT_UCP
607 int prop_type;
608 int prop_value;
609 int prop_fail_result;
610 int oclength;
611 pcre_uchar occhars[6];
612 #endif
613
614 int codelink;
615 int ctype;
616 int length;
617 int max;
618 int min;
619 int number;
620 int offset;
621 int op;
622 int save_capture_last;
623 int save_offset1, save_offset2, save_offset3;
624 int stacksave[REC_STACK_SAVE_MAX];
625
626 eptrblock newptrb;
627
628 /* There is a special fudge for calling match() in a way that causes it to
629 measure the size of its basic stack frame when the stack is being used for
630 recursion. The second argument (ecode) being NULL triggers this behaviour. It
631 cannot normally ever be NULL. The return is the negated value of the frame
632 size. */
633
634 if (ecode == NULL)
635 {
636 if (rdepth == 0)
637 return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
638 else
639 {
640 int len = (char *)&rdepth - (char *)eptr;
641 return (len > 0)? -len : len;
642 }
643 }
644 #endif /* NO_RECURSE */
645
646 /* To save space on the stack and in the heap frame, I have doubled up on some
647 of the local variables that are used only in localised parts of the code, but
648 still need to be preserved over recursive calls of match(). These macros define
649 the alternative names that are used. */
650
651 #define allow_zero cur_is_word
652 #define cbegroup condition
653 #define code_offset codelink
654 #define condassert condition
655 #define matched_once prev_is_word
656 #define foc number
657 #define save_mark data
658
659 /* These statements are here to stop the compiler complaining about unitialized
660 variables. */
661
662 #ifdef SUPPORT_UCP
663 prop_value = 0;
664 prop_fail_result = 0;
665 #endif
666
667
668 /* This label is used for tail recursion, which is used in a few cases even
669 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
670 used. Thanks to Ian Taylor for noticing this possibility and sending the
671 original patch. */
672
673 TAIL_RECURSE:
674
675 /* OK, now we can get on with the real code of the function. Recursive calls
676 are specified by the macro RMATCH and RRETURN is used to return. When
677 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
678 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
679 defined). However, RMATCH isn't like a function call because it's quite a
680 complicated macro. It has to be used in one particular way. This shouldn't,
681 however, impact performance when true recursion is being used. */
682
683 #ifdef SUPPORT_UTF
684 utf = md->utf; /* Local copy of the flag */
685 #else
686 utf = FALSE;
687 #endif
688
689 /* First check that we haven't called match() too many times, or that we
690 haven't exceeded the recursive call limit. */
691
692 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
693 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
694
695 /* At the start of a group with an unlimited repeat that may match an empty
696 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
697 done this way to save having to use another function argument, which would take
698 up space on the stack. See also MATCH_CONDASSERT below.
699
700 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
701 such remembered pointers, to be checked when we hit the closing ket, in order
702 to break infinite loops that match no characters. When match() is called in
703 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
704 NOT be used with tail recursion, because the memory block that is used is on
705 the stack, so a new one may be required for each match(). */
706
707 if (md->match_function_type == MATCH_CBEGROUP)
708 {
709 newptrb.epb_saved_eptr = eptr;
710 newptrb.epb_prev = eptrb;
711 eptrb = &newptrb;
712 md->match_function_type = 0;
713 }
714
715 /* Now start processing the opcodes. */
716
717 for (;;)
718 {
719 minimize = possessive = FALSE;
720 op = *ecode;
721
722 switch(op)
723 {
724 case OP_MARK:
725 md->nomatch_mark = ecode + 2;
726 md->mark = NULL; /* In case previously set by assertion */
727 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
728 eptrb, RM55);
729 if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
730 md->mark == NULL) md->mark = ecode + 2;
731
732 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
733 argument, and we must check whether that argument matches this MARK's
734 argument. It is passed back in md->start_match_ptr (an overloading of that
735 variable). If it does match, we reset that variable to the current subject
736 position and return MATCH_SKIP. Otherwise, pass back the return code
737 unaltered. */
738
739 else if (rrc == MATCH_SKIP_ARG &&
740 STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
741 {
742 md->start_match_ptr = eptr;
743 RRETURN(MATCH_SKIP);
744 }
745 RRETURN(rrc);
746
747 case OP_FAIL:
748 RRETURN(MATCH_NOMATCH);
749
750 /* COMMIT overrides PRUNE, SKIP, and THEN */
751
752 case OP_COMMIT:
753 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
754 eptrb, RM52);
755 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
756 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
757 rrc != MATCH_THEN)
758 RRETURN(rrc);
759 RRETURN(MATCH_COMMIT);
760
761 /* PRUNE overrides THEN */
762
763 case OP_PRUNE:
764 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
765 eptrb, RM51);
766 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
767 RRETURN(MATCH_PRUNE);
768
769 case OP_PRUNE_ARG:
770 md->nomatch_mark = ecode + 2;
771 md->mark = NULL; /* In case previously set by assertion */
772 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
773 eptrb, RM56);
774 if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
775 md->mark == NULL) md->mark = ecode + 2;
776 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
777 RRETURN(MATCH_PRUNE);
778
779 /* SKIP overrides PRUNE and THEN */
780
781 case OP_SKIP:
782 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
783 eptrb, RM53);
784 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
785 RRETURN(rrc);
786 md->start_match_ptr = eptr; /* Pass back current position */
787 RRETURN(MATCH_SKIP);
788
789 /* Note that, for Perl compatibility, SKIP with an argument does NOT set
790 nomatch_mark. There is a flag that disables this opcode when re-matching a
791 pattern that ended with a SKIP for which there was not a matching MARK. */
792
793 case OP_SKIP_ARG:
794 if (md->ignore_skip_arg)
795 {
796 ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
797 break;
798 }
799 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
800 eptrb, RM57);
801 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
802 RRETURN(rrc);
803
804 /* Pass back the current skip name by overloading md->start_match_ptr and
805 returning the special MATCH_SKIP_ARG return code. This will either be
806 caught by a matching MARK, or get to the top, where it causes a rematch
807 with the md->ignore_skip_arg flag set. */
808
809 md->start_match_ptr = ecode + 2;
810 RRETURN(MATCH_SKIP_ARG);
811
812 /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
813 the branch in which it occurs can be determined. Overload the start of
814 match pointer to do this. */
815
816 case OP_THEN:
817 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
818 eptrb, RM54);
819 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
820 md->start_match_ptr = ecode;
821 RRETURN(MATCH_THEN);
822
823 case OP_THEN_ARG:
824 md->nomatch_mark = ecode + 2;
825 md->mark = NULL; /* In case previously set by assertion */
826 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
827 md, eptrb, RM58);
828 if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
829 md->mark == NULL) md->mark = ecode + 2;
830 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
831 md->start_match_ptr = ecode;
832 RRETURN(MATCH_THEN);
833
834 /* Handle an atomic group that does not contain any capturing parentheses.
835 This can be handled like an assertion. Prior to 8.13, all atomic groups
836 were handled this way. In 8.13, the code was changed as below for ONCE, so
837 that backups pass through the group and thereby reset captured values.
838 However, this uses a lot more stack, so in 8.20, atomic groups that do not
839 contain any captures generate OP_ONCE_NC, which can be handled in the old,
840 less stack intensive way.
841
842 Check the alternative branches in turn - the matching won't pass the KET
843 for this kind of subpattern. If any one branch matches, we carry on as at
844 the end of a normal bracket, leaving the subject pointer, but resetting
845 the start-of-match value in case it was changed by \K. */
846
847 case OP_ONCE_NC:
848 prev = ecode;
849 saved_eptr = eptr;
850 save_mark = md->mark;
851 do
852 {
853 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
854 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
855 {
856 mstart = md->start_match_ptr;
857 break;
858 }
859 if (rrc == MATCH_THEN)
860 {
861 next = ecode + GET(ecode,1);
862 if (md->start_match_ptr < next &&
863 (*ecode == OP_ALT || *next == OP_ALT))
864 rrc = MATCH_NOMATCH;
865 }
866
867 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
868 ecode += GET(ecode,1);
869 md->mark = save_mark;
870 }
871 while (*ecode == OP_ALT);
872
873 /* If hit the end of the group (which could be repeated), fail */
874
875 if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
876
877 /* Continue as from after the group, updating the offsets high water
878 mark, since extracts may have been taken. */
879
880 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
881
882 offset_top = md->end_offset_top;
883 eptr = md->end_match_ptr;
884
885 /* For a non-repeating ket, just continue at this level. This also
886 happens for a repeating ket if no characters were matched in the group.
887 This is the forcible breaking of infinite loops as implemented in Perl
888 5.005. */
889
890 if (*ecode == OP_KET || eptr == saved_eptr)
891 {
892 ecode += 1+LINK_SIZE;
893 break;
894 }
895
896 /* The repeating kets try the rest of the pattern or restart from the
897 preceding bracket, in the appropriate order. The second "call" of match()
898 uses tail recursion, to avoid using another stack frame. */
899
900 if (*ecode == OP_KETRMIN)
901 {
902 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
903 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
904 ecode = prev;
905 goto TAIL_RECURSE;
906 }
907 else /* OP_KETRMAX */
908 {
909 RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
910 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
911 ecode += 1 + LINK_SIZE;
912 goto TAIL_RECURSE;
913 }
914 /* Control never gets here */
915
916 /* Handle a capturing bracket, other than those that are possessive with an
917 unlimited repeat. If there is space in the offset vector, save the current
918 subject position in the working slot at the top of the vector. We mustn't
919 change the current values of the data slot, because they may be set from a
920 previous iteration of this group, and be referred to by a reference inside
921 the group. A failure to match might occur after the group has succeeded,
922 if something later on doesn't match. For this reason, we need to restore
923 the working value and also the values of the final offsets, in case they
924 were set by a previous iteration of the same bracket.
925
926 If there isn't enough space in the offset vector, treat this as if it were
927 a non-capturing bracket. Don't worry about setting the flag for the error
928 case here; that is handled in the code for KET. */
929
930 case OP_CBRA:
931 case OP_SCBRA:
932 number = GET2(ecode, 1+LINK_SIZE);
933 offset = number << 1;
934
935 #ifdef PCRE_DEBUG
936 printf("start bracket %d\n", number);
937 printf("subject=");
938 pchars(eptr, 16, TRUE, md);
939 printf("\n");
940 #endif
941
942 if (offset < md->offset_max)
943 {
944 save_offset1 = md->offset_vector[offset];
945 save_offset2 = md->offset_vector[offset+1];
946 save_offset3 = md->offset_vector[md->offset_end - number];
947 save_capture_last = md->capture_last;
948 save_mark = md->mark;
949
950 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
951 md->offset_vector[md->offset_end - number] =
952 (int)(eptr - md->start_subject);
953
954 for (;;)
955 {
956 if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
957 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
958 eptrb, RM1);
959 if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
960
961 /* If we backed up to a THEN, check whether it is within the current
962 branch by comparing the address of the THEN that is passed back with
963 the end of the branch. If it is within the current branch, and the
964 branch is one of two or more alternatives (it either starts or ends
965 with OP_ALT), we have reached the limit of THEN's action, so convert
966 the return code to NOMATCH, which will cause normal backtracking to
967 happen from now on. Otherwise, THEN is passed back to an outer
968 alternative. This implements Perl's treatment of parenthesized groups,
969 where a group not containing | does not affect the current alternative,
970 that is, (X) is NOT the same as (X|(*F)). */
971
972 if (rrc == MATCH_THEN)
973 {
974 next = ecode + GET(ecode,1);
975 if (md->start_match_ptr < next &&
976 (*ecode == OP_ALT || *next == OP_ALT))
977 rrc = MATCH_NOMATCH;
978 }
979
980 /* Anything other than NOMATCH is passed back. */
981
982 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
983 md->capture_last = save_capture_last;
984 ecode += GET(ecode, 1);
985 md->mark = save_mark;
986 if (*ecode != OP_ALT) break;
987 }
988
989 DPRINTF(("bracket %d failed\n", number));
990 md->offset_vector[offset] = save_offset1;
991 md->offset_vector[offset+1] = save_offset2;
992 md->offset_vector[md->offset_end - number] = save_offset3;
993
994 /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
995
996 RRETURN(rrc);
997 }
998
999 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1000 as a non-capturing bracket. */
1001
1002 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1003 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1004
1005 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1006
1007 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1008 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1009
1010 /* Non-capturing or atomic group, except for possessive with unlimited
1011 repeat and ONCE group with no captures. Loop for all the alternatives.
1012
1013 When we get to the final alternative within the brackets, we used to return
1014 the result of a recursive call to match() whatever happened so it was
1015 possible to reduce stack usage by turning this into a tail recursion,
1016 except in the case of a possibly empty group. However, now that there is
1017 the possiblity of (*THEN) occurring in the final alternative, this
1018 optimization is no longer always possible.
1019
1020 We can optimize if we know there are no (*THEN)s in the pattern; at present
1021 this is the best that can be done.
1022
1023 MATCH_ONCE is returned when the end of an atomic group is successfully
1024 reached, but subsequent matching fails. It passes back up the tree (causing
1025 captured values to be reset) until the original atomic group level is
1026 reached. This is tested by comparing md->once_target with the start of the
1027 group. At this point, the return is converted into MATCH_NOMATCH so that
1028 previous backup points can be taken. */
1029
1030 case OP_ONCE:
1031 case OP_BRA:
1032 case OP_SBRA:
1033 DPRINTF(("start non-capturing bracket\n"));
1034
1035 for (;;)
1036 {
1037 if (op >= OP_SBRA || op == OP_ONCE)
1038 md->match_function_type = MATCH_CBEGROUP;
1039
1040 /* If this is not a possibly empty group, and there are no (*THEN)s in
1041 the pattern, and this is the final alternative, optimize as described
1042 above. */
1043
1044 else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1045 {
1046 ecode += PRIV(OP_lengths)[*ecode];
1047 goto TAIL_RECURSE;
1048 }
1049
1050 /* In all other cases, we have to make another call to match(). */
1051
1052 save_mark = md->mark;
1053 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1054 RM2);
1055
1056 /* See comment in the code for capturing groups above about handling
1057 THEN. */
1058
1059 if (rrc == MATCH_THEN)
1060 {
1061 next = ecode + GET(ecode,1);
1062 if (md->start_match_ptr < next &&
1063 (*ecode == OP_ALT || *next == OP_ALT))
1064 rrc = MATCH_NOMATCH;
1065 }
1066
1067 if (rrc != MATCH_NOMATCH)
1068 {
1069 if (rrc == MATCH_ONCE)
1070 {
1071 const pcre_uchar *scode = ecode;
1072 if (*scode != OP_ONCE) /* If not at start, find it */
1073 {
1074 while (*scode == OP_ALT) scode += GET(scode, 1);
1075 scode -= GET(scode, 1);
1076 }
1077 if (md->once_target == scode) rrc = MATCH_NOMATCH;
1078 }
1079 RRETURN(rrc);
1080 }
1081 ecode += GET(ecode, 1);
1082 md->mark = save_mark;
1083 if (*ecode != OP_ALT) break;
1084 }
1085
1086 RRETURN(MATCH_NOMATCH);
1087
1088 /* Handle possessive capturing brackets with an unlimited repeat. We come
1089 here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1090 handled similarly to the normal case above. However, the matching is
1091 different. The end of these brackets will always be OP_KETRPOS, which
1092 returns MATCH_KETRPOS without going further in the pattern. By this means
1093 we can handle the group by iteration rather than recursion, thereby
1094 reducing the amount of stack needed. */
1095
1096 case OP_CBRAPOS:
1097 case OP_SCBRAPOS:
1098 allow_zero = FALSE;
1099
1100 POSSESSIVE_CAPTURE:
1101 number = GET2(ecode, 1+LINK_SIZE);
1102 offset = number << 1;
1103
1104 #ifdef PCRE_DEBUG
1105 printf("start possessive bracket %d\n", number);
1106 printf("subject=");
1107 pchars(eptr, 16, TRUE, md);
1108 printf("\n");
1109 #endif
1110
1111 if (offset < md->offset_max)
1112 {
1113 matched_once = FALSE;
1114 code_offset = (int)(ecode - md->start_code);
1115
1116 save_offset1 = md->offset_vector[offset];
1117 save_offset2 = md->offset_vector[offset+1];
1118 save_offset3 = md->offset_vector[md->offset_end - number];
1119 save_capture_last = md->capture_last;
1120
1121 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1122
1123 /* Each time round the loop, save the current subject position for use
1124 when the group matches. For MATCH_MATCH, the group has matched, so we
1125 restart it with a new subject starting position, remembering that we had
1126 at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1127 usual. If we haven't matched any alternatives in any iteration, check to
1128 see if a previous iteration matched. If so, the group has matched;
1129 continue from afterwards. Otherwise it has failed; restore the previous
1130 capture values before returning NOMATCH. */
1131
1132 for (;;)
1133 {
1134 md->offset_vector[md->offset_end - number] =
1135 (int)(eptr - md->start_subject);
1136 if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1137 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1138 eptrb, RM63);
1139 if (rrc == MATCH_KETRPOS)
1140 {
1141 offset_top = md->end_offset_top;
1142 eptr = md->end_match_ptr;
1143 ecode = md->start_code + code_offset;
1144 save_capture_last = md->capture_last;
1145 matched_once = TRUE;
1146 continue;
1147 }
1148
1149 /* See comment in the code for capturing groups above about handling
1150 THEN. */
1151
1152 if (rrc == MATCH_THEN)
1153 {
1154 next = ecode + GET(ecode,1);
1155 if (md->start_match_ptr < next &&
1156 (*ecode == OP_ALT || *next == OP_ALT))
1157 rrc = MATCH_NOMATCH;
1158 }
1159
1160 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1161 md->capture_last = save_capture_last;
1162 ecode += GET(ecode, 1);
1163 if (*ecode != OP_ALT) break;
1164 }
1165
1166 if (!matched_once)
1167 {
1168 md->offset_vector[offset] = save_offset1;
1169 md->offset_vector[offset+1] = save_offset2;
1170 md->offset_vector[md->offset_end - number] = save_offset3;
1171 }
1172
1173 if (allow_zero || matched_once)
1174 {
1175 ecode += 1 + LINK_SIZE;
1176 break;
1177 }
1178
1179 RRETURN(MATCH_NOMATCH);
1180 }
1181
1182 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1183 as a non-capturing bracket. */
1184
1185 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1186 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1187
1188 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1189
1190 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1191 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1192
1193 /* Non-capturing possessive bracket with unlimited repeat. We come here
1194 from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1195 without the capturing complication. It is written out separately for speed
1196 and cleanliness. */
1197
1198 case OP_BRAPOS:
1199 case OP_SBRAPOS:
1200 allow_zero = FALSE;
1201
1202 POSSESSIVE_NON_CAPTURE:
1203 matched_once = FALSE;
1204 code_offset = (int)(ecode - md->start_code);
1205
1206 for (;;)
1207 {
1208 if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1209 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1210 eptrb, RM48);
1211 if (rrc == MATCH_KETRPOS)
1212 {
1213 offset_top = md->end_offset_top;
1214 eptr = md->end_match_ptr;
1215 ecode = md->start_code + code_offset;
1216 matched_once = TRUE;
1217 continue;
1218 }
1219
1220 /* See comment in the code for capturing groups above about handling
1221 THEN. */
1222
1223 if (rrc == MATCH_THEN)
1224 {
1225 next = ecode + GET(ecode,1);
1226 if (md->start_match_ptr < next &&
1227 (*ecode == OP_ALT || *next == OP_ALT))
1228 rrc = MATCH_NOMATCH;
1229 }
1230
1231 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1232 ecode += GET(ecode, 1);
1233 if (*ecode != OP_ALT) break;
1234 }
1235
1236 if (matched_once || allow_zero)
1237 {
1238 ecode += 1 + LINK_SIZE;
1239 break;
1240 }
1241 RRETURN(MATCH_NOMATCH);
1242
1243 /* Control never reaches here. */
1244
1245 /* Conditional group: compilation checked that there are no more than
1246 two branches. If the condition is false, skipping the first branch takes us
1247 past the end if there is only one branch, but that's OK because that is
1248 exactly what going to the ket would do. */
1249
1250 case OP_COND:
1251 case OP_SCOND:
1252 codelink = GET(ecode, 1);
1253
1254 /* Because of the way auto-callout works during compile, a callout item is
1255 inserted between OP_COND and an assertion condition. */
1256
1257 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1258 {
1259 if (PUBL(callout) != NULL)
1260 {
1261 PUBL(callout_block) cb;
1262 cb.version = 2; /* Version 1 of the callout block */
1263 cb.callout_number = ecode[LINK_SIZE+2];
1264 cb.offset_vector = md->offset_vector;
1265 #ifdef COMPILE_PCRE8
1266 cb.subject = (PCRE_SPTR)md->start_subject;
1267 #else
1268 cb.subject = (PCRE_SPTR16)md->start_subject;
1269 #endif
1270 cb.subject_length = (int)(md->end_subject - md->start_subject);
1271 cb.start_match = (int)(mstart - md->start_subject);
1272 cb.current_position = (int)(eptr - md->start_subject);
1273 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1274 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1275 cb.capture_top = offset_top/2;
1276 cb.capture_last = md->capture_last;
1277 cb.callout_data = md->callout_data;
1278 cb.mark = md->nomatch_mark;
1279 if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1280 if (rrc < 0) RRETURN(rrc);
1281 }
1282 ecode += PRIV(OP_lengths)[OP_CALLOUT];
1283 }
1284
1285 condcode = ecode[LINK_SIZE+1];
1286
1287 /* Now see what the actual condition is */
1288
1289 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
1290 {
1291 if (md->recursive == NULL) /* Not recursing => FALSE */
1292 {
1293 condition = FALSE;
1294 ecode += GET(ecode, 1);
1295 }
1296 else
1297 {
1298 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
1299 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1300
1301 /* If the test is for recursion into a specific subpattern, and it is
1302 false, but the test was set up by name, scan the table to see if the
1303 name refers to any other numbers, and test them. The condition is true
1304 if any one is set. */
1305
1306 if (!condition && condcode == OP_NRREF)
1307 {
1308 pcre_uchar *slotA = md->name_table;
1309 for (i = 0; i < md->name_count; i++)
1310 {
1311 if (GET2(slotA, 0) == recno) break;
1312 slotA += md->name_entry_size;
1313 }
1314
1315 /* Found a name for the number - there can be only one; duplicate
1316 names for different numbers are allowed, but not vice versa. First
1317 scan down for duplicates. */
1318
1319 if (i < md->name_count)
1320 {
1321 pcre_uchar *slotB = slotA;
1322 while (slotB > md->name_table)
1323 {
1324 slotB -= md->name_entry_size;
1325 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1326 {
1327 condition = GET2(slotB, 0) == md->recursive->group_num;
1328 if (condition) break;
1329 }
1330 else break;
1331 }
1332
1333 /* Scan up for duplicates */
1334
1335 if (!condition)
1336 {
1337 slotB = slotA;
1338 for (i++; i < md->name_count; i++)
1339 {
1340 slotB += md->name_entry_size;
1341 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1342 {
1343 condition = GET2(slotB, 0) == md->recursive->group_num;
1344 if (condition) break;
1345 }
1346 else break;
1347 }
1348 }
1349 }
1350 }
1351
1352 /* Chose branch according to the condition */
1353
1354 ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1355 }
1356 }
1357
1358 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1359 {
1360 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1361 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1362
1363 /* If the numbered capture is unset, but the reference was by name,
1364 scan the table to see if the name refers to any other numbers, and test
1365 them. The condition is true if any one is set. This is tediously similar
1366 to the code above, but not close enough to try to amalgamate. */
1367
1368 if (!condition && condcode == OP_NCREF)
1369 {
1370 int refno = offset >> 1;
1371 pcre_uchar *slotA = md->name_table;
1372
1373 for (i = 0; i < md->name_count; i++)
1374 {
1375 if (GET2(slotA, 0) == refno) break;
1376 slotA += md->name_entry_size;
1377 }
1378
1379 /* Found a name for the number - there can be only one; duplicate names
1380 for different numbers are allowed, but not vice versa. First scan down
1381 for duplicates. */
1382
1383 if (i < md->name_count)
1384 {
1385 pcre_uchar *slotB = slotA;
1386 while (slotB > md->name_table)
1387 {
1388 slotB -= md->name_entry_size;
1389 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1390 {
1391 offset = GET2(slotB, 0) << 1;
1392 condition = offset < offset_top &&
1393 md->offset_vector[offset] >= 0;
1394 if (condition) break;
1395 }
1396 else break;
1397 }
1398
1399 /* Scan up for duplicates */
1400
1401 if (!condition)
1402 {
1403 slotB = slotA;
1404 for (i++; i < md->name_count; i++)
1405 {
1406 slotB += md->name_entry_size;
1407 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1408 {
1409 offset = GET2(slotB, 0) << 1;
1410 condition = offset < offset_top &&
1411 md->offset_vector[offset] >= 0;
1412 if (condition) break;
1413 }
1414 else break;
1415 }
1416 }
1417 }
1418 }
1419
1420 /* Chose branch according to the condition */
1421
1422 ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1423 }
1424
1425 else if (condcode == OP_DEF) /* DEFINE - always false */
1426 {
1427 condition = FALSE;
1428 ecode += GET(ecode, 1);
1429 }
1430
1431 /* The condition is an assertion. Call match() to evaluate it - setting
1432 md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1433 an assertion. */
1434
1435 else
1436 {
1437 md->match_function_type = MATCH_CONDASSERT;
1438 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1439 if (rrc == MATCH_MATCH)
1440 {
1441 if (md->end_offset_top > offset_top)
1442 offset_top = md->end_offset_top; /* Captures may have happened */
1443 condition = TRUE;
1444 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1445 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1446 }
1447
1448 /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1449 assertion; it is therefore treated as NOMATCH. */
1450
1451 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1452 {
1453 RRETURN(rrc); /* Need braces because of following else */
1454 }
1455 else
1456 {
1457 condition = FALSE;
1458 ecode += codelink;
1459 }
1460 }
1461
1462 /* We are now at the branch that is to be obeyed. As there is only one, can
1463 use tail recursion to avoid using another stack frame, except when there is
1464 unlimited repeat of a possibly empty group. In the latter case, a recursive
1465 call to match() is always required, unless the second alternative doesn't
1466 exist, in which case we can just plough on. Note that, for compatibility
1467 with Perl, the | in a conditional group is NOT treated as creating two
1468 alternatives. If a THEN is encountered in the branch, it propagates out to
1469 the enclosing alternative (unless nested in a deeper set of alternatives,
1470 of course). */
1471
1472 if (condition || *ecode == OP_ALT)
1473 {
1474 if (op != OP_SCOND)
1475 {
1476 ecode += 1 + LINK_SIZE;
1477 goto TAIL_RECURSE;
1478 }
1479
1480 md->match_function_type = MATCH_CBEGROUP;
1481 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1482 RRETURN(rrc);
1483 }
1484
1485 /* Condition false & no alternative; continue after the group. */
1486
1487 else
1488 {
1489 ecode += 1 + LINK_SIZE;
1490 }
1491 break;
1492
1493
1494 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1495 to close any currently open capturing brackets. */
1496
1497 case OP_CLOSE:
1498 number = GET2(ecode, 1);
1499 offset = number << 1;
1500
1501 #ifdef PCRE_DEBUG
1502 printf("end bracket %d at *ACCEPT", number);
1503 printf("\n");
1504 #endif
1505
1506 md->capture_last = number;
1507 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1508 {
1509 md->offset_vector[offset] =
1510 md->offset_vector[md->offset_end - number];
1511 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1512 if (offset_top <= offset) offset_top = offset + 2;
1513 }
1514 ecode += 1 + IMM2_SIZE;
1515 break;
1516
1517
1518 /* End of the pattern, either real or forced. */
1519
1520 case OP_END:
1521 case OP_ACCEPT:
1522 case OP_ASSERT_ACCEPT:
1523
1524 /* If we have matched an empty string, fail if not in an assertion and not
1525 in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1526 is set and we have matched at the start of the subject. In both cases,
1527 backtracking will then try other alternatives, if any. */
1528
1529 if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1530 md->recursive == NULL &&
1531 (md->notempty ||
1532 (md->notempty_atstart &&
1533 mstart == md->start_subject + md->start_offset)))
1534 RRETURN(MATCH_NOMATCH);
1535
1536 /* Otherwise, we have a match. */
1537
1538 md->end_match_ptr = eptr; /* Record where we ended */
1539 md->end_offset_top = offset_top; /* and how many extracts were taken */
1540 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1541
1542 /* For some reason, the macros don't work properly if an expression is
1543 given as the argument to RRETURN when the heap is in use. */
1544
1545 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1546 RRETURN(rrc);
1547
1548 /* Assertion brackets. Check the alternative branches in turn - the
1549 matching won't pass the KET for an assertion. If any one branch matches,
1550 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1551 start of each branch to move the current point backwards, so the code at
1552 this level is identical to the lookahead case. When the assertion is part
1553 of a condition, we want to return immediately afterwards. The caller of
1554 this incarnation of the match() function will have set MATCH_CONDASSERT in
1555 md->match_function type, and one of these opcodes will be the first opcode
1556 that is processed. We use a local variable that is preserved over calls to
1557 match() to remember this case. */
1558
1559 case OP_ASSERT:
1560 case OP_ASSERTBACK:
1561 save_mark = md->mark;
1562 if (md->match_function_type == MATCH_CONDASSERT)
1563 {
1564 condassert = TRUE;
1565 md->match_function_type = 0;
1566 }
1567 else condassert = FALSE;
1568
1569 do
1570 {
1571 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1572 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1573 {
1574 mstart = md->start_match_ptr; /* In case \K reset it */
1575 break;
1576 }
1577 md->mark = save_mark;
1578
1579 /* A COMMIT failure must fail the entire assertion, without trying any
1580 subsequent branches. */
1581
1582 if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1583
1584 /* PCRE does not allow THEN to escape beyond an assertion; it
1585 is treated as NOMATCH. */
1586
1587 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1588 ecode += GET(ecode, 1);
1589 }
1590 while (*ecode == OP_ALT);
1591
1592 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1593
1594 /* If checking an assertion for a condition, return MATCH_MATCH. */
1595
1596 if (condassert) RRETURN(MATCH_MATCH);
1597
1598 /* Continue from after the assertion, updating the offsets high water
1599 mark, since extracts may have been taken during the assertion. */
1600
1601 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1602 ecode += 1 + LINK_SIZE;
1603 offset_top = md->end_offset_top;
1604 continue;
1605
1606 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1607 PRUNE, or COMMIT means we must assume failure without checking subsequent
1608 branches. */
1609
1610 case OP_ASSERT_NOT:
1611 case OP_ASSERTBACK_NOT:
1612 save_mark = md->mark;
1613 if (md->match_function_type == MATCH_CONDASSERT)
1614 {
1615 condassert = TRUE;
1616 md->match_function_type = 0;
1617 }
1618 else condassert = FALSE;
1619
1620 do
1621 {
1622 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1623 md->mark = save_mark;
1624 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1625 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1626 {
1627 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1628 break;
1629 }
1630
1631 /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1632 as NOMATCH. */
1633
1634 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1635 ecode += GET(ecode,1);
1636 }
1637 while (*ecode == OP_ALT);
1638
1639 if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
1640
1641 ecode += 1 + LINK_SIZE;
1642 continue;
1643
1644 /* Move the subject pointer back. This occurs only at the start of
1645 each branch of a lookbehind assertion. If we are too close to the start to
1646 move back, this match function fails. When working with UTF-8 we move
1647 back a number of characters, not bytes. */
1648
1649 case OP_REVERSE:
1650 #ifdef SUPPORT_UTF
1651 if (utf)
1652 {
1653 i = GET(ecode, 1);
1654 while (i-- > 0)
1655 {
1656 eptr--;
1657 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1658 BACKCHAR(eptr);
1659 }
1660 }
1661 else
1662 #endif
1663
1664 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1665
1666 {
1667 eptr -= GET(ecode, 1);
1668 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1669 }
1670
1671 /* Save the earliest consulted character, then skip to next op code */
1672
1673 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1674 ecode += 1 + LINK_SIZE;
1675 break;
1676
1677 /* The callout item calls an external function, if one is provided, passing
1678 details of the match so far. This is mainly for debugging, though the
1679 function is able to force a failure. */
1680
1681 case OP_CALLOUT:
1682 if (PUBL(callout) != NULL)
1683 {
1684 PUBL(callout_block) cb;
1685 cb.version = 2; /* Version 1 of the callout block */
1686 cb.callout_number = ecode[1];
1687 cb.offset_vector = md->offset_vector;
1688 #ifdef COMPILE_PCRE8
1689 cb.subject = (PCRE_SPTR)md->start_subject;
1690 #else
1691 cb.subject = (PCRE_SPTR16)md->start_subject;
1692 #endif
1693 cb.subject_length = (int)(md->end_subject - md->start_subject);
1694 cb.start_match = (int)(mstart - md->start_subject);
1695 cb.current_position = (int)(eptr - md->start_subject);
1696 cb.pattern_position = GET(ecode, 2);
1697 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1698 cb.capture_top = offset_top/2;
1699 cb.capture_last = md->capture_last;
1700 cb.callout_data = md->callout_data;
1701 cb.mark = md->nomatch_mark;
1702 if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1703 if (rrc < 0) RRETURN(rrc);
1704 }
1705 ecode += 2 + 2*LINK_SIZE;
1706 break;
1707
1708 /* Recursion either matches the current regex, or some subexpression. The
1709 offset data is the offset to the starting bracket from the start of the
1710 whole pattern. (This is so that it works from duplicated subpatterns.)
1711
1712 The state of the capturing groups is preserved over recursion, and
1713 re-instated afterwards. We don't know how many are started and not yet
1714 finished (offset_top records the completed total) so we just have to save
1715 all the potential data. There may be up to 65535 such values, which is too
1716 large to put on the stack, but using malloc for small numbers seems
1717 expensive. As a compromise, the stack is used when there are no more than
1718 REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1719
1720 There are also other values that have to be saved. We use a chained
1721 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1722 for the original version of this logic. It has, however, been hacked around
1723 a lot, so he is not to blame for the current way it works. */
1724
1725 case OP_RECURSE:
1726 {
1727 recursion_info *ri;
1728 int recno;
1729
1730 callpat = md->start_code + GET(ecode, 1);
1731 recno = (callpat == md->start_code)? 0 :
1732 GET2(callpat, 1 + LINK_SIZE);
1733
1734 /* Check for repeating a recursion without advancing the subject pointer.
1735 This should catch convoluted mutual recursions. (Some simple cases are
1736 caught at compile time.) */
1737
1738 for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1739 if (recno == ri->group_num && eptr == ri->subject_position)
1740 RRETURN(PCRE_ERROR_RECURSELOOP);
1741
1742 /* Add to "recursing stack" */
1743
1744 new_recursive.group_num = recno;
1745 new_recursive.subject_position = eptr;
1746 new_recursive.prevrec = md->recursive;
1747 md->recursive = &new_recursive;
1748
1749 /* Where to continue from afterwards */
1750
1751 ecode += 1 + LINK_SIZE;
1752
1753 /* Now save the offset data */
1754
1755 new_recursive.saved_max = md->offset_end;
1756 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1757 new_recursive.offset_save = stacksave;
1758 else
1759 {
1760 new_recursive.offset_save =
1761 (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1762 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1763 }
1764 memcpy(new_recursive.offset_save, md->offset_vector,
1765 new_recursive.saved_max * sizeof(int));
1766
1767 /* OK, now we can do the recursion. After processing each alternative,
1768 restore the offset data. If there were nested recursions, md->recursive
1769 might be changed, so reset it before looping. */
1770
1771 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1772 cbegroup = (*callpat >= OP_SBRA);
1773 do
1774 {
1775 if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1776 RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1777 md, eptrb, RM6);
1778 memcpy(md->offset_vector, new_recursive.offset_save,
1779 new_recursive.saved_max * sizeof(int));
1780 md->recursive = new_recursive.prevrec;
1781 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1782 {
1783 DPRINTF(("Recursion matched\n"));
1784 if (new_recursive.offset_save != stacksave)
1785 (PUBL(free))(new_recursive.offset_save);
1786
1787 /* Set where we got to in the subject, and reset the start in case
1788 it was changed by \K. This *is* propagated back out of a recursion,
1789 for Perl compatibility. */
1790
1791 eptr = md->end_match_ptr;
1792 mstart = md->start_match_ptr;
1793 goto RECURSION_MATCHED; /* Exit loop; end processing */
1794 }
1795
1796 /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1797 is treated as NOMATCH. */
1798
1799 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1800 rrc != MATCH_COMMIT)
1801 {
1802 DPRINTF(("Recursion gave error %d\n", rrc));
1803 if (new_recursive.offset_save != stacksave)
1804 (PUBL(free))(new_recursive.offset_save);
1805 RRETURN(rrc);
1806 }
1807
1808 md->recursive = &new_recursive;
1809 callpat += GET(callpat, 1);
1810 }
1811 while (*callpat == OP_ALT);
1812
1813 DPRINTF(("Recursion didn't match\n"));
1814 md->recursive = new_recursive.prevrec;
1815 if (new_recursive.offset_save != stacksave)
1816 (PUBL(free))(new_recursive.offset_save);
1817 RRETURN(MATCH_NOMATCH);
1818 }
1819
1820 RECURSION_MATCHED:
1821 break;
1822
1823 /* An alternation is the end of a branch; scan along to find the end of the
1824 bracketed group and go to there. */
1825
1826 case OP_ALT:
1827 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1828 break;
1829
1830 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1831 indicating that it may occur zero times. It may repeat infinitely, or not
1832 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1833 with fixed upper repeat limits are compiled as a number of copies, with the
1834 optional ones preceded by BRAZERO or BRAMINZERO. */
1835
1836 case OP_BRAZERO:
1837 next = ecode + 1;
1838 RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1839 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1840 do next += GET(next, 1); while (*next == OP_ALT);
1841 ecode = next + 1 + LINK_SIZE;
1842 break;
1843
1844 case OP_BRAMINZERO:
1845 next = ecode + 1;
1846 do next += GET(next, 1); while (*next == OP_ALT);
1847 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1848 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1849 ecode++;
1850 break;
1851
1852 case OP_SKIPZERO:
1853 next = ecode+1;
1854 do next += GET(next,1); while (*next == OP_ALT);
1855 ecode = next + 1 + LINK_SIZE;
1856 break;
1857
1858 /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1859 here; just jump to the group, with allow_zero set TRUE. */
1860
1861 case OP_BRAPOSZERO:
1862 op = *(++ecode);
1863 allow_zero = TRUE;
1864 if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1865 goto POSSESSIVE_NON_CAPTURE;
1866
1867 /* End of a group, repeated or non-repeating. */
1868
1869 case OP_KET:
1870 case OP_KETRMIN:
1871 case OP_KETRMAX:
1872 case OP_KETRPOS:
1873 prev = ecode - GET(ecode, 1);
1874
1875 /* If this was a group that remembered the subject start, in order to break
1876 infinite repeats of empty string matches, retrieve the subject start from
1877 the chain. Otherwise, set it NULL. */
1878
1879 if (*prev >= OP_SBRA || *prev == OP_ONCE)
1880 {
1881 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1882 eptrb = eptrb->epb_prev; /* Backup to previous group */
1883 }
1884 else saved_eptr = NULL;
1885
1886 /* If we are at the end of an assertion group or a non-capturing atomic
1887 group, stop matching and return MATCH_MATCH, but record the current high
1888 water mark for use by positive assertions. We also need to record the match
1889 start in case it was changed by \K. */
1890
1891 if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1892 *prev == OP_ONCE_NC)
1893 {
1894 md->end_match_ptr = eptr; /* For ONCE_NC */
1895 md->end_offset_top = offset_top;
1896 md->start_match_ptr = mstart;
1897 RRETURN(MATCH_MATCH); /* Sets md->mark */
1898 }
1899
1900 /* For capturing groups we have to check the group number back at the start
1901 and if necessary complete handling an extraction by setting the offsets and
1902 bumping the high water mark. Whole-pattern recursion is coded as a recurse
1903 into group 0, so it won't be picked up here. Instead, we catch it when the
1904 OP_END is reached. Other recursion is handled here. We just have to record
1905 the current subject position and start match pointer and give a MATCH
1906 return. */
1907
1908 if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1909 *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1910 {
1911 number = GET2(prev, 1+LINK_SIZE);
1912 offset = number << 1;
1913
1914 #ifdef PCRE_DEBUG
1915 printf("end bracket %d", number);
1916 printf("\n");
1917 #endif
1918
1919 /* Handle a recursively called group. */
1920
1921 if (md->recursive != NULL && md->recursive->group_num == number)
1922 {
1923 md->end_match_ptr = eptr;
1924 md->start_match_ptr = mstart;
1925 RRETURN(MATCH_MATCH);
1926 }
1927
1928 /* Deal with capturing */
1929
1930 md->capture_last = number;
1931 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1932 {
1933 /* If offset is greater than offset_top, it means that we are
1934 "skipping" a capturing group, and that group's offsets must be marked
1935 unset. In earlier versions of PCRE, all the offsets were unset at the
1936 start of matching, but this doesn't work because atomic groups and
1937 assertions can cause a value to be set that should later be unset.
1938 Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1939 part of the atomic group, but this is not on the final matching path,
1940 so must be unset when 2 is set. (If there is no group 2, there is no
1941 problem, because offset_top will then be 2, indicating no capture.) */
1942
1943 if (offset > offset_top)
1944 {
1945 register int *iptr = md->offset_vector + offset_top;
1946 register int *iend = md->offset_vector + offset;
1947 while (iptr < iend) *iptr++ = -1;
1948 }
1949
1950 /* Now make the extraction */
1951
1952 md->offset_vector[offset] =
1953 md->offset_vector[md->offset_end - number];
1954 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1955 if (offset_top <= offset) offset_top = offset + 2;
1956 }
1957 }
1958
1959 /* For an ordinary non-repeating ket, just continue at this level. This
1960 also happens for a repeating ket if no characters were matched in the
1961 group. This is the forcible breaking of infinite loops as implemented in
1962 Perl 5.005. For a non-repeating atomic group that includes captures,
1963 establish a backup point by processing the rest of the pattern at a lower
1964 level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1965 original OP_ONCE level, thereby bypassing intermediate backup points, but
1966 resetting any captures that happened along the way. */
1967
1968 if (*ecode == OP_KET || eptr == saved_eptr)
1969 {
1970 if (*prev == OP_ONCE)
1971 {
1972 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1973 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1974 md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
1975 RRETURN(MATCH_ONCE);
1976 }
1977 ecode += 1 + LINK_SIZE; /* Carry on at this level */
1978 break;
1979 }
1980
1981 /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1982 and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1983 at a time from the outer level, thus saving stack. */
1984
1985 if (*ecode == OP_KETRPOS)
1986 {
1987 md->end_match_ptr = eptr;
1988 md->end_offset_top = offset_top;
1989 RRETURN(MATCH_KETRPOS);
1990 }
1991
1992 /* The normal repeating kets try the rest of the pattern or restart from
1993 the preceding bracket, in the appropriate order. In the second case, we can
1994 use tail recursion to avoid using another stack frame, unless we have an
1995 an atomic group or an unlimited repeat of a group that can match an empty
1996 string. */
1997
1998 if (*ecode == OP_KETRMIN)
1999 {
2000 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2001 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2002 if (*prev == OP_ONCE)
2003 {
2004 RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2005 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2006 md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
2007 RRETURN(MATCH_ONCE);
2008 }
2009 if (*prev >= OP_SBRA) /* Could match an empty string */
2010 {
2011 RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2012 RRETURN(rrc);
2013 }
2014 ecode = prev;
2015 goto TAIL_RECURSE;
2016 }
2017 else /* OP_KETRMAX */
2018 {
2019 RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2020 if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2021 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2022 if (*prev == OP_ONCE)
2023 {
2024 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2025 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2026 md->once_target = prev;
2027 RRETURN(MATCH_ONCE);
2028 }
2029 ecode += 1 + LINK_SIZE;
2030 goto TAIL_RECURSE;
2031 }
2032 /* Control never gets here */
2033
2034 /* Not multiline mode: start of subject assertion, unless notbol. */
2035
2036 case OP_CIRC:
2037 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2038
2039 /* Start of subject assertion */
2040
2041 case OP_SOD:
2042 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2043 ecode++;
2044 break;
2045
2046 /* Multiline mode: start of subject unless notbol, or after any newline. */
2047
2048 case OP_CIRCM:
2049 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2050 if (eptr != md->start_subject &&
2051 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2052 RRETURN(MATCH_NOMATCH);
2053 ecode++;
2054 break;
2055
2056 /* Start of match assertion */
2057
2058 case OP_SOM:
2059 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2060 ecode++;
2061 break;
2062
2063 /* Reset the start of match point */
2064
2065 case OP_SET_SOM:
2066 mstart = eptr;
2067 ecode++;
2068 break;
2069
2070 /* Multiline mode: assert before any newline, or before end of subject
2071 unless noteol is set. */
2072
2073 case OP_DOLLM:
2074 if (eptr < md->end_subject)
2075 {
2076 if (!IS_NEWLINE(eptr))
2077 {
2078 if (md->partial != 0 &&
2079 eptr + 1 >= md->end_subject &&
2080 NLBLOCK->nltype == NLTYPE_FIXED &&
2081 NLBLOCK->nllen == 2 &&
2082 *eptr == NLBLOCK->nl[0])
2083 {
2084 md->hitend = TRUE;
2085 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2086 }
2087 RRETURN(MATCH_NOMATCH);
2088 }
2089 }
2090 else
2091 {
2092 if (md->noteol) RRETURN(MATCH_NOMATCH);
2093 SCHECK_PARTIAL();
2094 }
2095 ecode++;
2096 break;
2097
2098 /* Not multiline mode: assert before a terminating newline or before end of
2099 subject unless noteol is set. */
2100
2101 case OP_DOLL:
2102 if (md->noteol) RRETURN(MATCH_NOMATCH);
2103 if (!md->endonly) goto ASSERT_NL_OR_EOS;
2104
2105 /* ... else fall through for endonly */
2106
2107 /* End of subject assertion (\z) */
2108
2109 case OP_EOD:
2110 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2111 SCHECK_PARTIAL();
2112 ecode++;
2113 break;
2114
2115 /* End of subject or ending \n assertion (\Z) */
2116
2117 case OP_EODN:
2118 ASSERT_NL_OR_EOS:
2119 if (eptr < md->end_subject &&
2120 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2121 {
2122 if (md->partial != 0 &&
2123 eptr + 1 >= md->end_subject &&
2124 NLBLOCK->nltype == NLTYPE_FIXED &&
2125 NLBLOCK->nllen == 2 &&
2126 *eptr == NLBLOCK->nl[0])
2127 {
2128 md->hitend = TRUE;
2129 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2130 }
2131 RRETURN(MATCH_NOMATCH);
2132 }
2133
2134 /* Either at end of string or \n before end. */
2135
2136 SCHECK_PARTIAL();
2137 ecode++;
2138 break;
2139
2140 /* Word boundary assertions */
2141
2142 case OP_NOT_WORD_BOUNDARY:
2143 case OP_WORD_BOUNDARY:
2144 {
2145
2146 /* Find out if the previous and current characters are "word" characters.
2147 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2148 be "non-word" characters. Remember the earliest consulted character for
2149 partial matching. */
2150
2151 #ifdef SUPPORT_UTF
2152 if (utf)
2153 {
2154 /* Get status of previous character */
2155
2156 if (eptr == md->start_subject) prev_is_word = FALSE; else
2157 {
2158 PCRE_PUCHAR lastptr = eptr - 1;
2159 BACKCHAR(lastptr);
2160 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2161 GETCHAR(c, lastptr);
2162 #ifdef SUPPORT_UCP
2163 if (md->use_ucp)
2164 {
2165 if (c == '_') prev_is_word = TRUE; else
2166 {
2167 int cat = UCD_CATEGORY(c);
2168 prev_is_word = (cat == ucp_L || cat == ucp_N);
2169 }
2170 }
2171 else
2172 #endif
2173 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2174 }
2175
2176 /* Get status of next character */
2177
2178 if (eptr >= md->end_subject)
2179 {
2180 SCHECK_PARTIAL();
2181 cur_is_word = FALSE;
2182 }
2183 else
2184 {
2185 GETCHAR(c, eptr);
2186 #ifdef SUPPORT_UCP
2187 if (md->use_ucp)
2188 {
2189 if (c == '_') cur_is_word = TRUE; else
2190 {
2191 int cat = UCD_CATEGORY(c);
2192 cur_is_word = (cat == ucp_L || cat == ucp_N);
2193 }
2194 }
2195 else
2196 #endif
2197 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2198 }
2199 }
2200 else
2201 #endif
2202
2203 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2204 consistency with the behaviour of \w we do use it in this case. */
2205
2206 {
2207 /* Get status of previous character */
2208
2209 if (eptr == md->start_subject) prev_is_word = FALSE; else
2210 {
2211 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2212 #ifdef SUPPORT_UCP
2213 if (md->use_ucp)
2214 {
2215 c = eptr[-1];
2216 if (c == '_') prev_is_word = TRUE; else
2217 {
2218 int cat = UCD_CATEGORY(c);
2219 prev_is_word = (cat == ucp_L || cat == ucp_N);
2220 }
2221 }
2222 else
2223 #endif
2224 prev_is_word = MAX_255(eptr[-1])
2225 && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2226 }
2227
2228 /* Get status of next character */
2229
2230 if (eptr >= md->end_subject)
2231 {
2232 SCHECK_PARTIAL();
2233 cur_is_word = FALSE;
2234 }
2235 else
2236 #ifdef SUPPORT_UCP
2237 if (md->use_ucp)
2238 {
2239 c = *eptr;
2240 if (c == '_') cur_is_word = TRUE; else
2241 {
2242 int cat = UCD_CATEGORY(c);
2243 cur_is_word = (cat == ucp_L || cat == ucp_N);
2244 }
2245 }
2246 else
2247 #endif
2248 cur_is_word = MAX_255(*eptr)
2249 && ((md->ctypes[*eptr] & ctype_word) != 0);
2250 }
2251
2252 /* Now see if the situation is what we want */
2253
2254 if ((*ecode++ == OP_WORD_BOUNDARY)?
2255 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2256 RRETURN(MATCH_NOMATCH);
2257 }
2258 break;
2259
2260 /* Match any single character type except newline; have to take care with
2261 CRLF newlines and partial matching. */
2262
2263 case OP_ANY:
2264 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2265 if (md->partial != 0 &&
2266 eptr + 1 >= md->end_subject &&
2267 NLBLOCK->nltype == NLTYPE_FIXED &&
2268 NLBLOCK->nllen == 2 &&
2269 *eptr == NLBLOCK->nl[0])
2270 {
2271 md->hitend = TRUE;
2272 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2273 }
2274
2275 /* Fall through */
2276
2277 /* Match any single character whatsoever. */
2278
2279 case OP_ALLANY:
2280 if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2281 { /* not be updated before SCHECK_PARTIAL. */
2282 SCHECK_PARTIAL();
2283 RRETURN(MATCH_NOMATCH);
2284 }
2285 eptr++;
2286 #ifdef SUPPORT_UTF
2287 if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2288 #endif
2289 ecode++;
2290 break;
2291
2292 /* Match a single byte, even in UTF-8 mode. This opcode really does match
2293 any byte, even newline, independent of the setting of PCRE_DOTALL. */
2294
2295 case OP_ANYBYTE:
2296 if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2297 { /* not be updated before SCHECK_PARTIAL. */
2298 SCHECK_PARTIAL();
2299 RRETURN(MATCH_NOMATCH);
2300 }
2301 eptr++;
2302 ecode++;
2303 break;
2304
2305 case OP_NOT_DIGIT:
2306 if (eptr >= md->end_subject)
2307 {
2308 SCHECK_PARTIAL();
2309 RRETURN(MATCH_NOMATCH);
2310 }
2311 GETCHARINCTEST(c, eptr);
2312 if (
2313 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2314 c < 256 &&
2315 #endif
2316 (md->ctypes[c] & ctype_digit) != 0
2317 )
2318 RRETURN(MATCH_NOMATCH);
2319 ecode++;
2320 break;
2321
2322 case OP_DIGIT:
2323 if (eptr >= md->end_subject)
2324 {
2325 SCHECK_PARTIAL();
2326 RRETURN(MATCH_NOMATCH);
2327 }
2328 GETCHARINCTEST(c, eptr);
2329 if (
2330 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2331 c > 255 ||
2332 #endif
2333 (md->ctypes[c] & ctype_digit) == 0
2334 )
2335 RRETURN(MATCH_NOMATCH);
2336 ecode++;
2337 break;
2338
2339 case OP_NOT_WHITESPACE:
2340 if (eptr >= md->end_subject)
2341 {
2342 SCHECK_PARTIAL();
2343 RRETURN(MATCH_NOMATCH);
2344 }
2345 GETCHARINCTEST(c, eptr);
2346 if (
2347 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2348 c < 256 &&
2349 #endif
2350 (md->ctypes[c] & ctype_space) != 0
2351 )
2352 RRETURN(MATCH_NOMATCH);
2353 ecode++;
2354 break;
2355
2356 case OP_WHITESPACE:
2357 if (eptr >= md->end_subject)
2358 {
2359 SCHECK_PARTIAL();
2360 RRETURN(MATCH_NOMATCH);
2361 }
2362 GETCHARINCTEST(c, eptr);
2363 if (
2364 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2365 c > 255 ||
2366 #endif
2367 (md->ctypes[c] & ctype_space) == 0
2368 )
2369 RRETURN(MATCH_NOMATCH);
2370 ecode++;
2371 break;
2372
2373 case OP_NOT_WORDCHAR:
2374 if (eptr >= md->end_subject)
2375 {
2376 SCHECK_PARTIAL();
2377 RRETURN(MATCH_NOMATCH);
2378 }
2379 GETCHARINCTEST(c, eptr);
2380 if (
2381 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2382 c < 256 &&
2383 #endif
2384 (md->ctypes[c] & ctype_word) != 0
2385 )
2386 RRETURN(MATCH_NOMATCH);
2387 ecode++;
2388 break;
2389
2390 case OP_WORDCHAR:
2391 if (eptr >= md->end_subject)
2392 {
2393 SCHECK_PARTIAL();
2394 RRETURN(MATCH_NOMATCH);
2395 }
2396 GETCHARINCTEST(c, eptr);
2397 if (
2398 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2399 c > 255 ||
2400 #endif
2401 (md->ctypes[c] & ctype_word) == 0
2402 )
2403 RRETURN(MATCH_NOMATCH);
2404 ecode++;
2405 break;
2406
2407 case OP_ANYNL:
2408 if (eptr >= md->end_subject)
2409 {
2410 SCHECK_PARTIAL();
2411 RRETURN(MATCH_NOMATCH);
2412 }
2413 GETCHARINCTEST(c, eptr);
2414 switch(c)
2415 {
2416 default: RRETURN(MATCH_NOMATCH);
2417
2418 case 0x000d:
2419 if (eptr >= md->end_subject)
2420 {
2421 SCHECK_PARTIAL();
2422 }
2423 else if (*eptr == 0x0a) eptr++;
2424 break;
2425
2426 case 0x000a:
2427 break;
2428
2429 case 0x000b:
2430 case 0x000c:
2431 case 0x0085:
2432 case 0x2028:
2433 case 0x2029:
2434 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2435 break;
2436 }
2437 ecode++;
2438 break;
2439
2440 case OP_NOT_HSPACE:
2441 if (eptr >= md->end_subject)
2442 {
2443 SCHECK_PARTIAL();
2444 RRETURN(MATCH_NOMATCH);
2445 }
2446 GETCHARINCTEST(c, eptr);
2447 switch(c)
2448 {
2449 default: break;
2450 case 0x09: /* HT */
2451 case 0x20: /* SPACE */
2452 case 0xa0: /* NBSP */
2453 case 0x1680: /* OGHAM SPACE MARK */
2454 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2455 case 0x2000: /* EN QUAD */
2456 case 0x2001: /* EM QUAD */
2457 case 0x2002: /* EN SPACE */
2458 case 0x2003: /* EM SPACE */
2459 case 0x2004: /* THREE-PER-EM SPACE */
2460 case 0x2005: /* FOUR-PER-EM SPACE */
2461 case 0x2006: /* SIX-PER-EM SPACE */
2462 case 0x2007: /* FIGURE SPACE */
2463 case 0x2008: /* PUNCTUATION SPACE */
2464 case 0x2009: /* THIN SPACE */
2465 case 0x200A: /* HAIR SPACE */
2466 case 0x202f: /* NARROW NO-BREAK SPACE */
2467 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2468 case 0x3000: /* IDEOGRAPHIC SPACE */
2469 RRETURN(MATCH_NOMATCH);
2470 }
2471 ecode++;
2472 break;
2473
2474 case OP_HSPACE:
2475 if (eptr >= md->end_subject)
2476 {
2477 SCHECK_PARTIAL();
2478 RRETURN(MATCH_NOMATCH);
2479 }
2480 GETCHARINCTEST(c, eptr);
2481 switch(c)
2482 {
2483 default: RRETURN(MATCH_NOMATCH);
2484 case 0x09: /* HT */
2485 case 0x20: /* SPACE */
2486 case 0xa0: /* NBSP */
2487 case 0x1680: /* OGHAM SPACE MARK */
2488 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2489 case 0x2000: /* EN QUAD */
2490 case 0x2001: /* EM QUAD */
2491 case 0x2002: /* EN SPACE */
2492 case 0x2003: /* EM SPACE */
2493 case 0x2004: /* THREE-PER-EM SPACE */
2494 case 0x2005: /* FOUR-PER-EM SPACE */
2495 case 0x2006: /* SIX-PER-EM SPACE */
2496 case 0x2007: /* FIGURE SPACE */
2497 case 0x2008: /* PUNCTUATION SPACE */
2498 case 0x2009: /* THIN SPACE */
2499 case 0x200A: /* HAIR SPACE */
2500 case 0x202f: /* NARROW NO-BREAK SPACE */
2501 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2502 case 0x3000: /* IDEOGRAPHIC SPACE */
2503 break;
2504 }
2505 ecode++;
2506 break;
2507
2508 case OP_NOT_VSPACE:
2509 if (eptr >= md->end_subject)
2510 {
2511 SCHECK_PARTIAL();
2512 RRETURN(MATCH_NOMATCH);
2513 }
2514 GETCHARINCTEST(c, eptr);
2515 switch(c)
2516 {
2517 default: break;
2518 case 0x0a: /* LF */
2519 case 0x0b: /* VT */
2520 case 0x0c: /* FF */
2521 case 0x0d: /* CR */
2522 case 0x85: /* NEL */
2523 case 0x2028: /* LINE SEPARATOR */
2524 case 0x2029: /* PARAGRAPH SEPARATOR */
2525 RRETURN(MATCH_NOMATCH);
2526 }
2527 ecode++;
2528 break;
2529
2530 case OP_VSPACE:
2531 if (eptr >= md->end_subject)
2532 {
2533 SCHECK_PARTIAL();
2534 RRETURN(MATCH_NOMATCH);
2535 }
2536 GETCHARINCTEST(c, eptr);
2537 switch(c)
2538 {
2539 default: RRETURN(MATCH_NOMATCH);
2540 case 0x0a: /* LF */
2541 case 0x0b: /* VT */
2542 case 0x0c: /* FF */
2543 case 0x0d: /* CR */
2544 case 0x85: /* NEL */
2545 case 0x2028: /* LINE SEPARATOR */
2546 case 0x2029: /* PARAGRAPH SEPARATOR */
2547 break;
2548 }
2549 ecode++;
2550 break;
2551
2552 #ifdef SUPPORT_UCP
2553 /* Check the next character by Unicode property. We will get here only
2554 if the support is in the binary; otherwise a compile-time error occurs. */
2555
2556 case OP_PROP:
2557 case OP_NOTPROP:
2558 if (eptr >= md->end_subject)
2559 {
2560 SCHECK_PARTIAL();
2561 RRETURN(MATCH_NOMATCH);
2562 }
2563 GETCHARINCTEST(c, eptr);
2564 {
2565 const ucd_record *prop = GET_UCD(c);
2566
2567 switch(ecode[1])
2568 {
2569 case PT_ANY:
2570 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2571 break;
2572
2573 case PT_LAMP:
2574 if ((prop->chartype == ucp_Lu ||
2575 prop->chartype == ucp_Ll ||
2576 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2577 RRETURN(MATCH_NOMATCH);
2578 break;
2579
2580 case PT_GC:
2581 if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2582 RRETURN(MATCH_NOMATCH);
2583 break;
2584
2585 case PT_PC:
2586 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2587 RRETURN(MATCH_NOMATCH);
2588 break;
2589
2590 case PT_SC:
2591 if ((ecode[2] != prop->script) == (op == OP_PROP))
2592 RRETURN(MATCH_NOMATCH);
2593 break;
2594
2595 /* These are specials */
2596
2597 case PT_ALNUM:
2598 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2599 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2600 RRETURN(MATCH_NOMATCH);
2601 break;
2602
2603 case PT_SPACE: /* Perl space */
2604 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2605 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2606 == (op == OP_NOTPROP))
2607 RRETURN(MATCH_NOMATCH);
2608 break;
2609
2610 case PT_PXSPACE: /* POSIX space */
2611 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2612 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2613 c == CHAR_FF || c == CHAR_CR)
2614 == (op == OP_NOTPROP))
2615 RRETURN(MATCH_NOMATCH);
2616 break;
2617
2618 case PT_WORD:
2619 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2620 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2621 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2622 RRETURN(MATCH_NOMATCH);
2623 break;
2624
2625 /* This should never occur */
2626
2627 default:
2628 RRETURN(PCRE_ERROR_INTERNAL);
2629 }
2630
2631 ecode += 3;
2632 }
2633 break;
2634
2635 /* Match an extended Unicode sequence. We will get here only if the support
2636 is in the binary; otherwise a compile-time error occurs. */
2637
2638 case OP_EXTUNI:
2639 if (eptr >= md->end_subject)
2640 {
2641 SCHECK_PARTIAL();
2642 RRETURN(MATCH_NOMATCH);
2643 }
2644 GETCHARINCTEST(c, eptr);
2645 if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2646 while (eptr < md->end_subject)
2647 {
2648 int len = 1;
2649 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2650 if (UCD_CATEGORY(c) != ucp_M) break;
2651 eptr += len;
2652 }
2653 CHECK_PARTIAL();
2654 ecode++;
2655 break;
2656 #endif
2657
2658
2659 /* Match a back reference, possibly repeatedly. Look past the end of the
2660 item to see if there is repeat information following. The code is similar
2661 to that for character classes, but repeated for efficiency. Then obey
2662 similar code to character type repeats - written out again for speed.
2663 However, if the referenced string is the empty string, always treat
2664 it as matched, any number of times (otherwise there could be infinite
2665 loops). */
2666
2667 case OP_REF:
2668 case OP_REFI:
2669 caseless = op == OP_REFI;
2670 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2671 ecode += 1 + IMM2_SIZE;
2672
2673 /* If the reference is unset, there are two possibilities:
2674
2675 (a) In the default, Perl-compatible state, set the length negative;
2676 this ensures that every attempt at a match fails. We can't just fail
2677 here, because of the possibility of quantifiers with zero minima.
2678
2679 (b) If the JavaScript compatibility flag is set, set the length to zero
2680 so that the back reference matches an empty string.
2681
2682 Otherwise, set the length to the length of what was matched by the
2683 referenced subpattern. */
2684
2685 if (offset >= offset_top || md->offset_vector[offset] < 0)
2686 length = (md->jscript_compat)? 0 : -1;
2687 else
2688 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2689
2690 /* Set up for repetition, or handle the non-repeated case */
2691
2692 switch (*ecode)
2693 {
2694 case OP_CRSTAR:
2695 case OP_CRMINSTAR:
2696 case OP_CRPLUS:
2697 case OP_CRMINPLUS:
2698 case OP_CRQUERY:
2699 case OP_CRMINQUERY:
2700 c = *ecode++ - OP_CRSTAR;
2701 minimize = (c & 1) != 0;
2702 min = rep_min[c]; /* Pick up values from tables; */
2703 max = rep_max[c]; /* zero for max => infinity */
2704 if (max == 0) max = INT_MAX;
2705 break;
2706
2707 case OP_CRRANGE:
2708 case OP_CRMINRANGE:
2709 minimize = (*ecode == OP_CRMINRANGE);
2710 min = GET2(ecode, 1);
2711 max = GET2(ecode, 1 + IMM2_SIZE);
2712 if (max == 0) max = INT_MAX;
2713 ecode += 1 + 2 * IMM2_SIZE;
2714 break;
2715
2716 default: /* No repeat follows */
2717 if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2718 {
2719 if (length == -2) eptr = md->end_subject; /* Partial match */
2720 CHECK_PARTIAL();
2721 RRETURN(MATCH_NOMATCH);
2722 }
2723 eptr += length;
2724 continue; /* With the main loop */
2725 }
2726
2727 /* Handle repeated back references. If the length of the reference is
2728 zero, just continue with the main loop. If the length is negative, it
2729 means the reference is unset in non-Java-compatible mode. If the minimum is
2730 zero, we can continue at the same level without recursion. For any other
2731 minimum, carrying on will result in NOMATCH. */
2732
2733 if (length == 0) continue;
2734 if (length < 0 && min == 0) continue;
2735
2736 /* First, ensure the minimum number of matches are present. We get back
2737 the length of the reference string explicitly rather than passing the
2738 address of eptr, so that eptr can be a register variable. */
2739
2740 for (i = 1; i <= min; i++)
2741 {
2742 int slength;
2743 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2744 {
2745 if (slength == -2) eptr = md->end_subject; /* Partial match */
2746 CHECK_PARTIAL();
2747 RRETURN(MATCH_NOMATCH);
2748 }
2749 eptr += slength;
2750 }
2751
2752 /* If min = max, continue at the same level without recursion.
2753 They are not both allowed to be zero. */
2754
2755 if (min == max) continue;
2756
2757 /* If minimizing, keep trying and advancing the pointer */
2758
2759 if (minimize)
2760 {
2761 for (fi = min;; fi++)
2762 {
2763 int slength;
2764 RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2765 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2766 if (fi >= max) RRETURN(MATCH_NOMATCH);
2767 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2768 {
2769 if (slength == -2) eptr = md->end_subject; /* Partial match */
2770 CHECK_PARTIAL();
2771 RRETURN(MATCH_NOMATCH);
2772 }
2773 eptr += slength;
2774 }
2775 /* Control never gets here */
2776 }
2777
2778 /* If maximizing, find the longest string and work backwards */
2779
2780 else
2781 {
2782 pp = eptr;
2783 for (i = min; i < max; i++)
2784 {
2785 int slength;
2786 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2787 {
2788 /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2789 the soft partial matching case. */
2790
2791 if (slength == -2 && md->partial != 0 &&
2792 md->end_subject > md->start_used_ptr)
2793 {
2794 md->hitend = TRUE;
2795 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2796 }
2797 break;
2798 }
2799 eptr += slength;
2800 }
2801
2802 while (eptr >= pp)
2803 {
2804 RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2805 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2806 eptr -= length;
2807 }
2808 RRETURN(MATCH_NOMATCH);
2809 }
2810 /* Control never gets here */
2811
2812 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2813 used when all the characters in the class have values in the range 0-255,
2814 and either the matching is caseful, or the characters are in the range
2815 0-127 when UTF-8 processing is enabled. The only difference between
2816 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2817 encountered.
2818
2819 First, look past the end of the item to see if there is repeat information
2820 following. Then obey similar code to character type repeats - written out
2821 again for speed. */
2822
2823 case OP_NCLASS:
2824 case OP_CLASS:
2825 {
2826 /* The data variable is saved across frames, so the byte map needs to
2827 be stored there. */
2828 #define BYTE_MAP ((pcre_uint8 *)data)
2829 data = ecode + 1; /* Save for matching */
2830 ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2831
2832 switch (*ecode)
2833 {
2834 case OP_CRSTAR:
2835 case OP_CRMINSTAR:
2836 case OP_CRPLUS:
2837 case OP_CRMINPLUS:
2838 case OP_CRQUERY:
2839 case OP_CRMINQUERY:
2840 c = *ecode++ - OP_CRSTAR;
2841 minimize = (c & 1) != 0;
2842 min = rep_min[c]; /* Pick up values from tables; */
2843 max = rep_max[c]; /* zero for max => infinity */
2844 if (max == 0) max = INT_MAX;
2845 break;
2846
2847 case OP_CRRANGE:
2848 case OP_CRMINRANGE:
2849 minimize = (*ecode == OP_CRMINRANGE);
2850 min = GET2(ecode, 1);
2851 max = GET2(ecode, 1 + IMM2_SIZE);
2852 if (max == 0) max = INT_MAX;
2853 ecode += 1 + 2 * IMM2_SIZE;
2854 break;
2855
2856 default: /* No repeat follows */
2857 min = max = 1;
2858 break;
2859 }
2860
2861 /* First, ensure the minimum number of matches are present. */
2862
2863 #ifdef SUPPORT_UTF
2864 if (utf)
2865 {
2866 for (i = 1; i <= min; i++)
2867 {
2868 if (eptr >= md->end_subject)
2869 {
2870 SCHECK_PARTIAL();
2871 RRETURN(MATCH_NOMATCH);
2872 }
2873 GETCHARINC(c, eptr);
2874 if (c > 255)
2875 {
2876 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2877 }
2878 else
2879 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2880 }
2881 }
2882 else
2883 #endif
2884 /* Not UTF mode */
2885 {
2886 for (i = 1; i <= min; i++)
2887 {
2888 if (eptr >= md->end_subject)
2889 {
2890 SCHECK_PARTIAL();
2891 RRETURN(MATCH_NOMATCH);
2892 }
2893 c = *eptr++;
2894 #ifndef COMPILE_PCRE8
2895 if (c > 255)
2896 {
2897 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2898 }
2899 else
2900 #endif
2901 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2902 }
2903 }
2904
2905 /* If max == min we can continue with the main loop without the
2906 need to recurse. */
2907
2908 if (min == max) continue;
2909
2910 /* If minimizing, keep testing the rest of the expression and advancing
2911 the pointer while it matches the class. */
2912
2913 if (minimize)
2914 {
2915 #ifdef SUPPORT_UTF
2916 if (utf)
2917 {
2918 for (fi = min;; fi++)
2919 {
2920 RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2921 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2922 if (fi >= max) RRETURN(MATCH_NOMATCH);
2923 if (eptr >= md->end_subject)
2924 {
2925 SCHECK_PARTIAL();
2926 RRETURN(MATCH_NOMATCH);
2927 }
2928 GETCHARINC(c, eptr);
2929 if (c > 255)
2930 {
2931 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2932 }
2933 else
2934 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2935 }
2936 }
2937 else
2938 #endif
2939 /* Not UTF mode */
2940 {
2941 for (fi = min;; fi++)
2942 {
2943 RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2944 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2945 if (fi >= max) RRETURN(MATCH_NOMATCH);
2946 if (eptr >= md->end_subject)
2947 {
2948 SCHECK_PARTIAL();
2949 RRETURN(MATCH_NOMATCH);
2950 }
2951 c = *eptr++;
2952 #ifndef COMPILE_PCRE8
2953 if (c > 255)
2954 {
2955 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2956 }
2957 else
2958 #endif
2959 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2960 }
2961 }
2962 /* Control never gets here */
2963 }
2964
2965 /* If maximizing, find the longest possible run, then work backwards. */
2966
2967 else
2968 {
2969 pp = eptr;
2970
2971 #ifdef SUPPORT_UTF
2972 if (utf)
2973 {
2974 for (i = min; i < max; i++)
2975 {
2976 int len = 1;
2977 if (eptr >= md->end_subject)
2978 {
2979 SCHECK_PARTIAL();
2980 break;
2981 }
2982 GETCHARLEN(c, eptr, len);
2983 if (c > 255)
2984 {
2985 if (op == OP_CLASS) break;
2986 }
2987 else
2988 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2989 eptr += len;
2990 }
2991 for (;;)
2992 {
2993 RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2994 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2995 if (eptr-- == pp) break; /* Stop if tried at original pos */
2996 BACKCHAR(eptr);
2997 }
2998 }
2999 else
3000 #endif
3001 /* Not UTF mode */
3002 {
3003 for (i = min; i < max; i++)
3004 {
3005 if (eptr >= md->end_subject)
3006 {
3007 SCHECK_PARTIAL();
3008 break;
3009 }
3010 c = *eptr;
3011 #ifndef COMPILE_PCRE8
3012 if (c > 255)
3013 {
3014 if (op == OP_CLASS) break;
3015 }
3016 else
3017 #endif
3018 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3019 eptr++;
3020 }
3021 while (eptr >= pp)
3022 {
3023 RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3024 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3025 eptr--;
3026 }
3027 }
3028
3029 RRETURN(MATCH_NOMATCH);
3030 }
3031 #undef BYTE_MAP
3032 }
3033 /* Control never gets here */
3034
3035
3036 /* Match an extended character class. This opcode is encountered only
3037 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3038 mode, because Unicode properties are supported in non-UTF-8 mode. */
3039
3040 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3041 case OP_XCLASS:
3042 {
3043 data = ecode + 1 + LINK_SIZE; /* Save for matching */
3044 ecode += GET(ecode, 1); /* Advance past the item */
3045
3046 switch (*ecode)
3047 {
3048 case OP_CRSTAR:
3049 case OP_CRMINSTAR:
3050 case OP_CRPLUS:
3051 case OP_CRMINPLUS:
3052 case OP_CRQUERY:
3053 case OP_CRMINQUERY:
3054 c = *ecode++ - OP_CRSTAR;
3055 minimize = (c & 1) != 0;
3056 min = rep_min[c]; /* Pick up values from tables; */
3057 max = rep_max[c]; /* zero for max => infinity */
3058 if (max == 0) max = INT_MAX;
3059 break;
3060
3061 case OP_CRRANGE:
3062 case OP_CRMINRANGE:
3063 minimize = (*ecode == OP_CRMINRANGE);
3064 min = GET2(ecode, 1);
3065 max = GET2(ecode, 1 + IMM2_SIZE);
3066 if (max == 0) max = INT_MAX;
3067 ecode += 1 + 2 * IMM2_SIZE;
3068 break;
3069
3070 default: /* No repeat follows */
3071 min = max = 1;
3072 break;
3073 }
3074
3075 /* First, ensure the minimum number of matches are present. */
3076
3077 for (i = 1; i <= min; i++)
3078 {
3079 if (eptr >= md->end_subject)
3080 {
3081 SCHECK_PARTIAL();
3082 RRETURN(MATCH_NOMATCH);
3083 }
3084 GETCHARINCTEST(c, eptr);
3085 if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3086 }
3087
3088 /* If max == min we can continue with the main loop without the
3089 need to recurse. */
3090
3091 if (min == max) continue;
3092
3093 /* If minimizing, keep testing the rest of the expression and advancing
3094 the pointer while it matches the class. */
3095
3096 if (minimize)
3097 {
3098 for (fi = min;; fi++)
3099 {
3100 RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3101 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3102 if (fi >= max) RRETURN(MATCH_NOMATCH);
3103 if (eptr >= md->end_subject)
3104 {
3105 SCHECK_PARTIAL();
3106 RRETURN(MATCH_NOMATCH);
3107 }
3108 GETCHARINCTEST(c, eptr);
3109 if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3110 }
3111 /* Control never gets here */
3112 }
3113
3114 /* If maximizing, find the longest possible run, then work backwards. */
3115
3116 else
3117 {
3118 pp = eptr;
3119 for (i = min; i < max; i++)
3120 {
3121 int len = 1;
3122 if (eptr >= md->end_subject)
3123 {
3124 SCHECK_PARTIAL();
3125 break;
3126 }
3127 #ifdef SUPPORT_UTF
3128 GETCHARLENTEST(c, eptr, len);
3129 #else
3130 c = *eptr;
3131 #endif
3132 if (!PRIV(xclass)(c, data, utf)) break;
3133 eptr += len;
3134 }
3135 for(;;)
3136 {
3137 RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3138 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3139 if (eptr-- == pp) break; /* Stop if tried at original pos */
3140 #ifdef SUPPORT_UTF
3141 if (utf) BACKCHAR(eptr);
3142 #endif
3143 }
3144 RRETURN(MATCH_NOMATCH);
3145 }
3146
3147 /* Control never gets here */
3148 }
3149 #endif /* End of XCLASS */
3150
3151 /* Match a single character, casefully */
3152
3153 case OP_CHAR:
3154 #ifdef SUPPORT_UTF
3155 if (utf)
3156 {
3157 length = 1;
3158 ecode++;
3159 GETCHARLEN(fc, ecode, length);
3160 if (length > md->end_subject - eptr)
3161 {
3162 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
3163 RRETURN(MATCH_NOMATCH);
3164 }
3165 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3166 }
3167 else
3168 #endif
3169 /* Not UTF mode */
3170 {
3171 if (md->end_subject - eptr < 1)
3172 {
3173 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
3174 RRETURN(MATCH_NOMATCH);
3175 }
3176 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3177 ecode += 2;
3178 }
3179 break;
3180
3181 /* Match a single character, caselessly. If we are at the end of the
3182 subject, give up immediately. */
3183
3184 case OP_CHARI:
3185 if (eptr >= md->end_subject)
3186 {
3187 SCHECK_PARTIAL();
3188 RRETURN(MATCH_NOMATCH);
3189 }
3190
3191 #ifdef SUPPORT_UTF
3192 if (utf)
3193 {
3194 length = 1;
3195 ecode++;
3196 GETCHARLEN(fc, ecode, length);
3197
3198 /* If the pattern character's value is < 128, we have only one byte, and
3199 we know that its other case must also be one byte long, so we can use the
3200 fast lookup table. We know that there is at least one byte left in the
3201 subject. */
3202
3203 if (fc < 128)
3204 {
3205 if (md->lcc[fc]
3206 != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3207 ecode++;
3208 eptr++;
3209 }
3210
3211 /* Otherwise we must pick up the subject character. Note that we cannot
3212 use the value of "length" to check for sufficient bytes left, because the
3213 other case of the character may have more or fewer bytes. */
3214
3215 else
3216 {
3217 unsigned int dc;
3218 GETCHARINC(dc, eptr);
3219 ecode += length;
3220
3221 /* If we have Unicode property support, we can use it to test the other
3222 case of the character, if there is one. */
3223
3224 if (fc != dc)
3225 {
3226 #ifdef SUPPORT_UCP
3227 if (dc != UCD_OTHERCASE(fc))
3228 #endif
3229 RRETURN(MATCH_NOMATCH);
3230 }
3231 }
3232 }
3233 else
3234 #endif /* SUPPORT_UTF */
3235
3236 /* Not UTF mode */
3237 {
3238 if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3239 != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3240 eptr++;
3241 ecode += 2;
3242 }
3243 break;
3244
3245 /* Match a single character repeatedly. */
3246
3247 case OP_EXACT:
3248 case OP_EXACTI:
3249 min = max = GET2(ecode, 1);
3250 ecode += 1 + IMM2_SIZE;
3251 goto REPEATCHAR;
3252
3253 case OP_POSUPTO:
3254 case OP_POSUPTOI:
3255 possessive = TRUE;
3256 /* Fall through */
3257
3258 case OP_UPTO:
3259 case OP_UPTOI:
3260 case OP_MINUPTO:
3261 case OP_MINUPTOI:
3262 min = 0;
3263 max = GET2(ecode, 1);
3264 minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3265 ecode += 1 + IMM2_SIZE;
3266 goto REPEATCHAR;
3267
3268 case OP_POSSTAR:
3269 case OP_POSSTARI:
3270 possessive = TRUE;
3271 min = 0;
3272 max = INT_MAX;
3273 ecode++;
3274 goto REPEATCHAR;
3275
3276 case OP_POSPLUS:
3277 case OP_POSPLUSI:
3278 possessive = TRUE;
3279 min = 1;
3280 max = INT_MAX;
3281 ecode++;
3282 goto REPEATCHAR;
3283
3284 case OP_POSQUERY:
3285 case OP_POSQUERYI:
3286 possessive = TRUE;
3287 min = 0;
3288 max = 1;
3289 ecode++;
3290 goto REPEATCHAR;
3291
3292 case OP_STAR:
3293 case OP_STARI:
3294 case OP_MINSTAR:
3295 case OP_MINSTARI:
3296 case OP_PLUS:
3297 case OP_PLUSI:
3298 case OP_MINPLUS:
3299 case OP_MINPLUSI:
3300 case OP_QUERY:
3301 case OP_QUERYI:
3302 case OP_MINQUERY:
3303 case OP_MINQUERYI:
3304 c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3305 minimize = (c & 1) != 0;
3306 min = rep_min[c]; /* Pick up values from tables; */
3307 max = rep_max[c]; /* zero for max => infinity */
3308 if (max == 0) max = INT_MAX;
3309
3310 /* Common code for all repeated single-character matches. */
3311
3312 REPEATCHAR:
3313 #ifdef SUPPORT_UTF
3314 if (utf)
3315 {
3316 length = 1;
3317 charptr = ecode;
3318 GETCHARLEN(fc, ecode, length);
3319 ecode += length;
3320
3321 /* Handle multibyte character matching specially here. There is
3322 support for caseless matching if UCP support is present. */
3323
3324 if (length > 1)
3325 {
3326 #ifdef SUPPORT_UCP
3327 unsigned int othercase;
3328 if (op >= OP_STARI && /* Caseless */
3329 (othercase = UCD_OTHERCASE(fc)) != fc)
3330 oclength = PRIV(ord2utf)(othercase, occhars);
3331 else oclength = 0;
3332 #endif /* SUPPORT_UCP */
3333
3334 for (i = 1; i <= min; i++)
3335 {
3336 if (eptr <= md->end_subject - length &&
3337 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3338 #ifdef SUPPORT_UCP
3339 else if (oclength > 0 &&
3340 eptr <= md->end_subject - oclength &&
3341 memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3342 #endif /* SUPPORT_UCP */
3343 else
3344 {
3345 CHECK_PARTIAL();
3346 RRETURN(MATCH_NOMATCH);
3347 }
3348 }
3349
3350 if (min == max) continue;
3351
3352 if (minimize)
3353 {
3354 for (fi = min;; fi++)
3355 {
3356 RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3357 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3358 if (fi >= max) RRETURN(MATCH_NOMATCH);
3359 if (eptr <= md->end_subject - length &&
3360 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3361 #ifdef SUPPORT_UCP
3362 else if (oclength > 0 &&
3363 eptr <= md->end_subject - oclength &&
3364 memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3365 #endif /* SUPPORT_UCP */
3366 else
3367 {
3368 CHECK_PARTIAL();
3369 RRETURN(MATCH_NOMATCH);
3370 }
3371 }
3372 /* Control never gets here */
3373 }
3374
3375 else /* Maximize */
3376 {
3377 pp = eptr;
3378 for (i = min; i < max; i++)
3379 {
3380 if (eptr <= md->end_subject - length &&
3381 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3382 #ifdef SUPPORT_UCP
3383 else if (oclength > 0 &&
3384 eptr <= md->end_subject - oclength &&
3385 memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3386 #endif /* SUPPORT_UCP */
3387 else
3388 {
3389 CHECK_PARTIAL();
3390 break;
3391 }
3392 }
3393
3394 if (possessive) continue;
3395
3396 for(;;)
3397 {
3398 RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3399 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3400 if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3401 #ifdef SUPPORT_UCP
3402 eptr--;
3403 BACKCHAR(eptr);
3404 #else /* without SUPPORT_UCP */
3405 eptr -= length;
3406 #endif /* SUPPORT_UCP */
3407 }
3408 }
3409 /* Control never gets here */
3410 }
3411
3412 /* If the length of a UTF-8 character is 1, we fall through here, and
3413 obey the code as for non-UTF-8 characters below, though in this case the
3414 value of fc will always be < 128. */
3415 }
3416 else
3417 #endif /* SUPPORT_UTF */
3418 /* When not in UTF-8 mode, load a single-byte character. */
3419 fc = *ecode++;
3420
3421 /* The value of fc at this point is always one character, though we may
3422 or may not be in UTF mode. The code is duplicated for the caseless and
3423 caseful cases, for speed, since matching characters is likely to be quite
3424 common. First, ensure the minimum number of matches are present. If min =
3425 max, continue at the same level without recursing. Otherwise, if
3426 minimizing, keep trying the rest of the expression and advancing one
3427 matching character if failing, up to the maximum. Alternatively, if
3428 maximizing, find the maximum number of characters and work backwards. */
3429
3430 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3431 max, (char *)eptr));
3432
3433 if (op >= OP_STARI) /* Caseless */
3434 {
3435 #ifdef COMPILE_PCRE8
3436 /* fc must be < 128 if UTF is enabled. */
3437 foc = md->fcc[fc];
3438 #else
3439 #ifdef SUPPORT_UTF
3440 #ifdef SUPPORT_UCP
3441 if (utf && fc > 127)
3442 foc = UCD_OTHERCASE(fc);
3443 #else
3444 if (utf && fc > 127)
3445 foc = fc;
3446 #endif /* SUPPORT_UCP */
3447 else
3448 #endif /* SUPPORT_UTF */
3449 foc = TABLE_GET(fc, md->fcc, fc);
3450 #endif /* COMPILE_PCRE8 */
3451
3452 for (i = 1; i <= min; i++)
3453 {
3454 if (eptr >= md->end_subject)
3455 {
3456 SCHECK_PARTIAL();
3457 RRETURN(MATCH_NOMATCH);
3458 }
3459 if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3460 eptr++;
3461 }
3462 if (min == max) continue;
3463 if (minimize)
3464 {
3465 for (fi = min;; fi++)
3466 {
3467 RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3468 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3469 if (fi >= max) RRETURN(MATCH_NOMATCH);
3470 if (eptr >= md->end_subject)
3471 {
3472 SCHECK_PARTIAL();
3473 RRETURN(MATCH_NOMATCH);
3474 }
3475 if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3476 eptr++;
3477 }
3478 /* Control never gets here */
3479 }
3480 else /* Maximize */
3481 {
3482 pp = eptr;
3483 for (i = min; i < max; i++)
3484 {
3485 if (eptr >= md->end_subject)
3486 {
3487 SCHECK_PARTIAL();
3488 break;
3489 }
3490 if (fc != *eptr && foc != *eptr) break;
3491 eptr++;
3492 }
3493
3494 if (possessive) continue;
3495
3496 while (eptr >= pp)
3497 {
3498 RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3499 eptr--;
3500 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3501 }
3502 RRETURN(MATCH_NOMATCH);
3503 }
3504 /* Control never gets here */
3505 }
3506
3507 /* Caseful comparisons (includes all multi-byte characters) */
3508
3509 else
3510 {
3511 for (i = 1; i <= min; i++)
3512 {
3513 if (eptr >= md->end_subject)
3514 {
3515 SCHECK_PARTIAL();
3516 RRETURN(MATCH_NOMATCH);
3517 }
3518 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3519 }
3520
3521 if (min == max) continue;
3522
3523 if (minimize)
3524 {
3525 for (fi = min;; fi++)
3526 {
3527 RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3528 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3529 if (fi >= max) RRETURN(MATCH_NOMATCH);
3530 if (eptr >= md->end_subject)
3531 {
3532 SCHECK_PARTIAL();
3533 RRETURN(MATCH_NOMATCH);
3534 }
3535 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3536 }
3537 /* Control never gets here */
3538 }
3539 else /* Maximize */
3540 {
3541 pp = eptr;
3542 for (i = min; i < max; i++)
3543 {
3544 if (eptr >= md->end_subject)
3545 {
3546 SCHECK_PARTIAL();
3547 break;
3548 }
3549 if (fc != *eptr) break;
3550 eptr++;
3551 }
3552 if (possessive) continue;
3553
3554 while (eptr >= pp)
3555 {
3556 RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3557 eptr--;
3558 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3559 }
3560 RRETURN(MATCH_NOMATCH);
3561 }
3562 }
3563 /* Control never gets here */
3564
3565 /* Match a negated single one-byte character. The character we are
3566 checking can be multibyte. */
3567
3568 case OP_NOT:
3569 case OP_NOTI:
3570 if (eptr >= md->end_subject)
3571 {
3572 SCHECK_PARTIAL();
3573 RRETURN(MATCH_NOMATCH);
3574 }
3575 #ifdef SUPPORT_UTF
3576 if (utf)
3577 {
3578 register unsigned int ch, och;
3579
3580 ecode++;
3581 GETCHARINC(ch, ecode);
3582 GETCHARINC(c, eptr);
3583
3584 if (op == OP_NOT)
3585 {
3586 if (ch == c) RRETURN(MATCH_NOMATCH);
3587 }
3588 else
3589 {
3590 #ifdef SUPPORT_UCP
3591 if (ch > 127)
3592 och = UCD_OTHERCASE(ch);
3593 #else
3594 if (ch > 127)
3595 och = ch;
3596 #endif /* SUPPORT_UCP */
3597 else
3598 och = TABLE_GET(ch, md->fcc, ch);
3599 if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3600 }
3601 }
3602 else
3603 #endif
3604 {
3605 register unsigned int ch = ecode[1];
3606 c = *eptr++;
3607 if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3608 RRETURN(MATCH_NOMATCH);
3609 ecode += 2;
3610 }
3611 break;
3612
3613 /* Match a negated single one-byte character repeatedly. This is almost a
3614 repeat of the code for a repeated single character, but I haven't found a
3615 nice way of commoning these up that doesn't require a test of the
3616 positive/negative option for each character match. Maybe that wouldn't add
3617 very much to the time taken, but character matching *is* what this is all
3618 about... */
3619
3620 case OP_NOTEXACT:
3621 case OP_NOTEXACTI:
3622 min = max = GET2(ecode, 1);
3623 ecode += 1 + IMM2_SIZE;
3624 goto REPEATNOTCHAR;
3625
3626 case OP_NOTUPTO:
3627 case OP_NOTUPTOI:
3628 case OP_NOTMINUPTO:
3629 case OP_NOTMINUPTOI:
3630 min = 0;
3631 max = GET2(ecode, 1);
3632 minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3633 ecode += 1 + IMM2_SIZE;
3634 goto REPEATNOTCHAR;
3635
3636 case OP_NOTPOSSTAR:
3637 case OP_NOTPOSSTARI:
3638 possessive = TRUE;
3639 min = 0;
3640 max = INT_MAX;
3641 ecode++;
3642 goto REPEATNOTCHAR;
3643
3644 case OP_NOTPOSPLUS:
3645 case OP_NOTPOSPLUSI:
3646 possessive = TRUE;
3647 min = 1;
3648 max = INT_MAX;
3649 ecode++;
3650 goto REPEATNOTCHAR;
3651
3652 case OP_NOTPOSQUERY:
3653 case OP_NOTPOSQUERYI:
3654 possessive = TRUE;
3655 min = 0;
3656 max = 1;
3657 ecode++;
3658 goto REPEATNOTCHAR;
3659
3660 case OP_NOTPOSUPTO:
3661 case OP_NOTPOSUPTOI:
3662 possessive = TRUE;
3663 min = 0;
3664 max = GET2(ecode, 1);
3665 ecode += 1 + IMM2_SIZE;
3666 goto REPEATNOTCHAR;
3667
3668 case OP_NOTSTAR:
3669 case OP_NOTSTARI:
3670 case OP_NOTMINSTAR:
3671 case OP_NOTMINSTARI:
3672 case OP_NOTPLUS:
3673 case OP_NOTPLUSI:
3674 case OP_NOTMINPLUS:
3675 case OP_NOTMINPLUSI:
3676 case OP_NOTQUERY:
3677 case OP_NOTQUERYI:
3678 case OP_NOTMINQUERY:
3679 case OP_NOTMINQUERYI:
3680 c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3681 minimize = (c & 1) != 0;
3682 min = rep_min[c]; /* Pick up values from tables; */
3683 max = rep_max[c]; /* zero for max => infinity */
3684 if (max == 0) max = INT_MAX;
3685
3686 /* Common code for all repeated single-byte matches. */
3687
3688 REPEATNOTCHAR:
3689 GETCHARINCTEST(fc, ecode);
3690
3691 /* The code is duplicated for the caseless and caseful cases, for speed,
3692 since matching characters is likely to be quite common. First, ensure the
3693 minimum number of matches are present. If min = max, continue at the same
3694 level without recursing. Otherwise, if minimizing, keep trying the rest of
3695 the expression and advancing one matching character if failing, up to the
3696 maximum. Alternatively, if maximizing, find the maximum number of
3697 characters and work backwards. */
3698
3699 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3700 max, (char *)eptr));
3701
3702 if (op >= OP_NOTSTARI) /* Caseless */
3703 {
3704 #ifdef SUPPORT_UTF
3705 #ifdef SUPPORT_UCP
3706 if (utf && fc > 127)
3707 foc = UCD_OTHERCASE(fc);
3708 #else
3709 if (utf && fc > 127)
3710 foc = fc;
3711 #endif /* SUPPORT_UCP */
3712 else
3713 #endif /* SUPPORT_UTF */
3714 foc = TABLE_GET(fc, md->fcc, fc);
3715
3716 #ifdef SUPPORT_UTF
3717 if (utf)
3718 {
3719 register unsigned int d;
3720 for (i = 1; i <= min; i++)
3721 {
3722 if (eptr >= md->end_subject)
3723 {
3724 SCHECK_PARTIAL();
3725 RRETURN(MATCH_NOMATCH);
3726 }
3727 GETCHARINC(d, eptr);
3728 if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3729 }
3730 }
3731 else
3732 #endif
3733 /* Not UTF mode */
3734 {
3735 for (i = 1; i <= min; i++)
3736 {
3737 if (eptr >= md->end_subject)
3738 {
3739 SCHECK_PARTIAL();
3740 RRETURN(MATCH_NOMATCH);
3741 }
3742 if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3743 eptr++;
3744 }
3745 }
3746
3747 if (min == max) continue;
3748
3749 if (minimize)
3750 {
3751 #ifdef SUPPORT_UTF
3752 if (utf)
3753 {
3754 register unsigned int d;
3755 for (fi = min;; fi++)
3756 {
3757 RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3758 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3759 if (fi >= max) RRETURN(MATCH_NOMATCH);
3760 if (eptr >= md->end_subject)
3761 {
3762 SCHECK_PARTIAL();
3763 RRETURN(MATCH_NOMATCH);
3764 }
3765 GETCHARINC(d, eptr);
3766 if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3767 }
3768 }
3769 else
3770 #endif
3771 /* Not UTF mode */
3772 {
3773 for (fi = min;; fi++)
3774 {
3775 RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3776 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3777 if (fi >= max) RRETURN(MATCH_NOMATCH);
3778 if (eptr >= md->end_subject)
3779 {
3780 SCHECK_PARTIAL();
3781 RRETURN(MATCH_NOMATCH);
3782 }
3783 if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3784 eptr++;
3785 }
3786 }
3787 /* Control never gets here */
3788 }
3789
3790 /* Maximize case */
3791
3792 else
3793 {
3794 pp = eptr;
3795
3796 #ifdef SUPPORT_UTF
3797 if (utf)
3798 {
3799 register unsigned int d;
3800 for (i = min; i < max; i++)
3801 {
3802 int len = 1;
3803 if (eptr >= md->end_subject)
3804 {
3805 SCHECK_PARTIAL();
3806 break;
3807 }
3808 GETCHARLEN(d, eptr, len);
3809 if (fc == d || (unsigned int)foc == d) break;
3810 eptr += len;
3811 }
3812 if (possessive) continue;
3813 for(;;)
3814 {
3815 RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3816 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3817 if (eptr-- == pp) break; /* Stop if tried at original pos */
3818 BACKCHAR(eptr);
3819 }
3820 }
3821 else
3822 #endif
3823 /* Not UTF mode */
3824 {
3825 for (i = min; i < max; i++)
3826 {
3827 if (eptr >= md->end_subject)
3828 {
3829 SCHECK_PARTIAL();
3830 break;
3831 }
3832 if (fc == *eptr || foc == *eptr) break;
3833 eptr++;
3834 }
3835 if (possessive) continue;
3836 while (eptr >= pp)
3837 {
3838 RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3839 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3840 eptr--;
3841 }
3842 }
3843
3844 RRETURN(MATCH_NOMATCH);
3845 }
3846 /* Control never gets here */
3847 }
3848
3849 /* Caseful comparisons */
3850
3851 else
3852 {
3853 #ifdef SUPPORT_UTF
3854 if (utf)
3855 {
3856 register unsigned int d;
3857 for (i = 1; i <= min; i++)
3858 {
3859 if (eptr >= md->end_subject)
3860 {
3861 SCHECK_PARTIAL();
3862 RRETURN(MATCH_NOMATCH);
3863 }
3864 GETCHARINC(d, eptr);
3865 if (fc == d) RRETURN(MATCH_NOMATCH);
3866 }
3867 }
3868 else
3869 #endif
3870 /* Not UTF mode */
3871 {
3872 for (i = 1; i <= min; i++)
3873 {
3874 if (eptr >= md->end_subject)
3875 {
3876 SCHECK_PARTIAL();
3877 RRETURN(MATCH_NOMATCH);
3878 }
3879 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3880 }
3881 }
3882
3883 if (min == max) continue;
3884
3885 if (minimize)
3886 {
3887 #ifdef SUPPORT_UTF
3888 if (utf)
3889 {
3890 register unsigned int d;
3891 for (fi = min;; fi++)
3892 {
3893 RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3894 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3895 if (fi >= max) RRETURN(MATCH_NOMATCH);
3896 if (eptr >= md->end_subject)
3897 {
3898 SCHECK_PARTIAL();
3899 RRETURN(MATCH_NOMATCH);
3900 }
3901 GETCHARINC(d, eptr);
3902 if (fc == d) RRETURN(MATCH_NOMATCH);
3903 }
3904 }
3905 else
3906 #endif
3907 /* Not UTF mode */
3908 {
3909 for (fi = min;; fi++)
3910 {
3911 RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3912 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3913 if (fi >= max) RRETURN(MATCH_NOMATCH);
3914 if (eptr >= md->end_subject)
3915 {
3916 SCHECK_PARTIAL();
3917 RRETURN(MATCH_NOMATCH);
3918 }
3919 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3920 }
3921 }
3922 /* Control never gets here */
3923 }
3924
3925 /* Maximize case */
3926
3927 else
3928 {
3929 pp = eptr;
3930
3931 #ifdef SUPPORT_UTF
3932 if (utf)
3933 {
3934 register unsigned int d;
3935 for (i = min; i < max; i++)
3936 {
3937 int len = 1;
3938 if (eptr >= md->end_subject)
3939 {
3940 SCHECK_PARTIAL();
3941 break;
3942 }
3943 GETCHARLEN(d, eptr, len);
3944 if (fc == d) break;
3945 eptr += len;
3946 }
3947 if (possessive) continue;
3948 for(;;)
3949 {
3950 RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3951 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3952 if (eptr-- == pp) break; /* Stop if tried at original pos */
3953 BACKCHAR(eptr);
3954 }
3955 }
3956 else
3957 #endif
3958 /* Not UTF mode */
3959 {
3960 for (i = min; i < max; i++)
3961 {
3962 if (eptr >= md->end_subject)
3963 {
3964 SCHECK_PARTIAL();
3965 break;
3966 }
3967 if (fc == *eptr) break;
3968 eptr++;
3969 }
3970 if (possessive) continue;
3971 while (eptr >= pp)
3972 {
3973 RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3974 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3975 eptr--;
3976 }
3977 }
3978
3979 RRETURN(MATCH_NOMATCH);
3980 }
3981 }
3982 /* Control never gets here */
3983
3984 /* Match a single character type repeatedly; several different opcodes
3985 share code. This is very similar to the code for single characters, but we
3986 repeat it in the interests of efficiency. */
3987
3988 case OP_TYPEEXACT:
3989 min = max = GET2(ecode, 1);
3990 minimize = TRUE;
3991 ecode += 1 + IMM2_SIZE;
3992 goto REPEATTYPE;
3993
3994 case OP_TYPEUPTO:
3995 case OP_TYPEMINUPTO:
3996 min = 0;
3997 max = GET2(ecode, 1);
3998 minimize = *ecode == OP_TYPEMINUPTO;
3999 ecode += 1 + IMM2_SIZE;
4000 goto REPEATTYPE;
4001
4002 case OP_TYPEPOSSTAR:
4003 possessive = TRUE;
4004 min = 0;
4005 max = INT_MAX;
4006 ecode++;
4007 goto REPEATTYPE;
4008
4009 case OP_TYPEPOSPLUS:
4010 possessive = TRUE;
4011 min = 1;
4012 max = INT_MAX;
4013 ecode++;
4014 goto REPEATTYPE;
4015
4016 case OP_TYPEPOSQUERY:
4017 possessive = TRUE;
4018 min = 0;
4019 max = 1;
4020 ecode++;
4021 goto REPEATTYPE;
4022
4023 case OP_TYPEPOSUPTO:
4024 possessive = TRUE;
4025 min = 0;
4026 max = GET2(ecode, 1);
4027 ecode += 1 + IMM2_SIZE;
4028 goto REPEATTYPE;
4029
4030 case OP_TYPESTAR:
4031 case OP_TYPEMINSTAR:
4032 case OP_TYPEPLUS:
4033 case OP_TYPEMINPLUS:
4034 case OP_TYPEQUERY:
4035 case OP_TYPEMINQUERY:
4036 c = *ecode++ - OP_TYPESTAR;
4037 minimize = (c & 1) != 0;
4038 min = rep_min[c]; /* Pick up values from tables; */
4039 max = rep_max[c]; /* zero for max => infinity */
4040 if (max == 0) max = INT_MAX;
4041
4042 /* Common code for all repeated single character type matches. Note that
4043 in UTF-8 mode, '.' matches a character of any length, but for the other
4044 character types, the valid characters are all one-byte long. */
4045
4046 REPEATTYPE:
4047 ctype = *ecode++; /* Code for the character type */
4048
4049 #ifdef SUPPORT_UCP
4050 if (ctype == OP_PROP || ctype == OP_NOTPROP)
4051 {
4052 prop_fail_result = ctype == OP_NOTPROP;
4053 prop_type = *ecode++;
4054 prop_value = *ecode++;
4055 }
4056 else prop_type = -1;
4057 #endif
4058
4059 /* First, ensure the minimum number of matches are present. Use inline
4060 code for maximizing the speed, and do the type test once at the start
4061 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4062 is tidier. Also separate the UCP code, which can be the same for both UTF-8
4063 and single-bytes. */
4064
4065 if (min > 0)
4066 {
4067 #ifdef SUPPORT_UCP
4068 if (prop_type >= 0)
4069 {
4070 switch(prop_type)
4071 {
4072 case PT_ANY:
4073 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4074 for (i = 1; i <= min; i++)
4075 {
4076 if (eptr >= md->end_subject)
4077 {
4078 SCHECK_PARTIAL();
4079 RRETURN(MATCH_NOMATCH);
4080 }
4081 GETCHARINCTEST(c, eptr);
4082 }
4083 break;
4084
4085 case PT_LAMP:
4086 for (i = 1; i <= min; i++)
4087 {
4088 int chartype;
4089 if (eptr >= md->end_subject)
4090 {
4091 SCHECK_PARTIAL();
4092 RRETURN(MATCH_NOMATCH);
4093 }
4094 GETCHARINCTEST(c, eptr);
4095 chartype = UCD_CHARTYPE(c);
4096 if ((chartype == ucp_Lu ||
4097 chartype == ucp_Ll ||
4098 chartype == ucp_Lt) == prop_fail_result)
4099 RRETURN(MATCH_NOMATCH);
4100 }
4101 break;
4102
4103 case PT_GC:
4104 for (i = 1; i <= min; i++)
4105 {
4106 if (eptr >= md->end_subject)
4107 {
4108 SCHECK_PARTIAL();
4109 RRETURN(MATCH_NOMATCH);
4110 }
4111 GETCHARINCTEST(c, eptr);
4112 if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4113 RRETURN(MATCH_NOMATCH);
4114 }
4115 break;
4116
4117 case PT_PC:
4118 for (i = 1; i <= min; i++)
4119 {
4120 if (eptr >= md->end_subject)
4121 {
4122 SCHECK_PARTIAL();
4123 RRETURN(MATCH_NOMATCH);
4124 }
4125 GETCHARINCTEST(c, eptr);
4126 if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4127 RRETURN(MATCH_NOMATCH);
4128 }
4129 break;
4130
4131 case PT_SC:
4132 for (i = 1; i <= min; i++)
4133 {
4134 if (eptr >= md->end_subject)
4135 {
4136 SCHECK_PARTIAL();
4137 RRETURN(MATCH_NOMATCH);
4138 }
4139 GETCHARINCTEST(c, eptr);
4140 if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4141 RRETURN(MATCH_NOMATCH);
4142 }
4143 break;
4144
4145 case PT_ALNUM:
4146 for (i = 1; i <= min; i++)
4147 {
4148 int category;
4149 if (eptr >= md->end_subject)
4150 {
4151 SCHECK_PARTIAL();
4152 RRETURN(MATCH_NOMATCH);
4153 }
4154 GETCHARINCTEST(c, eptr);
4155 category = UCD_CATEGORY(c);
4156 if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4157 RRETURN(MATCH_NOMATCH);
4158 }
4159 break;
4160
4161 case PT_SPACE: /* Perl space */
4162 for (i = 1; i <= min; i++)
4163 {
4164 if (eptr >= md->end_subject)
4165 {
4166 SCHECK_PARTIAL();
4167 RRETURN(MATCH_NOMATCH);
4168 }
4169 GETCHARINCTEST(c, eptr);
4170 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4171 c == CHAR_FF || c == CHAR_CR)
4172 == prop_fail_result)
4173 RRETURN(MATCH_NOMATCH);
4174 }
4175 break;
4176
4177 case PT_PXSPACE: /* POSIX space */
4178 for (i = 1; i <= min; i++)
4179 {
4180 if (eptr >= md->end_subject)
4181 {
4182 SCHECK_PARTIAL();
4183 RRETURN(MATCH_NOMATCH);
4184 }
4185 GETCHARINCTEST(c, eptr);
4186 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4187 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4188 == prop_fail_result)
4189 RRETURN(MATCH_NOMATCH);
4190 }
4191 break;
4192
4193 case PT_WORD:
4194 for (i = 1; i <= min; i++)
4195 {
4196 int category;
4197 if (eptr >= md->end_subject)
4198 {
4199 SCHECK_PARTIAL();
4200 RRETURN(MATCH_NOMATCH);
4201 }
4202 GETCHARINCTEST(c, eptr);
4203 category = UCD_CATEGORY(c);
4204 if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4205 == prop_fail_result)
4206 RRETURN(MATCH_NOMATCH);
4207 }
4208 break;
4209
4210 /* This should not occur */
4211
4212 default:
4213 RRETURN(PCRE_ERROR_INTERNAL);
4214 }
4215 }
4216
4217 /* Match extended Unicode sequences. We will get here only if the
4218 support is in the binary; otherwise a compile-time error occurs. */
4219
4220 else if (ctype == OP_EXTUNI)
4221 {
4222 for (i = 1; i <= min; i++)
4223 {
4224 if (eptr >= md->end_subject)
4225 {
4226 SCHECK_PARTIAL();
4227 RRETURN(MATCH_NOMATCH);
4228 }
4229 GETCHARINCTEST(c, eptr);
4230 if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4231 while (eptr < md->end_subject)
4232 {
4233 int len = 1;
4234 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4235 if (UCD_CATEGORY(c) != ucp_M) break;
4236 eptr += len;
4237 }
4238 CHECK_PARTIAL();
4239 }
4240 }
4241
4242 else
4243 #endif /* SUPPORT_UCP */
4244
4245 /* Handle all other cases when the coding is UTF-8 */
4246
4247 #ifdef SUPPORT_UTF
4248 if (utf) switch(ctype)
4249 {
4250 case OP_ANY:
4251 for (i = 1; i <= min; i++)
4252 {
4253 if (eptr >= md->end_subject)
4254 {
4255 SCHECK_PARTIAL();
4256 RRETURN(MATCH_NOMATCH);
4257 }
4258 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4259 if (md->partial != 0 &&
4260 eptr + 1 >= md->end_subject &&
4261 NLBLOCK->nltype == NLTYPE_FIXED &&
4262 NLBLOCK->nllen == 2 &&
4263 *eptr == NLBLOCK->nl[0])
4264 {
4265 md->hitend = TRUE;
4266 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4267 }
4268 eptr++;
4269 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4270 }
4271 break;
4272
4273 case OP_ALLANY:
4274 for (i = 1; i <= min; i++)
4275 {
4276 if (eptr >= md->end_subject)
4277 {
4278 SCHECK_PARTIAL();
4279 RRETURN(MATCH_NOMATCH);
4280 }
4281 eptr++;
4282 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4283 }
4284 break;
4285
4286 case OP_ANYBYTE:
4287 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4288 eptr += min;
4289 break;
4290
4291 case OP_ANYNL:
4292 for (i = 1; i <= min; i++)
4293 {
4294 if (eptr >= md->end_subject)
4295 {
4296 SCHECK_PARTIAL();
4297 RRETURN(MATCH_NOMATCH);
4298 }
4299 GETCHARINC(c, eptr);
4300 switch(c)
4301 {
4302 default: RRETURN(MATCH_NOMATCH);
4303
4304 case 0x000d:
4305 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4306 break;
4307
4308 case 0x000a:
4309 break;
4310
4311 case 0x000b:
4312 case 0x000c:
4313 case 0x0085:
4314 case 0x2028:
4315 case 0x2029:
4316 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4317 break;
4318 }
4319 }
4320 break;
4321
4322 case OP_NOT_HSPACE:
4323 for (i = 1; i <= min; i++)
4324 {
4325 if (eptr >= md->end_subject)
4326 {
4327 SCHECK_PARTIAL();
4328 RRETURN(MATCH_NOMATCH);
4329 }
4330 GETCHARINC(c, eptr);
4331 switch(c)
4332 {
4333 default: break;
4334 case 0x09: /* HT */
4335 case 0x20: /* SPACE */
4336 case 0xa0: /* NBSP */
4337 case 0x1680: /* OGHAM SPACE MARK */
4338 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4339 case 0x2000: /* EN QUAD */
4340 case 0x2001: /* EM QUAD */
4341 case 0x2002: /* EN SPACE */
4342 case 0x2003: /* EM SPACE */
4343 case 0x2004: /* THREE-PER-EM SPACE */
4344 case 0x2005: /* FOUR-PER-EM SPACE */
4345 case 0x2006: /* SIX-PER-EM SPACE */
4346 case 0x2007: /* FIGURE SPACE */
4347 case 0x2008: /* PUNCTUATION SPACE */
4348 case 0x2009: /* THIN SPACE */
4349 case 0x200A: /* HAIR SPACE */
4350 case 0x202f: /* NARROW NO-BREAK SPACE */
4351 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4352 case 0x3000: /* IDEOGRAPHIC SPACE */
4353 RRETURN(MATCH_NOMATCH);
4354 }
4355 }
4356 break;
4357
4358 case OP_HSPACE:
4359 for (i = 1; i <= min; i++)
4360 {
4361 if (eptr >= md->end_subject)
4362 {
4363 SCHECK_PARTIAL();
4364 RRETURN(MATCH_NOMATCH);
4365 }
4366 GETCHARINC(c, eptr);
4367 switch(c)
4368 {
4369 default: RRETURN(MATCH_NOMATCH);
4370 case 0x09: /* HT */
4371 case 0x20: /* SPACE */
4372 case 0xa0: /* NBSP */
4373 case 0x1680: /* OGHAM SPACE MARK */
4374 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4375 case 0x2000: /* EN QUAD */
4376 case 0x2001: /* EM QUAD */
4377 case 0x2002: /* EN SPACE */
4378 case 0x2003: /* EM SPACE */
4379 case 0x2004: /* THREE-PER-EM SPACE */
4380 case 0x2005: /* FOUR-PER-EM SPACE */
4381 case 0x2006: /* SIX-PER-EM SPACE */
4382 case 0x2007: /* FIGURE SPACE */
4383 case 0x2008: /* PUNCTUATION SPACE */
4384 case 0x2009: /* THIN SPACE */
4385 case 0x200A: /* HAIR SPACE */
4386 case 0x202f: /* NARROW NO-BREAK SPACE */
4387 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4388 case 0x3000: /* IDEOGRAPHIC SPACE */
4389 break;
4390 }
4391 }
4392 break;
4393
4394 case OP_NOT_VSPACE:
4395 for (i = 1; i <= min; i++)
4396 {
4397 if (eptr >= md->end_subject)
4398 {
4399 SCHECK_PARTIAL();
4400 RRETURN(MATCH_NOMATCH);
4401 }
4402 GETCHARINC(c, eptr);
4403 switch(c)
4404 {
4405 default: break;
4406 case 0x0a: /* LF */
4407 case 0x0b: /* VT */
4408 case 0x0c: /* FF */
4409 case 0x0d: /* CR */
4410 case 0x85: /* NEL */
4411 case 0x2028: /* LINE SEPARATOR */
4412 case 0x2029: /* PARAGRAPH SEPARATOR */
4413 RRETURN(MATCH_NOMATCH);
4414 }
4415 }
4416 break;
4417
4418 case OP_VSPACE:
4419 for (i = 1; i <= min; i++)
4420 {
4421 if (eptr >= md->end_subject)
4422 {
4423 SCHECK_PARTIAL();
4424 RRETURN(MATCH_NOMATCH);
4425 }
4426 GETCHARINC(c, eptr);
4427 switch(c)
4428 {
4429 default: RRETURN(MATCH_NOMATCH);
4430 case 0x0a: /* LF */
4431 case 0x0b: /* VT */
4432 case 0x0c: /* FF */
4433 case 0x0d: /* CR */
4434 case 0x85: /* NEL */
4435 case 0x2028: /* LINE SEPARATOR */
4436 case 0x2029: /* PARAGRAPH SEPARATOR */
4437 break;
4438 }
4439 }
4440 break;
4441
4442 case OP_NOT_DIGIT:
4443 for (i = 1; i <= min; i++)
4444 {
4445 if (eptr >= md->end_subject)
4446 {
4447 SCHECK_PARTIAL();
4448 RRETURN(MATCH_NOMATCH);
4449 }
4450 GETCHARINC(c, eptr);
4451 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4452 RRETURN(MATCH_NOMATCH);
4453 }
4454 break;
4455
4456 case OP_DIGIT:
4457 for (i = 1; i <= min; i++)
4458 {
4459 if (eptr >= md->end_subject)
4460 {
4461 SCHECK_PARTIAL();
4462 RRETURN(MATCH_NOMATCH);
4463 }
4464 if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4465 RRETURN(MATCH_NOMATCH);
4466 eptr++;
4467 /* No need to skip more bytes - we know it's a 1-byte character */
4468 }
4469 break;
4470
4471 case OP_NOT_WHITESPACE:
4472 for (i = 1; i <= min; i++)
4473 {
4474 if (eptr >= md->end_subject)
4475 {
4476 SCHECK_PARTIAL();
4477 RRETURN(MATCH_NOMATCH);
4478 }
4479 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4480 RRETURN(MATCH_NOMATCH);
4481 eptr++;
4482 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4483 }
4484 break;
4485
4486 case OP_WHITESPACE:
4487 for (i = 1; i <= min; i++)
4488 {
4489 if (eptr >= md->end_subject)
4490 {
4491 SCHECK_PARTIAL();
4492 RRETURN(MATCH_NOMATCH);
4493 }
4494 if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4495 RRETURN(MATCH_NOMATCH);
4496 eptr++;
4497 /* No need to skip more bytes - we know it's a 1-byte character */
4498 }
4499 break;
4500
4501 case OP_NOT_WORDCHAR:
4502 for (i = 1; i <= min; i++)
4503 {
4504 if (eptr >= md->end_subject)
4505 {
4506 SCHECK_PARTIAL();
4507 RRETURN(MATCH_NOMATCH);
4508 }
4509 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4510 RRETURN(MATCH_NOMATCH);
4511 eptr++;
4512 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4513 }
4514 break;
4515
4516 case OP_WORDCHAR:
4517 for (i = 1; i <= min; i++)
4518 {
4519 if (eptr >= md->end_subject)
4520 {
4521 SCHECK_PARTIAL();
4522 RRETURN(MATCH_NOMATCH);
4523 }
4524 if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4525 RRETURN(MATCH_NOMATCH);
4526 eptr++;
4527 /* No need to skip more bytes - we know it's a 1-byte character */
4528 }
4529 break;
4530
4531 default:
4532 RRETURN(PCRE_ERROR_INTERNAL);
4533 } /* End switch(ctype) */
4534
4535 else
4536 #endif /* SUPPORT_UTF */
4537
4538 /* Code for the non-UTF-8 case for minimum matching of operators other
4539 than OP_PROP and OP_NOTPROP. */
4540
4541 switch(ctype)
4542 {
4543 case OP_ANY:
4544 for (i = 1; i <= min; i++)
4545 {
4546 if (eptr >= md->end_subject)
4547 {
4548 SCHECK_PARTIAL();
4549 RRETURN(MATCH_NOMATCH);
4550 }
4551 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4552 if (md->partial != 0 &&
4553 eptr + 1 >= md->end_subject &&
4554 NLBLOCK->nltype == NLTYPE_FIXED &&
4555 NLBLOCK->nllen == 2 &&
4556 *eptr == NLBLOCK->nl[0])
4557 {
4558 md->hitend = TRUE;
4559 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4560 }
4561 eptr++;
4562 }
4563 break;
4564
4565 case OP_ALLANY:
4566 if (eptr > md->end_subject - min)
4567 {
4568 SCHECK_PARTIAL();
4569 RRETURN(MATCH_NOMATCH);
4570 }
4571 eptr += min;
4572 break;
4573
4574 case OP_ANYBYTE:
4575 if (eptr > md->end_subject - min)
4576 {
4577 SCHECK_PARTIAL();
4578 RRETURN(MATCH_NOMATCH);
4579 }
4580 eptr += min;
4581 break;
4582
4583 case OP_ANYNL:
4584 for (i = 1; i <= min; i++)
4585 {
4586 if (eptr >= md->end_subject)
4587 {
4588 SCHECK_PARTIAL();
4589 RRETURN(MATCH_NOMATCH);
4590 }
4591 switch(*eptr++)
4592 {
4593 default: RRETURN(MATCH_NOMATCH);
4594
4595 case 0x000d:
4596 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4597 break;
4598
4599 case 0x000a:
4600 break;
4601
4602 case 0x000b:
4603 case 0x000c:
4604 case 0x0085:
4605 #ifdef COMPILE_PCRE16
4606 case 0x2028:
4607 case 0x2029:
4608 #endif
4609 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4610 break;
4611 }
4612 }
4613 break;
4614
4615 case OP_NOT_HSPACE:
4616 for (i = 1; i <= min; i++)
4617 {
4618 if (eptr >= md->end_subject)
4619 {
4620 SCHECK_PARTIAL();
4621 RRETURN(MATCH_NOMATCH);
4622 }
4623 switch(*eptr++)
4624 {
4625 default: break;
4626 case 0x09: /* HT */
4627 case 0x20: /* SPACE */
4628 case 0xa0: /* NBSP */
4629 #ifdef COMPILE_PCRE16
4630 case 0x1680: /* OGHAM SPACE MARK */
4631 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4632 case 0x2000: /* EN QUAD */
4633 case 0x2001: /* EM QUAD */
4634 case 0x2002: /* EN SPACE */
4635 case 0x2003: /* EM SPACE */
4636 case 0x2004: /* THREE-PER-EM SPACE */
4637 case 0x2005: /* FOUR-PER-EM SPACE */
4638 case 0x2006: /* SIX-PER-EM SPACE */
4639 case 0x2007: /* FIGURE SPACE */
4640 case 0x2008: /* PUNCTUATION SPACE */
4641 case 0x2009: /* THIN SPACE */
4642 case 0x200A: /* HAIR SPACE */
4643 case 0x202f: /* NARROW NO-BREAK SPACE */
4644 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4645 case 0x3000: /* IDEOGRAPHIC SPACE */
4646 #endif
4647 RRETURN(MATCH_NOMATCH);
4648 }
4649 }
4650 break;
4651
4652 case OP_HSPACE:
4653 for (i = 1; i <= min; i++)
4654 {
4655 if (eptr >= md->end_subject)
4656 {
4657 SCHECK_PARTIAL();
4658 RRETURN(MATCH_NOMATCH);
4659 }
4660 switch(*eptr++)
4661 {
4662 default: RRETURN(MATCH_NOMATCH);
4663 case 0x09: /* HT */
4664 case 0x20: /* SPACE */
4665 case 0xa0: /* NBSP */
4666 #ifdef COMPILE_PCRE16
4667 case 0x1680: /* OGHAM SPACE MARK */
4668 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4669 case 0x2000: /* EN QUAD */
4670 case 0x2001: /* EM QUAD */
4671 case 0x2002: /* EN SPACE */
4672 case 0x2003: /* EM SPACE */
4673 case 0x2004: /* THREE-PER-EM SPACE */
4674 case 0x2005: /* FOUR-PER-EM SPACE */
4675 case 0x2006: /* SIX-PER-EM SPACE */
4676 case 0x2007: /* FIGURE SPACE */
4677 case 0x2008: /* PUNCTUATION SPACE */
4678 case 0x2009: /* THIN SPACE */
4679 case 0x200A: /* HAIR SPACE */
4680 case 0x202f: /* NARROW NO-BREAK SPACE */
4681 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4682 case 0x3000: /* IDEOGRAPHIC SPACE */
4683 #endif
4684 break;
4685 }
4686 }
4687 break;
4688
4689 case OP_NOT_VSPACE:
4690 for (i = 1; i <= min; i++)
4691 {
4692 if (eptr >= md->end_subject)
4693 {
4694 SCHECK_PARTIAL();
4695 RRETURN(MATCH_NOMATCH);
4696 }
4697 switch(*eptr++)
4698 {
4699 default: break;
4700 case 0x0a: /* LF */
4701 case 0x0b: /* VT */
4702 case 0x0c: /* FF */
4703 case 0x0d: /* CR */
4704 case 0x85: /* NEL */
4705 #ifdef COMPILE_PCRE16
4706 case 0x2028: /* LINE SEPARATOR */
4707 case 0x2029: /* PARAGRAPH SEPARATOR */
4708 #endif
4709 RRETURN(MATCH_NOMATCH);
4710 }
4711 }
4712 break;
4713
4714 case OP_VSPACE:
4715 for (i = 1; i <= min; i++)
4716 {
4717 if (eptr >= md->end_subject)
4718 {
4719 SCHECK_PARTIAL();
4720 RRETURN(MATCH_NOMATCH);
4721 }
4722 switch(*eptr++)
4723 {
4724 default: RRETURN(MATCH_NOMATCH);
4725 case 0x0a: /* LF */
4726 case 0x0b: /* VT */
4727 case 0x0c: /* FF */
4728 case 0x0d: /* CR */
4729 case 0x85: /* NEL */
4730 #ifdef COMPILE_PCRE16
4731 case 0x2028: /* LINE SEPARATOR */
4732 case 0x2029: /* PARAGRAPH SEPARATOR */
4733 #endif
4734 break;
4735 }
4736 }
4737 break;
4738
4739 case OP_NOT_DIGIT:
4740 for (i = 1; i <= min; i++)
4741 {
4742 if (eptr >= md->end_subject)
4743 {
4744 SCHECK_PARTIAL();
4745 RRETURN(MATCH_NOMATCH);
4746 }
4747 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4748 RRETURN(MATCH_NOMATCH);
4749 eptr++;
4750 }
4751 break;
4752
4753 case OP_DIGIT:
4754 for (i = 1; i <= min; i++)
4755 {
4756 if (eptr >= md->end_subject)
4757 {
4758 SCHECK_PARTIAL();
4759 RRETURN(MATCH_NOMATCH);
4760 }
4761 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4762 RRETURN(MATCH_NOMATCH);
4763 eptr++;
4764 }
4765 break;
4766
4767 case OP_NOT_WHITESPACE:
4768 for (i = 1; i <= min; i++)
4769 {
4770 if (eptr >= md->end_subject)
4771 {
4772 SCHECK_PARTIAL();
4773 RRETURN(MATCH_NOMATCH);
4774 }
4775 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4776 RRETURN(MATCH_NOMATCH);
4777 eptr++;
4778 }
4779 break;
4780
4781 case OP_WHITESPACE:
4782 for (i = 1; i <= min; i++)
4783 {
4784 if (eptr >= md->end_subject)
4785 {
4786 SCHECK_PARTIAL();
4787 RRETURN(MATCH_NOMATCH);
4788 }
4789 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4790 RRETURN(MATCH_NOMATCH);
4791 eptr++;
4792 }
4793 break;
4794
4795 case OP_NOT_WORDCHAR:
4796 for (i = 1; i <= min; i++)
4797 {
4798 if (eptr >= md->end_subject)
4799 {
4800 SCHECK_PARTIAL();
4801 RRETURN(MATCH_NOMATCH);
4802 }
4803 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4804 RRETURN(MATCH_NOMATCH);
4805 eptr++;
4806 }
4807 break;
4808
4809 case OP_WORDCHAR:
4810 for (i = 1; i <= min; i++)
4811 {
4812 if (eptr >= md->end_subject)
4813 {
4814 SCHECK_PARTIAL();
4815 RRETURN(MATCH_NOMATCH);
4816 }
4817 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4818 RRETURN(MATCH_NOMATCH);
4819 eptr++;
4820 }
4821 break;
4822
4823 default:
4824 RRETURN(PCRE_ERROR_INTERNAL);
4825 }
4826 }
4827
4828 /* If min = max, continue at the same level without recursing */
4829
4830 if (min == max) continue;
4831
4832 /* If minimizing, we have to test the rest of the pattern before each
4833 subsequent match. Again, separate the UTF-8 case for speed, and also
4834 separate the UCP cases. */
4835
4836 if (minimize)
4837 {
4838 #ifdef SUPPORT_UCP
4839 if (prop_type >= 0)
4840 {
4841 switch(prop_type)
4842 {
4843 case PT_ANY:
4844 for (fi = min;; fi++)
4845 {
4846 RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4847 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4848 if (fi >= max) RRETURN(MATCH_NOMATCH);
4849 if (eptr >= md->end_subject)
4850 {
4851 SCHECK_PARTIAL();
4852 RRETURN(MATCH_NOMATCH);
4853 }
4854 GETCHARINCTEST(c, eptr);
4855 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4856 }
4857 /* Control never gets here */
4858
4859 case PT_LAMP:
4860 for (fi = min;; fi++)
4861 {
4862 int chartype;
4863 RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4864 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4865 if (fi >= max) RRETURN(MATCH_NOMATCH);
4866 if (eptr >= md->end_subject)
4867 {
4868 SCHECK_PARTIAL();
4869 RRETURN(MATCH_NOMATCH);
4870 }
4871 GETCHARINCTEST(c, eptr);
4872 chartype = UCD_CHARTYPE(c);
4873 if ((chartype == ucp_Lu ||
4874 chartype == ucp_Ll ||
4875 chartype == ucp_Lt) == prop_fail_result)
4876 RRETURN(MATCH_NOMATCH);
4877 }
4878 /* Control never gets here */
4879
4880 case PT_GC:
4881 for (fi = min;; fi++)
4882 {
4883 RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4884 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4885 if (fi >= max) RRETURN(MATCH_NOMATCH);
4886 if (eptr >= md->end_subject)
4887 {
4888 SCHECK_PARTIAL();
4889 RRETURN(MATCH_NOMATCH);
4890 }
4891 GETCHARINCTEST(c, eptr);
4892 if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4893 RRETURN(MATCH_NOMATCH);
4894 }
4895 /* Control never gets here */
4896
4897 case PT_PC:
4898 for (fi = min;; fi++)
4899 {
4900 RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4901 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4902 if (fi >= max) RRETURN(MATCH_NOMATCH);
4903 if (eptr >= md->end_subject)
4904 {
4905 SCHECK_PARTIAL();
4906 RRETURN(MATCH_NOMATCH);
4907 }
4908 GETCHARINCTEST(c, eptr);
4909 if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4910 RRETURN(MATCH_NOMATCH);
4911 }
4912 /* Control never gets here */
4913
4914 case PT_SC:
4915 for (fi = min;; fi++)
4916 {
4917 RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4918 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4919 if (fi >= max) RRETURN(MATCH_NOMATCH);
4920 if (eptr >= md->end_subject)
4921 {
4922 SCHECK_PARTIAL();
4923 RRETURN(MATCH_NOMATCH);
4924 }
4925 GETCHARINCTEST(c, eptr);
4926 if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4927 RRETURN(MATCH_NOMATCH);
4928 }
4929 /* Control never gets here */
4930
4931 case PT_ALNUM:
4932 for (fi = min;; fi++)
4933 {
4934 int category;
4935 RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4936 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4937 if (fi >= max) RRETURN(MATCH_NOMATCH);
4938 if (eptr >= md->end_subject)
4939 {
4940 SCHECK_PARTIAL();
4941 RRETURN(MATCH_NOMATCH);
4942 }
4943 GETCHARINCTEST(c, eptr);
4944 category = UCD_CATEGORY(c);
4945 if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4946 RRETURN(MATCH_NOMATCH);
4947 }
4948 /* Control never gets here */
4949
4950 case PT_SPACE: /* Perl space */
4951 for (fi = min;; fi++)
4952 {
4953 RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4954 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4955 if (fi >= max) RRETURN(MATCH_NOMATCH);
4956 if (eptr >= md->end_subject)
4957 {
4958 SCHECK_PARTIAL();
4959 RRETURN(MATCH_NOMATCH);
4960 }
4961 GETCHARINCTEST(c, eptr);
4962 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4963 c == CHAR_FF || c == CHAR_CR)
4964 == prop_fail_result)
4965 RRETURN(MATCH_NOMATCH);
4966 }
4967 /* Control never gets here */
4968
4969 case PT_PXSPACE: /* POSIX space */
4970 for (fi = min;; fi++)
4971 {
4972 RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4973 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4974 if (fi >= max) RRETURN(MATCH_NOMATCH);
4975 if (eptr >= md->end_subject)
4976 {
4977 SCHECK_PARTIAL();
4978 RRETURN(MATCH_NOMATCH);
4979 }
4980 GETCHARINCTEST(c, eptr);
4981 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4982 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4983 == prop_fail_result)
4984 RRETURN(MATCH_NOMATCH);
4985 }
4986 /* Control never gets here */
4987
4988 case PT_WORD:
4989 for (fi = min;; fi++)
4990 {
4991 int category;
4992 RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4993 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4994 if (fi >= max) RRETURN(MATCH_NOMATCH);
4995 if (eptr >= md->end_subject)
4996 {
4997 SCHECK_PARTIAL();
4998 RRETURN(MATCH_NOMATCH);
4999 }
5000 GETCHARINCTEST(c, eptr);
5001 category = UCD_CATEGORY(c);
5002 if ((category == ucp_L ||
5003 category == ucp_N ||
5004 c == CHAR_UNDERSCORE)
5005 == prop_fail_result)
5006 RRETURN(MATCH_NOMATCH);
5007 }
5008 /* Control never gets here */
5009
5010 /* This should never occur */
5011
5012 default:
5013 RRETURN(PCRE_ERROR_INTERNAL);
5014 }
5015 }
5016
5017 /* Match extended Unicode sequences. We will get here only if the
5018 support is in the binary; otherwise a compile-time error occurs. */
5019
5020 else if (ctype == OP_EXTUNI)
5021 {
5022 for (fi = min;; fi++)
5023 {
5024 RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5025 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5026 if (fi >= max) RRETURN(MATCH_NOMATCH);
5027 if (eptr >= md->end_subject)
5028 {
5029 SCHECK_PARTIAL();
5030 RRETURN(MATCH_NOMATCH);
5031 }
5032 GETCHARINCTEST(c, eptr);
5033 if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
5034 while (eptr < md->end_subject)
5035 {
5036 int len = 1;
5037 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5038 if (UCD_CATEGORY(c) != ucp_M) break;
5039 eptr += len;
5040 }
5041 CHECK_PARTIAL();
5042 }
5043 }
5044 else
5045 #endif /* SUPPORT_UCP */
5046
5047 #ifdef SUPPORT_UTF
5048 if (utf)
5049 {
5050 for (fi = min;; fi++)
5051 {
5052 RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5053 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5054 if (fi >= max) RRETURN(MATCH_NOMATCH);
5055 if (eptr >= md->end_subject)
5056 {
5057 SCHECK_PARTIAL();
5058 RRETURN(MATCH_NOMATCH);
5059 }
5060 if (ctype == OP_ANY && IS_NEWLINE(eptr))
5061 RRETURN(MATCH_NOMATCH);
5062 GETCHARINC(c, eptr);
5063 switch(ctype)
5064 {
5065 case OP_ANY: /* This is the non-NL case */
5066 if (md->partial != 0 && /* Take care with CRLF partial */
5067 eptr >= md->end_subject &&
5068 NLBLOCK->nltype == NLTYPE_FIXED &&
5069 NLBLOCK->nllen == 2 &&
5070 c == NLBLOCK->nl[0])
5071 {
5072 md->hitend = TRUE;
5073 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5074 }
5075 break;
5076
5077 case OP_ALLANY:
5078 case OP_ANYBYTE:
5079 break;
5080
5081 case OP_ANYNL:
5082 switch(c)
5083 {
5084 default: RRETURN(MATCH_NOMATCH);
5085 case 0x000d:
5086 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5087 break;
5088 case 0x000a:
5089 break;
5090
5091 case 0x000b:
5092 case 0x000c:
5093 case 0x0085:
5094 case 0x2028:
5095 case 0x2029:
5096 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5097 break;
5098 }
5099 break;
5100
5101 case OP_NOT_HSPACE:
5102 switch(c)
5103 {
5104 default: break;
5105 case 0x09: /* HT */
5106 case 0x20: /* SPACE */
5107 case 0xa0: /* NBSP */
5108 case 0x1680: /* OGHAM SPACE MARK */
5109 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5110 case 0x2000: /* EN QUAD */
5111 case 0x2001: /* EM QUAD */
5112 case 0x2002: /* EN SPACE */
5113 case 0x2003: /* EM SPACE */
5114 case 0x2004: /* THREE-PER-EM SPACE */
5115 case 0x2005: /* FOUR-PER-EM SPACE */
5116 case 0x2006: /* SIX-PER-EM SPACE */
5117 case 0x2007: /* FIGURE SPACE */
5118 case 0x2008: /* PUNCTUATION SPACE */
5119 case 0x2009: /* THIN SPACE */
5120 case 0x200A: /* HAIR SPACE */
5121 case 0x202f: /* NARROW NO-BREAK SPACE */
5122 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5123 case 0x3000: /* IDEOGRAPHIC SPACE */
5124 RRETURN(MATCH_NOMATCH);
5125 }
5126 break;
5127
5128 case OP_HSPACE:
5129 switch(c)
5130 {
5131 default: RRETURN(MATCH_NOMATCH);
5132 case 0x09: /* HT */
5133 case 0x20: /* SPACE */
5134 case 0xa0: /* NBSP */
5135 case 0x1680: /* OGHAM SPACE MARK */
5136 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5137 case 0x2000: /* EN QUAD */
5138 case 0x2001: /* EM QUAD */
5139 case 0x2002: /* EN SPACE */
5140 case 0x2003: /* EM SPACE */
5141 case 0x2004: /* THREE-PER-EM SPACE */
5142 case 0x2005: /* FOUR-PER-EM SPACE */
5143 case 0x2006: /* SIX-PER-EM SPACE */
5144 case 0x2007: /* FIGURE SPACE */
5145 case 0x2008: /* PUNCTUATION SPACE */
5146 case 0x2009: /* THIN SPACE */
5147 case 0x200A: /* HAIR SPACE */
5148 case 0x202f: /* NARROW NO-BREAK SPACE */
5149 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5150 case 0x3000: /* IDEOGRAPHIC SPACE */
5151 break;
5152 }
5153 break;
5154
5155 case OP_NOT_VSPACE:
5156 switch(c)
5157 {
5158 default: break;
5159 case 0x0a: /* LF */
5160 case 0x0b: /* VT */
5161 case 0x0c: /* FF */
5162 case 0x0d: /* CR */
5163 case 0x85: /* NEL */
5164 case 0x2028: /* LINE SEPARATOR */
5165 case 0x2029: /* PARAGRAPH SEPARATOR */
5166 RRETURN(MATCH_NOMATCH);
5167 }
5168 break;
5169
5170 case OP_VSPACE:
5171 switch(c)
5172 {
5173 default: RRETURN(MATCH_NOMATCH);
5174 case 0x0a: /* LF */
5175 case 0x0b: /* VT */
5176 case 0x0c: /* FF */
5177 case 0x0d: /* CR */
5178 case 0x85: /* NEL */
5179 case 0x2028: /* LINE SEPARATOR */
5180 case 0x2029: /* PARAGRAPH SEPARATOR */
5181 break;
5182 }
5183 break;
5184
5185 case OP_NOT_DIGIT:
5186 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5187 RRETURN(MATCH_NOMATCH);
5188 break;
5189
5190 case OP_DIGIT:
5191 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5192 RRETURN(MATCH_NOMATCH);
5193 break;
5194
5195 case OP_NOT_WHITESPACE:
5196 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5197 RRETURN(MATCH_NOMATCH);
5198 break;
5199
5200 case OP_WHITESPACE:
5201 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5202 RRETURN(MATCH_NOMATCH);
5203 break;
5204
5205 case OP_NOT_WORDCHAR:
5206 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5207 RRETURN(MATCH_NOMATCH);
5208 break;
5209
5210 case OP_WORDCHAR:
5211 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5212 RRETURN(MATCH_NOMATCH);
5213 break;
5214
5215 default:
5216 RRETURN(PCRE_ERROR_INTERNAL);
5217 }
5218 }
5219 }
5220 else
5221 #endif
5222 /* Not UTF mode */
5223 {
5224 for (fi = min;; fi++)
5225 {
5226 RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5227 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5228 if (fi >= max) RRETURN(MATCH_NOMATCH);
5229 if (eptr >= md->end_subject)
5230 {
5231 SCHECK_PARTIAL();
5232 RRETURN(MATCH_NOMATCH);
5233 }
5234 if (ctype == OP_ANY && IS_NEWLINE(eptr))
5235 RRETURN(MATCH_NOMATCH);
5236 c = *eptr++;
5237 switch(ctype)
5238 {
5239 case OP_ANY: /* This is the non-NL case */
5240 if (md->partial != 0 && /* Take care with CRLF partial */
5241 eptr >= md->end_subject &&
5242 NLBLOCK->nltype == NLTYPE_FIXED &&
5243 NLBLOCK->nllen == 2 &&
5244 c == NLBLOCK->nl[0])
5245 {
5246 md->hitend = TRUE;
5247 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5248 }
5249 break;
5250
5251 case OP_ALLANY:
5252 case OP_ANYBYTE:
5253 break;
5254
5255 case OP_ANYNL:
5256 switch(c)
5257 {
5258 default: RRETURN(MATCH_NOMATCH);
5259 case 0x000d:
5260 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5261 break;
5262
5263 case 0x000a:
5264 break;
5265
5266 case 0x000b:
5267 case 0x000c:
5268 case 0x0085:
5269 #ifdef COMPILE_PCRE16
5270 case 0x2028:
5271 case 0x2029:
5272 #endif
5273 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5274 break;
5275 }
5276 break;
5277
5278 case OP_NOT_HSPACE:
5279 switch(c)
5280 {
5281 default: break;
5282 case 0x09: /* HT */
5283 case 0x20: /* SPACE */
5284 case 0xa0: /* NBSP */
5285 #ifdef COMPILE_PCRE16
5286 case 0x1680: /* OGHAM SPACE MARK */
5287 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5288 case 0x2000: /* EN QUAD */
5289 case 0x2001: /* EM QUAD */
5290 case 0x2002: /* EN SPACE */
5291 case 0x2003: /* EM SPACE */
5292 case 0x2004: /* THREE-PER-EM SPACE */
5293 case 0x2005: /* FOUR-PER-EM SPACE */
5294 case 0x2006: /* SIX-PER-EM SPACE */
5295 case 0x2007: /* FIGURE SPACE */
5296 case 0x2008: /* PUNCTUATION SPACE */
5297 case 0x2009: /* THIN SPACE */
5298 case 0x200A: /* HAIR SPACE */
5299 case 0x202f: /* NARROW NO-BREAK SPACE */
5300 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5301 case 0x3000: /* IDEOGRAPHIC SPACE */
5302 #endif
5303 RRETURN(MATCH_NOMATCH);
5304 }
5305 break;
5306
5307 case OP_HSPACE:
5308 switch(c)
5309 {
5310 default: RRETURN(MATCH_NOMATCH);
5311 case 0x09: /* HT */
5312 case 0x20: /* SPACE */
5313 case 0xa0: /* NBSP */
5314 #ifdef COMPILE_PCRE16
5315 case 0x1680: /* OGHAM SPACE MARK */
5316 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5317 case 0x2000: /* EN QUAD */
5318 case 0x2001: /* EM QUAD */
5319 case 0x2002: /* EN SPACE */
5320 case 0x2003: /* EM SPACE */
5321 case 0x2004: /* THREE-PER-EM SPACE */
5322 case 0x2005: /* FOUR-PER-EM SPACE */
5323 case 0x2006: /* SIX-PER-EM SPACE */
5324 case 0x2007: /* FIGURE SPACE */
5325 case 0x2008: /* PUNCTUATION SPACE */
5326 case 0x2009: /* THIN SPACE */
5327 case 0x200A: /* HAIR SPACE */
5328 case 0x202f: /* NARROW NO-BREAK SPACE */
5329 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5330 case 0x3000: /* IDEOGRAPHIC SPACE */
5331 #endif
5332 break;
5333 }
5334 break;
5335
5336 case OP_NOT_VSPACE:
5337 switch(c)
5338 {
5339 default: break;
5340 case 0x0a: /* LF */
5341 case 0x0b: /* VT */
5342 case 0x0c: /* FF */
5343 case 0x0d: /* CR */
5344 case 0x85: /* NEL */
5345 #ifdef COMPILE_PCRE16
5346 case 0x2028: /* LINE SEPARATOR */
5347 case 0x2029: /* PARAGRAPH SEPARATOR */
5348 #endif
5349 RRETURN(MATCH_NOMATCH);
5350 }
5351 break;
5352
5353 case OP_VSPACE:
5354 switch(c)
5355 {
5356 default: RRETURN(MATCH_NOMATCH);
5357 case 0x0a: /* LF */
5358 case 0x0b: /* VT */
5359 case 0x0c: /* FF */
5360 case 0x0d: /* CR */
5361 case 0x85: /* NEL */
5362 #ifdef COMPILE_PCRE16
5363 case 0x2028: /* LINE SEPARATOR */
5364 case 0x2029: /* PARAGRAPH SEPARATOR */
5365 #endif
5366 break;
5367 }
5368 break;
5369
5370 case OP_NOT_DIGIT:
5371 if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5372 break;
5373
5374 case OP_DIGIT:
5375 if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5376 break;
5377
5378 case OP_NOT_WHITESPACE:
5379 if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5380 break;
5381
5382 case OP_WHITESPACE:
5383 if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5384 break;
5385
5386 case OP_NOT_WORDCHAR:
5387 if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5388 break;
5389
5390 case OP_WORDCHAR:
5391 if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5392 break;
5393
5394 default:
5395 RRETURN(PCRE_ERROR_INTERNAL);
5396 }
5397 }
5398 }
5399 /* Control never gets here */
5400 }
5401
5402 /* If maximizing, it is worth using inline code for speed, doing the type
5403 test once at the start (i.e. keep it out of the loop). Again, keep the
5404 UTF-8 and UCP stuff separate. */
5405
5406 else
5407 {
5408 pp = eptr; /* Remember where we started */
5409
5410 #ifdef SUPPORT_UCP
5411 if (prop_type >= 0)
5412 {
5413 switch(prop_type)
5414 {
5415 case PT_ANY:
5416 for (i = min; i < max; i++)
5417 {
5418 int len = 1;
5419 if (eptr >= md->end_subject)
5420 {
5421 SCHECK_PARTIAL();
5422 break;
5423 }
5424 GETCHARLENTEST(c, eptr, len);
5425 if (prop_fail_result) break;
5426 eptr+= len;
5427 }
5428 break;
5429
5430 case PT_LAMP:
5431 for (i = min; i < max; i++)
5432 {
5433 int chartype;
5434 int len = 1;
5435 if (eptr >= md->end_subject)
5436 {
5437 SCHECK_PARTIAL();
5438 break;
5439 }
5440 GETCHARLENTEST(c, eptr, len);
5441 chartype = UCD_CHARTYPE(c);
5442 if ((chartype == ucp_Lu ||
5443 chartype == ucp_Ll ||
5444 chartype == ucp_Lt) == prop_fail_result)
5445 break;
5446 eptr+= len;
5447 }
5448 break;
5449
5450 case PT_GC:
5451 for (i = min; i < max; i++)
5452 {
5453 int len = 1;
5454 if (eptr >= md->end_subject)
5455 {
5456 SCHECK_PARTIAL();
5457 break;
5458 }
5459 GETCHARLENTEST(c, eptr, len);
5460 if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5461 eptr+= len;
5462 }
5463 break;
5464
5465 case PT_PC:
5466 for (i = min; i < max; i++)
5467 {
5468 int len = 1;
5469 if (eptr >= md->end_subject)
5470 {
5471 SCHECK_PARTIAL();
5472 break;
5473 }
5474 GETCHARLENTEST(c, eptr, len);
5475 if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5476 eptr+= len;
5477 }
5478 break;
5479
5480 case PT_SC:
5481 for (i = min; i < max; i++)
5482 {
5483 int len = 1;
5484 if (eptr >= md->end_subject)
5485 {
5486 SCHECK_PARTIAL();
5487 break;
5488 }
5489 GETCHARLENTEST(c, eptr, len);
5490 if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5491 eptr+= len;
5492 }
5493 break;
5494
5495 case PT_ALNUM:
5496 for (i = min; i < max; i++)
5497 {
5498 int category;
5499 int len = 1;
5500 if (eptr >= md->end_subject)
5501 {
5502 SCHECK_PARTIAL();
5503 break;
5504 }
5505 GETCHARLENTEST(c, eptr, len);
5506 category = UCD_CATEGORY(c);
5507 if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5508 break;
5509 eptr+= len;
5510 }
5511 break;
5512
5513 case PT_SPACE: /* Perl space */
5514 for (i = min; i < max; i++)
5515 {
5516 int len = 1;
5517 if (eptr >= md->end_subject)
5518 {
5519 SCHECK_PARTIAL();
5520 break;
5521 }
5522 GETCHARLENTEST(c, eptr, len);
5523 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5524 c == CHAR_FF || c == CHAR_CR)
5525 == prop_fail_result)
5526 break;
5527 eptr+= len;
5528 }
5529 break;
5530
5531 case PT_PXSPACE: /* POSIX space */
5532 for (i = min; i < max; i++)
5533 {
5534 int len = 1;
5535 if (eptr >= md->end_subject)
5536 {
5537 SCHECK_PARTIAL();
5538 break;
5539 }
5540 GETCHARLENTEST(c, eptr, len);
5541 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5542 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5543 == prop_fail_result)
5544 break;
5545 eptr+= len;
5546 }
5547 break;
5548
5549 case PT_WORD:
5550 for (i = min; i < max; i++)
5551 {
5552 int category;
5553 int len = 1;
5554 if (eptr >= md->end_subject)
5555 {
5556 SCHECK_PARTIAL();
5557 break;
5558 }
5559 GETCHARLENTEST(c, eptr, len);
5560 category = UCD_CATEGORY(c);
5561 if ((category == ucp_L || category == ucp_N ||
5562 c == CHAR_UNDERSCORE) == prop_fail_result)
5563 break;
5564 eptr+= len;
5565 }
5566 break;
5567
5568 default:
5569 RRETURN(PCRE_ERROR_INTERNAL);
5570 }
5571
5572 /* eptr is now past the end of the maximum run */
5573
5574 if (possessive) continue;
5575 for(;;)
5576 {
5577 RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5578 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5579 if (eptr-- == pp) break; /* Stop if tried at original pos */
5580 if (utf) BACKCHAR(eptr);
5581 }
5582 }
5583
5584 /* Match extended Unicode sequences. We will get here only if the
5585 support is in the binary; otherwise a compile-time error occurs. */
5586
5587 else if (ctype == OP_EXTUNI)
5588 {
5589 for (i = min; i < max; i++)
5590 {
5591 int len = 1;
5592 if (eptr >= md->end_subject)
5593 {
5594 SCHECK_PARTIAL();
5595 break;
5596 }
5597 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5598 if (UCD_CATEGORY(c) == ucp_M) break;
5599 eptr += len;
5600 while (eptr < md->end_subject)
5601 {
5602 len = 1;
5603 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5604 if (UCD_CATEGORY(c) != ucp_M) break;
5605 eptr += len;
5606 }
5607 CHECK_PARTIAL();
5608 }
5609
5610 /* eptr is now past the end of the maximum run */
5611
5612 if (possessive) continue;
5613
5614 for(;;)
5615 {
5616 RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5617 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5618 if (eptr-- == pp) break; /* Stop if tried at original pos */
5619 for (;;) /* Move back over one extended */
5620 {
5621 if (!utf) c = *eptr; else
5622 {
5623 BACKCHAR(eptr);
5624 GETCHAR(c, eptr);
5625 }
5626 if (UCD_CATEGORY(c) != ucp_M) break;
5627 eptr--;
5628 }
5629 }
5630 }
5631
5632 else
5633 #endif /* SUPPORT_UCP */
5634
5635 #ifdef SUPPORT_UTF
5636 if (utf)
5637 {
5638 switch(ctype)
5639 {
5640 case OP_ANY:
5641 if (max < INT_MAX)
5642 {
5643 for (i = min; i < max; i++)
5644 {
5645 if (eptr >= md->end_subject)
5646 {
5647 SCHECK_PARTIAL();
5648 break;
5649 }
5650 if (IS_NEWLINE(eptr)) break;
5651 if (md->partial != 0 && /* Take care with CRLF partial */
5652 eptr + 1 >= md->end_subject &&
5653 NLBLOCK->nltype == NLTYPE_FIXED &&
5654 NLBLOCK->nllen == 2 &&
5655 *eptr == NLBLOCK->nl[0])
5656 {
5657 md->hitend = TRUE;
5658 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5659 }
5660 eptr++;
5661 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5662 }
5663 }
5664
5665 /* Handle unlimited UTF-8 repeat */
5666
5667 else
5668 {
5669 for (i = min; i < max; i++)
5670 {
5671 if (eptr >= md->end_subject)
5672 {
5673 SCHECK_PARTIAL();
5674 break;
5675 }
5676 if (IS_NEWLINE(eptr)) break;
5677 if (md->partial != 0 && /* Take care with CRLF partial */
5678 eptr + 1 >= md->end_subject &&
5679 NLBLOCK->nltype == NLTYPE_FIXED &&
5680 NLBLOCK->nllen == 2 &&
5681 *eptr == NLBLOCK->nl[0])
5682 {
5683 md->hitend = TRUE;
5684 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5685 }
5686 eptr++;
5687 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5688 }
5689 }
5690 break;
5691
5692 case OP_ALLANY:
5693 if (max < INT_MAX)
5694 {
5695 for (i = min; i < max; i++)
5696 {
5697 if (eptr >= md->end_subject)
5698 {
5699 SCHECK_PARTIAL();
5700 break;
5701 }
5702 eptr++;
5703 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5704 }
5705 }
5706 else
5707 {
5708 eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5709 SCHECK_PARTIAL();
5710 }
5711 break;
5712
5713 /* The byte case is the same as non-UTF8 */
5714
5715 case OP_ANYBYTE:
5716 c = max - min;
5717 if (c > (unsigned int)(md->end_subject - eptr))
5718 {
5719 eptr = md->end_subject;
5720 SCHECK_PARTIAL();
5721 }
5722 else eptr += c;
5723 break;
5724
5725 case OP_ANYNL:
5726 for (i = min; i < max; i++)
5727 {
5728 int len = 1;
5729 if (eptr >= md->end_subject)
5730 {
5731 SCHECK_PARTIAL();
5732 break;
5733 }
5734 GETCHARLEN(c, eptr, len);
5735 if (c == 0x000d)
5736 {
5737 if (++eptr >= md->end_subject) break;
5738 if (*eptr == 0x000a) eptr++;
5739 }
5740 else
5741 {
5742 if (c != 0x000a &&
5743 (md->bsr_anycrlf ||
5744 (c != 0x000b && c != 0x000c &&
5745 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5746 break;
5747 eptr += len;
5748 }
5749 }
5750 break;
5751
5752 case OP_NOT_HSPACE:
5753 case OP_HSPACE:
5754 for (i = min; i < max; i++)
5755 {
5756 BOOL gotspace;
5757 int len = 1;
5758 if (eptr >= md->end_subject)
5759 {
5760 SCHECK_PARTIAL();
5761 break;
5762 }
5763 GETCHARLEN(c, eptr, len);
5764 switch(c)
5765 {
5766 default: gotspace = FALSE; break;
5767 case 0x09: /* HT */
5768 case 0x20: /* SPACE */
5769 case 0xa0: /* NBSP */
5770 case 0x1680: /* OGHAM SPACE MARK */
5771 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5772 case 0x2000: /* EN QUAD */
5773 case 0x2001: /* EM QUAD */
5774 case 0x2002: /* EN SPACE */
5775 case 0x2003: /* EM SPACE */
5776 case 0x2004: /* THREE-PER-EM SPACE */
5777 case 0x2005: /* FOUR-PER-EM SPACE */
5778 case 0x2006: /* SIX-PER-EM SPACE */
5779 case 0x2007: /* FIGURE SPACE */
5780 case 0x2008: /* PUNCTUATION SPACE */
5781 case 0x2009: /* THIN SPACE */
5782 case 0x200A: /* HAIR SPACE */
5783 case 0x202f: /* NARROW NO-BREAK SPACE */
5784 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5785 case 0x3000: /* IDEOGRAPHIC SPACE */
5786 gotspace = TRUE;
5787 break;
5788 }
5789 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5790 eptr += len;
5791 }
5792 break;
5793
5794 case OP_NOT_VSPACE:
5795 case OP_VSPACE:
5796 for (i = min; i < max; i++)
5797 {
5798 BOOL gotspace;
5799 int len = 1;
5800 if (eptr >= md->end_subject)
5801 {
5802 SCHECK_PARTIAL();
5803 break;
5804 }
5805 GETCHARLEN(c, eptr, len);
5806 switch(c)
5807 {
5808 default: gotspace = FALSE; break;
5809 case 0x0a: /* LF */
5810 case 0x0b: /* VT */
5811 case 0x0c: /* FF */
5812 case 0x0d: /* CR */
5813 case 0x85: /* NEL */
5814 case 0x2028: /* LINE SEPARATOR */
5815 case 0x2029: /* PARAGRAPH SEPARATOR */
5816 gotspace = TRUE;
5817 break;
5818 }
5819 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5820 eptr += len;
5821 }
5822 break;
5823
5824 case OP_NOT_DIGIT:
5825 for (i = min; i < max; i++)
5826 {
5827 int len = 1;
5828 if (eptr >= md->end_subject)
5829 {
5830 SCHECK_PARTIAL();
5831 break;
5832 }
5833 GETCHARLEN(c, eptr, len);
5834 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5835 eptr+= len;
5836 }
5837 break;
5838
5839 case OP_DIGIT:
5840 for (i = min; i < max; i++)
5841 {
5842 int len = 1;
5843 if (eptr >= md->end_subject)
5844 {
5845 SCHECK_PARTIAL();
5846 break;
5847 }
5848 GETCHARLEN(c, eptr, len);
5849 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5850 eptr+= len;
5851 }
5852 break;
5853
5854 case OP_NOT_WHITESPACE:
5855 for (i = min; i < max; i++)
5856 {
5857 int len = 1;
5858 if (eptr >= md->end_subject)
5859 {
5860 SCHECK_PARTIAL();
5861 break;
5862 }
5863 GETCHARLEN(c, eptr, len);
5864 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5865 eptr+= len;
5866 }
5867 break;
5868
5869 case OP_WHITESPACE:
5870 for (i = min; i < max; i++)
5871 {
5872 int len = 1;
5873 if (eptr >= md->end_subject)
5874 {
5875 SCHECK_PARTIAL();
5876 break;
5877 }
5878 GETCHARLEN(c, eptr, len);
5879 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5880 eptr+= len;
5881 }
5882 break;
5883
5884 case OP_NOT_WORDCHAR:
5885 for (i = min; i < max; i++)
5886 {
5887 int len = 1;
5888 if (eptr >= md->end_subject)
5889 {
5890 SCHECK_PARTIAL();
5891 break;
5892 }
5893 GETCHARLEN(c, eptr, len);
5894 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5895 eptr+= len;
5896 }
5897 break;
5898
5899 case OP_WORDCHAR:
5900 for (i = min; i < max; i++)
5901 {
5902 int len = 1;
5903 if (eptr >= md->end_subject)
5904 {
5905 SCHECK_PARTIAL();
5906 break;
5907 }
5908 GETCHARLEN(c, eptr, len);
5909 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5910 eptr+= len;
5911 }
5912 break;
5913
5914 default:
5915 RRETURN(PCRE_ERROR_INTERNAL);
5916 }
5917
5918 /* eptr is now past the end of the maximum run. If possessive, we are
5919 done (no backing up). Otherwise, match at this position; anything other
5920 than no match is immediately returned. For nomatch, back up one
5921 character, unless we are matching \R and the last thing matched was
5922 \r\n, in which case, back up two bytes. */
5923
5924 if (possessive) continue;
5925 for(;;)
5926 {
5927 RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5928 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5929 if (eptr-- == pp) break; /* Stop if tried at original pos */
5930 BACKCHAR(eptr);
5931 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
5932 eptr[-1] == '\r') eptr--;
5933 }
5934 }
5935 else
5936 #endif /* SUPPORT_UTF */
5937 /* Not UTF mode */
5938 {
5939 switch(ctype)
5940 {
5941 case OP_ANY:
5942 for (i = min; i < max; i++)
5943 {
5944 if (eptr >= md->end_subject)
5945 {
5946 SCHECK_PARTIAL();
5947 break;
5948 }
5949 if (IS_NEWLINE(eptr)) break;
5950 if (md->partial != 0 && /* Take care with CRLF partial */
5951 eptr + 1 >= md->end_subject &&
5952 NLBLOCK->nltype == NLTYPE_FIXED &&
5953 NLBLOCK->nllen == 2 &&
5954 *eptr == NLBLOCK->nl[0])
5955 {
5956 md->hitend = TRUE;
5957 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5958 }
5959 eptr++;
5960 }
5961 break;
5962
5963 case OP_ALLANY:
5964 case OP_ANYBYTE:
5965 c = max - min;
5966 if (c > (unsigned int)(md->end_subject - eptr))
5967 {
5968 eptr = md->end_subject;
5969 SCHECK_PARTIAL();
5970 }
5971 else eptr += c;
5972 break;
5973
5974 case OP_ANYNL:
5975 for (i = min; i < max; i++)
5976 {
5977 if (eptr >= md->end_subject)
5978 {
5979 SCHECK_PARTIAL();
5980 break;
5981 }
5982 c = *eptr;
5983 if (c == 0x000d)
5984 {
5985 if (++eptr >= md->end_subject) break;
5986 if (*eptr == 0x000a) eptr++;
5987 }
5988 else
5989 {
5990 if (c != 0x000a && (md->bsr_anycrlf ||
5991 (c != 0x000b && c != 0x000c && c != 0x0085
5992 #ifdef COMPILE_PCRE16
5993 && c != 0x2028 && c != 0x2029
5994 #endif
5995 ))) break;
5996 eptr++;
5997 }
5998 }
5999 break;
6000
6001 case OP_NOT_HSPACE:
6002 for (i = min; i < max; i++)
6003 {
6004 if (eptr >= md->end_subject)
6005 {
6006 SCHECK_PARTIAL();
6007 break;
6008 }
6009 c = *eptr;
6010 if (c == 0x09 || c == 0x20 || c == 0xa0
6011 #ifdef COMPILE_PCRE16
6012 || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
6013 || c == 0x202f || c == 0x205f || c == 0x3000
6014 #endif
6015 ) break;
6016 eptr++;
6017 }
6018 break;
6019
6020 case OP_HSPACE:
6021 for (i = min; i < max; i++)
6022 {
6023 if (eptr >= md->end_subject)
6024 {
6025 SCHECK_PARTIAL();
6026 break;
6027 }
6028 c = *eptr;
6029 if (c != 0x09 && c != 0x20 && c != 0xa0
6030 #ifdef COMPILE_PCRE16
6031 && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
6032 && c != 0x202f && c != 0x205f && c != 0x3000
6033 #endif
6034 ) break;
6035 eptr++;
6036 }
6037 break;
6038
6039 case OP_NOT_VSPACE:
6040 for (i = min; i < max; i++)
6041 {
6042 if (eptr >= md->end_subject)
6043 {
6044 SCHECK_PARTIAL();
6045 break;
6046 }
6047 c = *eptr;
6048 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
6049 #ifdef COMPILE_PCRE16
6050 || c == 0x2028 || c == 0x2029
6051 #endif
6052 ) break;
6053 eptr++;
6054 }
6055 break;
6056
6057 case OP_VSPACE:
6058 for (i = min; i < max; i++)
6059 {
6060 if (eptr >= md->end_subject)
6061 {
6062 SCHECK_PARTIAL();
6063 break;
6064 }
6065 c = *eptr;
6066 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
6067 #ifdef COMPILE_PCRE16
6068 && c != 0x2028 && c != 0x2029
6069 #endif
6070 ) break;
6071 eptr++;
6072 }
6073 break;
6074
6075 case OP_NOT_DIGIT:
6076 for (i = min; i < max; i++)
6077 {
6078 if (eptr >= md->end_subject)
6079 {
6080 SCHECK_PARTIAL();
6081 break;
6082 }
6083 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6084 eptr++;
6085 }
6086 break;
6087
6088 case OP_DIGIT:
6089 for (i = min; i < max; i++)
6090 {
6091 if (eptr >= md->end_subject)
6092 {
6093 SCHECK_PARTIAL();
6094 break;
6095 }
6096 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6097 eptr++;
6098 }
6099 break;
6100
6101 case OP_NOT_WHITESPACE:
6102 for (i = min; i < max; i++)
6103 {
6104 if (eptr >= md->end_subject)
6105 {
6106 SCHECK_PARTIAL();
6107 break;
6108 }
6109 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6110 eptr++;
6111 }
6112 break;
6113
6114 case OP_WHITESPACE:
6115 for (i = min; i < max; i++)
6116 {
6117 if (eptr >= md->end_subject)
6118 {
6119 SCHECK_PARTIAL();
6120 break;
6121 }
6122 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6123 eptr++;
6124 }
6125 break;
6126
6127 case OP_NOT_WORDCHAR:
6128 for (i = min; i < max; i++)
6129 {
6130 if (eptr >= md->end_subject)
6131 {
6132 SCHECK_PARTIAL();
6133 break;
6134 }
6135 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6136 eptr++;
6137 }
6138 break;
6139
6140 case OP_WORDCHAR:
6141 for (i = min; i < max; i++)
6142 {
6143 if (eptr >= md->end_subject)
6144 {
6145 SCHECK_PARTIAL();
6146 break;
6147 }
6148 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6149 eptr++;
6150 }
6151 break;
6152
6153 default:
6154 RRETURN(PCRE_ERROR_INTERNAL);
6155 }
6156
6157 /* eptr is now past the end of the maximum run. If possessive, we are
6158 done (no backing up). Otherwise, match at this position; anything other
6159 than no match is immediately returned. For nomatch, back up one
6160 character (byte), unless we are matching \R and the last thing matched
6161 was \r\n, in which case, back up two bytes. */
6162
6163 if (possessive) continue;
6164 while (eptr >= pp)
6165 {
6166 RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6167 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6168 eptr--;
6169 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
6170 eptr[-1] == '\r') eptr--;
6171 }
6172 }
6173
6174 /* Get here if we can't make it match with any permitted repetitions */
6175
6176 RRETURN(MATCH_NOMATCH);
6177 }
6178 /* Control never gets here */
6179
6180 /* There's been some horrible disaster. Arrival here can only mean there is
6181 something seriously wrong in the code above or the OP_xxx definitions. */
6182
6183 default:
6184 DPRINTF(("Unknown opcode %d\n", *ecode));
6185 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6186 }
6187
6188 /* Do not stick any code in here without much thought; it is assumed
6189 that "continue" in the code above comes out to here to repeat the main
6190 loop. */
6191
6192 } /* End of main loop */
6193 /* Control never reaches here */
6194
6195
6196 /* When compiling to use the heap rather than the stack for recursive calls to
6197 match(), the RRETURN() macro jumps here. The number that is saved in
6198 frame->Xwhere indicates which label we actually want to return to. */
6199
6200 #ifdef NO_RECURSE
6201 #define LBL(val) case val: goto L_RM##val;
6202 HEAP_RETURN:
6203 switch (frame->Xwhere)
6204 {
6205 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6206 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6207 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6208 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6209 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6210 LBL(65) LBL(66)
6211 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6212 LBL(21)
6213 #endif
6214 #ifdef SUPPORT_UTF
6215 LBL(16) LBL(18) LBL(20)
6216 LBL(22) LBL(23) LBL(28) LBL(30)
6217 LBL(32) LBL(34) LBL(42) LBL(46)
6218 #ifdef SUPPORT_UCP
6219 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6220 LBL(59) LBL(60) LBL(61) LBL(62)
6221 #endif /* SUPPORT_UCP */
6222 #endif /* SUPPORT_UTF */
6223 default:
6224 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6225
6226 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6227
6228 return PCRE_ERROR_INTERNAL;
6229 }
6230 #undef LBL
6231 #endif /* NO_RECURSE */
6232 }
6233
6234
6235 /***************************************************************************
6236 ****************************************************************************
6237 RECURSION IN THE match() FUNCTION
6238
6239 Undefine all the macros that were defined above to handle this. */
6240
6241 #ifdef NO_RECURSE
6242 #undef eptr
6243 #undef ecode
6244 #undef mstart
6245 #undef offset_top
6246 #undef eptrb
6247 #undef flags
6248
6249 #undef callpat
6250 #undef charptr
6251 #undef data
6252 #undef next
6253 #undef pp
6254 #undef prev
6255 #undef saved_eptr
6256
6257 #undef new_recursive
6258
6259 #undef cur_is_word
6260 #undef condition
6261 #undef prev_is_word
6262
6263 #undef ctype
6264 #undef length
6265 #undef max
6266 #undef min
6267 #undef number
6268 #undef offset
6269 #undef op
6270 #undef save_capture_last
6271 #undef save_offset1
6272 #undef save_offset2
6273 #undef save_offset3
6274 #undef stacksave
6275
6276 #undef newptrb
6277
6278 #endif
6279
6280 /* These two are defined as macros in both cases */
6281
6282 #undef fc
6283 #undef fi
6284
6285 /***************************************************************************
6286 ***************************************************************************/
6287
6288
6289 #ifdef NO_RECURSE
6290 /*************************************************
6291 * Release allocated heap frames *
6292 *************************************************/
6293
6294 /* This function releases all the allocated frames. The base frame is on the
6295 machine stack, and so must not be freed.
6296
6297 Argument: the address of the base frame
6298 Returns: nothing
6299 */
6300
6301 static void
6302 release_match_heapframes (heapframe *frame_base)
6303 {
6304 heapframe *nextframe = frame_base->Xnextframe;
6305 while (nextframe != NULL)
6306 {
6307 heapframe *oldframe = nextframe;
6308 nextframe = nextframe->Xnextframe;
6309 (PUBL(stack_free))(oldframe);
6310 }
6311 }
6312 #endif
6313
6314
6315 /*************************************************
6316 * Execute a Regular Expression *
6317 *************************************************/
6318
6319 /* This function applies a compiled re to a subject string and picks out
6320 portions of the string if it matches. Two elements in the vector are set for
6321 each substring: the offsets to the start and end of the substring.
6322
6323 Arguments:
6324 argument_re points to the compiled expression
6325 extra_data points to extra data or is NULL
6326 subject points to the subject string
6327 length length of subject string (may contain binary zeros)
6328 start_offset where to start in the subject string
6329 options option bits
6330 offsets points to a vector of ints to be filled in with offsets
6331 offsetcount the number of elements in the vector
6332
6333 Returns: > 0 => success; value is the number of elements filled in
6334 = 0 => success, but offsets is not big enough
6335 -1 => failed to match
6336 < -1 => some kind of unexpected problem
6337 */
6338
6339 #ifdef COMPILE_PCRE8
6340 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6341 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6342 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6343 int offsetcount)
6344 #else
6345 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6346 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6347 PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6348 int offsetcount)
6349 #endif
6350 {
6351 int rc, ocount, arg_offset_max;
6352 int newline;
6353 BOOL using_temporary_offsets = FALSE;
6354 BOOL anchored;
6355 BOOL startline;
6356 BOOL firstline;
6357 BOOL utf;
6358 BOOL has_first_char = FALSE;
6359 BOOL has_req_char = FALSE;
6360 pcre_uchar first_char = 0;
6361 pcre_uchar first_char2 = 0;
6362 pcre_uchar req_char = 0;
6363 pcre_uchar req_char2 = 0;
6364 match_data match_block;
6365 match_data *md = &match_block;
6366 const pcre_uint8 *tables;
6367 const pcre_uint8 *start_bits = NULL;
6368 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6369 PCRE_PUCHAR end_subject;
6370 PCRE_PUCHAR start_partial = NULL;
6371 PCRE_PUCHAR req_char_ptr = start_match - 1;
6372
6373 const pcre_study_data *study;
6374 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6375
6376 #ifdef NO_RECURSE
6377 heapframe frame_zero;
6378 frame_zero.Xprevframe = NULL; /* Marks the top level */
6379 frame_zero.Xnextframe = NULL; /* None are allocated yet */
6380 md->match_frames_base = &frame_zero;
6381 #endif
6382
6383 /* Check for the special magic call that measures the size of the stack used
6384 per recursive call of match(). Without the funny casting for sizeof, a Windows
6385 compiler gave this error: "unary minus operator applied to unsigned type,
6386 result still unsigned". Hopefully the cast fixes that. */
6387
6388 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6389 start_offset == -999)
6390 #ifdef NO_RECURSE
6391 return -((int)sizeof(heapframe));
6392 #else
6393 return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6394 #endif
6395
6396 /* Plausibility checks */
6397
6398 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6399 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6400 return PCRE_ERROR_NULL;
6401 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6402 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6403
6404 /* Check that the first field in the block is the magic number. If it is not,
6405 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6406 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6407 means that the pattern is likely compiled with different endianness. */
6408
6409 if (re->magic_number != MAGIC_NUMBER)
6410 return re->magic_number == REVERSED_MAGIC_NUMBER?
6411 PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6412 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6413
6414 /* These two settings are used in the code for checking a UTF-8 string that
6415 follows immediately afterwards. Other values in the md block are used only
6416 during "normal" pcre_exec() processing, not when the JIT support is in use,
6417 so they are set up later. */
6418
6419 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6420 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6421 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6422 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6423
6424 /* Check a UTF-8 string if required. Pass back the character offset and error
6425 code for an invalid string if a results vector is available. */
6426
6427 #ifdef SUPPORT_UTF
6428 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6429 {
6430 int erroroffset;
6431 int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6432 if (errorcode != 0)
6433 {
6434 if (offsetcount >= 2)
6435 {
6436 offsets[0] = erroroffset;
6437 offsets[1] = errorcode;
6438 }
6439 #ifdef COMPILE_PCRE16
6440 return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6441 PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6442 #else
6443 return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6444 PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6445 #endif
6446 }
6447
6448 /* Check that a start_offset points to the start of a UTF character. */
6449 if (start_offset > 0 && start_offset < length &&
6450 NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6451 return PCRE_ERROR_BADUTF8_OFFSET;
6452 }
6453 #endif
6454
6455 /* If the pattern was successfully studied with JIT support, run the JIT
6456 executable instead of the rest of this function. Most options must be set at
6457 compile time for the JIT code to be usable. Fallback to the normal code path if
6458 an unsupported flag is set. */
6459
6460 #ifdef SUPPORT_JIT
6461 if (extra_data != NULL
6462 && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6463 PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6464 && extra_data->executable_jit != NULL
6465 && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6466 PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6467 PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6468 {
6469 rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
6470 start_offset, options, offsets, offsetcount);
6471
6472 /* PCRE_ERROR_NULL means that the selected normal or partial matching
6473 mode is not compiled. In this case we simply fallback to interpreter. */
6474
6475 if (rc != PCRE_ERROR_NULL) return rc;
6476 }
6477 #endif
6478
6479 /* Carry on with non-JIT matching. This information is for finding all the
6480 numbers associated with a given name, for condition testing. */
6481
6482 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6483 md->name_count = re->name_count;
6484 md->name_entry_size = re->name_entry_size;
6485
6486 /* Fish out the optional data from the extra_data structure, first setting
6487 the default values. */
6488
6489 study = NULL;
6490 md->match_limit = MATCH_LIMIT;
6491 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6492 md->callout_data = NULL;
6493
6494 /* The table pointer is always in native byte order. */
6495
6496 tables = re->tables;
6497
6498 if (extra_data != NULL)
6499 {
6500 register unsigned int flags = extra_data->flags;
6501 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6502 study = (const pcre_study_data *)extra_data->study_data;
6503 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6504 md->match_limit = extra_data->match_limit;
6505 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6506 md->match_limit_recursion = extra_data->match_limit_recursion;
6507 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6508 md->callout_data = extra_data->callout_data;
6509 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6510 }
6511
6512 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6513 is a feature that makes it possible to save compiled regex and re-use them
6514 in other programs later. */
6515
6516 if (tables == NULL) tables = PRIV(default_tables);
6517
6518 /* Set up other data */
6519
6520 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6521 startline = (re->flags & PCRE_STARTLINE) != 0;
6522 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6523
6524 /* The code starts after the real_pcre block and the capture name table. */
6525
6526 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6527 re->name_count * re->name_entry_size;
6528
6529 md->start_subject = (PCRE_PUCHAR)subject;
6530 md->start_offset = start_offset;
6531 md->end_subject = md->start_subject + length;
6532 end_subject = md->end_subject;
6533
6534 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6535 md->use_ucp = (re->options & PCRE_UCP) != 0;
6536 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6537 md->ignore_skip_arg = FALSE;
6538
6539 /* Some options are unpacked into BOOL variables in the hope that testing
6540 them will be faster than individual option bits. */
6541
6542 md->notbol = (options & PCRE_NOTBOL) != 0;
6543 md->noteol = (options & PCRE_NOTEOL) != 0;
6544 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6545 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6546
6547 md->hitend = FALSE;
6548 md->mark = md->nomatch_mark = NULL; /* In case never set */
6549
6550 md->recursive = NULL; /* No recursion at top level */
6551 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6552
6553 md->lcc = tables + lcc_offset;
6554 md->fcc = tables + fcc_offset;
6555 md->ctypes = tables + ctypes_offset;
6556
6557 /* Handle different \R options. */
6558
6559 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6560 {
6561 case 0:
6562 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6563 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6564 else
6565 #ifdef BSR_ANYCRLF
6566 md->bsr_anycrlf = TRUE;
6567 #else
6568 md->bsr_anycrlf = FALSE;
6569 #endif
6570 break;
6571
6572 case PCRE_BSR_ANYCRLF:
6573 md->bsr_anycrlf = TRUE;
6574 break;
6575
6576 case PCRE_BSR_UNICODE:
6577 md->bsr_anycrlf = FALSE;
6578 break;
6579
6580 default: return PCRE_ERROR_BADNEWLINE;
6581 }
6582
6583 /* Handle different types of newline. The three bits give eight cases. If
6584 nothing is set at run time, whatever was used at compile time applies. */
6585
6586 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6587 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6588 {
6589 case 0: newline = NEWLINE; break; /* Compile-time default */
6590 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6591 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6592 case PCRE_NEWLINE_CR+
6593 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6594 case PCRE_NEWLINE_ANY: newline = -1; break;
6595 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6596 default: return PCRE_ERROR_BADNEWLINE;
6597 }
6598
6599 if (newline == -2)
6600 {
6601 md->nltype = NLTYPE_ANYCRLF;
6602 }
6603 else if (newline < 0)
6604 {
6605 md->nltype = NLTYPE_ANY;
6606 }
6607 else
6608 {
6609 md->nltype = NLTYPE_FIXED;
6610 if (newline > 255)
6611 {
6612 md->nllen = 2;
6613 md->nl[0] = (newline >> 8) & 255;
6614 md->nl[1] = newline & 255;
6615 }
6616 else
6617 {
6618 md->nllen = 1;
6619 md->nl[0] = newline;
6620 }
6621 }
6622
6623 /* Partial matching was originally supported only for a restricted set of
6624 regexes; from release 8.00 there are no restrictions, but the bits are still
6625 defined (though never set). So there's no harm in leaving this code. */
6626
6627 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6628 return PCRE_ERROR_BADPARTIAL;
6629
6630 /* If the expression has got more back references than the offsets supplied can
6631 hold, we get a temporary chunk of working store to use during the matching.
6632 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6633 of 3. */
6634
6635 ocount = offsetcount - (offsetcount % 3);
6636 arg_offset_max = (2*ocount)/3;
6637
6638 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6639 {
6640 ocount = re->top_backref * 3 + 3;
6641 md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6642 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6643 using_temporary_offsets = TRUE;
6644 DPRINTF(("Got memory to hold back references\n"));
6645 }
6646 else md->offset_vector = offsets;
6647
6648 md->offset_end = ocount;
6649 md->offset_max = (2*ocount)/3;
6650 md->offset_overflow = FALSE;
6651 md->capture_last = -1;
6652
6653 /* Reset the working variable associated with each extraction. These should
6654 never be used unless previously set, but they get saved and restored, and so we
6655 initialize them to avoid reading uninitialized locations. Also, unset the
6656 offsets for the matched string. This is really just for tidiness with callouts,
6657 in case they inspect these fields. */
6658
6659 if (md->offset_vector != NULL)
6660 {
6661 register int *iptr = md->offset_vector + ocount;
6662 register int *iend = iptr - re->top_bracket;
6663 if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6664 while (--iptr >= iend) *iptr = -1;
6665 md->offset_vector[0] = md->offset_vector[1] = -1;
6666 }
6667
6668 /* Set up the first character to match, if available. The first_char value is
6669 never set for an anchored regular expression, but the anchoring may be forced
6670 at run time, so we have to test for anchoring. The first char may be unset for
6671 an unanchored pattern, of course. If there's no first char and the pattern was
6672 studied, there may be a bitmap of possible first characters. */
6673
6674 if (!anchored)
6675 {
6676 if ((re->flags & PCRE_FIRSTSET) != 0)
6677 {
6678 has_first_char = TRUE;
6679 first_char = first_char2 = (pcre_uchar)(re->first_char);
6680 if ((re->flags & PCRE_FCH_CASELESS) != 0)
6681 {
6682 first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6683 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6684 if (utf && first_char > 127)
6685 first_char2 = UCD_OTHERCASE(first_char);
6686 #endif
6687 }
6688 }
6689 else
6690 if (!startline && study != NULL &&
6691 (study->flags & PCRE_STUDY_MAPPED) != 0)
6692 start_bits = study->start_bits;
6693 }
6694
6695 /* For anchored or unanchored matches, there may be a "last known required
6696 character" set. */
6697
6698 if ((re->flags & PCRE_REQCHSET) != 0)
6699 {
6700 has_req_char = TRUE;
6701 req_char = req_char2 = (pcre_uchar)(re->req_char);
6702 if ((re->flags & PCRE_RCH_CASELESS) != 0)
6703 {
6704 req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6705 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6706 if (utf && req_char > 127)
6707 req_char2 = UCD_OTHERCASE(req_char);
6708 #endif
6709 }
6710 }
6711
6712
6713 /* ==========================================================================*/
6714
6715 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6716 the loop runs just once. */
6717
6718 for(;;)
6719 {
6720 PCRE_PUCHAR save_end_subject = end_subject;
6721 PCRE_PUCHAR new_start_match;
6722
6723 /* If firstline is TRUE, the start of the match is constrained to the first
6724 line of a multiline string. That is, the match must be before or at the first
6725 newline. Implement this by temporarily adjusting end_subject so that we stop
6726 scanning at a newline. If the match fails at the newline, later code breaks
6727 this loop. */
6728
6729 if (firstline)
6730 {
6731 PCRE_PUCHAR t = start_match;
6732 #ifdef SUPPORT_UTF
6733 if (utf)
6734 {
6735 while (t < md->end_subject && !IS_NEWLINE(t))
6736 {
6737 t++;
6738 ACROSSCHAR(t < end_subject, *t, t++);
6739 }
6740 }
6741 else
6742 #endif
6743 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6744 end_subject = t;
6745 }
6746