/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 933 - (show annotations)
Sat Feb 25 12:18:23 2012 UTC (7 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 218873 byte(s)
Error occurred while calculating annotation data.
Applied Graycode's patch to use heap stack frames more efficiently.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Values for setting in md->match_function_type to indicate two special types
61 of call to match(). We do it this way to save on using another stack variable,
62 as stack usage is to be discouraged. */
63
64 #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
65 #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
66
67 /* Non-error returns from the match() function. Error returns are externally
68 defined PCRE_ERROR_xxx codes, which are all negative. */
69
70 #define MATCH_MATCH 1
71 #define MATCH_NOMATCH 0
72
73 /* Special internal returns from the match() function. Make them sufficiently
74 negative to avoid the external error codes. */
75
76 #define MATCH_ACCEPT (-999)
77 #define MATCH_COMMIT (-998)
78 #define MATCH_KETRPOS (-997)
79 #define MATCH_ONCE (-996)
80 #define MATCH_PRUNE (-995)
81 #define MATCH_SKIP (-994)
82 #define MATCH_SKIP_ARG (-993)
83 #define MATCH_THEN (-992)
84
85 /* Maximum number of ints of offset to save on the stack for recursive calls.
86 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
87 because the offset vector is always a multiple of 3 long. */
88
89 #define REC_STACK_SAVE_MAX 30
90
91 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
92
93 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
94 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
95
96
97
98 #ifdef PCRE_DEBUG
99 /*************************************************
100 * Debugging function to print chars *
101 *************************************************/
102
103 /* Print a sequence of chars in printable format, stopping at the end of the
104 subject if the requested.
105
106 Arguments:
107 p points to characters
108 length number to print
109 is_subject TRUE if printing from within md->start_subject
110 md pointer to matching data block, if is_subject is TRUE
111
112 Returns: nothing
113 */
114
115 static void
116 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117 {
118 unsigned int c;
119 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
120 while (length-- > 0)
121 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
122 }
123 #endif
124
125
126
127 /*************************************************
128 * Match a back-reference *
129 *************************************************/
130
131 /* Normally, if a back reference hasn't been set, the length that is passed is
132 negative, so the match always fails. However, in JavaScript compatibility mode,
133 the length passed is zero. Note that in caseless UTF-8 mode, the number of
134 subject bytes matched may be different to the number of reference bytes.
135
136 Arguments:
137 offset index into the offset vector
138 eptr pointer into the subject
139 length length of reference to be matched (number of bytes)
140 md points to match data block
141 caseless TRUE if caseless
142
143 Returns: >= 0 the number of subject bytes matched
144 -1 no match
145 -2 partial match; always given if at end subject
146 */
147
148 static int
149 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
150 BOOL caseless)
151 {
152 PCRE_PUCHAR eptr_start = eptr;
153 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
154
155 #ifdef PCRE_DEBUG
156 if (eptr >= md->end_subject)
157 printf("matching subject <null>");
158 else
159 {
160 printf("matching subject ");
161 pchars(eptr, length, TRUE, md);
162 }
163 printf(" against backref ");
164 pchars(p, length, FALSE, md);
165 printf("\n");
166 #endif
167
168 /* Always fail if reference not set (and not JavaScript compatible - in that
169 case the length is passed as zero). */
170
171 if (length < 0) return -1;
172
173 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
174 properly if Unicode properties are supported. Otherwise, we can check only
175 ASCII characters. */
176
177 if (caseless)
178 {
179 #ifdef SUPPORT_UTF
180 #ifdef SUPPORT_UCP
181 if (md->utf)
182 {
183 /* Match characters up to the end of the reference. NOTE: the number of
184 bytes matched may differ, because there are some characters whose upper and
185 lower case versions code as different numbers of bytes. For example, U+023A
186 (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
187 a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
188 the latter. It is important, therefore, to check the length along the
189 reference, not along the subject (earlier code did this wrong). */
190
191 PCRE_PUCHAR endptr = p + length;
192 while (p < endptr)
193 {
194 int c, d;
195 if (eptr >= md->end_subject) return -2; /* Partial match */
196 GETCHARINC(c, eptr);
197 GETCHARINC(d, p);
198 if (c != d && c != UCD_OTHERCASE(d)) return -1;
199 }
200 }
201 else
202 #endif
203 #endif
204
205 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206 is no UCP support. */
207 {
208 while (length-- > 0)
209 {
210 if (eptr >= md->end_subject) return -2; /* Partial match */
211 if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
212 p++;
213 eptr++;
214 }
215 }
216 }
217
218 /* In the caseful case, we can just compare the bytes, whether or not we
219 are in UTF-8 mode. */
220
221 else
222 {
223 while (length-- > 0)
224 {
225 if (eptr >= md->end_subject) return -2; /* Partial match */
226 if (*p++ != *eptr++) return -1;
227 }
228 }
229
230 return (int)(eptr - eptr_start);
231 }
232
233
234
235 /***************************************************************************
236 ****************************************************************************
237 RECURSION IN THE match() FUNCTION
238
239 The match() function is highly recursive, though not every recursive call
240 increases the recursive depth. Nevertheless, some regular expressions can cause
241 it to recurse to a great depth. I was writing for Unix, so I just let it call
242 itself recursively. This uses the stack for saving everything that has to be
243 saved for a recursive call. On Unix, the stack can be large, and this works
244 fine.
245
246 It turns out that on some non-Unix-like systems there are problems with
247 programs that use a lot of stack. (This despite the fact that every last chip
248 has oodles of memory these days, and techniques for extending the stack have
249 been known for decades.) So....
250
251 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
252 calls by keeping local variables that need to be preserved in blocks of memory
253 obtained from malloc() instead instead of on the stack. Macros are used to
254 achieve this so that the actual code doesn't look very different to what it
255 always used to.
256
257 The original heap-recursive code used longjmp(). However, it seems that this
258 can be very slow on some operating systems. Following a suggestion from Stan
259 Switzer, the use of longjmp() has been abolished, at the cost of having to
260 provide a unique number for each call to RMATCH. There is no way of generating
261 a sequence of numbers at compile time in C. I have given them names, to make
262 them stand out more clearly.
263
264 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
265 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
266 tests. Furthermore, not using longjmp() means that local dynamic variables
267 don't have indeterminate values; this has meant that the frame size can be
268 reduced because the result can be "passed back" by straight setting of the
269 variable instead of being passed in the frame.
270 ****************************************************************************
271 ***************************************************************************/
272
273 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
274 below must be updated in sync. */
275
276 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
277 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
278 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
279 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
280 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
281 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
282 RM61, RM62, RM63, RM64, RM65, RM66 };
283
284 /* These versions of the macros use the stack, as normal. There are debugging
285 versions and production versions. Note that the "rw" argument of RMATCH isn't
286 actually used in this definition. */
287
288 #ifndef NO_RECURSE
289 #define REGISTER register
290
291 #ifdef PCRE_DEBUG
292 #define RMATCH(ra,rb,rc,rd,re,rw) \
293 { \
294 printf("match() called in line %d\n", __LINE__); \
295 rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
296 printf("to line %d\n", __LINE__); \
297 }
298 #define RRETURN(ra) \
299 { \
300 printf("match() returned %d from line %d ", ra, __LINE__); \
301 return ra; \
302 }
303 #else
304 #define RMATCH(ra,rb,rc,rd,re,rw) \
305 rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
306 #define RRETURN(ra) return ra
307 #endif
308
309 #else
310
311
312 /* These versions of the macros manage a private stack on the heap. Note that
313 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
314 argument of match(), which never changes. */
315
316 #define REGISTER
317
318 #define RMATCH(ra,rb,rc,rd,re,rw)\
319 {\
320 heapframe *newframe = frame->Xnextframe;\
321 if (newframe == NULL)\
322 {\
323 newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
324 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
325 newframe->Xnextframe = NULL;\
326 frame->Xnextframe = newframe;\
327 }\
328 frame->Xwhere = rw;\
329 newframe->Xeptr = ra;\
330 newframe->Xecode = rb;\
331 newframe->Xmstart = mstart;\
332 newframe->Xoffset_top = rc;\
333 newframe->Xeptrb = re;\
334 newframe->Xrdepth = frame->Xrdepth + 1;\
335 newframe->Xprevframe = frame;\
336 frame = newframe;\
337 DPRINTF(("restarting from line %d\n", __LINE__));\
338 goto HEAP_RECURSE;\
339 L_##rw:\
340 DPRINTF(("jumped back to line %d\n", __LINE__));\
341 }
342
343 #define RRETURN(ra)\
344 {\
345 heapframe *oldframe = frame;\
346 frame = oldframe->Xprevframe;\
347 if (frame != NULL)\
348 {\
349 rrc = ra;\
350 goto HEAP_RETURN;\
351 }\
352 return ra;\
353 }
354
355
356 /* Structure for remembering the local variables in a private frame */
357
358 typedef struct heapframe {
359 struct heapframe *Xprevframe;
360 struct heapframe *Xnextframe;
361
362 /* Function arguments that may change */
363
364 PCRE_PUCHAR Xeptr;
365 const pcre_uchar *Xecode;
366 PCRE_PUCHAR Xmstart;
367 int Xoffset_top;
368 eptrblock *Xeptrb;
369 unsigned int Xrdepth;
370
371 /* Function local variables */
372
373 PCRE_PUCHAR Xcallpat;
374 #ifdef SUPPORT_UTF
375 PCRE_PUCHAR Xcharptr;
376 #endif
377 PCRE_PUCHAR Xdata;
378 PCRE_PUCHAR Xnext;
379 PCRE_PUCHAR Xpp;
380 PCRE_PUCHAR Xprev;
381 PCRE_PUCHAR Xsaved_eptr;
382
383 recursion_info Xnew_recursive;
384
385 BOOL Xcur_is_word;
386 BOOL Xcondition;
387 BOOL Xprev_is_word;
388
389 #ifdef SUPPORT_UCP
390 int Xprop_type;
391 int Xprop_value;
392 int Xprop_fail_result;
393 int Xoclength;
394 pcre_uchar Xocchars[6];
395 #endif
396
397 int Xcodelink;
398 int Xctype;
399 unsigned int Xfc;
400 int Xfi;
401 int Xlength;
402 int Xmax;
403 int Xmin;
404 int Xnumber;
405 int Xoffset;
406 int Xop;
407 int Xsave_capture_last;
408 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
409 int Xstacksave[REC_STACK_SAVE_MAX];
410
411 eptrblock Xnewptrb;
412
413 /* Where to jump back to */
414
415 int Xwhere;
416
417 } heapframe;
418
419 #endif
420
421
422 /***************************************************************************
423 ***************************************************************************/
424
425
426
427 /*************************************************
428 * Match from current position *
429 *************************************************/
430
431 /* This function is called recursively in many circumstances. Whenever it
432 returns a negative (error) response, the outer incarnation must also return the
433 same response. */
434
435 /* These macros pack up tests that are used for partial matching, and which
436 appear several times in the code. We set the "hit end" flag if the pointer is
437 at the end of the subject and also past the start of the subject (i.e.
438 something has been matched). For hard partial matching, we then return
439 immediately. The second one is used when we already know we are past the end of
440 the subject. */
441
442 #define CHECK_PARTIAL()\
443 if (md->partial != 0 && eptr >= md->end_subject && \
444 eptr > md->start_used_ptr) \
445 { \
446 md->hitend = TRUE; \
447 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
448 }
449
450 #define SCHECK_PARTIAL()\
451 if (md->partial != 0 && eptr > md->start_used_ptr) \
452 { \
453 md->hitend = TRUE; \
454 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
455 }
456
457
458 /* Performance note: It might be tempting to extract commonly used fields from
459 the md structure (e.g. utf, end_subject) into individual variables to improve
460 performance. Tests using gcc on a SPARC disproved this; in the first case, it
461 made performance worse.
462
463 Arguments:
464 eptr pointer to current character in subject
465 ecode pointer to current position in compiled code
466 mstart pointer to the current match start position (can be modified
467 by encountering \K)
468 offset_top current top pointer
469 md pointer to "static" info for the match
470 eptrb pointer to chain of blocks containing eptr at start of
471 brackets - for testing for empty matches
472 rdepth the recursion depth
473
474 Returns: MATCH_MATCH if matched ) these values are >= 0
475 MATCH_NOMATCH if failed to match )
476 a negative MATCH_xxx value for PRUNE, SKIP, etc
477 a negative PCRE_ERROR_xxx value if aborted by an error condition
478 (e.g. stopped by repeated call or recursion limit)
479 */
480
481 static int
482 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
483 PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
484 unsigned int rdepth)
485 {
486 /* These variables do not need to be preserved over recursion in this function,
487 so they can be ordinary variables in all cases. Mark some of them with
488 "register" because they are used a lot in loops. */
489
490 register int rrc; /* Returns from recursive calls */
491 register int i; /* Used for loops not involving calls to RMATCH() */
492 register unsigned int c; /* Character values not kept over RMATCH() calls */
493 register BOOL utf; /* Local copy of UTF flag for speed */
494
495 BOOL minimize, possessive; /* Quantifier options */
496 BOOL caseless;
497 int condcode;
498
499 /* When recursion is not being used, all "local" variables that have to be
500 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
501 frame on the stack here; subsequent instantiations are obtained from the heap
502 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
503 the top-level on the stack rather than malloc-ing them all gives a performance
504 boost in many cases where there is not much "recursion". */
505
506 #ifdef NO_RECURSE
507 heapframe *frame = (heapframe *)md->match_frames_base;
508
509 /* Copy in the original argument variables */
510
511 frame->Xeptr = eptr;
512 frame->Xecode = ecode;
513 frame->Xmstart = mstart;
514 frame->Xoffset_top = offset_top;
515 frame->Xeptrb = eptrb;
516 frame->Xrdepth = rdepth;
517
518 /* This is where control jumps back to to effect "recursion" */
519
520 HEAP_RECURSE:
521
522 /* Macros make the argument variables come from the current frame */
523
524 #define eptr frame->Xeptr
525 #define ecode frame->Xecode
526 #define mstart frame->Xmstart
527 #define offset_top frame->Xoffset_top
528 #define eptrb frame->Xeptrb
529 #define rdepth frame->Xrdepth
530
531 /* Ditto for the local variables */
532
533 #ifdef SUPPORT_UTF
534 #define charptr frame->Xcharptr
535 #endif
536 #define callpat frame->Xcallpat
537 #define codelink frame->Xcodelink
538 #define data frame->Xdata
539 #define next frame->Xnext
540 #define pp frame->Xpp
541 #define prev frame->Xprev
542 #define saved_eptr frame->Xsaved_eptr
543
544 #define new_recursive frame->Xnew_recursive
545
546 #define cur_is_word frame->Xcur_is_word
547 #define condition frame->Xcondition
548 #define prev_is_word frame->Xprev_is_word
549
550 #ifdef SUPPORT_UCP
551 #define prop_type frame->Xprop_type
552 #define prop_value frame->Xprop_value
553 #define prop_fail_result frame->Xprop_fail_result
554 #define oclength frame->Xoclength
555 #define occhars frame->Xocchars
556 #endif
557
558 #define ctype frame->Xctype
559 #define fc frame->Xfc
560 #define fi frame->Xfi
561 #define length frame->Xlength
562 #define max frame->Xmax
563 #define min frame->Xmin
564 #define number frame->Xnumber
565 #define offset frame->Xoffset
566 #define op frame->Xop
567 #define save_capture_last frame->Xsave_capture_last
568 #define save_offset1 frame->Xsave_offset1
569 #define save_offset2 frame->Xsave_offset2
570 #define save_offset3 frame->Xsave_offset3
571 #define stacksave frame->Xstacksave
572
573 #define newptrb frame->Xnewptrb
574
575 /* When recursion is being used, local variables are allocated on the stack and
576 get preserved during recursion in the normal way. In this environment, fi and
577 i, and fc and c, can be the same variables. */
578
579 #else /* NO_RECURSE not defined */
580 #define fi i
581 #define fc c
582
583 /* Many of the following variables are used only in small blocks of the code.
584 My normal style of coding would have declared them within each of those blocks.
585 However, in order to accommodate the version of this code that uses an external
586 "stack" implemented on the heap, it is easier to declare them all here, so the
587 declarations can be cut out in a block. The only declarations within blocks
588 below are for variables that do not have to be preserved over a recursive call
589 to RMATCH(). */
590
591 #ifdef SUPPORT_UTF
592 const pcre_uchar *charptr;
593 #endif
594 const pcre_uchar *callpat;
595 const pcre_uchar *data;
596 const pcre_uchar *next;
597 PCRE_PUCHAR pp;
598 const pcre_uchar *prev;
599 PCRE_PUCHAR saved_eptr;
600
601 recursion_info new_recursive;
602
603 BOOL cur_is_word;
604 BOOL condition;
605 BOOL prev_is_word;
606
607 #ifdef SUPPORT_UCP
608 int prop_type;
609 int prop_value;
610 int prop_fail_result;
611 int oclength;
612 pcre_uchar occhars[6];
613 #endif
614
615 int codelink;
616 int ctype;
617 int length;
618 int max;
619 int min;
620 int number;
621 int offset;
622 int op;
623 int save_capture_last;
624 int save_offset1, save_offset2, save_offset3;
625 int stacksave[REC_STACK_SAVE_MAX];
626
627 eptrblock newptrb;
628
629 /* There is a special fudge for calling match() in a way that causes it to
630 measure the size of its basic stack frame when the stack is being used for
631 recursion. The second argument (ecode) being NULL triggers this behaviour. It
632 cannot normally ever be NULL. The return is the negated value of the frame
633 size. */
634
635 if (ecode == NULL)
636 {
637 if (rdepth == 0)
638 return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
639 else
640 {
641 int len = (char *)&rdepth - (char *)eptr;
642 return (len > 0)? -len : len;
643 }
644 }
645 #endif /* NO_RECURSE */
646
647 /* To save space on the stack and in the heap frame, I have doubled up on some
648 of the local variables that are used only in localised parts of the code, but
649 still need to be preserved over recursive calls of match(). These macros define
650 the alternative names that are used. */
651
652 #define allow_zero cur_is_word
653 #define cbegroup condition
654 #define code_offset codelink
655 #define condassert condition
656 #define matched_once prev_is_word
657 #define foc number
658 #define save_mark data
659
660 /* These statements are here to stop the compiler complaining about unitialized
661 variables. */
662
663 #ifdef SUPPORT_UCP
664 prop_value = 0;
665 prop_fail_result = 0;
666 #endif
667
668
669 /* This label is used for tail recursion, which is used in a few cases even
670 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
671 used. Thanks to Ian Taylor for noticing this possibility and sending the
672 original patch. */
673
674 TAIL_RECURSE:
675
676 /* OK, now we can get on with the real code of the function. Recursive calls
677 are specified by the macro RMATCH and RRETURN is used to return. When
678 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
679 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
680 defined). However, RMATCH isn't like a function call because it's quite a
681 complicated macro. It has to be used in one particular way. This shouldn't,
682 however, impact performance when true recursion is being used. */
683
684 #ifdef SUPPORT_UTF
685 utf = md->utf; /* Local copy of the flag */
686 #else
687 utf = FALSE;
688 #endif
689
690 /* First check that we haven't called match() too many times, or that we
691 haven't exceeded the recursive call limit. */
692
693 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
694 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
695
696 /* At the start of a group with an unlimited repeat that may match an empty
697 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
698 done this way to save having to use another function argument, which would take
699 up space on the stack. See also MATCH_CONDASSERT below.
700
701 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
702 such remembered pointers, to be checked when we hit the closing ket, in order
703 to break infinite loops that match no characters. When match() is called in
704 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
705 NOT be used with tail recursion, because the memory block that is used is on
706 the stack, so a new one may be required for each match(). */
707
708 if (md->match_function_type == MATCH_CBEGROUP)
709 {
710 newptrb.epb_saved_eptr = eptr;
711 newptrb.epb_prev = eptrb;
712 eptrb = &newptrb;
713 md->match_function_type = 0;
714 }
715
716 /* Now start processing the opcodes. */
717
718 for (;;)
719 {
720 minimize = possessive = FALSE;
721 op = *ecode;
722
723 switch(op)
724 {
725 case OP_MARK:
726 md->nomatch_mark = ecode + 2;
727 md->mark = NULL; /* In case previously set by assertion */
728 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
729 eptrb, RM55);
730 if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
731 md->mark == NULL) md->mark = ecode + 2;
732
733 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
734 argument, and we must check whether that argument matches this MARK's
735 argument. It is passed back in md->start_match_ptr (an overloading of that
736 variable). If it does match, we reset that variable to the current subject
737 position and return MATCH_SKIP. Otherwise, pass back the return code
738 unaltered. */
739
740 else if (rrc == MATCH_SKIP_ARG &&
741 STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
742 {
743 md->start_match_ptr = eptr;
744 RRETURN(MATCH_SKIP);
745 }
746 RRETURN(rrc);
747
748 case OP_FAIL:
749 RRETURN(MATCH_NOMATCH);
750
751 /* COMMIT overrides PRUNE, SKIP, and THEN */
752
753 case OP_COMMIT:
754 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
755 eptrb, RM52);
756 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
757 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
758 rrc != MATCH_THEN)
759 RRETURN(rrc);
760 RRETURN(MATCH_COMMIT);
761
762 /* PRUNE overrides THEN */
763
764 case OP_PRUNE:
765 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
766 eptrb, RM51);
767 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
768 RRETURN(MATCH_PRUNE);
769
770 case OP_PRUNE_ARG:
771 md->nomatch_mark = ecode + 2;
772 md->mark = NULL; /* In case previously set by assertion */
773 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
774 eptrb, RM56);
775 if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
776 md->mark == NULL) md->mark = ecode + 2;
777 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
778 RRETURN(MATCH_PRUNE);
779
780 /* SKIP overrides PRUNE and THEN */
781
782 case OP_SKIP:
783 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
784 eptrb, RM53);
785 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
786 RRETURN(rrc);
787 md->start_match_ptr = eptr; /* Pass back current position */
788 RRETURN(MATCH_SKIP);
789
790 /* Note that, for Perl compatibility, SKIP with an argument does NOT set
791 nomatch_mark. There is a flag that disables this opcode when re-matching a
792 pattern that ended with a SKIP for which there was not a matching MARK. */
793
794 case OP_SKIP_ARG:
795 if (md->ignore_skip_arg)
796 {
797 ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
798 break;
799 }
800 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
801 eptrb, RM57);
802 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
803 RRETURN(rrc);
804
805 /* Pass back the current skip name by overloading md->start_match_ptr and
806 returning the special MATCH_SKIP_ARG return code. This will either be
807 caught by a matching MARK, or get to the top, where it causes a rematch
808 with the md->ignore_skip_arg flag set. */
809
810 md->start_match_ptr = ecode + 2;
811 RRETURN(MATCH_SKIP_ARG);
812
813 /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
814 the branch in which it occurs can be determined. Overload the start of
815 match pointer to do this. */
816
817 case OP_THEN:
818 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
819 eptrb, RM54);
820 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
821 md->start_match_ptr = ecode;
822 RRETURN(MATCH_THEN);
823
824 case OP_THEN_ARG:
825 md->nomatch_mark = ecode + 2;
826 md->mark = NULL; /* In case previously set by assertion */
827 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
828 md, eptrb, RM58);
829 if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
830 md->mark == NULL) md->mark = ecode + 2;
831 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
832 md->start_match_ptr = ecode;
833 RRETURN(MATCH_THEN);
834
835 /* Handle an atomic group that does not contain any capturing parentheses.
836 This can be handled like an assertion. Prior to 8.13, all atomic groups
837 were handled this way. In 8.13, the code was changed as below for ONCE, so
838 that backups pass through the group and thereby reset captured values.
839 However, this uses a lot more stack, so in 8.20, atomic groups that do not
840 contain any captures generate OP_ONCE_NC, which can be handled in the old,
841 less stack intensive way.
842
843 Check the alternative branches in turn - the matching won't pass the KET
844 for this kind of subpattern. If any one branch matches, we carry on as at
845 the end of a normal bracket, leaving the subject pointer, but resetting
846 the start-of-match value in case it was changed by \K. */
847
848 case OP_ONCE_NC:
849 prev = ecode;
850 saved_eptr = eptr;
851 save_mark = md->mark;
852 do
853 {
854 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
855 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
856 {
857 mstart = md->start_match_ptr;
858 break;
859 }
860 if (rrc == MATCH_THEN)
861 {
862 next = ecode + GET(ecode,1);
863 if (md->start_match_ptr < next &&
864 (*ecode == OP_ALT || *next == OP_ALT))
865 rrc = MATCH_NOMATCH;
866 }
867
868 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
869 ecode += GET(ecode,1);
870 md->mark = save_mark;
871 }
872 while (*ecode == OP_ALT);
873
874 /* If hit the end of the group (which could be repeated), fail */
875
876 if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
877
878 /* Continue as from after the group, updating the offsets high water
879 mark, since extracts may have been taken. */
880
881 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
882
883 offset_top = md->end_offset_top;
884 eptr = md->end_match_ptr;
885
886 /* For a non-repeating ket, just continue at this level. This also
887 happens for a repeating ket if no characters were matched in the group.
888 This is the forcible breaking of infinite loops as implemented in Perl
889 5.005. */
890
891 if (*ecode == OP_KET || eptr == saved_eptr)
892 {
893 ecode += 1+LINK_SIZE;
894 break;
895 }
896
897 /* The repeating kets try the rest of the pattern or restart from the
898 preceding bracket, in the appropriate order. The second "call" of match()
899 uses tail recursion, to avoid using another stack frame. */
900
901 if (*ecode == OP_KETRMIN)
902 {
903 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
904 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
905 ecode = prev;
906 goto TAIL_RECURSE;
907 }
908 else /* OP_KETRMAX */
909 {
910 md->match_function_type = MATCH_CBEGROUP;
911 RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
912 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
913 ecode += 1 + LINK_SIZE;
914 goto TAIL_RECURSE;
915 }
916 /* Control never gets here */
917
918 /* Handle a capturing bracket, other than those that are possessive with an
919 unlimited repeat. If there is space in the offset vector, save the current
920 subject position in the working slot at the top of the vector. We mustn't
921 change the current values of the data slot, because they may be set from a
922 previous iteration of this group, and be referred to by a reference inside
923 the group. A failure to match might occur after the group has succeeded,
924 if something later on doesn't match. For this reason, we need to restore
925 the working value and also the values of the final offsets, in case they
926 were set by a previous iteration of the same bracket.
927
928 If there isn't enough space in the offset vector, treat this as if it were
929 a non-capturing bracket. Don't worry about setting the flag for the error
930 case here; that is handled in the code for KET. */
931
932 case OP_CBRA:
933 case OP_SCBRA:
934 number = GET2(ecode, 1+LINK_SIZE);
935 offset = number << 1;
936
937 #ifdef PCRE_DEBUG
938 printf("start bracket %d\n", number);
939 printf("subject=");
940 pchars(eptr, 16, TRUE, md);
941 printf("\n");
942 #endif
943
944 if (offset < md->offset_max)
945 {
946 save_offset1 = md->offset_vector[offset];
947 save_offset2 = md->offset_vector[offset+1];
948 save_offset3 = md->offset_vector[md->offset_end - number];
949 save_capture_last = md->capture_last;
950 save_mark = md->mark;
951
952 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
953 md->offset_vector[md->offset_end - number] =
954 (int)(eptr - md->start_subject);
955
956 for (;;)
957 {
958 if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
959 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
960 eptrb, RM1);
961 if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
962
963 /* If we backed up to a THEN, check whether it is within the current
964 branch by comparing the address of the THEN that is passed back with
965 the end of the branch. If it is within the current branch, and the
966 branch is one of two or more alternatives (it either starts or ends
967 with OP_ALT), we have reached the limit of THEN's action, so convert
968 the return code to NOMATCH, which will cause normal backtracking to
969 happen from now on. Otherwise, THEN is passed back to an outer
970 alternative. This implements Perl's treatment of parenthesized groups,
971 where a group not containing | does not affect the current alternative,
972 that is, (X) is NOT the same as (X|(*F)). */
973
974 if (rrc == MATCH_THEN)
975 {
976 next = ecode + GET(ecode,1);
977 if (md->start_match_ptr < next &&
978 (*ecode == OP_ALT || *next == OP_ALT))
979 rrc = MATCH_NOMATCH;
980 }
981
982 /* Anything other than NOMATCH is passed back. */
983
984 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
985 md->capture_last = save_capture_last;
986 ecode += GET(ecode, 1);
987 md->mark = save_mark;
988 if (*ecode != OP_ALT) break;
989 }
990
991 DPRINTF(("bracket %d failed\n", number));
992 md->offset_vector[offset] = save_offset1;
993 md->offset_vector[offset+1] = save_offset2;
994 md->offset_vector[md->offset_end - number] = save_offset3;
995
996 /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
997
998 RRETURN(rrc);
999 }
1000
1001 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1002 as a non-capturing bracket. */
1003
1004 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1005 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1006
1007 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1008
1009 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1010 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1011
1012 /* Non-capturing or atomic group, except for possessive with unlimited
1013 repeat and ONCE group with no captures. Loop for all the alternatives.
1014
1015 When we get to the final alternative within the brackets, we used to return
1016 the result of a recursive call to match() whatever happened so it was
1017 possible to reduce stack usage by turning this into a tail recursion,
1018 except in the case of a possibly empty group. However, now that there is
1019 the possiblity of (*THEN) occurring in the final alternative, this
1020 optimization is no longer always possible.
1021
1022 We can optimize if we know there are no (*THEN)s in the pattern; at present
1023 this is the best that can be done.
1024
1025 MATCH_ONCE is returned when the end of an atomic group is successfully
1026 reached, but subsequent matching fails. It passes back up the tree (causing
1027 captured values to be reset) until the original atomic group level is
1028 reached. This is tested by comparing md->once_target with the start of the
1029 group. At this point, the return is converted into MATCH_NOMATCH so that
1030 previous backup points can be taken. */
1031
1032 case OP_ONCE:
1033 case OP_BRA:
1034 case OP_SBRA:
1035 DPRINTF(("start non-capturing bracket\n"));
1036
1037 for (;;)
1038 {
1039 if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
1040
1041 /* If this is not a possibly empty group, and there are no (*THEN)s in
1042 the pattern, and this is the final alternative, optimize as described
1043 above. */
1044
1045 else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1046 {
1047 ecode += PRIV(OP_lengths)[*ecode];
1048 goto TAIL_RECURSE;
1049 }
1050
1051 /* In all other cases, we have to make another call to match(). */
1052
1053 save_mark = md->mark;
1054 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1055 RM2);
1056
1057 /* See comment in the code for capturing groups above about handling
1058 THEN. */
1059
1060 if (rrc == MATCH_THEN)
1061 {
1062 next = ecode + GET(ecode,1);
1063 if (md->start_match_ptr < next &&
1064 (*ecode == OP_ALT || *next == OP_ALT))
1065 rrc = MATCH_NOMATCH;
1066 }
1067
1068 if (rrc != MATCH_NOMATCH)
1069 {
1070 if (rrc == MATCH_ONCE)
1071 {
1072 const pcre_uchar *scode = ecode;
1073 if (*scode != OP_ONCE) /* If not at start, find it */
1074 {
1075 while (*scode == OP_ALT) scode += GET(scode, 1);
1076 scode -= GET(scode, 1);
1077 }
1078 if (md->once_target == scode) rrc = MATCH_NOMATCH;
1079 }
1080 RRETURN(rrc);
1081 }
1082 ecode += GET(ecode, 1);
1083 md->mark = save_mark;
1084 if (*ecode != OP_ALT) break;
1085 }
1086
1087 RRETURN(MATCH_NOMATCH);
1088
1089 /* Handle possessive capturing brackets with an unlimited repeat. We come
1090 here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1091 handled similarly to the normal case above. However, the matching is
1092 different. The end of these brackets will always be OP_KETRPOS, which
1093 returns MATCH_KETRPOS without going further in the pattern. By this means
1094 we can handle the group by iteration rather than recursion, thereby
1095 reducing the amount of stack needed. */
1096
1097 case OP_CBRAPOS:
1098 case OP_SCBRAPOS:
1099 allow_zero = FALSE;
1100
1101 POSSESSIVE_CAPTURE:
1102 number = GET2(ecode, 1+LINK_SIZE);
1103 offset = number << 1;
1104
1105 #ifdef PCRE_DEBUG
1106 printf("start possessive bracket %d\n", number);
1107 printf("subject=");
1108 pchars(eptr, 16, TRUE, md);
1109 printf("\n");
1110 #endif
1111
1112 if (offset < md->offset_max)
1113 {
1114 matched_once = FALSE;
1115 code_offset = (int)(ecode - md->start_code);
1116
1117 save_offset1 = md->offset_vector[offset];
1118 save_offset2 = md->offset_vector[offset+1];
1119 save_offset3 = md->offset_vector[md->offset_end - number];
1120 save_capture_last = md->capture_last;
1121
1122 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1123
1124 /* Each time round the loop, save the current subject position for use
1125 when the group matches. For MATCH_MATCH, the group has matched, so we
1126 restart it with a new subject starting position, remembering that we had
1127 at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1128 usual. If we haven't matched any alternatives in any iteration, check to
1129 see if a previous iteration matched. If so, the group has matched;
1130 continue from afterwards. Otherwise it has failed; restore the previous
1131 capture values before returning NOMATCH. */
1132
1133 for (;;)
1134 {
1135 md->offset_vector[md->offset_end - number] =
1136 (int)(eptr - md->start_subject);
1137 if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1138 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1139 eptrb, RM63);
1140 if (rrc == MATCH_KETRPOS)
1141 {
1142 offset_top = md->end_offset_top;
1143 eptr = md->end_match_ptr;
1144 ecode = md->start_code + code_offset;
1145 save_capture_last = md->capture_last;
1146 matched_once = TRUE;
1147 continue;
1148 }
1149
1150 /* See comment in the code for capturing groups above about handling
1151 THEN. */
1152
1153 if (rrc == MATCH_THEN)
1154 {
1155 next = ecode + GET(ecode,1);
1156 if (md->start_match_ptr < next &&
1157 (*ecode == OP_ALT || *next == OP_ALT))
1158 rrc = MATCH_NOMATCH;
1159 }
1160
1161 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1162 md->capture_last = save_capture_last;
1163 ecode += GET(ecode, 1);
1164 if (*ecode != OP_ALT) break;
1165 }
1166
1167 if (!matched_once)
1168 {
1169 md->offset_vector[offset] = save_offset1;
1170 md->offset_vector[offset+1] = save_offset2;
1171 md->offset_vector[md->offset_end - number] = save_offset3;
1172 }
1173
1174 if (allow_zero || matched_once)
1175 {
1176 ecode += 1 + LINK_SIZE;
1177 break;
1178 }
1179
1180 RRETURN(MATCH_NOMATCH);
1181 }
1182
1183 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1184 as a non-capturing bracket. */
1185
1186 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1187 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1188
1189 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1190
1191 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1192 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1193
1194 /* Non-capturing possessive bracket with unlimited repeat. We come here
1195 from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1196 without the capturing complication. It is written out separately for speed
1197 and cleanliness. */
1198
1199 case OP_BRAPOS:
1200 case OP_SBRAPOS:
1201 allow_zero = FALSE;
1202
1203 POSSESSIVE_NON_CAPTURE:
1204 matched_once = FALSE;
1205 code_offset = (int)(ecode - md->start_code);
1206
1207 for (;;)
1208 {
1209 if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1210 RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1211 eptrb, RM48);
1212 if (rrc == MATCH_KETRPOS)
1213 {
1214 offset_top = md->end_offset_top;
1215 eptr = md->end_match_ptr;
1216 ecode = md->start_code + code_offset;
1217 matched_once = TRUE;
1218 continue;
1219 }
1220
1221 /* See comment in the code for capturing groups above about handling
1222 THEN. */
1223
1224 if (rrc == MATCH_THEN)
1225 {
1226 next = ecode + GET(ecode,1);
1227 if (md->start_match_ptr < next &&
1228 (*ecode == OP_ALT || *next == OP_ALT))
1229 rrc = MATCH_NOMATCH;
1230 }
1231
1232 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1233 ecode += GET(ecode, 1);
1234 if (*ecode != OP_ALT) break;
1235 }
1236
1237 if (matched_once || allow_zero)
1238 {
1239 ecode += 1 + LINK_SIZE;
1240 break;
1241 }
1242 RRETURN(MATCH_NOMATCH);
1243
1244 /* Control never reaches here. */
1245
1246 /* Conditional group: compilation checked that there are no more than
1247 two branches. If the condition is false, skipping the first branch takes us
1248 past the end if there is only one branch, but that's OK because that is
1249 exactly what going to the ket would do. */
1250
1251 case OP_COND:
1252 case OP_SCOND:
1253 codelink = GET(ecode, 1);
1254
1255 /* Because of the way auto-callout works during compile, a callout item is
1256 inserted between OP_COND and an assertion condition. */
1257
1258 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1259 {
1260 if (PUBL(callout) != NULL)
1261 {
1262 PUBL(callout_block) cb;
1263 cb.version = 2; /* Version 1 of the callout block */
1264 cb.callout_number = ecode[LINK_SIZE+2];
1265 cb.offset_vector = md->offset_vector;
1266 #ifdef COMPILE_PCRE8
1267 cb.subject = (PCRE_SPTR)md->start_subject;
1268 #else
1269 cb.subject = (PCRE_SPTR16)md->start_subject;
1270 #endif
1271 cb.subject_length = (int)(md->end_subject - md->start_subject);
1272 cb.start_match = (int)(mstart - md->start_subject);
1273 cb.current_position = (int)(eptr - md->start_subject);
1274 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1275 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1276 cb.capture_top = offset_top/2;
1277 cb.capture_last = md->capture_last;
1278 cb.callout_data = md->callout_data;
1279 cb.mark = md->nomatch_mark;
1280 if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1281 if (rrc < 0) RRETURN(rrc);
1282 }
1283 ecode += PRIV(OP_lengths)[OP_CALLOUT];
1284 }
1285
1286 condcode = ecode[LINK_SIZE+1];
1287
1288 /* Now see what the actual condition is */
1289
1290 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
1291 {
1292 if (md->recursive == NULL) /* Not recursing => FALSE */
1293 {
1294 condition = FALSE;
1295 ecode += GET(ecode, 1);
1296 }
1297 else
1298 {
1299 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
1300 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1301
1302 /* If the test is for recursion into a specific subpattern, and it is
1303 false, but the test was set up by name, scan the table to see if the
1304 name refers to any other numbers, and test them. The condition is true
1305 if any one is set. */
1306
1307 if (!condition && condcode == OP_NRREF)
1308 {
1309 pcre_uchar *slotA = md->name_table;
1310 for (i = 0; i < md->name_count; i++)
1311 {
1312 if (GET2(slotA, 0) == recno) break;
1313 slotA += md->name_entry_size;
1314 }
1315
1316 /* Found a name for the number - there can be only one; duplicate
1317 names for different numbers are allowed, but not vice versa. First
1318 scan down for duplicates. */
1319
1320 if (i < md->name_count)
1321 {
1322 pcre_uchar *slotB = slotA;
1323 while (slotB > md->name_table)
1324 {
1325 slotB -= md->name_entry_size;
1326 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1327 {
1328 condition = GET2(slotB, 0) == md->recursive->group_num;
1329 if (condition) break;
1330 }
1331 else break;
1332 }
1333
1334 /* Scan up for duplicates */
1335
1336 if (!condition)
1337 {
1338 slotB = slotA;
1339 for (i++; i < md->name_count; i++)
1340 {
1341 slotB += md->name_entry_size;
1342 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1343 {
1344 condition = GET2(slotB, 0) == md->recursive->group_num;
1345 if (condition) break;
1346 }
1347 else break;
1348 }
1349 }
1350 }
1351 }
1352
1353 /* Chose branch according to the condition */
1354
1355 ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1356 }
1357 }
1358
1359 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1360 {
1361 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1362 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1363
1364 /* If the numbered capture is unset, but the reference was by name,
1365 scan the table to see if the name refers to any other numbers, and test
1366 them. The condition is true if any one is set. This is tediously similar
1367 to the code above, but not close enough to try to amalgamate. */
1368
1369 if (!condition && condcode == OP_NCREF)
1370 {
1371 int refno = offset >> 1;
1372 pcre_uchar *slotA = md->name_table;
1373
1374 for (i = 0; i < md->name_count; i++)
1375 {
1376 if (GET2(slotA, 0) == refno) break;
1377 slotA += md->name_entry_size;
1378 }
1379
1380 /* Found a name for the number - there can be only one; duplicate names
1381 for different numbers are allowed, but not vice versa. First scan down
1382 for duplicates. */
1383
1384 if (i < md->name_count)
1385 {
1386 pcre_uchar *slotB = slotA;
1387 while (slotB > md->name_table)
1388 {
1389 slotB -= md->name_entry_size;
1390 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1391 {
1392 offset = GET2(slotB, 0) << 1;
1393 condition = offset < offset_top &&
1394 md->offset_vector[offset] >= 0;
1395 if (condition) break;
1396 }
1397 else break;
1398 }
1399
1400 /* Scan up for duplicates */
1401
1402 if (!condition)
1403 {
1404 slotB = slotA;
1405 for (i++; i < md->name_count; i++)
1406 {
1407 slotB += md->name_entry_size;
1408 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1409 {
1410 offset = GET2(slotB, 0) << 1;
1411 condition = offset < offset_top &&
1412 md->offset_vector[offset] >= 0;
1413 if (condition) break;
1414 }
1415 else break;
1416 }
1417 }
1418 }
1419 }
1420
1421 /* Chose branch according to the condition */
1422
1423 ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1424 }
1425
1426 else if (condcode == OP_DEF) /* DEFINE - always false */
1427 {
1428 condition = FALSE;
1429 ecode += GET(ecode, 1);
1430 }
1431
1432 /* The condition is an assertion. Call match() to evaluate it - setting
1433 md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1434 an assertion. */
1435
1436 else
1437 {
1438 md->match_function_type = MATCH_CONDASSERT;
1439 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1440 if (rrc == MATCH_MATCH)
1441 {
1442 if (md->end_offset_top > offset_top)
1443 offset_top = md->end_offset_top; /* Captures may have happened */
1444 condition = TRUE;
1445 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1446 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1447 }
1448
1449 /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1450 assertion; it is therefore treated as NOMATCH. */
1451
1452 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1453 {
1454 RRETURN(rrc); /* Need braces because of following else */
1455 }
1456 else
1457 {
1458 condition = FALSE;
1459 ecode += codelink;
1460 }
1461 }
1462
1463 /* We are now at the branch that is to be obeyed. As there is only one, can
1464 use tail recursion to avoid using another stack frame, except when there is
1465 unlimited repeat of a possibly empty group. In the latter case, a recursive
1466 call to match() is always required, unless the second alternative doesn't
1467 exist, in which case we can just plough on. Note that, for compatibility
1468 with Perl, the | in a conditional group is NOT treated as creating two
1469 alternatives. If a THEN is encountered in the branch, it propagates out to
1470 the enclosing alternative (unless nested in a deeper set of alternatives,
1471 of course). */
1472
1473 if (condition || *ecode == OP_ALT)
1474 {
1475 if (op != OP_SCOND)
1476 {
1477 ecode += 1 + LINK_SIZE;
1478 goto TAIL_RECURSE;
1479 }
1480
1481 md->match_function_type = MATCH_CBEGROUP;
1482 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1483 RRETURN(rrc);
1484 }
1485
1486 /* Condition false & no alternative; continue after the group. */
1487
1488 else
1489 {
1490 ecode += 1 + LINK_SIZE;
1491 }
1492 break;
1493
1494
1495 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1496 to close any currently open capturing brackets. */
1497
1498 case OP_CLOSE:
1499 number = GET2(ecode, 1);
1500 offset = number << 1;
1501
1502 #ifdef PCRE_DEBUG
1503 printf("end bracket %d at *ACCEPT", number);
1504 printf("\n");
1505 #endif
1506
1507 md->capture_last = number;
1508 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1509 {
1510 md->offset_vector[offset] =
1511 md->offset_vector[md->offset_end - number];
1512 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1513 if (offset_top <= offset) offset_top = offset + 2;
1514 }
1515 ecode += 1 + IMM2_SIZE;
1516 break;
1517
1518
1519 /* End of the pattern, either real or forced. */
1520
1521 case OP_END:
1522 case OP_ACCEPT:
1523 case OP_ASSERT_ACCEPT:
1524
1525 /* If we have matched an empty string, fail if not in an assertion and not
1526 in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1527 is set and we have matched at the start of the subject. In both cases,
1528 backtracking will then try other alternatives, if any. */
1529
1530 if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1531 md->recursive == NULL &&
1532 (md->notempty ||
1533 (md->notempty_atstart &&
1534 mstart == md->start_subject + md->start_offset)))
1535 RRETURN(MATCH_NOMATCH);
1536
1537 /* Otherwise, we have a match. */
1538
1539 md->end_match_ptr = eptr; /* Record where we ended */
1540 md->end_offset_top = offset_top; /* and how many extracts were taken */
1541 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1542
1543 /* For some reason, the macros don't work properly if an expression is
1544 given as the argument to RRETURN when the heap is in use. */
1545
1546 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1547 RRETURN(rrc);
1548
1549 /* Assertion brackets. Check the alternative branches in turn - the
1550 matching won't pass the KET for an assertion. If any one branch matches,
1551 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1552 start of each branch to move the current point backwards, so the code at
1553 this level is identical to the lookahead case. When the assertion is part
1554 of a condition, we want to return immediately afterwards. The caller of
1555 this incarnation of the match() function will have set MATCH_CONDASSERT in
1556 md->match_function type, and one of these opcodes will be the first opcode
1557 that is processed. We use a local variable that is preserved over calls to
1558 match() to remember this case. */
1559
1560 case OP_ASSERT:
1561 case OP_ASSERTBACK:
1562 save_mark = md->mark;
1563 if (md->match_function_type == MATCH_CONDASSERT)
1564 {
1565 condassert = TRUE;
1566 md->match_function_type = 0;
1567 }
1568 else condassert = FALSE;
1569
1570 do
1571 {
1572 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1573 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1574 {
1575 mstart = md->start_match_ptr; /* In case \K reset it */
1576 break;
1577 }
1578
1579 /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1580 as NOMATCH. */
1581
1582 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1583 ecode += GET(ecode, 1);
1584 md->mark = save_mark;
1585 }
1586 while (*ecode == OP_ALT);
1587
1588 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1589
1590 /* If checking an assertion for a condition, return MATCH_MATCH. */
1591
1592 if (condassert) RRETURN(MATCH_MATCH);
1593
1594 /* Continue from after the assertion, updating the offsets high water
1595 mark, since extracts may have been taken during the assertion. */
1596
1597 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1598 ecode += 1 + LINK_SIZE;
1599 offset_top = md->end_offset_top;
1600 continue;
1601
1602 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1603 PRUNE, or COMMIT means we must assume failure without checking subsequent
1604 branches. */
1605
1606 case OP_ASSERT_NOT:
1607 case OP_ASSERTBACK_NOT:
1608 save_mark = md->mark;
1609 if (md->match_function_type == MATCH_CONDASSERT)
1610 {
1611 condassert = TRUE;
1612 md->match_function_type = 0;
1613 }
1614 else condassert = FALSE;
1615
1616 do
1617 {
1618 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1619 md->mark = save_mark;
1620 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1621 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1622 {
1623 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1624 break;
1625 }
1626
1627 /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1628 as NOMATCH. */
1629
1630 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1631 ecode += GET(ecode,1);
1632 }
1633 while (*ecode == OP_ALT);
1634
1635 if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
1636
1637 ecode += 1 + LINK_SIZE;
1638 continue;
1639
1640 /* Move the subject pointer back. This occurs only at the start of
1641 each branch of a lookbehind assertion. If we are too close to the start to
1642 move back, this match function fails. When working with UTF-8 we move
1643 back a number of characters, not bytes. */
1644
1645 case OP_REVERSE:
1646 #ifdef SUPPORT_UTF
1647 if (utf)
1648 {
1649 i = GET(ecode, 1);
1650 while (i-- > 0)
1651 {
1652 eptr--;
1653 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1654 BACKCHAR(eptr);
1655 }
1656 }
1657 else
1658 #endif
1659
1660 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1661
1662 {
1663 eptr -= GET(ecode, 1);
1664 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1665 }
1666
1667 /* Save the earliest consulted character, then skip to next op code */
1668
1669 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1670 ecode += 1 + LINK_SIZE;
1671 break;
1672
1673 /* The callout item calls an external function, if one is provided, passing
1674 details of the match so far. This is mainly for debugging, though the
1675 function is able to force a failure. */
1676
1677 case OP_CALLOUT:
1678 if (PUBL(callout) != NULL)
1679 {
1680 PUBL(callout_block) cb;
1681 cb.version = 2; /* Version 1 of the callout block */
1682 cb.callout_number = ecode[1];
1683 cb.offset_vector = md->offset_vector;
1684 #ifdef COMPILE_PCRE8
1685 cb.subject = (PCRE_SPTR)md->start_subject;
1686 #else
1687 cb.subject = (PCRE_SPTR16)md->start_subject;
1688 #endif
1689 cb.subject_length = (int)(md->end_subject - md->start_subject);
1690 cb.start_match = (int)(mstart - md->start_subject);
1691 cb.current_position = (int)(eptr - md->start_subject);
1692 cb.pattern_position = GET(ecode, 2);
1693 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1694 cb.capture_top = offset_top/2;
1695 cb.capture_last = md->capture_last;
1696 cb.callout_data = md->callout_data;
1697 cb.mark = md->nomatch_mark;
1698 if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1699 if (rrc < 0) RRETURN(rrc);
1700 }
1701 ecode += 2 + 2*LINK_SIZE;
1702 break;
1703
1704 /* Recursion either matches the current regex, or some subexpression. The
1705 offset data is the offset to the starting bracket from the start of the
1706 whole pattern. (This is so that it works from duplicated subpatterns.)
1707
1708 The state of the capturing groups is preserved over recursion, and
1709 re-instated afterwards. We don't know how many are started and not yet
1710 finished (offset_top records the completed total) so we just have to save
1711 all the potential data. There may be up to 65535 such values, which is too
1712 large to put on the stack, but using malloc for small numbers seems
1713 expensive. As a compromise, the stack is used when there are no more than
1714 REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1715
1716 There are also other values that have to be saved. We use a chained
1717 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1718 for the original version of this logic. It has, however, been hacked around
1719 a lot, so he is not to blame for the current way it works. */
1720
1721 case OP_RECURSE:
1722 {
1723 recursion_info *ri;
1724 int recno;
1725
1726 callpat = md->start_code + GET(ecode, 1);
1727 recno = (callpat == md->start_code)? 0 :
1728 GET2(callpat, 1 + LINK_SIZE);
1729
1730 /* Check for repeating a recursion without advancing the subject pointer.
1731 This should catch convoluted mutual recursions. (Some simple cases are
1732 caught at compile time.) */
1733
1734 for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1735 if (recno == ri->group_num && eptr == ri->subject_position)
1736 RRETURN(PCRE_ERROR_RECURSELOOP);
1737
1738 /* Add to "recursing stack" */
1739
1740 new_recursive.group_num = recno;
1741 new_recursive.subject_position = eptr;
1742 new_recursive.prevrec = md->recursive;
1743 md->recursive = &new_recursive;
1744
1745 /* Where to continue from afterwards */
1746
1747 ecode += 1 + LINK_SIZE;
1748
1749 /* Now save the offset data */
1750
1751 new_recursive.saved_max = md->offset_end;
1752 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1753 new_recursive.offset_save = stacksave;
1754 else
1755 {
1756 new_recursive.offset_save =
1757 (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1758 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1759 }
1760 memcpy(new_recursive.offset_save, md->offset_vector,
1761 new_recursive.saved_max * sizeof(int));
1762
1763 /* OK, now we can do the recursion. After processing each alternative,
1764 restore the offset data. If there were nested recursions, md->recursive
1765 might be changed, so reset it before looping. */
1766
1767 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1768 cbegroup = (*callpat >= OP_SBRA);
1769 do
1770 {
1771 if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1772 RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1773 md, eptrb, RM6);
1774 memcpy(md->offset_vector, new_recursive.offset_save,
1775 new_recursive.saved_max * sizeof(int));
1776 md->recursive = new_recursive.prevrec;
1777 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1778 {
1779 DPRINTF(("Recursion matched\n"));
1780 if (new_recursive.offset_save != stacksave)
1781 (PUBL(free))(new_recursive.offset_save);
1782
1783 /* Set where we got to in the subject, and reset the start in case
1784 it was changed by \K. This *is* propagated back out of a recursion,
1785 for Perl compatibility. */
1786
1787 eptr = md->end_match_ptr;
1788 mstart = md->start_match_ptr;
1789 goto RECURSION_MATCHED; /* Exit loop; end processing */
1790 }
1791
1792 /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1793 as NOMATCH. */
1794
1795 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1796 {
1797 DPRINTF(("Recursion gave error %d\n", rrc));
1798 if (new_recursive.offset_save != stacksave)
1799 (PUBL(free))(new_recursive.offset_save);
1800 RRETURN(rrc);
1801 }
1802
1803 md->recursive = &new_recursive;
1804 callpat += GET(callpat, 1);
1805 }
1806 while (*callpat == OP_ALT);
1807
1808 DPRINTF(("Recursion didn't match\n"));
1809 md->recursive = new_recursive.prevrec;
1810 if (new_recursive.offset_save != stacksave)
1811 (PUBL(free))(new_recursive.offset_save);
1812 RRETURN(MATCH_NOMATCH);
1813 }
1814
1815 RECURSION_MATCHED:
1816 break;
1817
1818 /* An alternation is the end of a branch; scan along to find the end of the
1819 bracketed group and go to there. */
1820
1821 case OP_ALT:
1822 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1823 break;
1824
1825 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1826 indicating that it may occur zero times. It may repeat infinitely, or not
1827 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1828 with fixed upper repeat limits are compiled as a number of copies, with the
1829 optional ones preceded by BRAZERO or BRAMINZERO. */
1830
1831 case OP_BRAZERO:
1832 next = ecode + 1;
1833 RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1834 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1835 do next += GET(next, 1); while (*next == OP_ALT);
1836 ecode = next + 1 + LINK_SIZE;
1837 break;
1838
1839 case OP_BRAMINZERO:
1840 next = ecode + 1;
1841 do next += GET(next, 1); while (*next == OP_ALT);
1842 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1843 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1844 ecode++;
1845 break;
1846
1847 case OP_SKIPZERO:
1848 next = ecode+1;
1849 do next += GET(next,1); while (*next == OP_ALT);
1850 ecode = next + 1 + LINK_SIZE;
1851 break;
1852
1853 /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1854 here; just jump to the group, with allow_zero set TRUE. */
1855
1856 case OP_BRAPOSZERO:
1857 op = *(++ecode);
1858 allow_zero = TRUE;
1859 if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1860 goto POSSESSIVE_NON_CAPTURE;
1861
1862 /* End of a group, repeated or non-repeating. */
1863
1864 case OP_KET:
1865 case OP_KETRMIN:
1866 case OP_KETRMAX:
1867 case OP_KETRPOS:
1868 prev = ecode - GET(ecode, 1);
1869
1870 /* If this was a group that remembered the subject start, in order to break
1871 infinite repeats of empty string matches, retrieve the subject start from
1872 the chain. Otherwise, set it NULL. */
1873
1874 if (*prev >= OP_SBRA || *prev == OP_ONCE)
1875 {
1876 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1877 eptrb = eptrb->epb_prev; /* Backup to previous group */
1878 }
1879 else saved_eptr = NULL;
1880
1881 /* If we are at the end of an assertion group or a non-capturing atomic
1882 group, stop matching and return MATCH_MATCH, but record the current high
1883 water mark for use by positive assertions. We also need to record the match
1884 start in case it was changed by \K. */
1885
1886 if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1887 *prev == OP_ONCE_NC)
1888 {
1889 md->end_match_ptr = eptr; /* For ONCE_NC */
1890 md->end_offset_top = offset_top;
1891 md->start_match_ptr = mstart;
1892 RRETURN(MATCH_MATCH); /* Sets md->mark */
1893 }
1894
1895 /* For capturing groups we have to check the group number back at the start
1896 and if necessary complete handling an extraction by setting the offsets and
1897 bumping the high water mark. Whole-pattern recursion is coded as a recurse
1898 into group 0, so it won't be picked up here. Instead, we catch it when the
1899 OP_END is reached. Other recursion is handled here. We just have to record
1900 the current subject position and start match pointer and give a MATCH
1901 return. */
1902
1903 if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1904 *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1905 {
1906 number = GET2(prev, 1+LINK_SIZE);
1907 offset = number << 1;
1908
1909 #ifdef PCRE_DEBUG
1910 printf("end bracket %d", number);
1911 printf("\n");
1912 #endif
1913
1914 /* Handle a recursively called group. */
1915
1916 if (md->recursive != NULL && md->recursive->group_num == number)
1917 {
1918 md->end_match_ptr = eptr;
1919 md->start_match_ptr = mstart;
1920 RRETURN(MATCH_MATCH);
1921 }
1922
1923 /* Deal with capturing */
1924
1925 md->capture_last = number;
1926 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1927 {
1928 /* If offset is greater than offset_top, it means that we are
1929 "skipping" a capturing group, and that group's offsets must be marked
1930 unset. In earlier versions of PCRE, all the offsets were unset at the
1931 start of matching, but this doesn't work because atomic groups and
1932 assertions can cause a value to be set that should later be unset.
1933 Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1934 part of the atomic group, but this is not on the final matching path,
1935 so must be unset when 2 is set. (If there is no group 2, there is no
1936 problem, because offset_top will then be 2, indicating no capture.) */
1937
1938 if (offset > offset_top)
1939 {
1940 register int *iptr = md->offset_vector + offset_top;
1941 register int *iend = md->offset_vector + offset;
1942 while (iptr < iend) *iptr++ = -1;
1943 }
1944
1945 /* Now make the extraction */
1946
1947 md->offset_vector[offset] =
1948 md->offset_vector[md->offset_end - number];
1949 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1950 if (offset_top <= offset) offset_top = offset + 2;
1951 }
1952 }
1953
1954 /* For an ordinary non-repeating ket, just continue at this level. This
1955 also happens for a repeating ket if no characters were matched in the
1956 group. This is the forcible breaking of infinite loops as implemented in
1957 Perl 5.005. For a non-repeating atomic group that includes captures,
1958 establish a backup point by processing the rest of the pattern at a lower
1959 level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1960 original OP_ONCE level, thereby bypassing intermediate backup points, but
1961 resetting any captures that happened along the way. */
1962
1963 if (*ecode == OP_KET || eptr == saved_eptr)
1964 {
1965 if (*prev == OP_ONCE)
1966 {
1967 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1968 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1969 md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
1970 RRETURN(MATCH_ONCE);
1971 }
1972 ecode += 1 + LINK_SIZE; /* Carry on at this level */
1973 break;
1974 }
1975
1976 /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1977 and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1978 at a time from the outer level, thus saving stack. */
1979
1980 if (*ecode == OP_KETRPOS)
1981 {
1982 md->end_match_ptr = eptr;
1983 md->end_offset_top = offset_top;
1984 RRETURN(MATCH_KETRPOS);
1985 }
1986
1987 /* The normal repeating kets try the rest of the pattern or restart from
1988 the preceding bracket, in the appropriate order. In the second case, we can
1989 use tail recursion to avoid using another stack frame, unless we have an
1990 an atomic group or an unlimited repeat of a group that can match an empty
1991 string. */
1992
1993 if (*ecode == OP_KETRMIN)
1994 {
1995 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1996 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1997 if (*prev == OP_ONCE)
1998 {
1999 RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2000 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2001 md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
2002 RRETURN(MATCH_ONCE);
2003 }
2004 if (*prev >= OP_SBRA) /* Could match an empty string */
2005 {
2006 md->match_function_type = MATCH_CBEGROUP;
2007 RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2008 RRETURN(rrc);
2009 }
2010 ecode = prev;
2011 goto TAIL_RECURSE;
2012 }
2013 else /* OP_KETRMAX */
2014 {
2015 if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
2016 RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2017 if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2018 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2019 if (*prev == OP_ONCE)
2020 {
2021 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2022 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2023 md->once_target = prev;
2024 RRETURN(MATCH_ONCE);
2025 }
2026 ecode += 1 + LINK_SIZE;
2027 goto TAIL_RECURSE;
2028 }
2029 /* Control never gets here */
2030
2031 /* Not multiline mode: start of subject assertion, unless notbol. */
2032
2033 case OP_CIRC:
2034 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2035
2036 /* Start of subject assertion */
2037
2038 case OP_SOD:
2039 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2040 ecode++;
2041 break;
2042
2043 /* Multiline mode: start of subject unless notbol, or after any newline. */
2044
2045 case OP_CIRCM:
2046 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2047 if (eptr != md->start_subject &&
2048 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2049 RRETURN(MATCH_NOMATCH);
2050 ecode++;
2051 break;
2052
2053 /* Start of match assertion */
2054
2055 case OP_SOM:
2056 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2057 ecode++;
2058 break;
2059
2060 /* Reset the start of match point */
2061
2062 case OP_SET_SOM:
2063 mstart = eptr;
2064 ecode++;
2065 break;
2066
2067 /* Multiline mode: assert before any newline, or before end of subject
2068 unless noteol is set. */
2069
2070 case OP_DOLLM:
2071 if (eptr < md->end_subject)
2072 {
2073 if (!IS_NEWLINE(eptr))
2074 {
2075 if (md->partial != 0 &&
2076 eptr + 1 >= md->end_subject &&
2077 NLBLOCK->nltype == NLTYPE_FIXED &&
2078 NLBLOCK->nllen == 2 &&
2079 *eptr == NLBLOCK->nl[0])
2080 {
2081 md->hitend = TRUE;
2082 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2083 }
2084 RRETURN(MATCH_NOMATCH);
2085 }
2086 }
2087 else
2088 {
2089 if (md->noteol) RRETURN(MATCH_NOMATCH);
2090 SCHECK_PARTIAL();
2091 }
2092 ecode++;
2093 break;
2094
2095 /* Not multiline mode: assert before a terminating newline or before end of
2096 subject unless noteol is set. */
2097
2098 case OP_DOLL:
2099 if (md->noteol) RRETURN(MATCH_NOMATCH);
2100 if (!md->endonly) goto ASSERT_NL_OR_EOS;
2101
2102 /* ... else fall through for endonly */
2103
2104 /* End of subject assertion (\z) */
2105
2106 case OP_EOD:
2107 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2108 SCHECK_PARTIAL();
2109 ecode++;
2110 break;
2111
2112 /* End of subject or ending \n assertion (\Z) */
2113
2114 case OP_EODN:
2115 ASSERT_NL_OR_EOS:
2116 if (eptr < md->end_subject &&
2117 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2118 {
2119 if (md->partial != 0 &&
2120 eptr + 1 >= md->end_subject &&
2121 NLBLOCK->nltype == NLTYPE_FIXED &&
2122 NLBLOCK->nllen == 2 &&
2123 *eptr == NLBLOCK->nl[0])
2124 {
2125 md->hitend = TRUE;
2126 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2127 }
2128 RRETURN(MATCH_NOMATCH);
2129 }
2130
2131 /* Either at end of string or \n before end. */
2132
2133 SCHECK_PARTIAL();
2134 ecode++;
2135 break;
2136
2137 /* Word boundary assertions */
2138
2139 case OP_NOT_WORD_BOUNDARY:
2140 case OP_WORD_BOUNDARY:
2141 {
2142
2143 /* Find out if the previous and current characters are "word" characters.
2144 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2145 be "non-word" characters. Remember the earliest consulted character for
2146 partial matching. */
2147
2148 #ifdef SUPPORT_UTF
2149 if (utf)
2150 {
2151 /* Get status of previous character */
2152
2153 if (eptr == md->start_subject) prev_is_word = FALSE; else
2154 {
2155 PCRE_PUCHAR lastptr = eptr - 1;
2156 BACKCHAR(lastptr);
2157 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2158 GETCHAR(c, lastptr);
2159 #ifdef SUPPORT_UCP
2160 if (md->use_ucp)
2161 {
2162 if (c == '_') prev_is_word = TRUE; else
2163 {
2164 int cat = UCD_CATEGORY(c);
2165 prev_is_word = (cat == ucp_L || cat == ucp_N);
2166 }
2167 }
2168 else
2169 #endif
2170 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2171 }
2172
2173 /* Get status of next character */
2174
2175 if (eptr >= md->end_subject)
2176 {
2177 SCHECK_PARTIAL();
2178 cur_is_word = FALSE;
2179 }
2180 else
2181 {
2182 GETCHAR(c, eptr);
2183 #ifdef SUPPORT_UCP
2184 if (md->use_ucp)
2185 {
2186 if (c == '_') cur_is_word = TRUE; else
2187 {
2188 int cat = UCD_CATEGORY(c);
2189 cur_is_word = (cat == ucp_L || cat == ucp_N);
2190 }
2191 }
2192 else
2193 #endif
2194 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2195 }
2196 }
2197 else
2198 #endif
2199
2200 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2201 consistency with the behaviour of \w we do use it in this case. */
2202
2203 {
2204 /* Get status of previous character */
2205
2206 if (eptr == md->start_subject) prev_is_word = FALSE; else
2207 {
2208 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2209 #ifdef SUPPORT_UCP
2210 if (md->use_ucp)
2211 {
2212 c = eptr[-1];
2213 if (c == '_') prev_is_word = TRUE; else
2214 {
2215 int cat = UCD_CATEGORY(c);
2216 prev_is_word = (cat == ucp_L || cat == ucp_N);
2217 }
2218 }
2219 else
2220 #endif
2221 prev_is_word = MAX_255(eptr[-1])
2222 && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2223 }
2224
2225 /* Get status of next character */
2226
2227 if (eptr >= md->end_subject)
2228 {
2229 SCHECK_PARTIAL();
2230 cur_is_word = FALSE;
2231 }
2232 else
2233 #ifdef SUPPORT_UCP
2234 if (md->use_ucp)
2235 {
2236 c = *eptr;
2237 if (c == '_') cur_is_word = TRUE; else
2238 {
2239 int cat = UCD_CATEGORY(c);
2240 cur_is_word = (cat == ucp_L || cat == ucp_N);
2241 }
2242 }
2243 else
2244 #endif
2245 cur_is_word = MAX_255(*eptr)
2246 && ((md->ctypes[*eptr] & ctype_word) != 0);
2247 }
2248
2249 /* Now see if the situation is what we want */
2250
2251 if ((*ecode++ == OP_WORD_BOUNDARY)?
2252 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2253 RRETURN(MATCH_NOMATCH);
2254 }
2255 break;
2256
2257 /* Match any single character type except newline; have to take care with
2258 CRLF newlines and partial matching. */
2259
2260 case OP_ANY:
2261 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2262 if (md->partial != 0 &&
2263 eptr + 1 >= md->end_subject &&
2264 NLBLOCK->nltype == NLTYPE_FIXED &&
2265 NLBLOCK->nllen == 2 &&
2266 *eptr == NLBLOCK->nl[0])
2267 {
2268 md->hitend = TRUE;
2269 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2270 }
2271
2272 /* Fall through */
2273
2274 /* Match any single character whatsoever. */
2275
2276 case OP_ALLANY:
2277 if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2278 { /* not be updated before SCHECK_PARTIAL. */
2279 SCHECK_PARTIAL();
2280 RRETURN(MATCH_NOMATCH);
2281 }
2282 eptr++;
2283 #ifdef SUPPORT_UTF
2284 if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2285 #endif
2286 ecode++;
2287 break;
2288
2289 /* Match a single byte, even in UTF-8 mode. This opcode really does match
2290 any byte, even newline, independent of the setting of PCRE_DOTALL. */
2291
2292 case OP_ANYBYTE:
2293 if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2294 { /* not be updated before SCHECK_PARTIAL. */
2295 SCHECK_PARTIAL();
2296 RRETURN(MATCH_NOMATCH);
2297 }
2298 eptr++;
2299 ecode++;
2300 break;
2301
2302 case OP_NOT_DIGIT:
2303 if (eptr >= md->end_subject)
2304 {
2305 SCHECK_PARTIAL();
2306 RRETURN(MATCH_NOMATCH);
2307 }
2308 GETCHARINCTEST(c, eptr);
2309 if (
2310 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2311 c < 256 &&
2312 #endif
2313 (md->ctypes[c] & ctype_digit) != 0
2314 )
2315 RRETURN(MATCH_NOMATCH);
2316 ecode++;
2317 break;
2318
2319 case OP_DIGIT:
2320 if (eptr >= md->end_subject)
2321 {
2322 SCHECK_PARTIAL();
2323 RRETURN(MATCH_NOMATCH);
2324 }
2325 GETCHARINCTEST(c, eptr);
2326 if (
2327 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2328 c > 255 ||
2329 #endif
2330 (md->ctypes[c] & ctype_digit) == 0
2331 )
2332 RRETURN(MATCH_NOMATCH);
2333 ecode++;
2334 break;
2335
2336 case OP_NOT_WHITESPACE:
2337 if (eptr >= md->end_subject)
2338 {
2339 SCHECK_PARTIAL();
2340 RRETURN(MATCH_NOMATCH);
2341 }
2342 GETCHARINCTEST(c, eptr);
2343 if (
2344 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2345 c < 256 &&
2346 #endif
2347 (md->ctypes[c] & ctype_space) != 0
2348 )
2349 RRETURN(MATCH_NOMATCH);
2350 ecode++;
2351 break;
2352
2353 case OP_WHITESPACE:
2354 if (eptr >= md->end_subject)
2355 {
2356 SCHECK_PARTIAL();
2357 RRETURN(MATCH_NOMATCH);
2358 }
2359 GETCHARINCTEST(c, eptr);
2360 if (
2361 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2362 c > 255 ||
2363 #endif
2364 (md->ctypes[c] & ctype_space) == 0
2365 )
2366 RRETURN(MATCH_NOMATCH);
2367 ecode++;
2368 break;
2369
2370 case OP_NOT_WORDCHAR:
2371 if (eptr >= md->end_subject)
2372 {
2373 SCHECK_PARTIAL();
2374 RRETURN(MATCH_NOMATCH);
2375 }
2376 GETCHARINCTEST(c, eptr);
2377 if (
2378 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2379 c < 256 &&
2380 #endif
2381 (md->ctypes[c] & ctype_word) != 0
2382 )
2383 RRETURN(MATCH_NOMATCH);
2384 ecode++;
2385 break;
2386
2387 case OP_WORDCHAR:
2388 if (eptr >= md->end_subject)
2389 {
2390 SCHECK_PARTIAL();
2391 RRETURN(MATCH_NOMATCH);
2392 }
2393 GETCHARINCTEST(c, eptr);
2394 if (
2395 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2396 c > 255 ||
2397 #endif
2398 (md->ctypes[c] & ctype_word) == 0
2399 )
2400 RRETURN(MATCH_NOMATCH);
2401 ecode++;
2402 break;
2403
2404 case OP_ANYNL:
2405 if (eptr >= md->end_subject)
2406 {
2407 SCHECK_PARTIAL();
2408 RRETURN(MATCH_NOMATCH);
2409 }
2410 GETCHARINCTEST(c, eptr);
2411 switch(c)
2412 {
2413 default: RRETURN(MATCH_NOMATCH);
2414
2415 case 0x000d:
2416 if (eptr >= md->end_subject)
2417 {
2418 SCHECK_PARTIAL();
2419 }
2420 else if (*eptr == 0x0a) eptr++;
2421 break;
2422
2423 case 0x000a:
2424 break;
2425
2426 case 0x000b:
2427 case 0x000c:
2428 case 0x0085:
2429 case 0x2028:
2430 case 0x2029:
2431 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2432 break;
2433 }
2434 ecode++;
2435 break;
2436
2437 case OP_NOT_HSPACE:
2438 if (eptr >= md->end_subject)
2439 {
2440 SCHECK_PARTIAL();
2441 RRETURN(MATCH_NOMATCH);
2442 }
2443 GETCHARINCTEST(c, eptr);
2444 switch(c)
2445 {
2446 default: break;
2447 case 0x09: /* HT */
2448 case 0x20: /* SPACE */
2449 case 0xa0: /* NBSP */
2450 case 0x1680: /* OGHAM SPACE MARK */
2451 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2452 case 0x2000: /* EN QUAD */
2453 case 0x2001: /* EM QUAD */
2454 case 0x2002: /* EN SPACE */
2455 case 0x2003: /* EM SPACE */
2456 case 0x2004: /* THREE-PER-EM SPACE */
2457 case 0x2005: /* FOUR-PER-EM SPACE */
2458 case 0x2006: /* SIX-PER-EM SPACE */
2459 case 0x2007: /* FIGURE SPACE */
2460 case 0x2008: /* PUNCTUATION SPACE */
2461 case 0x2009: /* THIN SPACE */
2462 case 0x200A: /* HAIR SPACE */
2463 case 0x202f: /* NARROW NO-BREAK SPACE */
2464 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2465 case 0x3000: /* IDEOGRAPHIC SPACE */
2466 RRETURN(MATCH_NOMATCH);
2467 }
2468 ecode++;
2469 break;
2470
2471 case OP_HSPACE:
2472 if (eptr >= md->end_subject)
2473 {
2474 SCHECK_PARTIAL();
2475 RRETURN(MATCH_NOMATCH);
2476 }
2477 GETCHARINCTEST(c, eptr);
2478 switch(c)
2479 {
2480 default: RRETURN(MATCH_NOMATCH);
2481 case 0x09: /* HT */
2482 case 0x20: /* SPACE */
2483 case 0xa0: /* NBSP */
2484 case 0x1680: /* OGHAM SPACE MARK */
2485 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2486 case 0x2000: /* EN QUAD */
2487 case 0x2001: /* EM QUAD */
2488 case 0x2002: /* EN SPACE */
2489 case 0x2003: /* EM SPACE */
2490 case 0x2004: /* THREE-PER-EM SPACE */
2491 case 0x2005: /* FOUR-PER-EM SPACE */
2492 case 0x2006: /* SIX-PER-EM SPACE */
2493 case 0x2007: /* FIGURE SPACE */
2494 case 0x2008: /* PUNCTUATION SPACE */
2495 case 0x2009: /* THIN SPACE */
2496 case 0x200A: /* HAIR SPACE */
2497 case 0x202f: /* NARROW NO-BREAK SPACE */
2498 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2499 case 0x3000: /* IDEOGRAPHIC SPACE */
2500 break;
2501 }
2502 ecode++;
2503 break;
2504
2505 case OP_NOT_VSPACE:
2506 if (eptr >= md->end_subject)
2507 {
2508 SCHECK_PARTIAL();
2509 RRETURN(MATCH_NOMATCH);
2510 }
2511 GETCHARINCTEST(c, eptr);
2512 switch(c)
2513 {
2514 default: break;
2515 case 0x0a: /* LF */
2516 case 0x0b: /* VT */
2517 case 0x0c: /* FF */
2518 case 0x0d: /* CR */
2519 case 0x85: /* NEL */
2520 case 0x2028: /* LINE SEPARATOR */
2521 case 0x2029: /* PARAGRAPH SEPARATOR */
2522 RRETURN(MATCH_NOMATCH);
2523 }
2524 ecode++;
2525 break;
2526
2527 case OP_VSPACE:
2528 if (eptr >= md->end_subject)
2529 {
2530 SCHECK_PARTIAL();
2531 RRETURN(MATCH_NOMATCH);
2532 }
2533 GETCHARINCTEST(c, eptr);
2534 switch(c)
2535 {
2536 default: RRETURN(MATCH_NOMATCH);
2537 case 0x0a: /* LF */
2538 case 0x0b: /* VT */
2539 case 0x0c: /* FF */
2540 case 0x0d: /* CR */
2541 case 0x85: /* NEL */
2542 case 0x2028: /* LINE SEPARATOR */
2543 case 0x2029: /* PARAGRAPH SEPARATOR */
2544 break;
2545 }
2546 ecode++;
2547 break;
2548
2549 #ifdef SUPPORT_UCP
2550 /* Check the next character by Unicode property. We will get here only
2551 if the support is in the binary; otherwise a compile-time error occurs. */
2552
2553 case OP_PROP:
2554 case OP_NOTPROP:
2555 if (eptr >= md->end_subject)
2556 {
2557 SCHECK_PARTIAL();
2558 RRETURN(MATCH_NOMATCH);
2559 }
2560 GETCHARINCTEST(c, eptr);
2561 {
2562 const ucd_record *prop = GET_UCD(c);
2563
2564 switch(ecode[1])
2565 {
2566 case PT_ANY:
2567 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2568 break;
2569
2570 case PT_LAMP:
2571 if ((prop->chartype == ucp_Lu ||
2572 prop->chartype == ucp_Ll ||
2573 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2574 RRETURN(MATCH_NOMATCH);
2575 break;
2576
2577 case PT_GC:
2578 if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2579 RRETURN(MATCH_NOMATCH);
2580 break;
2581
2582 case PT_PC:
2583 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2584 RRETURN(MATCH_NOMATCH);
2585 break;
2586
2587 case PT_SC:
2588 if ((ecode[2] != prop->script) == (op == OP_PROP))
2589 RRETURN(MATCH_NOMATCH);
2590 break;
2591
2592 /* These are specials */
2593
2594 case PT_ALNUM:
2595 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2596 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2597 RRETURN(MATCH_NOMATCH);
2598 break;
2599
2600 case PT_SPACE: /* Perl space */
2601 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2602 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2603 == (op == OP_NOTPROP))
2604 RRETURN(MATCH_NOMATCH);
2605 break;
2606
2607 case PT_PXSPACE: /* POSIX space */
2608 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2609 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2610 c == CHAR_FF || c == CHAR_CR)
2611 == (op == OP_NOTPROP))
2612 RRETURN(MATCH_NOMATCH);
2613 break;
2614
2615 case PT_WORD:
2616 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2617 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2618 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2619 RRETURN(MATCH_NOMATCH);
2620 break;
2621
2622 /* This should never occur */
2623
2624 default:
2625 RRETURN(PCRE_ERROR_INTERNAL);
2626 }
2627
2628 ecode += 3;
2629 }
2630 break;
2631
2632 /* Match an extended Unicode sequence. We will get here only if the support
2633 is in the binary; otherwise a compile-time error occurs. */
2634
2635 case OP_EXTUNI:
2636 if (eptr >= md->end_subject)
2637 {
2638 SCHECK_PARTIAL();
2639 RRETURN(MATCH_NOMATCH);
2640 }
2641 GETCHARINCTEST(c, eptr);
2642 if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2643 while (eptr < md->end_subject)
2644 {
2645 int len = 1;
2646 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2647 if (UCD_CATEGORY(c) != ucp_M) break;
2648 eptr += len;
2649 }
2650 CHECK_PARTIAL();
2651 ecode++;
2652 break;
2653 #endif
2654
2655
2656 /* Match a back reference, possibly repeatedly. Look past the end of the
2657 item to see if there is repeat information following. The code is similar
2658 to that for character classes, but repeated for efficiency. Then obey
2659 similar code to character type repeats - written out again for speed.
2660 However, if the referenced string is the empty string, always treat
2661 it as matched, any number of times (otherwise there could be infinite
2662 loops). */
2663
2664 case OP_REF:
2665 case OP_REFI:
2666 caseless = op == OP_REFI;
2667 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2668 ecode += 1 + IMM2_SIZE;
2669
2670 /* If the reference is unset, there are two possibilities:
2671
2672 (a) In the default, Perl-compatible state, set the length negative;
2673 this ensures that every attempt at a match fails. We can't just fail
2674 here, because of the possibility of quantifiers with zero minima.
2675
2676 (b) If the JavaScript compatibility flag is set, set the length to zero
2677 so that the back reference matches an empty string.
2678
2679 Otherwise, set the length to the length of what was matched by the
2680 referenced subpattern. */
2681
2682 if (offset >= offset_top || md->offset_vector[offset] < 0)
2683 length = (md->jscript_compat)? 0 : -1;
2684 else
2685 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2686
2687 /* Set up for repetition, or handle the non-repeated case */
2688
2689 switch (*ecode)
2690 {
2691 case OP_CRSTAR:
2692 case OP_CRMINSTAR:
2693 case OP_CRPLUS:
2694 case OP_CRMINPLUS:
2695 case OP_CRQUERY:
2696 case OP_CRMINQUERY:
2697 c = *ecode++ - OP_CRSTAR;
2698 minimize = (c & 1) != 0;
2699 min = rep_min[c]; /* Pick up values from tables; */
2700 max = rep_max[c]; /* zero for max => infinity */
2701 if (max == 0) max = INT_MAX;
2702 break;
2703
2704 case OP_CRRANGE:
2705 case OP_CRMINRANGE:
2706 minimize = (*ecode == OP_CRMINRANGE);
2707 min = GET2(ecode, 1);
2708 max = GET2(ecode, 1 + IMM2_SIZE);
2709 if (max == 0) max = INT_MAX;
2710 ecode += 1 + 2 * IMM2_SIZE;
2711 break;
2712
2713 default: /* No repeat follows */
2714 if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2715 {
2716 if (length == -2) eptr = md->end_subject; /* Partial match */
2717 CHECK_PARTIAL();
2718 RRETURN(MATCH_NOMATCH);
2719 }
2720 eptr += length;
2721 continue; /* With the main loop */
2722 }
2723
2724 /* Handle repeated back references. If the length of the reference is
2725 zero, just continue with the main loop. If the length is negative, it
2726 means the reference is unset in non-Java-compatible mode. If the minimum is
2727 zero, we can continue at the same level without recursion. For any other
2728 minimum, carrying on will result in NOMATCH. */
2729
2730 if (length == 0) continue;
2731 if (length < 0 && min == 0) continue;
2732
2733 /* First, ensure the minimum number of matches are present. We get back
2734 the length of the reference string explicitly rather than passing the
2735 address of eptr, so that eptr can be a register variable. */
2736
2737 for (i = 1; i <= min; i++)
2738 {
2739 int slength;
2740 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2741 {
2742 if (slength == -2) eptr = md->end_subject; /* Partial match */
2743 CHECK_PARTIAL();
2744 RRETURN(MATCH_NOMATCH);
2745 }
2746 eptr += slength;
2747 }
2748
2749 /* If min = max, continue at the same level without recursion.
2750 They are not both allowed to be zero. */
2751
2752 if (min == max) continue;
2753
2754 /* If minimizing, keep trying and advancing the pointer */
2755
2756 if (minimize)
2757 {
2758 for (fi = min;; fi++)
2759 {
2760 int slength;
2761 RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2762 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2763 if (fi >= max) RRETURN(MATCH_NOMATCH);
2764 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2765 {
2766 if (slength == -2) eptr = md->end_subject; /* Partial match */
2767 CHECK_PARTIAL();
2768 RRETURN(MATCH_NOMATCH);
2769 }
2770 eptr += slength;
2771 }
2772 /* Control never gets here */
2773 }
2774
2775 /* If maximizing, find the longest string and work backwards */
2776
2777 else
2778 {
2779 pp = eptr;
2780 for (i = min; i < max; i++)
2781 {
2782 int slength;
2783 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2784 {
2785 /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2786 the soft partial matching case. */
2787
2788 if (slength == -2 && md->partial != 0 &&
2789 md->end_subject > md->start_used_ptr)
2790 {
2791 md->hitend = TRUE;
2792 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2793 }
2794 break;
2795 }
2796 eptr += slength;
2797 }
2798
2799 while (eptr >= pp)
2800 {
2801 RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2802 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2803 eptr -= length;
2804 }
2805 RRETURN(MATCH_NOMATCH);
2806 }
2807 /* Control never gets here */
2808
2809 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2810 used when all the characters in the class have values in the range 0-255,
2811 and either the matching is caseful, or the characters are in the range
2812 0-127 when UTF-8 processing is enabled. The only difference between
2813 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2814 encountered.
2815
2816 First, look past the end of the item to see if there is repeat information
2817 following. Then obey similar code to character type repeats - written out
2818 again for speed. */
2819
2820 case OP_NCLASS:
2821 case OP_CLASS:
2822 {
2823 /* The data variable is saved across frames, so the byte map needs to
2824 be stored there. */
2825 #define BYTE_MAP ((pcre_uint8 *)data)
2826 data = ecode + 1; /* Save for matching */
2827 ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2828
2829 switch (*ecode)
2830 {
2831 case OP_CRSTAR:
2832 case OP_CRMINSTAR:
2833 case OP_CRPLUS:
2834 case OP_CRMINPLUS:
2835 case OP_CRQUERY:
2836 case OP_CRMINQUERY:
2837 c = *ecode++ - OP_CRSTAR;
2838 minimize = (c & 1) != 0;
2839 min = rep_min[c]; /* Pick up values from tables; */
2840 max = rep_max[c]; /* zero for max => infinity */
2841 if (max == 0) max = INT_MAX;
2842 break;
2843
2844 case OP_CRRANGE:
2845 case OP_CRMINRANGE:
2846 minimize = (*ecode == OP_CRMINRANGE);
2847 min = GET2(ecode, 1);
2848 max = GET2(ecode, 1 + IMM2_SIZE);
2849 if (max == 0) max = INT_MAX;
2850 ecode += 1 + 2 * IMM2_SIZE;
2851 break;
2852
2853 default: /* No repeat follows */
2854 min = max = 1;
2855 break;
2856 }
2857
2858 /* First, ensure the minimum number of matches are present. */
2859
2860 #ifdef SUPPORT_UTF
2861 if (utf)
2862 {
2863 for (i = 1; i <= min; i++)
2864 {
2865 if (eptr >= md->end_subject)
2866 {
2867 SCHECK_PARTIAL();
2868 RRETURN(MATCH_NOMATCH);
2869 }
2870 GETCHARINC(c, eptr);
2871 if (c > 255)
2872 {
2873 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2874 }
2875 else
2876 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2877 }
2878 }
2879 else
2880 #endif
2881 /* Not UTF mode */
2882 {
2883 for (i = 1; i <= min; i++)
2884 {
2885 if (eptr >= md->end_subject)
2886 {
2887 SCHECK_PARTIAL();
2888 RRETURN(MATCH_NOMATCH);
2889 }
2890 c = *eptr++;
2891 #ifndef COMPILE_PCRE8
2892 if (c > 255)
2893 {
2894 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2895 }
2896 else
2897 #endif
2898 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2899 }
2900 }
2901
2902 /* If max == min we can continue with the main loop without the
2903 need to recurse. */
2904
2905 if (min == max) continue;
2906
2907 /* If minimizing, keep testing the rest of the expression and advancing
2908 the pointer while it matches the class. */
2909
2910 if (minimize)
2911 {
2912 #ifdef SUPPORT_UTF
2913 if (utf)
2914 {
2915 for (fi = min;; fi++)
2916 {
2917 RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2918 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2919 if (fi >= max) RRETURN(MATCH_NOMATCH);
2920 if (eptr >= md->end_subject)
2921 {
2922 SCHECK_PARTIAL();
2923 RRETURN(MATCH_NOMATCH);
2924 }
2925 GETCHARINC(c, eptr);
2926 if (c > 255)
2927 {
2928 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2929 }
2930 else
2931 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2932 }
2933 }
2934 else
2935 #endif
2936 /* Not UTF mode */
2937 {
2938 for (fi = min;; fi++)
2939 {
2940 RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2941 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2942 if (fi >= max) RRETURN(MATCH_NOMATCH);
2943 if (eptr >= md->end_subject)
2944 {
2945 SCHECK_PARTIAL();
2946 RRETURN(MATCH_NOMATCH);
2947 }
2948 c = *eptr++;
2949 #ifndef COMPILE_PCRE8
2950 if (c > 255)
2951 {
2952 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2953 }
2954 else
2955 #endif
2956 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2957 }
2958 }
2959 /* Control never gets here */
2960 }
2961
2962 /* If maximizing, find the longest possible run, then work backwards. */
2963
2964 else
2965 {
2966 pp = eptr;
2967
2968 #ifdef SUPPORT_UTF
2969 if (utf)
2970 {
2971 for (i = min; i < max; i++)
2972 {
2973 int len = 1;
2974 if (eptr >= md->end_subject)
2975 {
2976 SCHECK_PARTIAL();
2977 break;
2978 }
2979 GETCHARLEN(c, eptr, len);
2980 if (c > 255)
2981 {
2982 if (op == OP_CLASS) break;
2983 }
2984 else
2985 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2986 eptr += len;
2987 }
2988 for (;;)
2989 {
2990 RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2991 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2992 if (eptr-- == pp) break; /* Stop if tried at original pos */
2993 BACKCHAR(eptr);
2994 }
2995 }
2996 else
2997 #endif
2998 /* Not UTF mode */
2999 {
3000 for (i = min; i < max; i++)
3001 {
3002 if (eptr >= md->end_subject)
3003 {
3004 SCHECK_PARTIAL();
3005 break;
3006 }
3007 c = *eptr;
3008 #ifndef COMPILE_PCRE8
3009 if (c > 255)
3010 {
3011 if (op == OP_CLASS) break;
3012 }
3013 else
3014 #endif
3015 if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3016 eptr++;
3017 }
3018 while (eptr >= pp)
3019 {
3020 RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3021 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3022 eptr--;
3023 }
3024 }
3025
3026 RRETURN(MATCH_NOMATCH);
3027 }
3028 #undef BYTE_MAP
3029 }
3030 /* Control never gets here */
3031
3032
3033 /* Match an extended character class. This opcode is encountered only
3034 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3035 mode, because Unicode properties are supported in non-UTF-8 mode. */
3036
3037 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3038 case OP_XCLASS:
3039 {
3040 data = ecode + 1 + LINK_SIZE; /* Save for matching */
3041 ecode += GET(ecode, 1); /* Advance past the item */
3042
3043 switch (*ecode)
3044 {
3045 case OP_CRSTAR:
3046 case OP_CRMINSTAR:
3047 case OP_CRPLUS:
3048 case OP_CRMINPLUS:
3049 case OP_CRQUERY:
3050 case OP_CRMINQUERY:
3051 c = *ecode++ - OP_CRSTAR;
3052 minimize = (c & 1) != 0;
3053 min = rep_min[c]; /* Pick up values from tables; */
3054 max = rep_max[c]; /* zero for max => infinity */
3055 if (max == 0) max = INT_MAX;
3056 break;
3057
3058 case OP_CRRANGE:
3059 case OP_CRMINRANGE:
3060 minimize = (*ecode == OP_CRMINRANGE);
3061 min = GET2(ecode, 1);
3062 max = GET2(ecode, 1 + IMM2_SIZE);
3063 if (max == 0) max = INT_MAX;
3064 ecode += 1 + 2 * IMM2_SIZE;
3065 break;
3066
3067 default: /* No repeat follows */
3068 min = max = 1;
3069 break;
3070 }
3071
3072 /* First, ensure the minimum number of matches are present. */
3073
3074 for (i = 1; i <= min; i++)
3075 {
3076 if (eptr >= md->end_subject)
3077 {
3078 SCHECK_PARTIAL();
3079 RRETURN(MATCH_NOMATCH);
3080 }
3081 GETCHARINCTEST(c, eptr);
3082 if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3083 }
3084
3085 /* If max == min we can continue with the main loop without the
3086 need to recurse. */
3087
3088 if (min == max) continue;
3089
3090 /* If minimizing, keep testing the rest of the expression and advancing
3091 the pointer while it matches the class. */
3092
3093 if (minimize)
3094 {
3095 for (fi = min;; fi++)
3096 {
3097 RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3098 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3099 if (fi >= max) RRETURN(MATCH_NOMATCH);
3100 if (eptr >= md->end_subject)
3101 {
3102 SCHECK_PARTIAL();
3103 RRETURN(MATCH_NOMATCH);
3104 }
3105 GETCHARINCTEST(c, eptr);
3106 if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3107 }
3108 /* Control never gets here */
3109 }
3110
3111 /* If maximizing, find the longest possible run, then work backwards. */
3112
3113 else
3114 {
3115 pp = eptr;
3116 for (i = min; i < max; i++)
3117 {
3118 int len = 1;
3119 if (eptr >= md->end_subject)
3120 {
3121 SCHECK_PARTIAL();
3122 break;
3123 }
3124 #ifdef SUPPORT_UTF
3125 GETCHARLENTEST(c, eptr, len);
3126 #else
3127 c = *eptr;
3128 #endif
3129 if (!PRIV(xclass)(c, data, utf)) break;
3130 eptr += len;
3131 }
3132 for(;;)
3133 {
3134 RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3135 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3136 if (eptr-- == pp) break; /* Stop if tried at original pos */
3137 #ifdef SUPPORT_UTF
3138 if (utf) BACKCHAR(eptr);
3139 #endif
3140 }
3141 RRETURN(MATCH_NOMATCH);
3142 }
3143
3144 /* Control never gets here */
3145 }
3146 #endif /* End of XCLASS */
3147
3148 /* Match a single character, casefully */
3149
3150 case OP_CHAR:
3151 #ifdef SUPPORT_UTF
3152 if (utf)
3153 {
3154 length = 1;
3155 ecode++;
3156 GETCHARLEN(fc, ecode, length);
3157 if (length > md->end_subject - eptr)
3158 {
3159 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
3160 RRETURN(MATCH_NOMATCH);
3161 }
3162 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3163 }
3164 else
3165 #endif
3166 /* Not UTF mode */
3167 {
3168 if (md->end_subject - eptr < 1)
3169 {
3170 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
3171 RRETURN(MATCH_NOMATCH);
3172 }
3173 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3174 ecode += 2;
3175 }
3176 break;
3177
3178 /* Match a single character, caselessly. If we are at the end of the
3179 subject, give up immediately. */
3180
3181 case OP_CHARI:
3182 if (eptr >= md->end_subject)
3183 {
3184 SCHECK_PARTIAL();
3185 RRETURN(MATCH_NOMATCH);
3186 }
3187
3188 #ifdef SUPPORT_UTF
3189 if (utf)
3190 {
3191 length = 1;
3192 ecode++;
3193 GETCHARLEN(fc, ecode, length);
3194
3195 /* If the pattern character's value is < 128, we have only one byte, and
3196 we know that its other case must also be one byte long, so we can use the
3197 fast lookup table. We know that there is at least one byte left in the
3198 subject. */
3199
3200 if (fc < 128)
3201 {
3202 if (md->lcc[fc]
3203 != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3204 ecode++;
3205 eptr++;
3206 }
3207
3208 /* Otherwise we must pick up the subject character. Note that we cannot
3209 use the value of "length" to check for sufficient bytes left, because the
3210 other case of the character may have more or fewer bytes. */
3211
3212 else
3213 {
3214 unsigned int dc;
3215 GETCHARINC(dc, eptr);
3216 ecode += length;
3217
3218 /* If we have Unicode property support, we can use it to test the other
3219 case of the character, if there is one. */
3220
3221 if (fc != dc)
3222 {
3223 #ifdef SUPPORT_UCP
3224 if (dc != UCD_OTHERCASE(fc))
3225 #endif
3226 RRETURN(MATCH_NOMATCH);
3227 }
3228 }
3229 }
3230 else
3231 #endif /* SUPPORT_UTF */
3232
3233 /* Not UTF mode */
3234 {
3235 if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3236 != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3237 eptr++;
3238 ecode += 2;
3239 }
3240 break;
3241
3242 /* Match a single character repeatedly. */
3243
3244 case OP_EXACT:
3245 case OP_EXACTI:
3246 min = max = GET2(ecode, 1);
3247 ecode += 1 + IMM2_SIZE;
3248 goto REPEATCHAR;
3249
3250 case OP_POSUPTO:
3251 case OP_POSUPTOI:
3252 possessive = TRUE;
3253 /* Fall through */
3254
3255 case OP_UPTO:
3256 case OP_UPTOI:
3257 case OP_MINUPTO:
3258 case OP_MINUPTOI:
3259 min = 0;
3260 max = GET2(ecode, 1);
3261 minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3262 ecode += 1 + IMM2_SIZE;
3263 goto REPEATCHAR;
3264
3265 case OP_POSSTAR:
3266 case OP_POSSTARI:
3267 possessive = TRUE;
3268 min = 0;
3269 max = INT_MAX;
3270 ecode++;
3271 goto REPEATCHAR;
3272
3273 case OP_POSPLUS:
3274 case OP_POSPLUSI:
3275 possessive = TRUE;
3276 min = 1;
3277 max = INT_MAX;
3278 ecode++;
3279 goto REPEATCHAR;
3280
3281 case OP_POSQUERY:
3282 case OP_POSQUERYI:
3283 possessive = TRUE;
3284 min = 0;
3285 max = 1;
3286 ecode++;
3287 goto REPEATCHAR;
3288
3289 case OP_STAR:
3290 case OP_STARI:
3291 case OP_MINSTAR:
3292 case OP_MINSTARI:
3293 case OP_PLUS:
3294 case OP_PLUSI:
3295 case OP_MINPLUS:
3296 case OP_MINPLUSI:
3297 case OP_QUERY:
3298 case OP_QUERYI:
3299 case OP_MINQUERY:
3300 case OP_MINQUERYI:
3301 c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3302 minimize = (c & 1) != 0;
3303 min = rep_min[c]; /* Pick up values from tables; */
3304 max = rep_max[c]; /* zero for max => infinity */
3305 if (max == 0) max = INT_MAX;
3306
3307 /* Common code for all repeated single-character matches. */
3308
3309 REPEATCHAR:
3310 #ifdef SUPPORT_UTF
3311 if (utf)
3312 {
3313 length = 1;
3314 charptr = ecode;
3315 GETCHARLEN(fc, ecode, length);
3316 ecode += length;
3317
3318 /* Handle multibyte character matching specially here. There is
3319 support for caseless matching if UCP support is present. */
3320
3321 if (length > 1)
3322 {
3323 #ifdef SUPPORT_UCP
3324 unsigned int othercase;
3325 if (op >= OP_STARI && /* Caseless */
3326 (othercase = UCD_OTHERCASE(fc)) != fc)
3327 oclength = PRIV(ord2utf)(othercase, occhars);
3328 else oclength = 0;
3329 #endif /* SUPPORT_UCP */
3330
3331 for (i = 1; i <= min; i++)
3332 {
3333 if (eptr <= md->end_subject - length &&
3334 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3335 #ifdef SUPPORT_UCP
3336 else if (oclength > 0 &&
3337 eptr <= md->end_subject - oclength &&
3338 memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3339 #endif /* SUPPORT_UCP */
3340 else
3341 {
3342 CHECK_PARTIAL();
3343 RRETURN(MATCH_NOMATCH);
3344 }
3345 }
3346
3347 if (min == max) continue;
3348
3349 if (minimize)
3350 {
3351 for (fi = min;; fi++)
3352 {
3353 RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3354 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3355 if (fi >= max) RRETURN(MATCH_NOMATCH);
3356 if (eptr <= md->end_subject - length &&
3357 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3358 #ifdef SUPPORT_UCP
3359 else if (oclength > 0 &&
3360 eptr <= md->end_subject - oclength &&
3361 memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3362 #endif /* SUPPORT_UCP */
3363 else
3364 {
3365 CHECK_PARTIAL();
3366 RRETURN(MATCH_NOMATCH);
3367 }
3368 }
3369 /* Control never gets here */
3370 }
3371
3372 else /* Maximize */
3373 {
3374 pp = eptr;
3375 for (i = min; i < max; i++)
3376 {
3377 if (eptr <= md->end_subject - length &&
3378 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3379 #ifdef SUPPORT_UCP
3380 else if (oclength > 0 &&
3381 eptr <= md->end_subject - oclength &&
3382 memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3383 #endif /* SUPPORT_UCP */
3384 else
3385 {
3386 CHECK_PARTIAL();
3387 break;
3388 }
3389 }
3390
3391 if (possessive) continue;
3392
3393 for(;;)
3394 {
3395 RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3396 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3397 if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3398 #ifdef SUPPORT_UCP
3399 eptr--;
3400 BACKCHAR(eptr);
3401 #else /* without SUPPORT_UCP */
3402 eptr -= length;
3403 #endif /* SUPPORT_UCP */
3404 }
3405 }
3406 /* Control never gets here */
3407 }
3408
3409 /* If the length of a UTF-8 character is 1, we fall through here, and
3410 obey the code as for non-UTF-8 characters below, though in this case the
3411 value of fc will always be < 128. */
3412 }
3413 else
3414 #endif /* SUPPORT_UTF */
3415 /* When not in UTF-8 mode, load a single-byte character. */
3416 fc = *ecode++;
3417
3418 /* The value of fc at this point is always one character, though we may
3419 or may not be in UTF mode. The code is duplicated for the caseless and
3420 caseful cases, for speed, since matching characters is likely to be quite
3421 common. First, ensure the minimum number of matches are present. If min =
3422 max, continue at the same level without recursing. Otherwise, if
3423 minimizing, keep trying the rest of the expression and advancing one
3424 matching character if failing, up to the maximum. Alternatively, if
3425 maximizing, find the maximum number of characters and work backwards. */
3426
3427 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3428 max, eptr));
3429
3430 if (op >= OP_STARI) /* Caseless */
3431 {
3432 #ifdef COMPILE_PCRE8
3433 /* fc must be < 128 if UTF is enabled. */
3434 foc = md->fcc[fc];
3435 #else
3436 #ifdef SUPPORT_UTF
3437 #ifdef SUPPORT_UCP
3438 if (utf && fc > 127)
3439 foc = UCD_OTHERCASE(fc);
3440 #else
3441 if (utf && fc > 127)
3442 foc = fc;
3443 #endif /* SUPPORT_UCP */
3444 else
3445 #endif /* SUPPORT_UTF */
3446 foc = TABLE_GET(fc, md->fcc, fc);
3447 #endif /* COMPILE_PCRE8 */
3448
3449 for (i = 1; i <= min; i++)
3450 {
3451 if (eptr >= md->end_subject)
3452 {
3453 SCHECK_PARTIAL();
3454 RRETURN(MATCH_NOMATCH);
3455 }
3456 if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3457 eptr++;
3458 }
3459 if (min == max) continue;
3460 if (minimize)
3461 {
3462 for (fi = min;; fi++)
3463 {
3464 RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3465 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3466 if (fi >= max) RRETURN(MATCH_NOMATCH);
3467 if (eptr >= md->end_subject)
3468 {
3469 SCHECK_PARTIAL();
3470 RRETURN(MATCH_NOMATCH);
3471 }
3472 if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3473 eptr++;
3474 }
3475 /* Control never gets here */
3476 }
3477 else /* Maximize */
3478 {
3479 pp = eptr;
3480 for (i = min; i < max; i++)
3481 {
3482 if (eptr >= md->end_subject)
3483 {
3484 SCHECK_PARTIAL();
3485 break;
3486 }
3487 if (fc != *eptr && foc != *eptr) break;
3488 eptr++;
3489 }
3490
3491 if (possessive) continue;
3492
3493 while (eptr >= pp)
3494 {
3495 RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3496 eptr--;
3497 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498 }
3499 RRETURN(MATCH_NOMATCH);
3500 }
3501 /* Control never gets here */
3502 }
3503
3504 /* Caseful comparisons (includes all multi-byte characters) */
3505
3506 else
3507 {
3508 for (i = 1; i <= min; i++)
3509 {
3510 if (eptr >= md->end_subject)
3511 {
3512 SCHECK_PARTIAL();
3513 RRETURN(MATCH_NOMATCH);
3514 }
3515 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3516 }
3517
3518 if (min == max) continue;
3519
3520 if (minimize)
3521 {
3522 for (fi = min;; fi++)
3523 {
3524 RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3525 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3526 if (fi >= max) RRETURN(MATCH_NOMATCH);
3527 if (eptr >= md->end_subject)
3528 {
3529 SCHECK_PARTIAL();
3530 RRETURN(MATCH_NOMATCH);
3531 }
3532 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3533 }
3534 /* Control never gets here */
3535 }
3536 else /* Maximize */
3537 {
3538 pp = eptr;
3539 for (i = min; i < max; i++)
3540 {
3541 if (eptr >= md->end_subject)
3542 {
3543 SCHECK_PARTIAL();
3544 break;
3545 }
3546 if (fc != *eptr) break;
3547 eptr++;
3548 }
3549 if (possessive) continue;
3550
3551 while (eptr >= pp)
3552 {
3553 RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3554 eptr--;
3555 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3556 }
3557 RRETURN(MATCH_NOMATCH);
3558 }
3559 }
3560 /* Control never gets here */
3561
3562 /* Match a negated single one-byte character. The character we are
3563 checking can be multibyte. */
3564
3565 case OP_NOT:
3566 case OP_NOTI:
3567 if (eptr >= md->end_subject)
3568 {
3569 SCHECK_PARTIAL();
3570 RRETURN(MATCH_NOMATCH);
3571 }
3572 #ifdef SUPPORT_UTF
3573 if (utf)
3574 {
3575 register unsigned int ch, och;
3576
3577 ecode++;
3578 GETCHARINC(ch, ecode);
3579 GETCHARINC(c, eptr);
3580
3581 if (op == OP_NOT)
3582 {
3583 if (ch == c) RRETURN(MATCH_NOMATCH);
3584 }
3585 else
3586 {
3587 #ifdef SUPPORT_UCP
3588 if (ch > 127)
3589 och = UCD_OTHERCASE(ch);
3590 #else
3591 if (ch > 127)
3592 och = ch;
3593 #endif /* SUPPORT_UCP */
3594 else
3595 och = TABLE_GET(ch, md->fcc, ch);
3596 if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3597 }
3598 }
3599 else
3600 #endif
3601 {
3602 register unsigned int ch = ecode[1];
3603 c = *eptr++;
3604 if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3605 RRETURN(MATCH_NOMATCH);
3606 ecode += 2;
3607 }
3608 break;
3609
3610 /* Match a negated single one-byte character repeatedly. This is almost a
3611 repeat of the code for a repeated single character, but I haven't found a
3612 nice way of commoning these up that doesn't require a test of the
3613 positive/negative option for each character match. Maybe that wouldn't add
3614 very much to the time taken, but character matching *is* what this is all
3615 about... */
3616
3617 case OP_NOTEXACT:
3618 case OP_NOTEXACTI:
3619 min = max = GET2(ecode, 1);
3620 ecode += 1 + IMM2_SIZE;
3621 goto REPEATNOTCHAR;
3622
3623 case OP_NOTUPTO:
3624 case OP_NOTUPTOI:
3625 case OP_NOTMINUPTO:
3626 case OP_NOTMINUPTOI:
3627 min = 0;
3628 max = GET2(ecode, 1);
3629 minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3630 ecode += 1 + IMM2_SIZE;
3631 goto REPEATNOTCHAR;
3632
3633 case OP_NOTPOSSTAR:
3634 case OP_NOTPOSSTARI:
3635 possessive = TRUE;
3636 min = 0;
3637 max = INT_MAX;
3638 ecode++;
3639 goto REPEATNOTCHAR;
3640
3641 case OP_NOTPOSPLUS:
3642 case OP_NOTPOSPLUSI:
3643 possessive = TRUE;
3644 min = 1;
3645 max = INT_MAX;
3646 ecode++;
3647 goto REPEATNOTCHAR;
3648
3649 case OP_NOTPOSQUERY:
3650 case OP_NOTPOSQUERYI:
3651 possessive = TRUE;
3652 min = 0;
3653 max = 1;
3654 ecode++;
3655 goto REPEATNOTCHAR;
3656
3657 case OP_NOTPOSUPTO:
3658 case OP_NOTPOSUPTOI:
3659 possessive = TRUE;
3660 min = 0;
3661 max = GET2(ecode, 1);
3662 ecode += 1 + IMM2_SIZE;
3663 goto REPEATNOTCHAR;
3664
3665 case OP_NOTSTAR:
3666 case OP_NOTSTARI:
3667 case OP_NOTMINSTAR:
3668 case OP_NOTMINSTARI:
3669 case OP_NOTPLUS:
3670 case OP_NOTPLUSI:
3671 case OP_NOTMINPLUS:
3672 case OP_NOTMINPLUSI:
3673 case OP_NOTQUERY:
3674 case OP_NOTQUERYI:
3675 case OP_NOTMINQUERY:
3676 case OP_NOTMINQUERYI:
3677 c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3678 minimize = (c & 1) != 0;
3679 min = rep_min[c]; /* Pick up values from tables; */
3680 max = rep_max[c]; /* zero for max => infinity */
3681 if (max == 0) max = INT_MAX;
3682
3683 /* Common code for all repeated single-byte matches. */
3684
3685 REPEATNOTCHAR:
3686 GETCHARINCTEST(fc, ecode);
3687
3688 /* The code is duplicated for the caseless and caseful cases, for speed,
3689 since matching characters is likely to be quite common. First, ensure the
3690 minimum number of matches are present. If min = max, continue at the same
3691 level without recursing. Otherwise, if minimizing, keep trying the rest of
3692 the expression and advancing one matching character if failing, up to the
3693 maximum. Alternatively, if maximizing, find the maximum number of
3694 characters and work backwards. */
3695
3696 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3697 max, eptr));
3698
3699 if (op >= OP_NOTSTARI) /* Caseless */
3700 {
3701 #ifdef SUPPORT_UTF
3702 #ifdef SUPPORT_UCP
3703 if (utf && fc > 127)
3704 foc = UCD_OTHERCASE(fc);
3705 #else
3706 if (utf && fc > 127)
3707 foc = fc;
3708 #endif /* SUPPORT_UCP */
3709 else
3710 #endif /* SUPPORT_UTF */
3711 foc = TABLE_GET(fc, md->fcc, fc);
3712
3713 #ifdef SUPPORT_UTF
3714 if (utf)
3715 {
3716 register unsigned int d;
3717 for (i = 1; i <= min; i++)
3718 {
3719 if (eptr >= md->end_subject)
3720 {
3721 SCHECK_PARTIAL();
3722 RRETURN(MATCH_NOMATCH);
3723 }
3724 GETCHARINC(d, eptr);
3725 if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3726 }
3727 }
3728 else
3729 #endif
3730 /* Not UTF mode */
3731 {
3732 for (i = 1; i <= min; i++)
3733 {
3734 if (eptr >= md->end_subject)
3735 {
3736 SCHECK_PARTIAL();
3737 RRETURN(MATCH_NOMATCH);
3738 }
3739 if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3740 eptr++;
3741 }
3742 }
3743
3744 if (min == max) continue;
3745
3746 if (minimize)
3747 {
3748 #ifdef SUPPORT_UTF
3749 if (utf)
3750 {
3751 register unsigned int d;
3752 for (fi = min;; fi++)
3753 {
3754 RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3755 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3756 if (fi >= max) RRETURN(MATCH_NOMATCH);
3757 if (eptr >= md->end_subject)
3758 {
3759 SCHECK_PARTIAL();
3760 RRETURN(MATCH_NOMATCH);
3761 }
3762 GETCHARINC(d, eptr);
3763 if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3764 }
3765 }
3766 else
3767 #endif
3768 /* Not UTF mode */
3769 {
3770 for (fi = min;; fi++)
3771 {
3772 RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3773 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3774 if (fi >= max) RRETURN(MATCH_NOMATCH);
3775 if (eptr >= md->end_subject)
3776 {
3777 SCHECK_PARTIAL();
3778 RRETURN(MATCH_NOMATCH);
3779 }
3780 if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3781 eptr++;
3782 }
3783 }
3784 /* Control never gets here */
3785 }
3786
3787 /* Maximize case */
3788
3789 else
3790 {
3791 pp = eptr;
3792
3793 #ifdef SUPPORT_UTF
3794 if (utf)
3795 {
3796 register unsigned int d;
3797 for (i = min; i < max; i++)
3798 {
3799 int len = 1;
3800 if (eptr >= md->end_subject)
3801 {
3802 SCHECK_PARTIAL();
3803 break;
3804 }
3805 GETCHARLEN(d, eptr, len);
3806 if (fc == d || (unsigned int)foc == d) break;
3807 eptr += len;
3808 }
3809 if (possessive) continue;
3810 for(;;)
3811 {
3812 RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3813 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3814 if (eptr-- == pp) break; /* Stop if tried at original pos */
3815 BACKCHAR(eptr);
3816 }
3817 }
3818 else
3819 #endif
3820 /* Not UTF mode */
3821 {
3822 for (i = min; i < max; i++)
3823 {
3824 if (eptr >= md->end_subject)
3825 {
3826 SCHECK_PARTIAL();
3827 break;
3828 }
3829 if (fc == *eptr || foc == *eptr) break;
3830 eptr++;
3831 }
3832 if (possessive) continue;
3833 while (eptr >= pp)
3834 {
3835 RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3836 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3837 eptr--;
3838 }
3839 }
3840
3841 RRETURN(MATCH_NOMATCH);
3842 }
3843 /* Control never gets here */
3844 }
3845
3846 /* Caseful comparisons */
3847
3848 else
3849 {
3850 #ifdef SUPPORT_UTF
3851 if (utf)
3852 {
3853 register unsigned int d;
3854 for (i = 1; i <= min; i++)
3855 {
3856 if (eptr >= md->end_subject)
3857 {
3858 SCHECK_PARTIAL();
3859 RRETURN(MATCH_NOMATCH);
3860 }
3861 GETCHARINC(d, eptr);
3862 if (fc == d) RRETURN(MATCH_NOMATCH);
3863 }
3864 }
3865 else
3866 #endif
3867 /* Not UTF mode */
3868 {
3869 for (i = 1; i <= min; i++)
3870 {
3871 if (eptr >= md->end_subject)
3872 {
3873 SCHECK_PARTIAL();
3874 RRETURN(MATCH_NOMATCH);
3875 }
3876 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3877 }
3878 }
3879
3880 if (min == max) continue;
3881
3882 if (minimize)
3883 {
3884 #ifdef SUPPORT_UTF
3885 if (utf)
3886 {
3887 register unsigned int d;
3888 for (fi = min;; fi++)
3889 {
3890 RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3891 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3892 if (fi >= max) RRETURN(MATCH_NOMATCH);
3893 if (eptr >= md->end_subject)
3894 {
3895 SCHECK_PARTIAL();
3896 RRETURN(MATCH_NOMATCH);
3897 }
3898 GETCHARINC(d, eptr);
3899 if (fc == d) RRETURN(MATCH_NOMATCH);
3900 }
3901 }
3902 else
3903 #endif
3904 /* Not UTF mode */
3905 {
3906 for (fi = min;; fi++)
3907 {
3908 RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3909 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910 if (fi >= max) RRETURN(MATCH_NOMATCH);
3911 if (eptr >= md->end_subject)
3912 {
3913 SCHECK_PARTIAL();
3914 RRETURN(MATCH_NOMATCH);
3915 }
3916 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3917 }
3918 }
3919 /* Control never gets here */
3920 }
3921
3922 /* Maximize case */
3923
3924 else
3925 {
3926 pp = eptr;
3927
3928 #ifdef SUPPORT_UTF
3929 if (utf)
3930 {
3931 register unsigned int d;
3932 for (i = min; i < max; i++)
3933 {
3934 int len = 1;
3935 if (eptr >= md->end_subject)
3936 {
3937 SCHECK_PARTIAL();
3938 break;
3939 }
3940 GETCHARLEN(d, eptr, len);
3941 if (fc == d) break;
3942 eptr += len;
3943 }
3944 if (possessive) continue;
3945 for(;;)
3946 {
3947 RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3948 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3949 if (eptr-- == pp) break; /* Stop if tried at original pos */
3950 BACKCHAR(eptr);
3951 }
3952 }
3953 else
3954 #endif
3955 /* Not UTF mode */
3956 {
3957 for (i = min; i < max; i++)
3958 {
3959 if (eptr >= md->end_subject)
3960 {
3961 SCHECK_PARTIAL();
3962 break;
3963 }
3964 if (fc == *eptr) break;
3965 eptr++;
3966 }
3967 if (possessive) continue;
3968 while (eptr >= pp)
3969 {
3970 RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3971 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3972 eptr--;
3973 }
3974 }
3975
3976 RRETURN(MATCH_NOMATCH);
3977 }
3978 }
3979 /* Control never gets here */
3980
3981 /* Match a single character type repeatedly; several different opcodes
3982 share code. This is very similar to the code for single characters, but we
3983 repeat it in the interests of efficiency. */
3984
3985 case OP_TYPEEXACT:
3986 min = max = GET2(ecode, 1);
3987 minimize = TRUE;
3988 ecode += 1 + IMM2_SIZE;
3989 goto REPEATTYPE;
3990
3991 case OP_TYPEUPTO:
3992 case OP_TYPEMINUPTO:
3993 min = 0;
3994 max = GET2(ecode, 1);
3995 minimize = *ecode == OP_TYPEMINUPTO;
3996 ecode += 1 + IMM2_SIZE;
3997 goto REPEATTYPE;
3998
3999 case OP_TYPEPOSSTAR:
4000 possessive = TRUE;
4001 min = 0;
4002 max = INT_MAX;
4003 ecode++;
4004 goto REPEATTYPE;
4005
4006 case OP_TYPEPOSPLUS:
4007 possessive = TRUE;
4008 min = 1;
4009 max = INT_MAX;
4010 ecode++;
4011 goto REPEATTYPE;
4012
4013 case OP_TYPEPOSQUERY:
4014 possessive = TRUE;
4015 min = 0;
4016 max = 1;
4017 ecode++;
4018 goto REPEATTYPE;
4019
4020 case OP_TYPEPOSUPTO:
4021 possessive = TRUE;
4022 min = 0;
4023 max = GET2(ecode, 1);
4024 ecode += 1 + IMM2_SIZE;
4025 goto REPEATTYPE;
4026
4027 case OP_TYPESTAR:
4028 case OP_TYPEMINSTAR:
4029 case OP_TYPEPLUS:
4030 case OP_TYPEMINPLUS:
4031 case OP_TYPEQUERY:
4032 case OP_TYPEMINQUERY:
4033 c = *ecode++ - OP_TYPESTAR;
4034 minimize = (c & 1) != 0;
4035 min = rep_min[c]; /* Pick up values from tables; */
4036 max = rep_max[c]; /* zero for max => infinity */
4037 if (max == 0) max = INT_MAX;
4038
4039 /* Common code for all repeated single character type matches. Note that
4040 in UTF-8 mode, '.' matches a character of any length, but for the other
4041 character types, the valid characters are all one-byte long. */
4042
4043 REPEATTYPE:
4044 ctype = *ecode++; /* Code for the character type */
4045
4046 #ifdef SUPPORT_UCP
4047 if (ctype == OP_PROP || ctype == OP_NOTPROP)
4048 {
4049 prop_fail_result = ctype == OP_NOTPROP;
4050 prop_type = *ecode++;
4051 prop_value = *ecode++;
4052 }
4053 else prop_type = -1;
4054 #endif
4055
4056 /* First, ensure the minimum number of matches are present. Use inline
4057 code for maximizing the speed, and do the type test once at the start
4058 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4059 is tidier. Also separate the UCP code, which can be the same for both UTF-8
4060 and single-bytes. */
4061
4062 if (min > 0)
4063 {
4064 #ifdef SUPPORT_UCP
4065 if (prop_type >= 0)
4066 {
4067 switch(prop_type)
4068 {
4069 case PT_ANY:
4070 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4071 for (i = 1; i <= min; i++)
4072 {
4073 if (eptr >= md->end_subject)
4074 {
4075 SCHECK_PARTIAL();
4076 RRETURN(MATCH_NOMATCH);
4077 }
4078 GETCHARINCTEST(c, eptr);
4079 }
4080 break;
4081
4082 case PT_LAMP:
4083 for (i = 1; i <= min; i++)
4084 {
4085 int chartype;
4086 if (eptr >= md->end_subject)
4087 {
4088 SCHECK_PARTIAL();
4089 RRETURN(MATCH_NOMATCH);
4090 }
4091 GETCHARINCTEST(c, eptr);
4092 chartype = UCD_CHARTYPE(c);
4093 if ((chartype == ucp_Lu ||
4094 chartype == ucp_Ll ||
4095 chartype == ucp_Lt) == prop_fail_result)
4096 RRETURN(MATCH_NOMATCH);
4097 }
4098 break;
4099
4100 case PT_GC:
4101 for (i = 1; i <= min; i++)
4102 {
4103 if (eptr >= md->end_subject)
4104 {
4105 SCHECK_PARTIAL();
4106 RRETURN(MATCH_NOMATCH);
4107 }
4108 GETCHARINCTEST(c, eptr);
4109 if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4110 RRETURN(MATCH_NOMATCH);
4111 }
4112 break;
4113
4114 case PT_PC:
4115 for (i = 1; i <= min; i++)
4116 {
4117 if (eptr >= md->end_subject)
4118 {
4119 SCHECK_PARTIAL();
4120 RRETURN(MATCH_NOMATCH);
4121 }
4122 GETCHARINCTEST(c, eptr);
4123 if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4124 RRETURN(MATCH_NOMATCH);
4125 }
4126 break;
4127
4128 case PT_SC:
4129 for (i = 1; i <= min; i++)
4130 {
4131 if (eptr >= md->end_subject)
4132 {
4133 SCHECK_PARTIAL();
4134 RRETURN(MATCH_NOMATCH);
4135 }
4136 GETCHARINCTEST(c, eptr);
4137 if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4138 RRETURN(MATCH_NOMATCH);
4139 }
4140 break;
4141
4142 case PT_ALNUM:
4143 for (i = 1; i <= min; i++)
4144 {
4145 int category;
4146 if (eptr >= md->end_subject)
4147 {
4148 SCHECK_PARTIAL();
4149 RRETURN(MATCH_NOMATCH);
4150 }
4151 GETCHARINCTEST(c, eptr);
4152 category = UCD_CATEGORY(c);
4153 if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4154 RRETURN(MATCH_NOMATCH);
4155 }
4156 break;
4157
4158 case PT_SPACE: /* Perl space */
4159 for (i = 1; i <= min; i++)
4160 {
4161 if (eptr >= md->end_subject)
4162 {
4163 SCHECK_PARTIAL();
4164 RRETURN(MATCH_NOMATCH);
4165 }
4166 GETCHARINCTEST(c, eptr);
4167 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4168 c == CHAR_FF || c == CHAR_CR)
4169 == prop_fail_result)
4170 RRETURN(MATCH_NOMATCH);
4171 }
4172 break;
4173
4174 case PT_PXSPACE: /* POSIX space */
4175 for (i = 1; i <= min; i++)
4176 {
4177 if (eptr >= md->end_subject)
4178 {
4179 SCHECK_PARTIAL();
4180 RRETURN(MATCH_NOMATCH);
4181 }
4182 GETCHARINCTEST(c, eptr);
4183 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4184 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4185 == prop_fail_result)
4186 RRETURN(MATCH_NOMATCH);
4187 }
4188 break;
4189
4190 case PT_WORD:
4191 for (i = 1; i <= min; i++)
4192 {
4193 int category;
4194 if (eptr >= md->end_subject)
4195 {
4196 SCHECK_PARTIAL();
4197 RRETURN(MATCH_NOMATCH);
4198 }
4199 GETCHARINCTEST(c, eptr);
4200 category = UCD_CATEGORY(c);
4201 if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4202 == prop_fail_result)
4203 RRETURN(MATCH_NOMATCH);
4204 }
4205 break;
4206
4207 /* This should not occur */
4208
4209 default:
4210 RRETURN(PCRE_ERROR_INTERNAL);
4211 }
4212 }
4213
4214 /* Match extended Unicode sequences. We will get here only if the
4215 support is in the binary; otherwise a compile-time error occurs. */
4216
4217 else if (ctype == OP_EXTUNI)
4218 {
4219 for (i = 1; i <= min; i++)
4220 {
4221 if (eptr >= md->end_subject)
4222 {
4223 SCHECK_PARTIAL();
4224 RRETURN(MATCH_NOMATCH);
4225 }
4226 GETCHARINCTEST(c, eptr);
4227 if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4228 while (eptr < md->end_subject)
4229 {
4230 int len = 1;
4231 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4232 if (UCD_CATEGORY(c) != ucp_M) break;
4233 eptr += len;
4234 }
4235 CHECK_PARTIAL();
4236 }
4237 }
4238
4239 else
4240 #endif /* SUPPORT_UCP */
4241
4242 /* Handle all other cases when the coding is UTF-8 */
4243
4244 #ifdef SUPPORT_UTF
4245 if (utf) switch(ctype)
4246 {
4247 case OP_ANY:
4248 for (i = 1; i <= min; i++)
4249 {
4250 if (eptr >= md->end_subject)
4251 {
4252 SCHECK_PARTIAL();
4253 RRETURN(MATCH_NOMATCH);
4254 }
4255 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4256 if (md->partial != 0 &&
4257 eptr + 1 >= md->end_subject &&
4258 NLBLOCK->nltype == NLTYPE_FIXED &&
4259 NLBLOCK->nllen == 2 &&
4260 *eptr == NLBLOCK->nl[0])
4261 {
4262 md->hitend = TRUE;
4263 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4264 }
4265 eptr++;
4266 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4267 }
4268 break;
4269
4270 case OP_ALLANY:
4271 for (i = 1; i <= min; i++)
4272 {
4273 if (eptr >= md->end_subject)
4274 {
4275 SCHECK_PARTIAL();
4276 RRETURN(MATCH_NOMATCH);
4277 }
4278 eptr++;
4279 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4280 }
4281 break;
4282
4283 case OP_ANYBYTE:
4284 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4285 eptr += min;
4286 break;
4287
4288 case OP_ANYNL:
4289 for (i = 1; i <= min; i++)
4290 {
4291 if (eptr >= md->end_subject)
4292 {
4293 SCHECK_PARTIAL();
4294 RRETURN(MATCH_NOMATCH);
4295 }
4296 GETCHARINC(c, eptr);
4297 switch(c)
4298 {
4299 default: RRETURN(MATCH_NOMATCH);
4300
4301 case 0x000d:
4302 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4303 break;
4304
4305 case 0x000a:
4306 break;
4307
4308 case 0x000b:
4309 case 0x000c:
4310 case 0x0085:
4311 case 0x2028:
4312 case 0x2029:
4313 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4314 break;
4315 }
4316 }
4317 break;
4318
4319 case OP_NOT_HSPACE:
4320 for (i = 1; i <= min; i++)
4321 {
4322 if (eptr >= md->end_subject)
4323 {
4324 SCHECK_PARTIAL();
4325 RRETURN(MATCH_NOMATCH);
4326 }
4327 GETCHARINC(c, eptr);
4328 switch(c)
4329 {
4330 default: break;
4331 case 0x09: /* HT */
4332 case 0x20: /* SPACE */
4333 case 0xa0: /* NBSP */
4334 case 0x1680: /* OGHAM SPACE MARK */
4335 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4336 case 0x2000: /* EN QUAD */
4337 case 0x2001: /* EM QUAD */
4338 case 0x2002: /* EN SPACE */
4339 case 0x2003: /* EM SPACE */
4340 case 0x2004: /* THREE-PER-EM SPACE */
4341 case 0x2005: /* FOUR-PER-EM SPACE */
4342 case 0x2006: /* SIX-PER-EM SPACE */
4343 case 0x2007: /* FIGURE SPACE */
4344 case 0x2008: /* PUNCTUATION SPACE */
4345 case 0x2009: /* THIN SPACE */
4346 case 0x200A: /* HAIR SPACE */
4347 case 0x202f: /* NARROW NO-BREAK SPACE */
4348 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4349 case 0x3000: /* IDEOGRAPHIC SPACE */
4350 RRETURN(MATCH_NOMATCH);
4351 }
4352 }
4353 break;
4354
4355 case OP_HSPACE:
4356 for (i = 1; i <= min; i++)
4357 {
4358 if (eptr >= md->end_subject)
4359 {
4360 SCHECK_PARTIAL();
4361 RRETURN(MATCH_NOMATCH);
4362 }
4363 GETCHARINC(c, eptr);
4364 switch(c)
4365 {
4366 default: RRETURN(MATCH_NOMATCH);
4367 case 0x09: /* HT */
4368 case 0x20: /* SPACE */
4369 case 0xa0: /* NBSP */
4370 case 0x1680: /* OGHAM SPACE MARK */
4371 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4372 case 0x2000: /* EN QUAD */
4373 case 0x2001: /* EM QUAD */
4374 case 0x2002: /* EN SPACE */
4375 case 0x2003: /* EM SPACE */
4376 case 0x2004: /* THREE-PER-EM SPACE */
4377 case 0x2005: /* FOUR-PER-EM SPACE */
4378 case 0x2006: /* SIX-PER-EM SPACE */
4379 case 0x2007: /* FIGURE SPACE */
4380 case 0x2008: /* PUNCTUATION SPACE */
4381 case 0x2009: /* THIN SPACE */
4382 case 0x200A: /* HAIR SPACE */
4383 case 0x202f: /* NARROW NO-BREAK SPACE */
4384 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4385 case 0x3000: /* IDEOGRAPHIC SPACE */
4386 break;
4387 }
4388 }
4389 break;
4390
4391 case OP_NOT_VSPACE:
4392 for (i = 1; i <= min; i++)
4393 {
4394 if (eptr >= md->end_subject)
4395 {
4396 SCHECK_PARTIAL();
4397 RRETURN(MATCH_NOMATCH);
4398 }
4399 GETCHARINC(c, eptr);
4400 switch(c)
4401 {
4402 default: break;
4403 case 0x0a: /* LF */
4404 case 0x0b: /* VT */
4405 case 0x0c: /* FF */
4406 case 0x0d: /* CR */
4407 case 0x85: /* NEL */
4408 case 0x2028: /* LINE SEPARATOR */
4409 case 0x2029: /* PARAGRAPH SEPARATOR */
4410 RRETURN(MATCH_NOMATCH);
4411 }
4412 }
4413 break;
4414
4415 case OP_VSPACE:
4416 for (i = 1; i <= min; i++)
4417 {
4418 if (eptr >= md->end_subject)
4419 {
4420 SCHECK_PARTIAL();
4421 RRETURN(MATCH_NOMATCH);
4422 }
4423 GETCHARINC(c, eptr);
4424 switch(c)
4425 {
4426 default: RRETURN(MATCH_NOMATCH);
4427 case 0x0a: /* LF */
4428 case 0x0b: /* VT */
4429 case 0x0c: /* FF */
4430 case 0x0d: /* CR */
4431 case 0x85: /* NEL */
4432 case 0x2028: /* LINE SEPARATOR */
4433 case 0x2029: /* PARAGRAPH SEPARATOR */
4434 break;
4435 }
4436 }
4437 break;
4438
4439 case OP_NOT_DIGIT:
4440 for (i = 1; i <= min; i++)
4441 {
4442 if (eptr >= md->end_subject)
4443 {
4444 SCHECK_PARTIAL();
4445 RRETURN(MATCH_NOMATCH);
4446 }
4447 GETCHARINC(c, eptr);
4448 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4449 RRETURN(MATCH_NOMATCH);
4450 }
4451 break;
4452
4453 case OP_DIGIT:
4454 for (i = 1; i <= min; i++)
4455 {
4456 if (eptr >= md->end_subject)
4457 {
4458 SCHECK_PARTIAL();
4459 RRETURN(MATCH_NOMATCH);
4460 }
4461 if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4462 RRETURN(MATCH_NOMATCH);
4463 eptr++;
4464 /* No need to skip more bytes - we know it's a 1-byte character */
4465 }
4466 break;
4467
4468 case OP_NOT_WHITESPACE:
4469 for (i = 1; i <= min; i++)
4470 {
4471 if (eptr >= md->end_subject)
4472 {
4473 SCHECK_PARTIAL();
4474 RRETURN(MATCH_NOMATCH);
4475 }
4476 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4477 RRETURN(MATCH_NOMATCH);
4478 eptr++;
4479 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4480 }
4481 break;
4482
4483 case OP_WHITESPACE:
4484 for (i = 1; i <= min; i++)
4485 {
4486 if (eptr >= md->end_subject)
4487 {
4488 SCHECK_PARTIAL();
4489 RRETURN(MATCH_NOMATCH);
4490 }
4491 if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4492 RRETURN(MATCH_NOMATCH);
4493 eptr++;
4494 /* No need to skip more bytes - we know it's a 1-byte character */
4495 }
4496 break;
4497
4498 case OP_NOT_WORDCHAR:
4499 for (i = 1; i <= min; i++)
4500 {
4501 if (eptr >= md->end_subject)
4502 {
4503 SCHECK_PARTIAL();
4504 RRETURN(MATCH_NOMATCH);
4505 }
4506 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4507 RRETURN(MATCH_NOMATCH);
4508 eptr++;
4509 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4510 }
4511 break;
4512
4513 case OP_WORDCHAR:
4514 for (i = 1; i <= min; i++)
4515 {
4516 if (eptr >= md->end_subject)
4517 {
4518 SCHECK_PARTIAL();
4519 RRETURN(MATCH_NOMATCH);
4520 }
4521 if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4522 RRETURN(MATCH_NOMATCH);
4523 eptr++;
4524 /* No need to skip more bytes - we know it's a 1-byte character */
4525 }
4526 break;
4527
4528 default:
4529 RRETURN(PCRE_ERROR_INTERNAL);
4530 } /* End switch(ctype) */
4531
4532 else
4533 #endif /* SUPPORT_UTF */
4534
4535 /* Code for the non-UTF-8 case for minimum matching of operators other
4536 than OP_PROP and OP_NOTPROP. */
4537
4538 switch(ctype)
4539 {
4540 case OP_ANY:
4541 for (i = 1; i <= min; i++)
4542 {
4543 if (eptr >= md->end_subject)
4544 {
4545 SCHECK_PARTIAL();
4546 RRETURN(MATCH_NOMATCH);
4547 }
4548 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4549 if (md->partial != 0 &&
4550 eptr + 1 >= md->end_subject &&
4551 NLBLOCK->nltype == NLTYPE_FIXED &&
4552 NLBLOCK->nllen == 2 &&
4553 *eptr == NLBLOCK->nl[0])
4554 {
4555 md->hitend = TRUE;
4556 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4557 }
4558 eptr++;
4559 }
4560 break;
4561
4562 case OP_ALLANY:
4563 if (eptr > md->end_subject - min)
4564 {
4565 SCHECK_PARTIAL();
4566 RRETURN(MATCH_NOMATCH);
4567 }
4568 eptr += min;
4569 break;
4570
4571 case OP_ANYBYTE:
4572 if (eptr > md->end_subject - min)
4573 {
4574 SCHECK_PARTIAL();
4575 RRETURN(MATCH_NOMATCH);
4576 }
4577 eptr += min;
4578 break;
4579
4580 case OP_ANYNL:
4581 for (i = 1; i <= min; i++)
4582 {
4583 if (eptr >= md->end_subject)
4584 {
4585 SCHECK_PARTIAL();
4586 RRETURN(MATCH_NOMATCH);
4587 }
4588 switch(*eptr++)
4589 {
4590 default: RRETURN(MATCH_NOMATCH);
4591
4592 case 0x000d:
4593 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4594 break;
4595
4596 case 0x000a:
4597 break;
4598
4599 case 0x000b:
4600 case 0x000c:
4601 case 0x0085:
4602 #ifdef COMPILE_PCRE16
4603 case 0x2028:
4604 case 0x2029:
4605 #endif
4606 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4607 break;
4608 }
4609 }
4610 break;
4611
4612 case OP_NOT_HSPACE:
4613 for (i = 1; i <= min; i++)
4614 {
4615 if (eptr >= md->end_subject)
4616 {
4617 SCHECK_PARTIAL();
4618 RRETURN(MATCH_NOMATCH);
4619 }
4620 switch(*eptr++)
4621 {
4622 default: break;
4623 case 0x09: /* HT */
4624 case 0x20: /* SPACE */
4625 case 0xa0: /* NBSP */
4626 #ifdef COMPILE_PCRE16
4627 case 0x1680: /* OGHAM SPACE MARK */
4628 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4629 case 0x2000: /* EN QUAD */
4630 case 0x2001: /* EM QUAD */
4631 case 0x2002: /* EN SPACE */
4632 case 0x2003: /* EM SPACE */
4633 case 0x2004: /* THREE-PER-EM SPACE */
4634 case 0x2005: /* FOUR-PER-EM SPACE */
4635 case 0x2006: /* SIX-PER-EM SPACE */
4636 case 0x2007: /* FIGURE SPACE */
4637 case 0x2008: /* PUNCTUATION SPACE */
4638 case 0x2009: /* THIN SPACE */
4639 case 0x200A: /* HAIR SPACE */
4640 case 0x202f: /* NARROW NO-BREAK SPACE */
4641 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4642 case 0x3000: /* IDEOGRAPHIC SPACE */
4643 #endif
4644 RRETURN(MATCH_NOMATCH);
4645 }
4646 }
4647 break;
4648
4649 case OP_HSPACE:
4650 for (i = 1; i <= min; i++)
4651 {
4652 if (eptr >= md->end_subject)
4653 {
4654 SCHECK_PARTIAL();
4655 RRETURN(MATCH_NOMATCH);
4656 }
4657 switch(*eptr++)
4658 {
4659 default: RRETURN(MATCH_NOMATCH);
4660 case 0x09: /* HT */
4661 case 0x20: /* SPACE */
4662 case 0xa0: /* NBSP */
4663 #ifdef COMPILE_PCRE16
4664 case 0x1680: /* OGHAM SPACE MARK */
4665 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4666 case 0x2000: /* EN QUAD */
4667 case 0x2001: /* EM QUAD */
4668 case 0x2002: /* EN SPACE */
4669 case 0x2003: /* EM SPACE */
4670 case 0x2004: /* THREE-PER-EM SPACE */
4671 case 0x2005: /* FOUR-PER-EM SPACE */
4672 case 0x2006: /* SIX-PER-EM SPACE */
4673 case 0x2007: /* FIGURE SPACE */
4674 case 0x2008: /* PUNCTUATION SPACE */
4675 case 0x2009: /* THIN SPACE */
4676 case 0x200A: /* HAIR SPACE */
4677 case 0x202f: /* NARROW NO-BREAK SPACE */
4678 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4679 case 0x3000: /* IDEOGRAPHIC SPACE */
4680 #endif
4681 break;
4682 }
4683 }
4684 break;
4685
4686 case OP_NOT_VSPACE:
4687 for (i = 1; i <= min; i++)
4688 {
4689 if (eptr >= md->end_subject)
4690 {
4691 SCHECK_PARTIAL();
4692 RRETURN(MATCH_NOMATCH);
4693 }
4694 switch(*eptr++)
4695 {
4696 default: break;
4697 case 0x0a: /* LF */
4698 case 0x0b: /* VT */
4699 case 0x0c: /* FF */
4700 case 0x0d: /* CR */
4701 case 0x85: /* NEL */
4702 #ifdef COMPILE_PCRE16
4703 case 0x2028: /* LINE SEPARATOR */
4704 case 0x2029: /* PARAGRAPH SEPARATOR */
4705 #endif
4706 RRETURN(MATCH_NOMATCH);
4707 }
4708 }
4709 break;
4710
4711 case OP_VSPACE:
4712 for (i = 1; i <= min; i++)
4713 {
4714 if (eptr >= md->end_subject)
4715 {
4716 SCHECK_PARTIAL();
4717 RRETURN(MATCH_NOMATCH);
4718 }
4719 switch(*eptr++)
4720 {
4721 default: RRETURN(MATCH_NOMATCH);
4722 case 0x0a: /* LF */
4723 case 0x0b: /* VT */
4724 case 0x0c: /* FF */
4725 case 0x0d: /* CR */
4726 case 0x85: /* NEL */
4727 #ifdef COMPILE_PCRE16
4728 case 0x2028: /* LINE SEPARATOR */
4729 case 0x2029: /* PARAGRAPH SEPARATOR */
4730 #endif
4731 break;
4732 }
4733 }
4734 break;
4735
4736 case OP_NOT_DIGIT:
4737 for (i = 1; i <= min; i++)
4738 {
4739 if (eptr >= md->end_subject)
4740 {
4741 SCHECK_PARTIAL();
4742 RRETURN(MATCH_NOMATCH);
4743 }
4744 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4745 RRETURN(MATCH_NOMATCH);
4746 eptr++;
4747 }
4748 break;
4749
4750 case OP_DIGIT:
4751 for (i = 1; i <= min; i++)
4752 {
4753 if (eptr >= md->end_subject)
4754 {
4755 SCHECK_PARTIAL();
4756 RRETURN(MATCH_NOMATCH);
4757 }
4758 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4759 RRETURN(MATCH_NOMATCH);
4760 eptr++;
4761 }
4762 break;
4763
4764 case OP_NOT_WHITESPACE:
4765 for (i = 1; i <= min; i++)
4766 {
4767 if (eptr >= md->end_subject)
4768 {
4769 SCHECK_PARTIAL();
4770 RRETURN(MATCH_NOMATCH);
4771 }
4772 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4773 RRETURN(MATCH_NOMATCH);
4774 eptr++;
4775 }
4776 break;
4777
4778 case OP_WHITESPACE:
4779 for (i = 1; i <= min; i++)
4780 {
4781 if (eptr >= md->end_subject)
4782 {
4783 SCHECK_PARTIAL();
4784 RRETURN(MATCH_NOMATCH);
4785 }
4786 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4787 RRETURN(MATCH_NOMATCH);
4788 eptr++;
4789 }
4790 break;
4791
4792 case OP_NOT_WORDCHAR:
4793 for (i = 1; i <= min; i++)
4794 {
4795 if (eptr >= md->end_subject)
4796 {
4797 SCHECK_PARTIAL();
4798 RRETURN(MATCH_NOMATCH);
4799 }
4800 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4801 RRETURN(MATCH_NOMATCH);
4802 eptr++;
4803 }
4804 break;
4805
4806 case OP_WORDCHAR:
4807 for (i = 1; i <= min; i++)
4808 {
4809 if (eptr >= md->end_subject)
4810 {
4811 SCHECK_PARTIAL();
4812 RRETURN(MATCH_NOMATCH);
4813 }
4814 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4815 RRETURN(MATCH_NOMATCH);
4816 eptr++;
4817 }
4818 break;
4819
4820 default:
4821 RRETURN(PCRE_ERROR_INTERNAL);
4822 }
4823 }
4824
4825 /* If min = max, continue at the same level without recursing */
4826
4827 if (min == max) continue;
4828
4829 /* If minimizing, we have to test the rest of the pattern before each
4830 subsequent match. Again, separate the UTF-8 case for speed, and also
4831 separate the UCP cases. */
4832
4833 if (minimize)
4834 {
4835 #ifdef SUPPORT_UCP
4836 if (prop_type >= 0)
4837 {
4838 switch(prop_type)
4839 {
4840 case PT_ANY:
4841 for (fi = min;; fi++)
4842 {
4843 RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4844 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4845 if (fi >= max) RRETURN(MATCH_NOMATCH);
4846 if (eptr >= md->end_subject)
4847 {
4848 SCHECK_PARTIAL();
4849 RRETURN(MATCH_NOMATCH);
4850 }
4851 GETCHARINCTEST(c, eptr);
4852 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4853 }
4854 /* Control never gets here */
4855
4856 case PT_LAMP:
4857 for (fi = min;; fi++)
4858 {
4859 int chartype;
4860 RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4861 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4862 if (fi >= max) RRETURN(MATCH_NOMATCH);
4863 if (eptr >= md->end_subject)
4864 {
4865 SCHECK_PARTIAL();
4866 RRETURN(MATCH_NOMATCH);
4867 }
4868 GETCHARINCTEST(c, eptr);
4869 chartype = UCD_CHARTYPE(c);
4870 if ((chartype == ucp_Lu ||
4871 chartype == ucp_Ll ||
4872 chartype == ucp_Lt) == prop_fail_result)
4873 RRETURN(MATCH_NOMATCH);
4874 }
4875 /* Control never gets here */
4876
4877 case PT_GC:
4878 for (fi = min;; fi++)
4879 {
4880 RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4881 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4882 if (fi >= max) RRETURN(MATCH_NOMATCH);
4883 if (eptr >= md->end_subject)
4884 {
4885 SCHECK_PARTIAL();
4886 RRETURN(MATCH_NOMATCH);
4887 }
4888 GETCHARINCTEST(c, eptr);
4889 if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4890 RRETURN(MATCH_NOMATCH);
4891 }
4892 /* Control never gets here */
4893
4894 case PT_PC:
4895 for (fi = min;; fi++)
4896 {
4897 RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4898 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4899 if (fi >= max) RRETURN(MATCH_NOMATCH);
4900 if (eptr >= md->end_subject)
4901 {
4902 SCHECK_PARTIAL();
4903 RRETURN(MATCH_NOMATCH);
4904 }
4905 GETCHARINCTEST(c, eptr);
4906 if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4907 RRETURN(MATCH_NOMATCH);
4908 }
4909 /* Control never gets here */
4910
4911 case PT_SC:
4912 for (fi = min;; fi++)
4913 {
4914 RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4915 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4916 if (fi >= max) RRETURN(MATCH_NOMATCH);
4917 if (eptr >= md->end_subject)
4918 {
4919 SCHECK_PARTIAL();
4920 RRETURN(MATCH_NOMATCH);
4921 }
4922 GETCHARINCTEST(c, eptr);
4923 if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4924 RRETURN(MATCH_NOMATCH);
4925 }
4926 /* Control never gets here */
4927
4928 case PT_ALNUM:
4929 for (fi = min;; fi++)
4930 {
4931 int category;
4932 RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4933 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934 if (fi >= max) RRETURN(MATCH_NOMATCH);
4935 if (eptr >= md->end_subject)
4936 {
4937 SCHECK_PARTIAL();
4938 RRETURN(MATCH_NOMATCH);
4939 }
4940 GETCHARINCTEST(c, eptr);
4941 category = UCD_CATEGORY(c);
4942 if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4943 RRETURN(MATCH_NOMATCH);
4944 }
4945 /* Control never gets here */
4946
4947 case PT_SPACE: /* Perl space */
4948 for (fi = min;; fi++)
4949 {
4950 RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4951 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4952 if (fi >= max) RRETURN(MATCH_NOMATCH);
4953 if (eptr >= md->end_subject)
4954 {
4955 SCHECK_PARTIAL();
4956 RRETURN(MATCH_NOMATCH);
4957 }
4958 GETCHARINCTEST(c, eptr);
4959 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4960 c == CHAR_FF || c == CHAR_CR)
4961 == prop_fail_result)
4962 RRETURN(MATCH_NOMATCH);
4963 }
4964 /* Control never gets here */
4965
4966 case PT_PXSPACE: /* POSIX space */
4967 for (fi = min;; fi++)
4968 {
4969 RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4970 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4971 if (fi >= max) RRETURN(MATCH_NOMATCH);
4972 if (eptr >= md->end_subject)
4973 {
4974 SCHECK_PARTIAL();
4975 RRETURN(MATCH_NOMATCH);
4976 }
4977 GETCHARINCTEST(c, eptr);
4978 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4979 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4980 == prop_fail_result)
4981 RRETURN(MATCH_NOMATCH);
4982 }
4983 /* Control never gets here */
4984
4985 case PT_WORD:
4986 for (fi = min;; fi++)
4987 {
4988 int category;
4989 RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4990 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4991 if (fi >= max) RRETURN(MATCH_NOMATCH);
4992 if (eptr >= md->end_subject)
4993 {
4994 SCHECK_PARTIAL();
4995 RRETURN(MATCH_NOMATCH);
4996 }
4997 GETCHARINCTEST(c, eptr);
4998 category = UCD_CATEGORY(c);
4999 if ((category == ucp_L ||
5000 category == ucp_N ||
5001 c == CHAR_UNDERSCORE)
5002 == prop_fail_result)
5003 RRETURN(MATCH_NOMATCH);
5004 }
5005 /* Control never gets here */
5006
5007 /* This should never occur */
5008
5009 default:
5010 RRETURN(PCRE_ERROR_INTERNAL);
5011 }
5012 }
5013
5014 /* Match extended Unicode sequences. We will get here only if the
5015 support is in the binary; otherwise a compile-time error occurs. */
5016
5017 else if (ctype == OP_EXTUNI)
5018 {
5019 for (fi = min;; fi++)
5020 {
5021 RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5022 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5023 if (fi >= max) RRETURN(MATCH_NOMATCH);
5024 if (eptr >= md->end_subject)
5025 {
5026 SCHECK_PARTIAL();
5027 RRETURN(MATCH_NOMATCH);
5028 }
5029 GETCHARINCTEST(c, eptr);
5030 if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
5031 while (eptr < md->end_subject)
5032 {
5033 int len = 1;
5034 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5035 if (UCD_CATEGORY(c) != ucp_M) break;
5036 eptr += len;
5037 }
5038 CHECK_PARTIAL();
5039 }
5040 }
5041 else
5042 #endif /* SUPPORT_UCP */
5043
5044 #ifdef SUPPORT_UTF
5045 if (utf)
5046 {
5047 for (fi = min;; fi++)
5048 {
5049 RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5050 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5051 if (fi >= max) RRETURN(MATCH_NOMATCH);
5052 if (eptr >= md->end_subject)
5053 {
5054 SCHECK_PARTIAL();
5055 RRETURN(MATCH_NOMATCH);
5056 }
5057 if (ctype == OP_ANY && IS_NEWLINE(eptr))
5058 RRETURN(MATCH_NOMATCH);
5059 GETCHARINC(c, eptr);
5060 switch(ctype)
5061 {
5062 case OP_ANY: /* This is the non-NL case */
5063 if (md->partial != 0 && /* Take care with CRLF partial */
5064 eptr >= md->end_subject &&
5065 NLBLOCK->nltype == NLTYPE_FIXED &&
5066 NLBLOCK->nllen == 2 &&
5067 c == NLBLOCK->nl[0])
5068 {
5069 md->hitend = TRUE;
5070 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5071 }
5072 break;
5073
5074 case OP_ALLANY:
5075 case OP_ANYBYTE:
5076 break;
5077
5078 case OP_ANYNL:
5079 switch(c)
5080 {
5081 default: RRETURN(MATCH_NOMATCH);
5082 case 0x000d:
5083 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5084 break;
5085 case 0x000a:
5086 break;
5087
5088 case 0x000b:
5089 case 0x000c:
5090 case 0x0085:
5091 case 0x2028:
5092 case 0x2029:
5093 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5094 break;
5095 }
5096 break;
5097
5098 case OP_NOT_HSPACE:
5099 switch(c)
5100 {
5101 default: break;
5102 case 0x09: /* HT */
5103 case 0x20: /* SPACE */
5104 case 0xa0: /* NBSP */
5105 case 0x1680: /* OGHAM SPACE MARK */
5106 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5107 case 0x2000: /* EN QUAD */
5108 case 0x2001: /* EM QUAD */
5109 case 0x2002: /* EN SPACE */
5110 case 0x2003: /* EM SPACE */
5111 case 0x2004: /* THREE-PER-EM SPACE */
5112 case 0x2005: /* FOUR-PER-EM SPACE */
5113 case 0x2006: /* SIX-PER-EM SPACE */
5114 case 0x2007: /* FIGURE SPACE */
5115 case 0x2008: /* PUNCTUATION SPACE */
5116 case 0x2009: /* THIN SPACE */
5117 case 0x200A: /* HAIR SPACE */
5118 case 0x202f: /* NARROW NO-BREAK SPACE */
5119 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5120 case 0x3000: /* IDEOGRAPHIC SPACE */
5121 RRETURN(MATCH_NOMATCH);
5122 }
5123 break;
5124
5125 case OP_HSPACE:
5126 switch(c)
5127 {
5128 default: RRETURN(MATCH_NOMATCH);
5129 case 0x09: /* HT */
5130 case 0x20: /* SPACE */
5131 case 0xa0: /* NBSP */
5132 case 0x1680: /* OGHAM SPACE MARK */
5133 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5134 case 0x2000: /* EN QUAD */
5135 case 0x2001: /* EM QUAD */
5136 case 0x2002: /* EN SPACE */
5137 case 0x2003: /* EM SPACE */
5138 case 0x2004: /* THREE-PER-EM SPACE */
5139 case 0x2005: /* FOUR-PER-EM SPACE */
5140 case 0x2006: /* SIX-PER-EM SPACE */
5141 case 0x2007: /* FIGURE SPACE */
5142 case 0x2008: /* PUNCTUATION SPACE */
5143 case 0x2009: /* THIN SPACE */
5144 case 0x200A: /* HAIR SPACE */
5145 case 0x202f: /* NARROW NO-BREAK SPACE */
5146 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5147 case 0x3000: /* IDEOGRAPHIC SPACE */
5148 break;
5149 }
5150 break;
5151
5152 case OP_NOT_VSPACE:
5153 switch(c)
5154 {
5155 default: break;
5156 case 0x0a: /* LF */
5157 case 0x0b: /* VT */
5158 case 0x0c: /* FF */
5159 case 0x0d: /* CR */
5160 case 0x85: /* NEL */
5161 case 0x2028: /* LINE SEPARATOR */
5162 case 0x2029: /* PARAGRAPH SEPARATOR */
5163 RRETURN(MATCH_NOMATCH);
5164 }
5165 break;
5166
5167 case OP_VSPACE:
5168 switch(c)
5169 {
5170 default: RRETURN(MATCH_NOMATCH);
5171 case 0x0a: /* LF */
5172 case 0x0b: /* VT */
5173 case 0x0c: /* FF */
5174 case 0x0d: /* CR */
5175 case 0x85: /* NEL */
5176 case 0x2028: /* LINE SEPARATOR */
5177 case 0x2029: /* PARAGRAPH SEPARATOR */
5178 break;
5179 }
5180 break;
5181
5182 case OP_NOT_DIGIT:
5183 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5184 RRETURN(MATCH_NOMATCH);
5185 break;
5186
5187 case OP_DIGIT:
5188 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5189 RRETURN(MATCH_NOMATCH);
5190 break;
5191
5192 case OP_NOT_WHITESPACE:
5193 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5194 RRETURN(MATCH_NOMATCH);
5195 break;
5196
5197 case OP_WHITESPACE:
5198 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5199 RRETURN(MATCH_NOMATCH);
5200 break;
5201
5202 case OP_NOT_WORDCHAR:
5203 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5204 RRETURN(MATCH_NOMATCH);
5205 break;
5206
5207 case OP_WORDCHAR:
5208 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5209 RRETURN(MATCH_NOMATCH);
5210 break;
5211
5212 default:
5213 RRETURN(PCRE_ERROR_INTERNAL);
5214 }
5215 }
5216 }
5217 else
5218 #endif
5219 /* Not UTF mode */
5220 {
5221 for (fi = min;; fi++)
5222 {
5223 RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5224 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5225 if (fi >= max) RRETURN(MATCH_NOMATCH);
5226 if (eptr >= md->end_subject)
5227 {
5228 SCHECK_PARTIAL();
5229 RRETURN(MATCH_NOMATCH);
5230 }
5231 if (ctype == OP_ANY && IS_NEWLINE(eptr))
5232 RRETURN(MATCH_NOMATCH);
5233 c = *eptr++;
5234 switch(ctype)
5235 {
5236 case OP_ANY: /* This is the non-NL case */
5237 if (md->partial != 0 && /* Take care with CRLF partial */
5238 eptr >= md->end_subject &&
5239 NLBLOCK->nltype == NLTYPE_FIXED &&
5240 NLBLOCK->nllen == 2 &&
5241 c == NLBLOCK->nl[0])
5242 {
5243 md->hitend = TRUE;
5244 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5245 }
5246 break;
5247
5248 case OP_ALLANY:
5249 case OP_ANYBYTE:
5250 break;
5251
5252 case OP_ANYNL:
5253 switch(c)
5254 {
5255 default: RRETURN(MATCH_NOMATCH);
5256 case 0x000d:
5257 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5258 break;
5259
5260 case 0x000a:
5261 break;
5262
5263 case 0x000b:
5264 case 0x000c:
5265 case 0x0085:
5266 #ifdef COMPILE_PCRE16
5267 case 0x2028:
5268 case 0x2029:
5269 #endif
5270 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5271 break;
5272 }
5273 break;
5274
5275 case OP_NOT_HSPACE:
5276 switch(c)
5277 {
5278 default: break;
5279 case 0x09: /* HT */
5280 case 0x20: /* SPACE */
5281 case 0xa0: /* NBSP */
5282 #ifdef COMPILE_PCRE16
5283 case 0x1680: /* OGHAM SPACE MARK */
5284 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5285 case 0x2000: /* EN QUAD */
5286 case 0x2001: /* EM QUAD */
5287 case 0x2002: /* EN SPACE */
5288 case 0x2003: /* EM SPACE */
5289 case 0x2004: /* THREE-PER-EM SPACE */
5290 case 0x2005: /* FOUR-PER-EM SPACE */
5291 case 0x2006: /* SIX-PER-EM SPACE */
5292 case 0x2007: /* FIGURE SPACE */
5293 case 0x2008: /* PUNCTUATION SPACE */
5294 case 0x2009: /* THIN SPACE */
5295 case 0x200A: /* HAIR SPACE */
5296 case 0x202f: /* NARROW NO-BREAK SPACE */
5297 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5298 case 0x3000: /* IDEOGRAPHIC SPACE */
5299 #endif
5300 RRETURN(MATCH_NOMATCH);
5301 }
5302 break;
5303
5304 case OP_HSPACE:
5305 switch(c)
5306 {
5307 default: RRETURN(MATCH_NOMATCH);
5308 case 0x09: /* HT */
5309 case 0x20: /* SPACE */
5310 case 0xa0: /* NBSP */
5311 #ifdef COMPILE_PCRE16
5312 case 0x1680: /* OGHAM SPACE MARK */
5313 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5314 case 0x2000: /* EN QUAD */
5315 case 0x2001: /* EM QUAD */
5316 case 0x2002: /* EN SPACE */
5317 case 0x2003: /* EM SPACE */
5318 case 0x2004: /* THREE-PER-EM SPACE */
5319 case 0x2005: /* FOUR-PER-EM SPACE */
5320 case 0x2006: /* SIX-PER-EM SPACE */
5321 case 0x2007: /* FIGURE SPACE */
5322 case 0x2008: /* PUNCTUATION SPACE */
5323 case 0x2009: /* THIN SPACE */
5324 case 0x200A: /* HAIR SPACE */
5325 case 0x202f: /* NARROW NO-BREAK SPACE */
5326 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5327 case 0x3000: /* IDEOGRAPHIC SPACE */
5328 #endif
5329 break;
5330 }
5331 break;
5332
5333 case OP_NOT_VSPACE:
5334 switch(c)
5335 {
5336 default: break;
5337 case 0x0a: /* LF */
5338 case 0x0b: /* VT */
5339 case 0x0c: /* FF */
5340 case 0x0d: /* CR */
5341 case 0x85: /* NEL */
5342 #ifdef COMPILE_PCRE16
5343 case 0x2028: /* LINE SEPARATOR */
5344 case 0x2029: /* PARAGRAPH SEPARATOR */
5345 #endif
5346 RRETURN(MATCH_NOMATCH);
5347 }
5348 break;
5349
5350 case OP_VSPACE:
5351 switch(c)
5352 {
5353 default: RRETURN(MATCH_NOMATCH);
5354 case 0x0a: /* LF */
5355 case 0x0b: /* VT */
5356 case 0x0c: /* FF */
5357 case 0x0d: /* CR */
5358 case 0x85: /* NEL */
5359 #ifdef COMPILE_PCRE16
5360 case 0x2028: /* LINE SEPARATOR */
5361 case 0x2029: /* PARAGRAPH SEPARATOR */
5362 #endif
5363 break;
5364 }
5365 break;
5366
5367 case OP_NOT_DIGIT:
5368 if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5369 break;
5370
5371 case OP_DIGIT:
5372 if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5373 break;
5374
5375 case OP_NOT_WHITESPACE:
5376 if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5377 break;
5378
5379 case OP_WHITESPACE:
5380 if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5381 break;
5382
5383 case OP_NOT_WORDCHAR:
5384 if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5385 break;
5386
5387 case OP_WORDCHAR:
5388 if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5389 break;
5390
5391 default:
5392 RRETURN(PCRE_ERROR_INTERNAL);
5393 }
5394 }
5395 }
5396 /* Control never gets here */
5397 }
5398
5399 /* If maximizing, it is worth using inline code for speed, doing the type
5400 test once at the start (i.e. keep it out of the loop). Again, keep the
5401 UTF-8 and UCP stuff separate. */
5402
5403 else
5404 {
5405 pp = eptr; /* Remember where we started */
5406
5407 #ifdef SUPPORT_UCP
5408 if (prop_type >= 0)
5409 {
5410 switch(prop_type)
5411 {
5412 case PT_ANY:
5413 for (i = min; i < max; i++)
5414 {
5415 int len = 1;
5416 if (eptr >= md->end_subject)
5417 {
5418 SCHECK_PARTIAL();
5419 break;
5420 }
5421 GETCHARLENTEST(c, eptr, len);
5422 if (prop_fail_result) break;
5423 eptr+= len;
5424 }
5425 break;
5426
5427 case PT_LAMP:
5428 for (i = min; i < max; i++)
5429 {
5430 int chartype;
5431 int len = 1;
5432 if (eptr >= md->end_subject)
5433 {
5434 SCHECK_PARTIAL();
5435 break;
5436 }
5437 GETCHARLENTEST(c, eptr, len);
5438 chartype = UCD_CHARTYPE(c);
5439 if ((chartype == ucp_Lu ||
5440 chartype == ucp_Ll ||
5441 chartype == ucp_Lt) == prop_fail_result)
5442 break;
5443 eptr+= len;
5444 }
5445 break;
5446
5447 case PT_GC:
5448 for (i = min; i < max; i++)
5449 {
5450 int len = 1;
5451 if (eptr >= md->end_subject)
5452 {
5453 SCHECK_PARTIAL();
5454 break;
5455 }
5456 GETCHARLENTEST(c, eptr, len);
5457 if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5458 eptr+= len;
5459 }
5460 break;
5461
5462 case PT_PC:
5463 for (i = min; i < max; i++)
5464 {
5465 int len = 1;
5466 if (eptr >= md->end_subject)
5467 {
5468 SCHECK_PARTIAL();
5469 break;
5470 }
5471 GETCHARLENTEST(c, eptr, len);
5472 if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5473 eptr+= len;
5474 }
5475 break;
5476
5477 case PT_SC:
5478 for (i = min; i < max; i++)
5479 {
5480 int len = 1;
5481 if (eptr >= md->end_subject)
5482 {
5483 SCHECK_PARTIAL();
5484 break;
5485 }
5486 GETCHARLENTEST(c, eptr, len);
5487 if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5488 eptr+= len;
5489 }
5490 break;
5491
5492 case PT_ALNUM:
5493 for (i = min; i < max; i++)
5494 {
5495 int category;
5496 int len = 1;
5497 if (eptr >= md->end_subject)
5498 {
5499 SCHECK_PARTIAL();
5500 break;
5501 }
5502 GETCHARLENTEST(c, eptr, len);
5503 category = UCD_CATEGORY(c);
5504 if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5505 break;
5506 eptr+= len;
5507 }
5508 break;
5509
5510 case PT_SPACE: /* Perl space */
5511 for (i = min; i < max; i++)
5512 {
5513 int len = 1;
5514 if (eptr >= md->end_subject)
5515 {
5516 SCHECK_PARTIAL();
5517 break;
5518 }
5519 GETCHARLENTEST(c, eptr, len);
5520 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5521 c == CHAR_FF || c == CHAR_CR)
5522 == prop_fail_result)
5523 break;
5524 eptr+= len;
5525 }
5526 break;
5527
5528 case PT_PXSPACE: /* POSIX space */
5529 for (i = min; i < max; i++)
5530 {
5531 int len = 1;
5532 if (eptr >= md->end_subject)
5533 {
5534 SCHECK_PARTIAL();
5535 break;
5536 }
5537 GETCHARLENTEST(c, eptr, len);
5538 if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5539 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5540 == prop_fail_result)
5541 break;
5542 eptr+= len;
5543 }
5544 break;
5545
5546 case PT_WORD:
5547 for (i = min; i < max; i++)
5548 {
5549 int category;
5550 int len = 1;
5551 if (eptr >= md->end_subject)
5552 {
5553 SCHECK_PARTIAL();
5554 break;
5555 }
5556 GETCHARLENTEST(c, eptr, len);
5557 category = UCD_CATEGORY(c);
5558 if ((category == ucp_L || category == ucp_N ||
5559 c == CHAR_UNDERSCORE) == prop_fail_result)
5560 break;
5561 eptr+= len;
5562 }
5563 break;
5564
5565 default:
5566 RRETURN(PCRE_ERROR_INTERNAL);
5567 }
5568
5569 /* eptr is now past the end of the maximum run */
5570
5571 if (possessive) continue;
5572 for(;;)
5573 {
5574 RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5575 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5576 if (eptr-- == pp) break; /* Stop if tried at original pos */
5577 if (utf) BACKCHAR(eptr);
5578 }
5579 }
5580
5581 /* Match extended Unicode sequences. We will get here only if the
5582 support is in the binary; otherwise a compile-time error occurs. */
5583
5584 else if (ctype == OP_EXTUNI)
5585 {
5586 for (i = min; i < max; i++)
5587 {
5588 int len = 1;
5589 if (eptr >= md->end_subject)
5590 {
5591 SCHECK_PARTIAL();
5592 break;
5593 }
5594 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5595 if (UCD_CATEGORY(c) == ucp_M) break;
5596 eptr += len;
5597 while (eptr < md->end_subject)
5598 {
5599 len = 1;
5600 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5601 if (UCD_CATEGORY(c) != ucp_M) break;
5602 eptr += len;
5603 }
5604 CHECK_PARTIAL();
5605 }
5606
5607 /* eptr is now past the end of the maximum run */
5608
5609 if (possessive) continue;
5610
5611 for(;;)
5612 {
5613 RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5614 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5615 if (eptr-- == pp) break; /* Stop if tried at original pos */
5616 for (;;) /* Move back over one extended */
5617 {
5618 if (!utf) c = *eptr; else
5619 {
5620 BACKCHAR(eptr);
5621 GETCHAR(c, eptr);
5622 }
5623 if (UCD_CATEGORY(c) != ucp_M) break;
5624 eptr--;
5625 }
5626 }
5627 }
5628
5629 else
5630 #endif /* SUPPORT_UCP */
5631
5632 #ifdef SUPPORT_UTF
5633 if (utf)
5634 {
5635 switch(ctype)
5636 {
5637 case OP_ANY:
5638 if (max < INT_MAX)
5639 {
5640 for (i = min; i < max; i++)
5641 {
5642 if (eptr >= md->end_subject)
5643 {
5644 SCHECK_PARTIAL();
5645 break;
5646 }
5647 if (IS_NEWLINE(eptr)) break;
5648 if (md->partial != 0 && /* Take care with CRLF partial */
5649 eptr + 1 >= md->end_subject &&
5650 NLBLOCK->nltype == NLTYPE_FIXED &&
5651 NLBLOCK->nllen == 2 &&
5652 *eptr == NLBLOCK->nl[0])
5653 {
5654 md->hitend = TRUE;
5655 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5656 }
5657 eptr++;
5658 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5659 }
5660 }
5661
5662 /* Handle unlimited UTF-8 repeat */
5663
5664 else
5665 {
5666 for (i = min; i < max; i++)
5667 {
5668 if (eptr >= md->end_subject)
5669 {
5670 SCHECK_PARTIAL();
5671 break;
5672 }
5673 if (IS_NEWLINE(eptr)) break;
5674 if (md->partial != 0 && /* Take care with CRLF partial */
5675 eptr + 1 >= md->end_subject &&
5676 NLBLOCK->nltype == NLTYPE_FIXED &&
5677 NLBLOCK->nllen == 2 &&
5678 *eptr == NLBLOCK->nl[0])
5679 {
5680 md->hitend = TRUE;
5681 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5682 }
5683 eptr++;
5684 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5685 }
5686 }
5687 break;
5688
5689 case OP_ALLANY:
5690 if (max < INT_MAX)
5691 {
5692 for (i = min; i < max; i++)
5693 {
5694 if (eptr >= md->end_subject)
5695 {
5696 SCHECK_PARTIAL();
5697 break;
5698 }
5699 eptr++;
5700 ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5701 }
5702 }
5703 else
5704 {
5705 eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5706 SCHECK_PARTIAL();
5707 }
5708 break;
5709
5710 /* The byte case is the same as non-UTF8 */
5711
5712 case OP_ANYBYTE:
5713 c = max - min;
5714 if (c > (unsigned int)(md->end_subject - eptr))
5715 {
5716 eptr = md->end_subject;
5717 SCHECK_PARTIAL();
5718 }
5719 else eptr += c;
5720 break;
5721
5722 case OP_ANYNL:
5723 for (i = min; i < max; i++)
5724 {
5725 int len = 1;
5726 if (eptr >= md->end_subject)
5727 {
5728 SCHECK_PARTIAL();
5729 break;
5730 }
5731 GETCHARLEN(c, eptr, len);
5732 if (c == 0x000d)
5733 {
5734 if (++eptr >= md->end_subject) break;
5735 if (*eptr == 0x000a) eptr++;
5736 }
5737 else
5738 {
5739 if (c != 0x000a &&
5740 (md->bsr_anycrlf ||
5741 (c != 0x000b && c != 0x000c &&
5742 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5743 break;
5744 eptr += len;
5745 }
5746 }
5747 break;
5748
5749 case OP_NOT_HSPACE:
5750 case OP_HSPACE:
5751 for (i = min; i < max; i++)
5752 {
5753 BOOL gotspace;
5754 int len = 1;
5755 if (eptr >= md->end_subject)
5756 {
5757 SCHECK_PARTIAL();
5758 break;
5759 }
5760 GETCHARLEN(c, eptr, len);
5761 switch(c)
5762 {
5763 default: gotspace = FALSE; break;
5764 case 0x09: /* HT */
5765 case 0x20: /* SPACE */
5766 case 0xa0: /* NBSP */
5767 case 0x1680: /* OGHAM SPACE MARK */
5768 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5769 case 0x2000: /* EN QUAD */
5770 case 0x2001: /* EM QUAD */
5771 case 0x2002: /* EN SPACE */
5772 case 0x2003: /* EM SPACE */
5773 case 0x2004: /* THREE-PER-EM SPACE */
5774 case 0x2005: /* FOUR-PER-EM SPACE */
5775 case 0x2006: /* SIX-PER-EM SPACE */
5776 case 0x2007: /* FIGURE SPACE */
5777 case 0x2008: /* PUNCTUATION SPACE */
5778 case 0x2009: /* THIN SPACE */
5779 case 0x200A: /* HAIR SPACE */
5780 case 0x202f: /* NARROW NO-BREAK SPACE */
5781 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5782 case 0x3000: /* IDEOGRAPHIC SPACE */
5783 gotspace = TRUE;
5784 break;
5785 }
5786 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5787 eptr += len;
5788 }
5789 break;
5790
5791 case OP_NOT_VSPACE:
5792 case OP_VSPACE:
5793 for (i = min; i < max; i++)
5794 {
5795 BOOL gotspace;
5796 int len = 1;
5797 if (eptr >= md->end_subject)
5798 {
5799 SCHECK_PARTIAL();
5800 break;
5801 }
5802 GETCHARLEN(c, eptr, len);
5803 switch(c)
5804 {
5805 default: gotspace = FALSE; break;
5806 case 0x0a: /* LF */
5807 case 0x0b: /* VT */
5808 case 0x0c: /* FF */
5809 case 0x0d: /* CR */
5810 case 0x85: /* NEL */
5811 case 0x2028: /* LINE SEPARATOR */
5812 case 0x2029: /* PARAGRAPH SEPARATOR */
5813 gotspace = TRUE;
5814 break;
5815 }
5816 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5817 eptr += len;
5818 }
5819 break;
5820
5821 case OP_NOT_DIGIT:
5822 for (i = min; i < max; i++)
5823 {
5824 int len = 1;
5825 if (eptr >= md->end_subject)
5826 {
5827 SCHECK_PARTIAL();
5828 break;
5829 }
5830 GETCHARLEN(c, eptr, len);
5831 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5832 eptr+= len;
5833 }
5834 break;
5835
5836 case OP_DIGIT:
5837 for (i = min; i < max; i++)
5838 {
5839 int len = 1;
5840 if (eptr >= md->end_subject)
5841 {
5842 SCHECK_PARTIAL();
5843 break;
5844 }
5845 GETCHARLEN(c, eptr, len);
5846 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5847 eptr+= len;
5848 }
5849 break;
5850
5851 case OP_NOT_WHITESPACE:
5852 for (i = min; i < max; i++)
5853 {
5854 int len = 1;
5855 if (eptr >= md->end_subject)
5856 {
5857 SCHECK_PARTIAL();
5858 break;
5859 }
5860 GETCHARLEN(c, eptr, len);
5861 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5862 eptr+= len;
5863 }
5864 break;
5865
5866 case OP_WHITESPACE:
5867 for (i = min; i < max; i++)
5868 {
5869 int len = 1;
5870 if (eptr >= md->end_subject)
5871 {
5872 SCHECK_PARTIAL();
5873 break;
5874 }
5875 GETCHARLEN(c, eptr, len);
5876 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5877 eptr+= len;
5878 }
5879 break;
5880
5881 case OP_NOT_WORDCHAR:
5882 for (i = min; i < max; i++)
5883 {
5884 int len = 1;
5885 if (eptr >= md->end_subject)
5886 {
5887 SCHECK_PARTIAL();
5888 break;
5889 }
5890 GETCHARLEN(c, eptr, len);
5891 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5892 eptr+= len;
5893 }
5894 break;
5895
5896 case OP_WORDCHAR:
5897 for (i = min; i < max; i++)
5898 {
5899 int len = 1;
5900 if (eptr >= md->end_subject)
5901 {
5902 SCHECK_PARTIAL();
5903 break;
5904 }
5905 GETCHARLEN(c, eptr, len);
5906 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5907 eptr+= len;
5908 }
5909 break;
5910
5911 default:
5912 RRETURN(PCRE_ERROR_INTERNAL);
5913 }
5914
5915 /* eptr is now past the end of the maximum run. If possessive, we are
5916 done (no backing up). Otherwise, match at this position; anything other
5917 than no match is immediately returned. For nomatch, back up one
5918 character, unless we are matching \R and the last thing matched was
5919 \r\n, in which case, back up two bytes. */
5920
5921 if (possessive) continue;
5922 for(;;)
5923 {
5924 RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5925 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5926 if (eptr-- == pp) break; /* Stop if tried at original pos */
5927 BACKCHAR(eptr);
5928 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
5929 eptr[-1] == '\r') eptr--;
5930 }
5931 }
5932 else
5933 #endif /* SUPPORT_UTF */
5934 /* Not UTF mode */
5935 {
5936 switch(ctype)
5937 {
5938 case OP_ANY:
5939 for (i = min; i < max; i++)
5940 {
5941 if (eptr >= md->end_subject)
5942 {
5943 SCHECK_PARTIAL();
5944 break;
5945 }
5946 if (IS_NEWLINE(eptr)) break;
5947 if (md->partial != 0 && /* Take care with CRLF partial */
5948 eptr + 1 >= md->end_subject &&
5949 NLBLOCK->nltype == NLTYPE_FIXED &&
5950 NLBLOCK->nllen == 2 &&
5951 *eptr == NLBLOCK->nl[0])
5952 {
5953 md->hitend = TRUE;
5954 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5955 }
5956 eptr++;
5957 }
5958 break;
5959
5960 case OP_ALLANY:
5961 case OP_ANYBYTE:
5962 c = max - min;
5963 if (c > (unsigned int)(md->end_subject - eptr))
5964 {
5965 eptr = md->end_subject;
5966 SCHECK_PARTIAL();
5967 }
5968 else eptr += c;
5969 break;
5970
5971 case OP_ANYNL:
5972 for (i = min; i < max; i++)
5973 {
5974 if (eptr >= md->end_subject)
5975 {
5976 SCHECK_PARTIAL();
5977 break;
5978 }
5979 c = *eptr;
5980 if (c == 0x000d)
5981 {
5982 if (++eptr >= md->end_subject) break;
5983 if (*eptr == 0x000a) eptr++;
5984 }
5985 else
5986 {
5987 if (c != 0x000a && (md->bsr_anycrlf ||
5988 (c != 0x000b && c != 0x000c && c != 0x0085
5989 #ifdef COMPILE_PCRE16
5990 && c != 0x2028 && c != 0x2029
5991 #endif
5992 ))) break;
5993 eptr++;
5994 }
5995 }
5996 break;
5997
5998 case OP_NOT_HSPACE:
5999 for (i = min; i < max; i++)
6000 {
6001 if (eptr >= md->end_subject)
6002 {
6003 SCHECK_PARTIAL();
6004 break;
6005 }
6006 c = *eptr;
6007 if (c == 0x09 || c == 0x20 || c == 0xa0
6008 #ifdef COMPILE_PCRE16
6009 || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
6010 || c == 0x202f || c == 0x205f || c == 0x3000
6011 #endif
6012 ) break;
6013 eptr++;
6014 }
6015 break;
6016
6017 case OP_HSPACE:
6018 for (i = min; i < max; i++)
6019 {
6020 if (eptr >= md->end_subject)
6021 {
6022 SCHECK_PARTIAL();
6023 break;
6024 }
6025 c = *eptr;
6026 if (c != 0x09 && c != 0x20 && c != 0xa0
6027 #ifdef COMPILE_PCRE16
6028 && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
6029 && c != 0x202f && c != 0x205f && c != 0x3000
6030 #endif
6031 ) break;
6032 eptr++;
6033 }
6034 break;
6035
6036 case OP_NOT_VSPACE:
6037 for (i = min; i < max; i++)
6038 {
6039 if (eptr >= md->end_subject)
6040 {
6041 SCHECK_PARTIAL();
6042 break;
6043 }
6044 c = *eptr;
6045 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
6046 #ifdef COMPILE_PCRE16
6047 || c == 0x2028 || c == 0x2029
6048 #endif
6049 ) break;
6050 eptr++;
6051 }
6052 break;
6053
6054 case OP_VSPACE:
6055 for (i = min; i < max; i++)
6056 {
6057 if (eptr >= md->end_subject)
6058 {
6059 SCHECK_PARTIAL();
6060 break;
6061 }
6062 c = *eptr;
6063 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
6064 #ifdef COMPILE_PCRE16
6065 && c != 0x2028 && c != 0x2029
6066 #endif
6067 ) break;
6068 eptr++;
6069 }
6070 break;
6071
6072 case OP_NOT_DIGIT:
6073 for (i = min; i < max; i++)
6074 {
6075 if (eptr >= md->end_subject)
6076 {
6077 SCHECK_PARTIAL();
6078 break;
6079 }
6080 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6081 eptr++;
6082 }
6083 break;
6084
6085 case OP_DIGIT:
6086 for (i = min; i < max; i++)
6087 {
6088 if (eptr >= md->end_subject)
6089 {
6090 SCHECK_PARTIAL();
6091 break;
6092 }
6093 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6094 eptr++;
6095 }
6096 break;
6097
6098 case OP_NOT_WHITESPACE:
6099 for (i = min; i < max; i++)
6100 {
6101 if (eptr >= md->end_subject)
6102 {
6103 SCHECK_PARTIAL();
6104 break;
6105 }
6106 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6107 eptr++;
6108 }
6109 break;
6110
6111 case OP_WHITESPACE:
6112 for (i = min; i < max; i++)
6113 {
6114 if (eptr >= md->end_subject)
6115 {
6116 SCHECK_PARTIAL();
6117 break;
6118 }
6119 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6120 eptr++;
6121 }
6122 break;
6123
6124 case OP_NOT_WORDCHAR:
6125 for (i = min; i < max; i++)
6126 {
6127 if (eptr >= md->end_subject)
6128 {
6129 SCHECK_PARTIAL();
6130 break;
6131 }
6132 if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6133 eptr++;
6134 }
6135 break;
6136
6137 case OP_WORDCHAR:
6138 for (i = min; i < max; i++)
6139 {
6140 if (eptr >= md->end_subject)
6141 {
6142 SCHECK_PARTIAL();
6143 break;
6144 }
6145 if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6146 eptr++;
6147 }
6148 break;
6149
6150 default:
6151 RRETURN(PCRE_ERROR_INTERNAL);
6152 }
6153
6154 /* eptr is now past the end of the maximum run. If possessive, we are
6155 done (no backing up). Otherwise, match at this position; anything other
6156 than no match is immediately returned. For nomatch, back up one
6157 character (byte), unless we are matching \R and the last thing matched
6158 was \r\n, in which case, back up two bytes. */
6159
6160 if (possessive) continue;
6161 while (eptr >= pp)
6162 {
6163 RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6164 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6165 eptr--;
6166 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
6167 eptr[-1] == '\r') eptr--;
6168 }
6169 }
6170
6171 /* Get here if we can't make it match with any permitted repetitions */
6172
6173 RRETURN(MATCH_NOMATCH);
6174 }
6175 /* Control never gets here */
6176
6177 /* There's been some horrible disaster. Arrival here can only mean there is
6178 something seriously wrong in the code above or the OP_xxx definitions. */
6179
6180 default:
6181 DPRINTF(("Unknown opcode %d\n", *ecode));
6182 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6183 }
6184
6185 /* Do not stick any code in here without much thought; it is assumed
6186 that "continue" in the code above comes out to here to repeat the main
6187 loop. */
6188
6189 } /* End of main loop */
6190 /* Control never reaches here */
6191
6192
6193 /* When compiling to use the heap rather than the stack for recursive calls to
6194 match(), the RRETURN() macro jumps here. The number that is saved in
6195 frame->Xwhere indicates which label we actually want to return to. */
6196
6197 #ifdef NO_RECURSE
6198 #define LBL(val) case val: goto L_RM##val;
6199 HEAP_RETURN:
6200 switch (frame->Xwhere)
6201 {
6202 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6203 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6204 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6205 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6206 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6207 LBL(65) LBL(66)
6208 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6209 LBL(21)
6210 #endif
6211 #ifdef SUPPORT_UTF
6212 LBL(16) LBL(18) LBL(20)
6213 LBL(22) LBL(23) LBL(28) LBL(30)
6214 LBL(32) LBL(34) LBL(42) LBL(46)
6215 #ifdef SUPPORT_UCP
6216 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6217 LBL(59) LBL(60) LBL(61) LBL(62)
6218 #endif /* SUPPORT_UCP */
6219 #endif /* SUPPORT_UTF */
6220 default:
6221 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6222
6223 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6224
6225 return PCRE_ERROR_INTERNAL;
6226 }
6227 #undef LBL
6228 #endif /* NO_RECURSE */
6229 }
6230
6231
6232 /***************************************************************************
6233 ****************************************************************************
6234 RECURSION IN THE match() FUNCTION
6235
6236 Undefine all the macros that were defined above to handle this. */
6237
6238 #ifdef NO_RECURSE
6239 #undef eptr
6240 #undef ecode
6241 #undef mstart
6242 #undef offset_top
6243 #undef eptrb
6244 #undef flags
6245
6246 #undef callpat
6247 #undef charptr
6248 #undef data
6249 #undef next
6250 #undef pp
6251 #undef prev
6252 #undef saved_eptr
6253
6254 #undef new_recursive
6255
6256 #undef cur_is_word
6257 #undef condition
6258 #undef prev_is_word
6259
6260 #undef ctype
6261 #undef length
6262 #undef max
6263 #undef min
6264 #undef number
6265 #undef offset
6266 #undef op
6267 #undef save_capture_last
6268 #undef save_offset1
6269 #undef save_offset2
6270 #undef save_offset3
6271 #undef stacksave
6272
6273 #undef newptrb
6274
6275 #endif
6276
6277 /* These two are defined as macros in both cases */
6278
6279 #undef fc
6280 #undef fi
6281
6282 /***************************************************************************
6283 ***************************************************************************/
6284
6285
6286 #ifdef NO_RECURSE
6287 /*************************************************
6288 * Release allocated heap frames *
6289 *************************************************/
6290
6291 /* This function releases all the allocated frames. The base frame is on the
6292 machine stack, and so must not be freed.
6293
6294 Argument: the address of the base frame
6295 Returns: nothing
6296 */
6297
6298 static void
6299 release_match_heapframes (heapframe *frame_base)
6300 {
6301 heapframe *nextframe = frame_base->Xnextframe;
6302 while (nextframe != NULL)
6303 {
6304 heapframe *oldframe = nextframe;
6305 nextframe = nextframe->Xnextframe;
6306 (PUBL(stack_free))(oldframe);
6307 }
6308 }
6309 #endif
6310
6311
6312 /*************************************************
6313 * Execute a Regular Expression *
6314 *************************************************/
6315
6316 /* This function applies a compiled re to a subject string and picks out
6317 portions of the string if it matches. Two elements in the vector are set for
6318 each substring: the offsets to the start and end of the substring.
6319
6320 Arguments:
6321 argument_re points to the compiled expression
6322 extra_data points to extra data or is NULL
6323 subject points to the subject string
6324 length length of subject string (may contain binary zeros)
6325 start_offset where to start in the subject string
6326 options option bits
6327 offsets points to a vector of ints to be filled in with offsets
6328 offsetcount the number of elements in the vector
6329
6330 Returns: > 0 => success; value is the number of elements filled in
6331 = 0 => success, but offsets is not big enough
6332 -1 => failed to match
6333 < -1 => some kind of unexpected problem
6334 */
6335
6336 #ifdef COMPILE_PCRE8
6337 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6338 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6339 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6340 int offsetcount)
6341 #else
6342 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6343 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6344 PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6345 int offsetcount)
6346 #endif
6347 {
6348 int rc, ocount, arg_offset_max;
6349 int newline;
6350 BOOL using_temporary_offsets = FALSE;
6351 BOOL anchored;
6352 BOOL startline;
6353 BOOL firstline;
6354 BOOL utf;
6355 BOOL has_first_char = FALSE;
6356 BOOL has_req_char = FALSE;
6357 pcre_uchar first_char = 0;
6358 pcre_uchar first_char2 = 0;
6359 pcre_uchar req_char = 0;
6360 pcre_uchar req_char2 = 0;
6361 match_data match_block;
6362 match_data *md = &match_block;
6363 const pcre_uint8 *tables;
6364 const pcre_uint8 *start_bits = NULL;
6365 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6366 PCRE_PUCHAR end_subject;
6367 PCRE_PUCHAR start_partial = NULL;
6368 PCRE_PUCHAR req_char_ptr = start_match - 1;
6369
6370 const pcre_study_data *study;
6371 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6372
6373 #ifdef NO_RECURSE
6374 heapframe frame_zero;
6375 frame_zero.Xprevframe = NULL; /* Marks the top level */
6376 frame_zero.Xnextframe = NULL; /* None are allocated yet */
6377 md->match_frames_base = &frame_zero;
6378 #endif
6379
6380 /* Check for the special magic call that measures the size of the stack used
6381 per recursive call of match(). Without the funny casting for sizeof, a Windows
6382 compiler gave this error: "unary minus operator applied to unsigned type,
6383 result still unsigned". Hopefully the cast fixes that. */
6384
6385 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6386 start_offset == -999)
6387 #ifdef NO_RECURSE
6388 return -((int)sizeof(heapframe));
6389 #else
6390 return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6391 #endif
6392
6393 /* Plausibility checks */
6394
6395 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6396 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6397 return PCRE_ERROR_NULL;
6398 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6399 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6400
6401 /* Check that the first field in the block is the magic number. If it is not,
6402 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6403 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6404 means that the pattern is likely compiled with different endianness. */
6405
6406 if (re->magic_number != MAGIC_NUMBER)
6407 return re->magic_number == REVERSED_MAGIC_NUMBER?
6408 PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6409 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6410
6411 /* These two settings are used in the code for checking a UTF-8 string that
6412 follows immediately afterwards. Other values in the md block are used only
6413 during "normal" pcre_exec() processing, not when the JIT support is in use,
6414 so they are set up later. */
6415
6416 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6417 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6418 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6419 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6420
6421 /* Check a UTF-8 string if required. Pass back the character offset and error
6422 code for an invalid string if a results vector is available. */
6423
6424 #ifdef SUPPORT_UTF
6425 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6426 {
6427 int erroroffset;
6428 int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6429 if (errorcode != 0)
6430 {
6431 if (offsetcount >= 2)
6432 {
6433 offsets[0] = erroroffset;
6434 offsets[1] = errorcode;
6435 }
6436 #ifdef COMPILE_PCRE16
6437 return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6438 PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6439 #else
6440 return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6441 PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6442 #endif
6443 }
6444
6445 /* Check that a start_offset points to the start of a UTF character. */
6446 if (start_offset > 0 && start_offset < length &&
6447 NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6448 return PCRE_ERROR_BADUTF8_OFFSET;
6449 }
6450 #endif
6451
6452 /* If the pattern was successfully studied with JIT support, run the JIT
6453 executable instead of the rest of this function. Most options must be set at
6454 compile time for the JIT code to be usable. Fallback to the normal code path if
6455 an unsupported flag is set. */
6456
6457 #ifdef SUPPORT_JIT
6458 if (extra_data != NULL
6459 && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6460 PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6461 && extra_data->executable_jit != NULL
6462 && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6463 PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6464 PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6465 {
6466 rc = PRIV(jit_exec)(re, extra_data->executable_jit,
6467 (const pcre_uchar *)subject, length, start_offset, options,
6468 ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6469 ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount,
6470 ((extra_data->flags & PCRE_EXTRA_MARK) != 0) ? extra_data->mark : NULL);
6471
6472 /* PCRE_ERROR_NULL means that the selected normal or partial matching
6473 mode is not compiled. In this case we simply fallback to interpreter. */
6474
6475 if (rc != PCRE_ERROR_NULL) return rc;
6476 }
6477 #endif
6478
6479 /* Carry on with non-JIT matching. This information is for finding all the
6480 numbers associated with a given name, for condition testing. */
6481
6482 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6483 md->name_count = re->name_count;
6484 md->name_entry_size = re->name_entry_size;
6485
6486 /* Fish out the optional data from the extra_data structure, first setting
6487 the default values. */
6488
6489 study = NULL;
6490 md->match_limit = MATCH_LIMIT;
6491 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6492 md->callout_data = NULL;
6493
6494 /* The table pointer is always in native byte order. */
6495
6496 tables = re->tables;
6497
6498 if (extra_data != NULL)
6499 {
6500 register unsigned int flags = extra_data->flags;
6501 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6502 study = (const pcre_study_data *)extra_data->study_data;
6503 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6504 md->match_limit = extra_data->match_limit;
6505 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6506 md->match_limit_recursion = extra_data->match_limit_recursion;
6507 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6508 md->callout_data = extra_data->callout_data;
6509 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6510 }
6511
6512 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6513 is a feature that makes it possible to save compiled regex and re-use them
6514 in other programs later. */
6515
6516 if (tables == NULL) tables = PRIV(default_tables);
6517
6518 /* Set up other data */
6519
6520 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6521 startline = (re->flags & PCRE_STARTLINE) != 0;
6522 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6523
6524 /* The code starts after the real_pcre block and the capture name table. */
6525
6526 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6527 re->name_count * re->name_entry_size;
6528
6529 md->start_subject = (PCRE_PUCHAR)subject;
6530 md->start_offset = start_offset;
6531 md->end_subject = md->start_subject + length;
6532 end_subject = md->end_subject;
6533
6534 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6535 md->use_ucp = (re->options & PCRE_UCP) != 0;
6536 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6537 md->ignore_skip_arg = FALSE;
6538
6539 /* Some options are unpacked into BOOL variables in the hope that testing
6540 them will be faster than individual option bits. */
6541
6542 md->notbol = (options & PCRE_NOTBOL) != 0;
6543 md->noteol = (options & PCRE_NOTEOL) != 0;
6544 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6545 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6546
6547 md->hitend = FALSE;
6548 md->mark = md->nomatch_mark = NULL; /* In case never set */
6549
6550 md->recursive = NULL; /* No recursion at top level */
6551 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6552
6553 md->lcc = tables + lcc_offset;
6554 md->fcc = tables + fcc_offset;
6555 md->ctypes = tables + ctypes_offset;
6556
6557 /* Handle different \R options. */
6558
6559 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6560 {
6561 case 0:
6562 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6563 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6564 else
6565 #ifdef BSR_ANYCRLF
6566 md->bsr_anycrlf = TRUE;
6567 #else
6568 md->bsr_anycrlf = FALSE;
6569 #endif
6570 break;
6571
6572 case PCRE_BSR_ANYCRLF:
6573 md->bsr_anycrlf = TRUE;
6574 break;
6575
6576 case PCRE_BSR_UNICODE:
6577 md->bsr_anycrlf = FALSE;
6578 break;
6579
6580 default: return PCRE_ERROR_BADNEWLINE;
6581 }
6582
6583 /* Handle different types of newline. The three bits give eight cases. If
6584 nothing is set at run time, whatever was used at compile time applies. */
6585
6586 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6587 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6588 {
6589 case 0: newline = NEWLINE; break; /* Compile-time default */
6590 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6591 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6592 case PCRE_NEWLINE_CR+
6593 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6594 case PCRE_NEWLINE_ANY: newline = -1; break;
6595 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6596 default: return PCRE_ERROR_BADNEWLINE;
6597 }
6598
6599 if (newline == -2)
6600 {
6601 md->nltype = NLTYPE_ANYCRLF;
6602 }
6603 else if (newline < 0)
6604 {
6605 md->nltype = NLTYPE_ANY;
6606 }
6607 else
6608 {
6609 md->nltype = NLTYPE_FIXED;
6610 if (newline > 255)
6611 {
6612 md->nllen = 2;
6613 md->nl[0] = (newline >> 8) & 255;
6614 md->nl[1] = newline & 255;
6615 }
6616 else
6617 {
6618 md->nllen = 1;
6619 md->nl[0] = newline;
6620 }
6621 }
6622
6623 /* Partial matching was originally supported only for a restricted set of
6624 regexes; from release 8.00 there are no restrictions, but the bits are still
6625 defined (though never set). So there's no harm in leaving this code. */
6626
6627 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6628 return PCRE_ERROR_BADPARTIAL;
6629
6630 /* If the expression has got more back references than the offsets supplied can
6631 hold, we get a temporary chunk of working store to use during the matching.
6632 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6633 of 3. */
6634
6635 ocount = offsetcount - (offsetcount % 3);
6636 arg_offset_max = (2*ocount)/3;
6637
6638 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6639 {
6640 ocount = re->top_backref * 3 + 3;
6641 md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6642 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6643 using_temporary_offsets = TRUE;
6644 DPRINTF(("Got memory to hold back references\n"));
6645 }
6646 else md->offset_vector = offsets;
6647
6648 md->offset_end = ocount;
6649 md->offset_max = (2*ocount)/3;
6650 md->offset_overflow = FALSE;
6651 md->capture_last = -1;
6652
6653 /* Reset the working variable associated with each extraction. These should
6654 never be used unless previously set, but they get saved and restored, and so we
6655 initialize them to avoid reading uninitialized locations. Also, unset the
6656 offsets for the matched string. This is really just for tidiness with callouts,
6657 in case they inspect these fields. */
6658
6659 if (md->offset_vector != NULL)
6660 {
6661 register int *iptr = md->offset_vector + ocount;
6662 register int *iend = iptr - re->top_bracket;
6663 if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6664 while (--iptr >= iend) *iptr = -1;
6665 md->offset_vector[0] = md->offset_vector[1] = -1;
6666 }
6667
6668 /* Set up the first character to match, if available. The first_char value is
6669 never set for an anchored regular expression, but the anchoring may be forced
6670 at run time, so we have to test for anchoring. The first char may be unset for
6671 an unanchored pattern, of course. If there's no first char and the pattern was
6672 studied, there may be a bitmap of possible first characters. */
6673
6674 if (!anchored)
6675 {
6676 if ((re->flags & PCRE_FIRSTSET) != 0)
6677 {
6678 has_first_char = TRUE;
6679 first_char = first_char2 = (pcre_uchar)(re->first_char);
6680 if ((re->flags & PCRE_FCH_CASELESS) != 0)
6681 {
6682 first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6683 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6684 if (utf && first_char > 127)
6685 first_char2 = UCD_OTHERCASE(first_char);
6686 #endif
6687 }
6688 }
6689 else
6690 if (!startline && study != NULL &&
6691 (study->flags & PCRE_STUDY_MAPPED) != 0)
6692 start_bits = study->start_bits;
6693 }
6694
6695 /* For anchored or unanchored matches, there may be a "last known required
6696 character" set. */
6697
6698 if ((re->flags & PCRE_REQCHSET) != 0)
6699 {
6700 has_req_char = TRUE;
6701 req_char = req_char2 = (pcre_uchar)(re->req_char);
6702 if ((re->flags & PCRE_RCH_CASELESS) != 0)
6703 {
6704 req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6705 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6706 if (utf && req_char > 127)
6707 req_char2 = UCD_OTHERCASE(req_char);
6708 #endif
6709 }
6710 }
6711
6712
6713 /* ==========================================================================*/
6714
6715 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6716 the loop runs just once. */
6717
6718 for(;;)
6719 {
6720 PCRE_PUCHAR save_end_subject = end_subject;
6721 PCRE_PUCHAR new_start_match;
6722
6723 /* If firstline is TRUE, the start of the match is constrained to the first
6724 line of a multiline string. That is, the match must be before or at the first
6725 newline. Implement this by temporarily adjusting end_subject so that we stop
6726 scanning at a newline. If the match fails at the newline, later code breaks
6727 this loop. */
6728
6729 if (firstline)
6730 {
6731 PCRE_PUCHAR t = start_match;
6732 #ifdef SUPPORT_UTF
6733 if (utf)
6734 {
6735 while (t < md->end_subject && !IS_NEWLINE(t))
6736 {
6737 t++;
6738 ACROSSCHAR(t < end_subject, *t, t++);
6739 }
6740 }
6741 else
6742 #endif
6743 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6744 end_subject = t;