/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 602 - (show annotations)
Wed May 25 08:29:03 2011 UTC (4 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 188067 byte(s)
Error occurred while calculating annotation data.
Remove OP_OPT by handling /i and /m entirely at compile time. Fixes bug with 
patterns like /(?i:([^b]))(?1)/, where the /i option was mishandled.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
73
74 #define MATCH_ACCEPT (-999)
75 #define MATCH_COMMIT (-998)
76 #define MATCH_PRUNE (-997)
77 #define MATCH_SKIP (-996)
78 #define MATCH_SKIP_ARG (-995)
79 #define MATCH_THEN (-994)
80
81 /* This is a convenience macro for code that occurs many times. */
82
83 #define MRRETURN(ra) \
84 { \
85 md->mark = markptr; \
86 RRETURN(ra); \
87 }
88
89 /* Maximum number of ints of offset to save on the stack for recursive calls.
90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91 because the offset vector is always a multiple of 3 long. */
92
93 #define REC_STACK_SAVE_MAX 30
94
95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96
97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99
100
101
102 #ifdef PCRE_DEBUG
103 /*************************************************
104 * Debugging function to print chars *
105 *************************************************/
106
107 /* Print a sequence of chars in printable format, stopping at the end of the
108 subject if the requested.
109
110 Arguments:
111 p points to characters
112 length number to print
113 is_subject TRUE if printing from within md->start_subject
114 md pointer to matching data block, if is_subject is TRUE
115
116 Returns: nothing
117 */
118
119 static void
120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121 {
122 unsigned int c;
123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124 while (length-- > 0)
125 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126 }
127 #endif
128
129
130
131 /*************************************************
132 * Match a back-reference *
133 *************************************************/
134
135 /* Normally, if a back reference hasn't been set, the length that is passed is
136 negative, so the match always fails. However, in JavaScript compatibility mode,
137 the length passed is zero. Note that in caseless UTF-8 mode, the number of
138 subject bytes matched may be different to the number of reference bytes.
139
140 Arguments:
141 offset index into the offset vector
142 eptr pointer into the subject
143 length length of reference to be matched (number of bytes)
144 md points to match data block
145 caseless TRUE if caseless
146
147 Returns: < 0 if not matched, otherwise the number of subject bytes matched
148 */
149
150 static int
151 match_ref(int offset, register USPTR eptr, int length, match_data *md,
152 BOOL caseless)
153 {
154 USPTR eptr_start = eptr;
155 register USPTR p = md->start_subject + md->offset_vector[offset];
156
157 #ifdef PCRE_DEBUG
158 if (eptr >= md->end_subject)
159 printf("matching subject <null>");
160 else
161 {
162 printf("matching subject ");
163 pchars(eptr, length, TRUE, md);
164 }
165 printf(" against backref ");
166 pchars(p, length, FALSE, md);
167 printf("\n");
168 #endif
169
170 /* Always fail if reference not set (and not JavaScript compatible). */
171
172 if (length < 0) return -1;
173
174 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175 properly if Unicode properties are supported. Otherwise, we can check only
176 ASCII characters. */
177
178 if (caseless)
179 {
180 #ifdef SUPPORT_UTF8
181 #ifdef SUPPORT_UCP
182 if (md->utf8)
183 {
184 /* Match characters up to the end of the reference. NOTE: the number of
185 bytes matched may differ, because there are some characters whose upper and
186 lower case versions code as different numbers of bytes. For example, U+023A
187 (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188 a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189 the latter. It is important, therefore, to check the length along the
190 reference, not along the subject (earlier code did this wrong). */
191
192 USPTR endptr = p + length;
193 while (p < endptr)
194 {
195 int c, d;
196 if (eptr >= md->end_subject) return -1;
197 GETCHARINC(c, eptr);
198 GETCHARINC(d, p);
199 if (c != d && c != UCD_OTHERCASE(d)) return -1;
200 }
201 }
202 else
203 #endif
204 #endif
205
206 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207 is no UCP support. */
208 {
209 if (eptr + length > md->end_subject) return -1;
210 while (length-- > 0)
211 { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212 }
213 }
214
215 /* In the caseful case, we can just compare the bytes, whether or not we
216 are in UTF-8 mode. */
217
218 else
219 {
220 if (eptr + length > md->end_subject) return -1;
221 while (length-- > 0) if (*p++ != *eptr++) return -1;
222 }
223
224 return eptr - eptr_start;
225 }
226
227
228
229 /***************************************************************************
230 ****************************************************************************
231 RECURSION IN THE match() FUNCTION
232
233 The match() function is highly recursive, though not every recursive call
234 increases the recursive depth. Nevertheless, some regular expressions can cause
235 it to recurse to a great depth. I was writing for Unix, so I just let it call
236 itself recursively. This uses the stack for saving everything that has to be
237 saved for a recursive call. On Unix, the stack can be large, and this works
238 fine.
239
240 It turns out that on some non-Unix-like systems there are problems with
241 programs that use a lot of stack. (This despite the fact that every last chip
242 has oodles of memory these days, and techniques for extending the stack have
243 been known for decades.) So....
244
245 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
246 calls by keeping local variables that need to be preserved in blocks of memory
247 obtained from malloc() instead instead of on the stack. Macros are used to
248 achieve this so that the actual code doesn't look very different to what it
249 always used to.
250
251 The original heap-recursive code used longjmp(). However, it seems that this
252 can be very slow on some operating systems. Following a suggestion from Stan
253 Switzer, the use of longjmp() has been abolished, at the cost of having to
254 provide a unique number for each call to RMATCH. There is no way of generating
255 a sequence of numbers at compile time in C. I have given them names, to make
256 them stand out more clearly.
257
258 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260 tests. Furthermore, not using longjmp() means that local dynamic variables
261 don't have indeterminate values; this has meant that the frame size can be
262 reduced because the result can be "passed back" by straight setting of the
263 variable instead of being passed in the frame.
264 ****************************************************************************
265 ***************************************************************************/
266
267 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268 below must be updated in sync. */
269
270 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
271 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276 RM61, RM62 };
277
278 /* These versions of the macros use the stack, as normal. There are debugging
279 versions and production versions. Note that the "rw" argument of RMATCH isn't
280 actually used in this definition. */
281
282 #ifndef NO_RECURSE
283 #define REGISTER register
284
285 #ifdef PCRE_DEBUG
286 #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
287 { \
288 printf("match() called in line %d\n", __LINE__); \
289 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1); \
290 printf("to line %d\n", __LINE__); \
291 }
292 #define RRETURN(ra) \
293 { \
294 printf("match() returned %d from line %d ", ra, __LINE__); \
295 return ra; \
296 }
297 #else
298 #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
299 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1)
300 #define RRETURN(ra) return ra
301 #endif
302
303 #else
304
305
306 /* These versions of the macros manage a private stack on the heap. Note that
307 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308 argument of match(), which never changes. */
309
310 #define REGISTER
311
312 #define RMATCH(ra,rb,rc,rd,re,rf,rw)\
313 {\
314 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316 frame->Xwhere = rw; \
317 newframe->Xeptr = ra;\
318 newframe->Xecode = rb;\
319 newframe->Xmstart = mstart;\
320 newframe->Xmarkptr = markptr;\
321 newframe->Xoffset_top = rc;\
322 newframe->Xeptrb = re;\
323 newframe->Xflags = rf;\
324 newframe->Xrdepth = frame->Xrdepth + 1;\
325 newframe->Xprevframe = frame;\
326 frame = newframe;\
327 DPRINTF(("restarting from line %d\n", __LINE__));\
328 goto HEAP_RECURSE;\
329 L_##rw:\
330 DPRINTF(("jumped back to line %d\n", __LINE__));\
331 }
332
333 #define RRETURN(ra)\
334 {\
335 heapframe *oldframe = frame;\
336 frame = oldframe->Xprevframe;\
337 (pcre_stack_free)(oldframe);\
338 if (frame != NULL)\
339 {\
340 rrc = ra;\
341 goto HEAP_RETURN;\
342 }\
343 return ra;\
344 }
345
346
347 /* Structure for remembering the local variables in a private frame */
348
349 typedef struct heapframe {
350 struct heapframe *Xprevframe;
351
352 /* Function arguments that may change */
353
354 USPTR Xeptr;
355 const uschar *Xecode;
356 USPTR Xmstart;
357 USPTR Xmarkptr;
358 int Xoffset_top;
359 eptrblock *Xeptrb;
360 int Xflags;
361 unsigned int Xrdepth;
362
363 /* Function local variables */
364
365 USPTR Xcallpat;
366 #ifdef SUPPORT_UTF8
367 USPTR Xcharptr;
368 #endif
369 USPTR Xdata;
370 USPTR Xnext;
371 USPTR Xpp;
372 USPTR Xprev;
373 USPTR Xsaved_eptr;
374
375 recursion_info Xnew_recursive;
376
377 BOOL Xcur_is_word;
378 BOOL Xcondition;
379 BOOL Xprev_is_word;
380
381 #ifdef SUPPORT_UCP
382 int Xprop_type;
383 int Xprop_value;
384 int Xprop_fail_result;
385 int Xprop_category;
386 int Xprop_chartype;
387 int Xprop_script;
388 int Xoclength;
389 uschar Xocchars[8];
390 #endif
391
392 int Xcodelink;
393 int Xctype;
394 unsigned int Xfc;
395 int Xfi;
396 int Xlength;
397 int Xmax;
398 int Xmin;
399 int Xnumber;
400 int Xoffset;
401 int Xop;
402 int Xsave_capture_last;
403 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
404 int Xstacksave[REC_STACK_SAVE_MAX];
405
406 eptrblock Xnewptrb;
407
408 /* Where to jump back to */
409
410 int Xwhere;
411
412 } heapframe;
413
414 #endif
415
416
417 /***************************************************************************
418 ***************************************************************************/
419
420
421
422 /*************************************************
423 * Match from current position *
424 *************************************************/
425
426 /* This function is called recursively in many circumstances. Whenever it
427 returns a negative (error) response, the outer incarnation must also return the
428 same response. */
429
430 /* These macros pack up tests that are used for partial matching, and which
431 appears several times in the code. We set the "hit end" flag if the pointer is
432 at the end of the subject and also past the start of the subject (i.e.
433 something has been matched). For hard partial matching, we then return
434 immediately. The second one is used when we already know we are past the end of
435 the subject. */
436
437 #define CHECK_PARTIAL()\
438 if (md->partial != 0 && eptr >= md->end_subject && \
439 eptr > md->start_used_ptr) \
440 { \
441 md->hitend = TRUE; \
442 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
443 }
444
445 #define SCHECK_PARTIAL()\
446 if (md->partial != 0 && eptr > md->start_used_ptr) \
447 { \
448 md->hitend = TRUE; \
449 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
450 }
451
452
453 /* Performance note: It might be tempting to extract commonly used fields from
454 the md structure (e.g. utf8, end_subject) into individual variables to improve
455 performance. Tests using gcc on a SPARC disproved this; in the first case, it
456 made performance worse.
457
458 Arguments:
459 eptr pointer to current character in subject
460 ecode pointer to current position in compiled code
461 mstart pointer to the current match start position (can be modified
462 by encountering \K)
463 markptr pointer to the most recent MARK name, or NULL
464 offset_top current top pointer
465 md pointer to "static" info for the match
466 eptrb pointer to chain of blocks containing eptr at start of
467 brackets - for testing for empty matches
468 flags can contain
469 match_condassert - this is an assertion condition
470 match_cbegroup - this is the start of an unlimited repeat
471 group that can match an empty string
472 rdepth the recursion depth
473
474 Returns: MATCH_MATCH if matched ) these values are >= 0
475 MATCH_NOMATCH if failed to match )
476 a negative MATCH_xxx value for PRUNE, SKIP, etc
477 a negative PCRE_ERROR_xxx value if aborted by an error condition
478 (e.g. stopped by repeated call or recursion limit)
479 */
480
481 static int
482 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
483 const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,
484 int flags, unsigned int rdepth)
485 {
486 /* These variables do not need to be preserved over recursion in this function,
487 so they can be ordinary variables in all cases. Mark some of them with
488 "register" because they are used a lot in loops. */
489
490 register int rrc; /* Returns from recursive calls */
491 register int i; /* Used for loops not involving calls to RMATCH() */
492 register unsigned int c; /* Character values not kept over RMATCH() calls */
493 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
494
495 BOOL minimize, possessive; /* Quantifier options */
496 BOOL caseless;
497 int condcode;
498
499 /* When recursion is not being used, all "local" variables that have to be
500 preserved over calls to RMATCH() are part of a "frame" which is obtained from
501 heap storage. Set up the top-level frame here; others are obtained from the
502 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
503
504 #ifdef NO_RECURSE
505 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
506 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
507 frame->Xprevframe = NULL; /* Marks the top level */
508
509 /* Copy in the original argument variables */
510
511 frame->Xeptr = eptr;
512 frame->Xecode = ecode;
513 frame->Xmstart = mstart;
514 frame->Xmarkptr = markptr;
515 frame->Xoffset_top = offset_top;
516 frame->Xeptrb = eptrb;
517 frame->Xflags = flags;
518 frame->Xrdepth = rdepth;
519
520 /* This is where control jumps back to to effect "recursion" */
521
522 HEAP_RECURSE:
523
524 /* Macros make the argument variables come from the current frame */
525
526 #define eptr frame->Xeptr
527 #define ecode frame->Xecode
528 #define mstart frame->Xmstart
529 #define markptr frame->Xmarkptr
530 #define offset_top frame->Xoffset_top
531 #define eptrb frame->Xeptrb
532 #define flags frame->Xflags
533 #define rdepth frame->Xrdepth
534
535 /* Ditto for the local variables */
536
537 #ifdef SUPPORT_UTF8
538 #define charptr frame->Xcharptr
539 #endif
540 #define callpat frame->Xcallpat
541 #define codelink frame->Xcodelink
542 #define data frame->Xdata
543 #define next frame->Xnext
544 #define pp frame->Xpp
545 #define prev frame->Xprev
546 #define saved_eptr frame->Xsaved_eptr
547
548 #define new_recursive frame->Xnew_recursive
549
550 #define cur_is_word frame->Xcur_is_word
551 #define condition frame->Xcondition
552 #define prev_is_word frame->Xprev_is_word
553
554 #ifdef SUPPORT_UCP
555 #define prop_type frame->Xprop_type
556 #define prop_value frame->Xprop_value
557 #define prop_fail_result frame->Xprop_fail_result
558 #define prop_category frame->Xprop_category
559 #define prop_chartype frame->Xprop_chartype
560 #define prop_script frame->Xprop_script
561 #define oclength frame->Xoclength
562 #define occhars frame->Xocchars
563 #endif
564
565 #define ctype frame->Xctype
566 #define fc frame->Xfc
567 #define fi frame->Xfi
568 #define length frame->Xlength
569 #define max frame->Xmax
570 #define min frame->Xmin
571 #define number frame->Xnumber
572 #define offset frame->Xoffset
573 #define op frame->Xop
574 #define save_capture_last frame->Xsave_capture_last
575 #define save_offset1 frame->Xsave_offset1
576 #define save_offset2 frame->Xsave_offset2
577 #define save_offset3 frame->Xsave_offset3
578 #define stacksave frame->Xstacksave
579
580 #define newptrb frame->Xnewptrb
581
582 /* When recursion is being used, local variables are allocated on the stack and
583 get preserved during recursion in the normal way. In this environment, fi and
584 i, and fc and c, can be the same variables. */
585
586 #else /* NO_RECURSE not defined */
587 #define fi i
588 #define fc c
589
590
591 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
592 const uschar *charptr; /* in small blocks of the code. My normal */
593 #endif /* style of coding would have declared */
594 const uschar *callpat; /* them within each of those blocks. */
595 const uschar *data; /* However, in order to accommodate the */
596 const uschar *next; /* version of this code that uses an */
597 USPTR pp; /* external "stack" implemented on the */
598 const uschar *prev; /* heap, it is easier to declare them all */
599 USPTR saved_eptr; /* here, so the declarations can be cut */
600 /* out in a block. The only declarations */
601 recursion_info new_recursive; /* within blocks below are for variables */
602 /* that do not have to be preserved over */
603 BOOL cur_is_word; /* a recursive call to RMATCH(). */
604 BOOL condition;
605 BOOL prev_is_word;
606
607 #ifdef SUPPORT_UCP
608 int prop_type;
609 int prop_value;
610 int prop_fail_result;
611 int prop_category;
612 int prop_chartype;
613 int prop_script;
614 int oclength;
615 uschar occhars[8];
616 #endif
617
618 int codelink;
619 int ctype;
620 int length;
621 int max;
622 int min;
623 int number;
624 int offset;
625 int op;
626 int save_capture_last;
627 int save_offset1, save_offset2, save_offset3;
628 int stacksave[REC_STACK_SAVE_MAX];
629
630 eptrblock newptrb;
631 #endif /* NO_RECURSE */
632
633 /* These statements are here to stop the compiler complaining about unitialized
634 variables. */
635
636 #ifdef SUPPORT_UCP
637 prop_value = 0;
638 prop_fail_result = 0;
639 #endif
640
641
642 /* This label is used for tail recursion, which is used in a few cases even
643 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
644 used. Thanks to Ian Taylor for noticing this possibility and sending the
645 original patch. */
646
647 TAIL_RECURSE:
648
649 /* OK, now we can get on with the real code of the function. Recursive calls
650 are specified by the macro RMATCH and RRETURN is used to return. When
651 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
652 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
653 defined). However, RMATCH isn't like a function call because it's quite a
654 complicated macro. It has to be used in one particular way. This shouldn't,
655 however, impact performance when true recursion is being used. */
656
657 #ifdef SUPPORT_UTF8
658 utf8 = md->utf8; /* Local copy of the flag */
659 #else
660 utf8 = FALSE;
661 #endif
662
663 /* First check that we haven't called match() too many times, or that we
664 haven't exceeded the recursive call limit. */
665
666 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
667 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
668
669 /* At the start of a group with an unlimited repeat that may match an empty
670 string, the match_cbegroup flag is set. When this is the case, add the current
671 subject pointer to the chain of such remembered pointers, to be checked when we
672 hit the closing ket, in order to break infinite loops that match no characters.
673 When match() is called in other circumstances, don't add to the chain. The
674 match_cbegroup flag must NOT be used with tail recursion, because the memory
675 block that is used is on the stack, so a new one may be required for each
676 match(). */
677
678 if ((flags & match_cbegroup) != 0)
679 {
680 newptrb.epb_saved_eptr = eptr;
681 newptrb.epb_prev = eptrb;
682 eptrb = &newptrb;
683 }
684
685 /* Now start processing the opcodes. */
686
687 for (;;)
688 {
689 minimize = possessive = FALSE;
690 op = *ecode;
691
692 switch(op)
693 {
694 case OP_MARK:
695 markptr = ecode + 2;
696 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
697 eptrb, flags, RM55);
698
699 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
700 argument, and we must check whether that argument matches this MARK's
701 argument. It is passed back in md->start_match_ptr (an overloading of that
702 variable). If it does match, we reset that variable to the current subject
703 position and return MATCH_SKIP. Otherwise, pass back the return code
704 unaltered. */
705
706 if (rrc == MATCH_SKIP_ARG &&
707 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
708 {
709 md->start_match_ptr = eptr;
710 RRETURN(MATCH_SKIP);
711 }
712
713 if (md->mark == NULL) md->mark = markptr;
714 RRETURN(rrc);
715
716 case OP_FAIL:
717 MRRETURN(MATCH_NOMATCH);
718
719 /* COMMIT overrides PRUNE, SKIP, and THEN */
720
721 case OP_COMMIT:
722 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
723 eptrb, flags, RM52);
724 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
725 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
726 rrc != MATCH_THEN)
727 RRETURN(rrc);
728 MRRETURN(MATCH_COMMIT);
729
730 /* PRUNE overrides THEN */
731
732 case OP_PRUNE:
733 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734 eptrb, flags, RM51);
735 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736 MRRETURN(MATCH_PRUNE);
737
738 case OP_PRUNE_ARG:
739 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
740 eptrb, flags, RM56);
741 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742 md->mark = ecode + 2;
743 RRETURN(MATCH_PRUNE);
744
745 /* SKIP overrides PRUNE and THEN */
746
747 case OP_SKIP:
748 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
749 eptrb, flags, RM53);
750 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
751 RRETURN(rrc);
752 md->start_match_ptr = eptr; /* Pass back current position */
753 MRRETURN(MATCH_SKIP);
754
755 case OP_SKIP_ARG:
756 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757 eptrb, flags, RM57);
758 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
759 RRETURN(rrc);
760
761 /* Pass back the current skip name by overloading md->start_match_ptr and
762 returning the special MATCH_SKIP_ARG return code. This will either be
763 caught by a matching MARK, or get to the top, where it is treated the same
764 as PRUNE. */
765
766 md->start_match_ptr = ecode + 2;
767 RRETURN(MATCH_SKIP_ARG);
768
769 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
770 the alt that is at the start of the current branch. This makes it possible
771 to skip back past alternatives that precede the THEN within the current
772 branch. */
773
774 case OP_THEN:
775 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
776 eptrb, flags, RM54);
777 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
778 md->start_match_ptr = ecode - GET(ecode, 1);
779 MRRETURN(MATCH_THEN);
780
781 case OP_THEN_ARG:
782 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
783 offset_top, md, eptrb, flags, RM58);
784 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
785 md->start_match_ptr = ecode - GET(ecode, 1);
786 md->mark = ecode + LINK_SIZE + 2;
787 RRETURN(MATCH_THEN);
788
789 /* Handle a capturing bracket. If there is space in the offset vector, save
790 the current subject position in the working slot at the top of the vector.
791 We mustn't change the current values of the data slot, because they may be
792 set from a previous iteration of this group, and be referred to by a
793 reference inside the group.
794
795 If the bracket fails to match, we need to restore this value and also the
796 values of the final offsets, in case they were set by a previous iteration
797 of the same bracket.
798
799 If there isn't enough space in the offset vector, treat this as if it were
800 a non-capturing bracket. Don't worry about setting the flag for the error
801 case here; that is handled in the code for KET. */
802
803 case OP_CBRA:
804 case OP_SCBRA:
805 number = GET2(ecode, 1+LINK_SIZE);
806 offset = number << 1;
807
808 #ifdef PCRE_DEBUG
809 printf("start bracket %d\n", number);
810 printf("subject=");
811 pchars(eptr, 16, TRUE, md);
812 printf("\n");
813 #endif
814
815 if (offset < md->offset_max)
816 {
817 save_offset1 = md->offset_vector[offset];
818 save_offset2 = md->offset_vector[offset+1];
819 save_offset3 = md->offset_vector[md->offset_end - number];
820 save_capture_last = md->capture_last;
821
822 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
823 md->offset_vector[md->offset_end - number] =
824 (int)(eptr - md->start_subject);
825
826 flags = (op == OP_SCBRA)? match_cbegroup : 0;
827 do
828 {
829 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
830 eptrb, flags, RM1);
831 if (rrc != MATCH_NOMATCH &&
832 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
833 RRETURN(rrc);
834 md->capture_last = save_capture_last;
835 ecode += GET(ecode, 1);
836 }
837 while (*ecode == OP_ALT);
838
839 DPRINTF(("bracket %d failed\n", number));
840
841 md->offset_vector[offset] = save_offset1;
842 md->offset_vector[offset+1] = save_offset2;
843 md->offset_vector[md->offset_end - number] = save_offset3;
844
845 if (rrc != MATCH_THEN) md->mark = markptr;
846 RRETURN(MATCH_NOMATCH);
847 }
848
849 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
850 as a non-capturing bracket. */
851
852 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
854
855 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
856
857 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
858 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
859
860 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
861 final alternative within the brackets, we would return the result of a
862 recursive call to match() whatever happened. We can reduce stack usage by
863 turning this into a tail recursion, except in the case when match_cbegroup
864 is set.*/
865
866 case OP_BRA:
867 case OP_SBRA:
868 DPRINTF(("start non-capturing bracket\n"));
869 flags = (op >= OP_SBRA)? match_cbegroup : 0;
870 for (;;)
871 {
872 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
873 {
874 if (flags == 0) /* Not a possibly empty group */
875 {
876 ecode += _pcre_OP_lengths[*ecode];
877 DPRINTF(("bracket 0 tail recursion\n"));
878 goto TAIL_RECURSE;
879 }
880
881 /* Possibly empty group; can't use tail recursion. */
882
883 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
884 flags, RM48);
885 if (rrc == MATCH_NOMATCH) md->mark = markptr;
886 RRETURN(rrc);
887 }
888
889 /* For non-final alternatives, continue the loop for a NOMATCH result;
890 otherwise return. */
891
892 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
893 flags, RM2);
894 if (rrc != MATCH_NOMATCH &&
895 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
896 RRETURN(rrc);
897 ecode += GET(ecode, 1);
898 }
899 /* Control never reaches here. */
900
901 /* Conditional group: compilation checked that there are no more than
902 two branches. If the condition is false, skipping the first branch takes us
903 past the end if there is only one branch, but that's OK because that is
904 exactly what going to the ket would do. As there is only one branch to be
905 obeyed, we can use tail recursion to avoid using another stack frame. */
906
907 case OP_COND:
908 case OP_SCOND:
909 codelink= GET(ecode, 1);
910
911 /* Because of the way auto-callout works during compile, a callout item is
912 inserted between OP_COND and an assertion condition. */
913
914 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
915 {
916 if (pcre_callout != NULL)
917 {
918 pcre_callout_block cb;
919 cb.version = 1; /* Version 1 of the callout block */
920 cb.callout_number = ecode[LINK_SIZE+2];
921 cb.offset_vector = md->offset_vector;
922 cb.subject = (PCRE_SPTR)md->start_subject;
923 cb.subject_length = (int)(md->end_subject - md->start_subject);
924 cb.start_match = (int)(mstart - md->start_subject);
925 cb.current_position = (int)(eptr - md->start_subject);
926 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
927 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
928 cb.capture_top = offset_top/2;
929 cb.capture_last = md->capture_last;
930 cb.callout_data = md->callout_data;
931 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
932 if (rrc < 0) RRETURN(rrc);
933 }
934 ecode += _pcre_OP_lengths[OP_CALLOUT];
935 }
936
937 condcode = ecode[LINK_SIZE+1];
938
939 /* Now see what the actual condition is */
940
941 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
942 {
943 if (md->recursive == NULL) /* Not recursing => FALSE */
944 {
945 condition = FALSE;
946 ecode += GET(ecode, 1);
947 }
948 else
949 {
950 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
951 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
952
953 /* If the test is for recursion into a specific subpattern, and it is
954 false, but the test was set up by name, scan the table to see if the
955 name refers to any other numbers, and test them. The condition is true
956 if any one is set. */
957
958 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
959 {
960 uschar *slotA = md->name_table;
961 for (i = 0; i < md->name_count; i++)
962 {
963 if (GET2(slotA, 0) == recno) break;
964 slotA += md->name_entry_size;
965 }
966
967 /* Found a name for the number - there can be only one; duplicate
968 names for different numbers are allowed, but not vice versa. First
969 scan down for duplicates. */
970
971 if (i < md->name_count)
972 {
973 uschar *slotB = slotA;
974 while (slotB > md->name_table)
975 {
976 slotB -= md->name_entry_size;
977 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
978 {
979 condition = GET2(slotB, 0) == md->recursive->group_num;
980 if (condition) break;
981 }
982 else break;
983 }
984
985 /* Scan up for duplicates */
986
987 if (!condition)
988 {
989 slotB = slotA;
990 for (i++; i < md->name_count; i++)
991 {
992 slotB += md->name_entry_size;
993 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
994 {
995 condition = GET2(slotB, 0) == md->recursive->group_num;
996 if (condition) break;
997 }
998 else break;
999 }
1000 }
1001 }
1002 }
1003
1004 /* Chose branch according to the condition */
1005
1006 ecode += condition? 3 : GET(ecode, 1);
1007 }
1008 }
1009
1010 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1011 {
1012 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1013 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1014
1015 /* If the numbered capture is unset, but the reference was by name,
1016 scan the table to see if the name refers to any other numbers, and test
1017 them. The condition is true if any one is set. This is tediously similar
1018 to the code above, but not close enough to try to amalgamate. */
1019
1020 if (!condition && condcode == OP_NCREF)
1021 {
1022 int refno = offset >> 1;
1023 uschar *slotA = md->name_table;
1024
1025 for (i = 0; i < md->name_count; i++)
1026 {
1027 if (GET2(slotA, 0) == refno) break;
1028 slotA += md->name_entry_size;
1029 }
1030
1031 /* Found a name for the number - there can be only one; duplicate names
1032 for different numbers are allowed, but not vice versa. First scan down
1033 for duplicates. */
1034
1035 if (i < md->name_count)
1036 {
1037 uschar *slotB = slotA;
1038 while (slotB > md->name_table)
1039 {
1040 slotB -= md->name_entry_size;
1041 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1042 {
1043 offset = GET2(slotB, 0) << 1;
1044 condition = offset < offset_top &&
1045 md->offset_vector[offset] >= 0;
1046 if (condition) break;
1047 }
1048 else break;
1049 }
1050
1051 /* Scan up for duplicates */
1052
1053 if (!condition)
1054 {
1055 slotB = slotA;
1056 for (i++; i < md->name_count; i++)
1057 {
1058 slotB += md->name_entry_size;
1059 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1060 {
1061 offset = GET2(slotB, 0) << 1;
1062 condition = offset < offset_top &&
1063 md->offset_vector[offset] >= 0;
1064 if (condition) break;
1065 }
1066 else break;
1067 }
1068 }
1069 }
1070 }
1071
1072 /* Chose branch according to the condition */
1073
1074 ecode += condition? 3 : GET(ecode, 1);
1075 }
1076
1077 else if (condcode == OP_DEF) /* DEFINE - always false */
1078 {
1079 condition = FALSE;
1080 ecode += GET(ecode, 1);
1081 }
1082
1083 /* The condition is an assertion. Call match() to evaluate it - setting
1084 the final argument match_condassert causes it to stop at the end of an
1085 assertion. */
1086
1087 else
1088 {
1089 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL,
1090 match_condassert, RM3);
1091 if (rrc == MATCH_MATCH)
1092 {
1093 condition = TRUE;
1094 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1095 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1096 }
1097 else if (rrc != MATCH_NOMATCH &&
1098 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1099 {
1100 RRETURN(rrc); /* Need braces because of following else */
1101 }
1102 else
1103 {
1104 condition = FALSE;
1105 ecode += codelink;
1106 }
1107 }
1108
1109 /* We are now at the branch that is to be obeyed. As there is only one,
1110 we can use tail recursion to avoid using another stack frame, except when
1111 match_cbegroup is required for an unlimited repeat of a possibly empty
1112 group. If the second alternative doesn't exist, we can just plough on. */
1113
1114 if (condition || *ecode == OP_ALT)
1115 {
1116 ecode += 1 + LINK_SIZE;
1117 if (op == OP_SCOND) /* Possibly empty group */
1118 {
1119 RMATCH(eptr, ecode, offset_top, md, eptrb, match_cbegroup, RM49);
1120 RRETURN(rrc);
1121 }
1122 else /* Group must match something */
1123 {
1124 flags = 0;
1125 goto TAIL_RECURSE;
1126 }
1127 }
1128 else /* Condition false & no alternative */
1129 {
1130 ecode += 1 + LINK_SIZE;
1131 }
1132 break;
1133
1134
1135 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1136 to close any currently open capturing brackets. */
1137
1138 case OP_CLOSE:
1139 number = GET2(ecode, 1);
1140 offset = number << 1;
1141
1142 #ifdef PCRE_DEBUG
1143 printf("end bracket %d at *ACCEPT", number);
1144 printf("\n");
1145 #endif
1146
1147 md->capture_last = number;
1148 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1149 {
1150 md->offset_vector[offset] =
1151 md->offset_vector[md->offset_end - number];
1152 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1153 if (offset_top <= offset) offset_top = offset + 2;
1154 }
1155 ecode += 3;
1156 break;
1157
1158
1159 /* End of the pattern, either real or forced. If we are in a top-level
1160 recursion, we should restore the offsets appropriately and continue from
1161 after the call. */
1162
1163 case OP_ACCEPT:
1164 case OP_END:
1165 if (md->recursive != NULL && md->recursive->group_num == 0)
1166 {
1167 recursion_info *rec = md->recursive;
1168 DPRINTF(("End of pattern in a (?0) recursion\n"));
1169 md->recursive = rec->prevrec;
1170 memmove(md->offset_vector, rec->offset_save,
1171 rec->saved_max * sizeof(int));
1172 offset_top = rec->save_offset_top;
1173 ecode = rec->after_call;
1174 break;
1175 }
1176
1177 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1178 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1179 the subject. In both cases, backtracking will then try other alternatives,
1180 if any. */
1181
1182 if (eptr == mstart &&
1183 (md->notempty ||
1184 (md->notempty_atstart &&
1185 mstart == md->start_subject + md->start_offset)))
1186 MRRETURN(MATCH_NOMATCH);
1187
1188 /* Otherwise, we have a match. */
1189
1190 md->end_match_ptr = eptr; /* Record where we ended */
1191 md->end_offset_top = offset_top; /* and how many extracts were taken */
1192 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1193
1194 /* For some reason, the macros don't work properly if an expression is
1195 given as the argument to MRRETURN when the heap is in use. */
1196
1197 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1198 MRRETURN(rrc);
1199
1200 /* Assertion brackets. Check the alternative branches in turn - the
1201 matching won't pass the KET for an assertion. If any one branch matches,
1202 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1203 start of each branch to move the current point backwards, so the code at
1204 this level is identical to the lookahead case. */
1205
1206 case OP_ASSERT:
1207 case OP_ASSERTBACK:
1208 do
1209 {
1210 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1211 RM4);
1212 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1213 {
1214 mstart = md->start_match_ptr; /* In case \K reset it */
1215 break;
1216 }
1217 if (rrc != MATCH_NOMATCH &&
1218 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1219 RRETURN(rrc);
1220 ecode += GET(ecode, 1);
1221 }
1222 while (*ecode == OP_ALT);
1223 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1224
1225 /* If checking an assertion for a condition, return MATCH_MATCH. */
1226
1227 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1228
1229 /* Continue from after the assertion, updating the offsets high water
1230 mark, since extracts may have been taken during the assertion. */
1231
1232 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1233 ecode += 1 + LINK_SIZE;
1234 offset_top = md->end_offset_top;
1235 continue;
1236
1237 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1238 PRUNE, or COMMIT means we must assume failure without checking subsequent
1239 branches. */
1240
1241 case OP_ASSERT_NOT:
1242 case OP_ASSERTBACK_NOT:
1243 do
1244 {
1245 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1246 RM5);
1247 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1248 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1249 {
1250 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1251 break;
1252 }
1253 if (rrc != MATCH_NOMATCH &&
1254 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1255 RRETURN(rrc);
1256 ecode += GET(ecode,1);
1257 }
1258 while (*ecode == OP_ALT);
1259
1260 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1261
1262 ecode += 1 + LINK_SIZE;
1263 continue;
1264
1265 /* Move the subject pointer back. This occurs only at the start of
1266 each branch of a lookbehind assertion. If we are too close to the start to
1267 move back, this match function fails. When working with UTF-8 we move
1268 back a number of characters, not bytes. */
1269
1270 case OP_REVERSE:
1271 #ifdef SUPPORT_UTF8
1272 if (utf8)
1273 {
1274 i = GET(ecode, 1);
1275 while (i-- > 0)
1276 {
1277 eptr--;
1278 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1279 BACKCHAR(eptr);
1280 }
1281 }
1282 else
1283 #endif
1284
1285 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1286
1287 {
1288 eptr -= GET(ecode, 1);
1289 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1290 }
1291
1292 /* Save the earliest consulted character, then skip to next op code */
1293
1294 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1295 ecode += 1 + LINK_SIZE;
1296 break;
1297
1298 /* The callout item calls an external function, if one is provided, passing
1299 details of the match so far. This is mainly for debugging, though the
1300 function is able to force a failure. */
1301
1302 case OP_CALLOUT:
1303 if (pcre_callout != NULL)
1304 {
1305 pcre_callout_block cb;
1306 cb.version = 1; /* Version 1 of the callout block */
1307 cb.callout_number = ecode[1];
1308 cb.offset_vector = md->offset_vector;
1309 cb.subject = (PCRE_SPTR)md->start_subject;
1310 cb.subject_length = (int)(md->end_subject - md->start_subject);
1311 cb.start_match = (int)(mstart - md->start_subject);
1312 cb.current_position = (int)(eptr - md->start_subject);
1313 cb.pattern_position = GET(ecode, 2);
1314 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1315 cb.capture_top = offset_top/2;
1316 cb.capture_last = md->capture_last;
1317 cb.callout_data = md->callout_data;
1318 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1319 if (rrc < 0) RRETURN(rrc);
1320 }
1321 ecode += 2 + 2*LINK_SIZE;
1322 break;
1323
1324 /* Recursion either matches the current regex, or some subexpression. The
1325 offset data is the offset to the starting bracket from the start of the
1326 whole pattern. (This is so that it works from duplicated subpatterns.)
1327
1328 If there are any capturing brackets started but not finished, we have to
1329 save their starting points and reinstate them after the recursion. However,
1330 we don't know how many such there are (offset_top records the completed
1331 total) so we just have to save all the potential data. There may be up to
1332 65535 such values, which is too large to put on the stack, but using malloc
1333 for small numbers seems expensive. As a compromise, the stack is used when
1334 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1335 is used. A problem is what to do if the malloc fails ... there is no way of
1336 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1337 values on the stack, and accept that the rest may be wrong.
1338
1339 There are also other values that have to be saved. We use a chained
1340 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1341 for the original version of this logic. */
1342
1343 case OP_RECURSE:
1344 {
1345 callpat = md->start_code + GET(ecode, 1);
1346 new_recursive.group_num = (callpat == md->start_code)? 0 :
1347 GET2(callpat, 1 + LINK_SIZE);
1348
1349 /* Add to "recursing stack" */
1350
1351 new_recursive.prevrec = md->recursive;
1352 md->recursive = &new_recursive;
1353
1354 /* Find where to continue from afterwards */
1355
1356 ecode += 1 + LINK_SIZE;
1357 new_recursive.after_call = ecode;
1358
1359 /* Now save the offset data. */
1360
1361 new_recursive.saved_max = md->offset_end;
1362 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1363 new_recursive.offset_save = stacksave;
1364 else
1365 {
1366 new_recursive.offset_save =
1367 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1368 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1369 }
1370
1371 memcpy(new_recursive.offset_save, md->offset_vector,
1372 new_recursive.saved_max * sizeof(int));
1373 new_recursive.save_offset_top = offset_top;
1374
1375 /* OK, now we can do the recursion. For each top-level alternative we
1376 restore the offset and recursion data. */
1377
1378 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1379 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1380 do
1381 {
1382 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1383 md, eptrb, flags, RM6);
1384 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1385 {
1386 DPRINTF(("Recursion matched\n"));
1387 md->recursive = new_recursive.prevrec;
1388 if (new_recursive.offset_save != stacksave)
1389 (pcre_free)(new_recursive.offset_save);
1390 MRRETURN(MATCH_MATCH);
1391 }
1392 else if (rrc != MATCH_NOMATCH &&
1393 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1394 {
1395 DPRINTF(("Recursion gave error %d\n", rrc));
1396 if (new_recursive.offset_save != stacksave)
1397 (pcre_free)(new_recursive.offset_save);
1398 RRETURN(rrc);
1399 }
1400
1401 md->recursive = &new_recursive;
1402 memcpy(md->offset_vector, new_recursive.offset_save,
1403 new_recursive.saved_max * sizeof(int));
1404 callpat += GET(callpat, 1);
1405 }
1406 while (*callpat == OP_ALT);
1407
1408 DPRINTF(("Recursion didn't match\n"));
1409 md->recursive = new_recursive.prevrec;
1410 if (new_recursive.offset_save != stacksave)
1411 (pcre_free)(new_recursive.offset_save);
1412 MRRETURN(MATCH_NOMATCH);
1413 }
1414 /* Control never reaches here */
1415
1416 /* "Once" brackets are like assertion brackets except that after a match,
1417 the point in the subject string is not moved back. Thus there can never be
1418 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1419 Check the alternative branches in turn - the matching won't pass the KET
1420 for this kind of subpattern. If any one branch matches, we carry on as at
1421 the end of a normal bracket, leaving the subject pointer, but resetting
1422 the start-of-match value in case it was changed by \K. */
1423
1424 case OP_ONCE:
1425 prev = ecode;
1426 saved_eptr = eptr;
1427
1428 do
1429 {
1430 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM7);
1431 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1432 {
1433 mstart = md->start_match_ptr;
1434 break;
1435 }
1436 if (rrc != MATCH_NOMATCH &&
1437 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1438 RRETURN(rrc);
1439 ecode += GET(ecode,1);
1440 }
1441 while (*ecode == OP_ALT);
1442
1443 /* If hit the end of the group (which could be repeated), fail */
1444
1445 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1446
1447 /* Continue as from after the assertion, updating the offsets high water
1448 mark, since extracts may have been taken. */
1449
1450 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1451
1452 offset_top = md->end_offset_top;
1453 eptr = md->end_match_ptr;
1454
1455 /* For a non-repeating ket, just continue at this level. This also
1456 happens for a repeating ket if no characters were matched in the group.
1457 This is the forcible breaking of infinite loops as implemented in Perl
1458 5.005. If there is an options reset, it will get obeyed in the normal
1459 course of events. */
1460
1461 if (*ecode == OP_KET || eptr == saved_eptr)
1462 {
1463 ecode += 1+LINK_SIZE;
1464 break;
1465 }
1466
1467 /* The repeating kets try the rest of the pattern or restart from the
1468 preceding bracket, in the appropriate order. The second "call" of match()
1469 uses tail recursion, to avoid using another stack frame. */
1470
1471 if (*ecode == OP_KETRMIN)
1472 {
1473 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM8);
1474 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1475 ecode = prev;
1476 flags = 0;
1477 goto TAIL_RECURSE;
1478 }
1479 else /* OP_KETRMAX */
1480 {
1481 RMATCH(eptr, prev, offset_top, md, eptrb, match_cbegroup, RM9);
1482 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1483 ecode += 1 + LINK_SIZE;
1484 flags = 0;
1485 goto TAIL_RECURSE;
1486 }
1487 /* Control never gets here */
1488
1489 /* An alternation is the end of a branch; scan along to find the end of the
1490 bracketed group and go to there. */
1491
1492 case OP_ALT:
1493 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1494 break;
1495
1496 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1497 indicating that it may occur zero times. It may repeat infinitely, or not
1498 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1499 with fixed upper repeat limits are compiled as a number of copies, with the
1500 optional ones preceded by BRAZERO or BRAMINZERO. */
1501
1502 case OP_BRAZERO:
1503 {
1504 next = ecode+1;
1505 RMATCH(eptr, next, offset_top, md, eptrb, 0, RM10);
1506 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1507 do next += GET(next,1); while (*next == OP_ALT);
1508 ecode = next + 1 + LINK_SIZE;
1509 }
1510 break;
1511
1512 case OP_BRAMINZERO:
1513 {
1514 next = ecode+1;
1515 do next += GET(next, 1); while (*next == OP_ALT);
1516 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, 0, RM11);
1517 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518 ecode++;
1519 }
1520 break;
1521
1522 case OP_SKIPZERO:
1523 {
1524 next = ecode+1;
1525 do next += GET(next,1); while (*next == OP_ALT);
1526 ecode = next + 1 + LINK_SIZE;
1527 }
1528 break;
1529
1530 /* End of a group, repeated or non-repeating. */
1531
1532 case OP_KET:
1533 case OP_KETRMIN:
1534 case OP_KETRMAX:
1535 prev = ecode - GET(ecode, 1);
1536
1537 /* If this was a group that remembered the subject start, in order to break
1538 infinite repeats of empty string matches, retrieve the subject start from
1539 the chain. Otherwise, set it NULL. */
1540
1541 if (*prev >= OP_SBRA)
1542 {
1543 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1544 eptrb = eptrb->epb_prev; /* Backup to previous group */
1545 }
1546 else saved_eptr = NULL;
1547
1548 /* If we are at the end of an assertion group or an atomic group, stop
1549 matching and return MATCH_MATCH, but record the current high water mark for
1550 use by positive assertions. We also need to record the match start in case
1551 it was changed by \K. */
1552
1553 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1554 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1555 *prev == OP_ONCE)
1556 {
1557 md->end_match_ptr = eptr; /* For ONCE */
1558 md->end_offset_top = offset_top;
1559 md->start_match_ptr = mstart;
1560 MRRETURN(MATCH_MATCH);
1561 }
1562
1563 /* For capturing groups we have to check the group number back at the start
1564 and if necessary complete handling an extraction by setting the offsets and
1565 bumping the high water mark. Note that whole-pattern recursion is coded as
1566 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1567 when the OP_END is reached. Other recursion is handled here. */
1568
1569 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1570 {
1571 number = GET2(prev, 1+LINK_SIZE);
1572 offset = number << 1;
1573
1574 #ifdef PCRE_DEBUG
1575 printf("end bracket %d", number);
1576 printf("\n");
1577 #endif
1578
1579 md->capture_last = number;
1580 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1581 {
1582 md->offset_vector[offset] =
1583 md->offset_vector[md->offset_end - number];
1584 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1585 if (offset_top <= offset) offset_top = offset + 2;
1586 }
1587
1588 /* Handle a recursively called group. Restore the offsets
1589 appropriately and continue from after the call. */
1590
1591 if (md->recursive != NULL && md->recursive->group_num == number)
1592 {
1593 recursion_info *rec = md->recursive;
1594 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1595 md->recursive = rec->prevrec;
1596 memcpy(md->offset_vector, rec->offset_save,
1597 rec->saved_max * sizeof(int));
1598 offset_top = rec->save_offset_top;
1599 ecode = rec->after_call;
1600 break;
1601 }
1602 }
1603
1604 /* For a non-repeating ket, just continue at this level. This also
1605 happens for a repeating ket if no characters were matched in the group.
1606 This is the forcible breaking of infinite loops as implemented in Perl
1607 5.005. If there is an options reset, it will get obeyed in the normal
1608 course of events. */
1609
1610 if (*ecode == OP_KET || eptr == saved_eptr)
1611 {
1612 ecode += 1 + LINK_SIZE;
1613 break;
1614 }
1615
1616 /* The repeating kets try the rest of the pattern or restart from the
1617 preceding bracket, in the appropriate order. In the second case, we can use
1618 tail recursion to avoid using another stack frame, unless we have an
1619 unlimited repeat of a group that can match an empty string. */
1620
1621 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1622
1623 if (*ecode == OP_KETRMIN)
1624 {
1625 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM12);
1626 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1627 if (flags != 0) /* Could match an empty string */
1628 {
1629 RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM50);
1630 RRETURN(rrc);
1631 }
1632 ecode = prev;
1633 goto TAIL_RECURSE;
1634 }
1635 else /* OP_KETRMAX */
1636 {
1637 RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM13);
1638 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1639 ecode += 1 + LINK_SIZE;
1640 flags = 0;
1641 goto TAIL_RECURSE;
1642 }
1643 /* Control never gets here */
1644
1645 /* Not multiline mode: start of subject assertion, unless notbol. */
1646
1647 case OP_CIRC:
1648 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1649
1650 /* Start of subject assertion */
1651
1652 case OP_SOD:
1653 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1654 ecode++;
1655 break;
1656
1657 /* Multiline mode: start of subject unless notbol, or after any newline. */
1658
1659 case OP_CIRCM:
1660 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1661 if (eptr != md->start_subject &&
1662 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1663 MRRETURN(MATCH_NOMATCH);
1664 ecode++;
1665 break;
1666
1667 /* Start of match assertion */
1668
1669 case OP_SOM:
1670 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1671 ecode++;
1672 break;
1673
1674 /* Reset the start of match point */
1675
1676 case OP_SET_SOM:
1677 mstart = eptr;
1678 ecode++;
1679 break;
1680
1681 /* Multiline mode: assert before any newline, or before end of subject
1682 unless noteol is set. */
1683
1684 case OP_DOLLM:
1685 if (eptr < md->end_subject)
1686 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1687 else
1688 {
1689 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1690 SCHECK_PARTIAL();
1691 }
1692 ecode++;
1693 break;
1694
1695 /* Not multiline mode: assert before a terminating newline or before end of
1696 subject unless noteol is set. */
1697
1698 case OP_DOLL:
1699 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1700 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1701
1702 /* ... else fall through for endonly */
1703
1704 /* End of subject assertion (\z) */
1705
1706 case OP_EOD:
1707 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1708 SCHECK_PARTIAL();
1709 ecode++;
1710 break;
1711
1712 /* End of subject or ending \n assertion (\Z) */
1713
1714 case OP_EODN:
1715 ASSERT_NL_OR_EOS:
1716 if (eptr < md->end_subject &&
1717 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718 MRRETURN(MATCH_NOMATCH);
1719
1720 /* Either at end of string or \n before end. */
1721
1722 SCHECK_PARTIAL();
1723 ecode++;
1724 break;
1725
1726 /* Word boundary assertions */
1727
1728 case OP_NOT_WORD_BOUNDARY:
1729 case OP_WORD_BOUNDARY:
1730 {
1731
1732 /* Find out if the previous and current characters are "word" characters.
1733 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1734 be "non-word" characters. Remember the earliest consulted character for
1735 partial matching. */
1736
1737 #ifdef SUPPORT_UTF8
1738 if (utf8)
1739 {
1740 /* Get status of previous character */
1741
1742 if (eptr == md->start_subject) prev_is_word = FALSE; else
1743 {
1744 USPTR lastptr = eptr - 1;
1745 while((*lastptr & 0xc0) == 0x80) lastptr--;
1746 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1747 GETCHAR(c, lastptr);
1748 #ifdef SUPPORT_UCP
1749 if (md->use_ucp)
1750 {
1751 if (c == '_') prev_is_word = TRUE; else
1752 {
1753 int cat = UCD_CATEGORY(c);
1754 prev_is_word = (cat == ucp_L || cat == ucp_N);
1755 }
1756 }
1757 else
1758 #endif
1759 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1760 }
1761
1762 /* Get status of next character */
1763
1764 if (eptr >= md->end_subject)
1765 {
1766 SCHECK_PARTIAL();
1767 cur_is_word = FALSE;
1768 }
1769 else
1770 {
1771 GETCHAR(c, eptr);
1772 #ifdef SUPPORT_UCP
1773 if (md->use_ucp)
1774 {
1775 if (c == '_') cur_is_word = TRUE; else
1776 {
1777 int cat = UCD_CATEGORY(c);
1778 cur_is_word = (cat == ucp_L || cat == ucp_N);
1779 }
1780 }
1781 else
1782 #endif
1783 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1784 }
1785 }
1786 else
1787 #endif
1788
1789 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1790 consistency with the behaviour of \w we do use it in this case. */
1791
1792 {
1793 /* Get status of previous character */
1794
1795 if (eptr == md->start_subject) prev_is_word = FALSE; else
1796 {
1797 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1798 #ifdef SUPPORT_UCP
1799 if (md->use_ucp)
1800 {
1801 c = eptr[-1];
1802 if (c == '_') prev_is_word = TRUE; else
1803 {
1804 int cat = UCD_CATEGORY(c);
1805 prev_is_word = (cat == ucp_L || cat == ucp_N);
1806 }
1807 }
1808 else
1809 #endif
1810 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1811 }
1812
1813 /* Get status of next character */
1814
1815 if (eptr >= md->end_subject)
1816 {
1817 SCHECK_PARTIAL();
1818 cur_is_word = FALSE;
1819 }
1820 else
1821 #ifdef SUPPORT_UCP
1822 if (md->use_ucp)
1823 {
1824 c = *eptr;
1825 if (c == '_') cur_is_word = TRUE; else
1826 {
1827 int cat = UCD_CATEGORY(c);
1828 cur_is_word = (cat == ucp_L || cat == ucp_N);
1829 }
1830 }
1831 else
1832 #endif
1833 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1834 }
1835
1836 /* Now see if the situation is what we want */
1837
1838 if ((*ecode++ == OP_WORD_BOUNDARY)?
1839 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1840 MRRETURN(MATCH_NOMATCH);
1841 }
1842 break;
1843
1844 /* Match a single character type; inline for speed */
1845
1846 case OP_ANY:
1847 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1848 /* Fall through */
1849
1850 case OP_ALLANY:
1851 if (eptr++ >= md->end_subject)
1852 {
1853 SCHECK_PARTIAL();
1854 MRRETURN(MATCH_NOMATCH);
1855 }
1856 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1857 ecode++;
1858 break;
1859
1860 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1861 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1862
1863 case OP_ANYBYTE:
1864 if (eptr++ >= md->end_subject)
1865 {
1866 SCHECK_PARTIAL();
1867 MRRETURN(MATCH_NOMATCH);
1868 }
1869 ecode++;
1870 break;
1871
1872 case OP_NOT_DIGIT:
1873 if (eptr >= md->end_subject)
1874 {
1875 SCHECK_PARTIAL();
1876 MRRETURN(MATCH_NOMATCH);
1877 }
1878 GETCHARINCTEST(c, eptr);
1879 if (
1880 #ifdef SUPPORT_UTF8
1881 c < 256 &&
1882 #endif
1883 (md->ctypes[c] & ctype_digit) != 0
1884 )
1885 MRRETURN(MATCH_NOMATCH);
1886 ecode++;
1887 break;
1888
1889 case OP_DIGIT:
1890 if (eptr >= md->end_subject)
1891 {
1892 SCHECK_PARTIAL();
1893 MRRETURN(MATCH_NOMATCH);
1894 }
1895 GETCHARINCTEST(c, eptr);
1896 if (
1897 #ifdef SUPPORT_UTF8
1898 c >= 256 ||
1899 #endif
1900 (md->ctypes[c] & ctype_digit) == 0
1901 )
1902 MRRETURN(MATCH_NOMATCH);
1903 ecode++;
1904 break;
1905
1906 case OP_NOT_WHITESPACE:
1907 if (eptr >= md->end_subject)
1908 {
1909 SCHECK_PARTIAL();
1910 MRRETURN(MATCH_NOMATCH);
1911 }
1912 GETCHARINCTEST(c, eptr);
1913 if (
1914 #ifdef SUPPORT_UTF8
1915 c < 256 &&
1916 #endif
1917 (md->ctypes[c] & ctype_space) != 0
1918 )
1919 MRRETURN(MATCH_NOMATCH);
1920 ecode++;
1921 break;
1922
1923 case OP_WHITESPACE:
1924 if (eptr >= md->end_subject)
1925 {
1926 SCHECK_PARTIAL();
1927 MRRETURN(MATCH_NOMATCH);
1928 }
1929 GETCHARINCTEST(c, eptr);
1930 if (
1931 #ifdef SUPPORT_UTF8
1932 c >= 256 ||
1933 #endif
1934 (md->ctypes[c] & ctype_space) == 0
1935 )
1936 MRRETURN(MATCH_NOMATCH);
1937 ecode++;
1938 break;
1939
1940 case OP_NOT_WORDCHAR:
1941 if (eptr >= md->end_subject)
1942 {
1943 SCHECK_PARTIAL();
1944 MRRETURN(MATCH_NOMATCH);
1945 }
1946 GETCHARINCTEST(c, eptr);
1947 if (
1948 #ifdef SUPPORT_UTF8
1949 c < 256 &&
1950 #endif
1951 (md->ctypes[c] & ctype_word) != 0
1952 )
1953 MRRETURN(MATCH_NOMATCH);
1954 ecode++;
1955 break;
1956
1957 case OP_WORDCHAR:
1958 if (eptr >= md->end_subject)
1959 {
1960 SCHECK_PARTIAL();
1961 MRRETURN(MATCH_NOMATCH);
1962 }
1963 GETCHARINCTEST(c, eptr);
1964 if (
1965 #ifdef SUPPORT_UTF8
1966 c >= 256 ||
1967 #endif
1968 (md->ctypes[c] & ctype_word) == 0
1969 )
1970 MRRETURN(MATCH_NOMATCH);
1971 ecode++;
1972 break;
1973
1974 case OP_ANYNL:
1975 if (eptr >= md->end_subject)
1976 {
1977 SCHECK_PARTIAL();
1978 MRRETURN(MATCH_NOMATCH);
1979 }
1980 GETCHARINCTEST(c, eptr);
1981 switch(c)
1982 {
1983 default: MRRETURN(MATCH_NOMATCH);
1984
1985 case 0x000d:
1986 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1987 break;
1988
1989 case 0x000a:
1990 break;
1991
1992 case 0x000b:
1993 case 0x000c:
1994 case 0x0085:
1995 case 0x2028:
1996 case 0x2029:
1997 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1998 break;
1999 }
2000 ecode++;
2001 break;
2002
2003 case OP_NOT_HSPACE:
2004 if (eptr >= md->end_subject)
2005 {
2006 SCHECK_PARTIAL();
2007 MRRETURN(MATCH_NOMATCH);
2008 }
2009 GETCHARINCTEST(c, eptr);
2010 switch(c)
2011 {
2012 default: break;
2013 case 0x09: /* HT */
2014 case 0x20: /* SPACE */
2015 case 0xa0: /* NBSP */
2016 case 0x1680: /* OGHAM SPACE MARK */
2017 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2018 case 0x2000: /* EN QUAD */
2019 case 0x2001: /* EM QUAD */
2020 case 0x2002: /* EN SPACE */
2021 case 0x2003: /* EM SPACE */
2022 case 0x2004: /* THREE-PER-EM SPACE */
2023 case 0x2005: /* FOUR-PER-EM SPACE */
2024 case 0x2006: /* SIX-PER-EM SPACE */
2025 case 0x2007: /* FIGURE SPACE */
2026 case 0x2008: /* PUNCTUATION SPACE */
2027 case 0x2009: /* THIN SPACE */
2028 case 0x200A: /* HAIR SPACE */
2029 case 0x202f: /* NARROW NO-BREAK SPACE */
2030 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2031 case 0x3000: /* IDEOGRAPHIC SPACE */
2032 MRRETURN(MATCH_NOMATCH);
2033 }
2034 ecode++;
2035 break;
2036
2037 case OP_HSPACE:
2038 if (eptr >= md->end_subject)
2039 {
2040 SCHECK_PARTIAL();
2041 MRRETURN(MATCH_NOMATCH);
2042 }
2043 GETCHARINCTEST(c, eptr);
2044 switch(c)
2045 {
2046 default: MRRETURN(MATCH_NOMATCH);
2047 case 0x09: /* HT */
2048 case 0x20: /* SPACE */
2049 case 0xa0: /* NBSP */
2050 case 0x1680: /* OGHAM SPACE MARK */
2051 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2052 case 0x2000: /* EN QUAD */
2053 case 0x2001: /* EM QUAD */
2054 case 0x2002: /* EN SPACE */
2055 case 0x2003: /* EM SPACE */
2056 case 0x2004: /* THREE-PER-EM SPACE */
2057 case 0x2005: /* FOUR-PER-EM SPACE */
2058 case 0x2006: /* SIX-PER-EM SPACE */
2059 case 0x2007: /* FIGURE SPACE */
2060 case 0x2008: /* PUNCTUATION SPACE */
2061 case 0x2009: /* THIN SPACE */
2062 case 0x200A: /* HAIR SPACE */
2063 case 0x202f: /* NARROW NO-BREAK SPACE */
2064 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2065 case 0x3000: /* IDEOGRAPHIC SPACE */
2066 break;
2067 }
2068 ecode++;
2069 break;
2070
2071 case OP_NOT_VSPACE:
2072 if (eptr >= md->end_subject)
2073 {
2074 SCHECK_PARTIAL();
2075 MRRETURN(MATCH_NOMATCH);
2076 }
2077 GETCHARINCTEST(c, eptr);
2078 switch(c)
2079 {
2080 default: break;
2081 case 0x0a: /* LF */
2082 case 0x0b: /* VT */
2083 case 0x0c: /* FF */
2084 case 0x0d: /* CR */
2085 case 0x85: /* NEL */
2086 case 0x2028: /* LINE SEPARATOR */
2087 case 0x2029: /* PARAGRAPH SEPARATOR */
2088 MRRETURN(MATCH_NOMATCH);
2089 }
2090 ecode++;
2091 break;
2092
2093 case OP_VSPACE:
2094 if (eptr >= md->end_subject)
2095 {
2096 SCHECK_PARTIAL();
2097 MRRETURN(MATCH_NOMATCH);
2098 }
2099 GETCHARINCTEST(c, eptr);
2100 switch(c)
2101 {
2102 default: MRRETURN(MATCH_NOMATCH);
2103 case 0x0a: /* LF */
2104 case 0x0b: /* VT */
2105 case 0x0c: /* FF */
2106 case 0x0d: /* CR */
2107 case 0x85: /* NEL */
2108 case 0x2028: /* LINE SEPARATOR */
2109 case 0x2029: /* PARAGRAPH SEPARATOR */
2110 break;
2111 }
2112 ecode++;
2113 break;
2114
2115 #ifdef SUPPORT_UCP
2116 /* Check the next character by Unicode property. We will get here only
2117 if the support is in the binary; otherwise a compile-time error occurs. */
2118
2119 case OP_PROP:
2120 case OP_NOTPROP:
2121 if (eptr >= md->end_subject)
2122 {
2123 SCHECK_PARTIAL();
2124 MRRETURN(MATCH_NOMATCH);
2125 }
2126 GETCHARINCTEST(c, eptr);
2127 {
2128 const ucd_record *prop = GET_UCD(c);
2129
2130 switch(ecode[1])
2131 {
2132 case PT_ANY:
2133 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2134 break;
2135
2136 case PT_LAMP:
2137 if ((prop->chartype == ucp_Lu ||
2138 prop->chartype == ucp_Ll ||
2139 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2140 MRRETURN(MATCH_NOMATCH);
2141 break;
2142
2143 case PT_GC:
2144 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2145 MRRETURN(MATCH_NOMATCH);
2146 break;
2147
2148 case PT_PC:
2149 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2150 MRRETURN(MATCH_NOMATCH);
2151 break;
2152
2153 case PT_SC:
2154 if ((ecode[2] != prop->script) == (op == OP_PROP))
2155 MRRETURN(MATCH_NOMATCH);
2156 break;
2157
2158 /* These are specials */
2159
2160 case PT_ALNUM:
2161 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2162 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2163 MRRETURN(MATCH_NOMATCH);
2164 break;
2165
2166 case PT_SPACE: /* Perl space */
2167 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2168 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2169 == (op == OP_NOTPROP))
2170 MRRETURN(MATCH_NOMATCH);
2171 break;
2172
2173 case PT_PXSPACE: /* POSIX space */
2174 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2175 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2176 c == CHAR_FF || c == CHAR_CR)
2177 == (op == OP_NOTPROP))
2178 MRRETURN(MATCH_NOMATCH);
2179 break;
2180
2181 case PT_WORD:
2182 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2183 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2184 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2185 MRRETURN(MATCH_NOMATCH);
2186 break;
2187
2188 /* This should never occur */
2189
2190 default:
2191 RRETURN(PCRE_ERROR_INTERNAL);
2192 }
2193
2194 ecode += 3;
2195 }
2196 break;
2197
2198 /* Match an extended Unicode sequence. We will get here only if the support
2199 is in the binary; otherwise a compile-time error occurs. */
2200
2201 case OP_EXTUNI:
2202 if (eptr >= md->end_subject)
2203 {
2204 SCHECK_PARTIAL();
2205 MRRETURN(MATCH_NOMATCH);
2206 }
2207 GETCHARINCTEST(c, eptr);
2208 {
2209 int category = UCD_CATEGORY(c);
2210 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2211 while (eptr < md->end_subject)
2212 {
2213 int len = 1;
2214 if (!utf8) c = *eptr; else
2215 {
2216 GETCHARLEN(c, eptr, len);
2217 }
2218 category = UCD_CATEGORY(c);
2219 if (category != ucp_M) break;
2220 eptr += len;
2221 }
2222 }
2223 ecode++;
2224 break;
2225 #endif
2226
2227
2228 /* Match a back reference, possibly repeatedly. Look past the end of the
2229 item to see if there is repeat information following. The code is similar
2230 to that for character classes, but repeated for efficiency. Then obey
2231 similar code to character type repeats - written out again for speed.
2232 However, if the referenced string is the empty string, always treat
2233 it as matched, any number of times (otherwise there could be infinite
2234 loops). */
2235
2236 case OP_REF:
2237 case OP_REFI:
2238 caseless = op == OP_REFI;
2239 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2240 ecode += 3;
2241
2242 /* If the reference is unset, there are two possibilities:
2243
2244 (a) In the default, Perl-compatible state, set the length negative;
2245 this ensures that every attempt at a match fails. We can't just fail
2246 here, because of the possibility of quantifiers with zero minima.
2247
2248 (b) If the JavaScript compatibility flag is set, set the length to zero
2249 so that the back reference matches an empty string.
2250
2251 Otherwise, set the length to the length of what was matched by the
2252 referenced subpattern. */
2253
2254 if (offset >= offset_top || md->offset_vector[offset] < 0)
2255 length = (md->jscript_compat)? 0 : -1;
2256 else
2257 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2258
2259 /* Set up for repetition, or handle the non-repeated case */
2260
2261 switch (*ecode)
2262 {
2263 case OP_CRSTAR:
2264 case OP_CRMINSTAR:
2265 case OP_CRPLUS:
2266 case OP_CRMINPLUS:
2267 case OP_CRQUERY:
2268 case OP_CRMINQUERY:
2269 c = *ecode++ - OP_CRSTAR;
2270 minimize = (c & 1) != 0;
2271 min = rep_min[c]; /* Pick up values from tables; */
2272 max = rep_max[c]; /* zero for max => infinity */
2273 if (max == 0) max = INT_MAX;
2274 break;
2275
2276 case OP_CRRANGE:
2277 case OP_CRMINRANGE:
2278 minimize = (*ecode == OP_CRMINRANGE);
2279 min = GET2(ecode, 1);
2280 max = GET2(ecode, 3);
2281 if (max == 0) max = INT_MAX;
2282 ecode += 5;
2283 break;
2284
2285 default: /* No repeat follows */
2286 if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2287 {
2288 CHECK_PARTIAL();
2289 MRRETURN(MATCH_NOMATCH);
2290 }
2291 eptr += length;
2292 continue; /* With the main loop */
2293 }
2294
2295 /* Handle repeated back references. If the length of the reference is
2296 zero, just continue with the main loop. */
2297
2298 if (length == 0) continue;
2299
2300 /* First, ensure the minimum number of matches are present. We get back
2301 the length of the reference string explicitly rather than passing the
2302 address of eptr, so that eptr can be a register variable. */
2303
2304 for (i = 1; i <= min; i++)
2305 {
2306 int slength;
2307 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2308 {
2309 CHECK_PARTIAL();
2310 MRRETURN(MATCH_NOMATCH);
2311 }
2312 eptr += slength;
2313 }
2314
2315 /* If min = max, continue at the same level without recursion.
2316 They are not both allowed to be zero. */
2317
2318 if (min == max) continue;
2319
2320 /* If minimizing, keep trying and advancing the pointer */
2321
2322 if (minimize)
2323 {
2324 for (fi = min;; fi++)
2325 {
2326 int slength;
2327 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM14);
2328 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2329 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2330 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2331 {
2332 CHECK_PARTIAL();
2333 MRRETURN(MATCH_NOMATCH);
2334 }
2335 eptr += slength;
2336 }
2337 /* Control never gets here */
2338 }
2339
2340 /* If maximizing, find the longest string and work backwards */
2341
2342 else
2343 {
2344 pp = eptr;
2345 for (i = min; i < max; i++)
2346 {
2347 int slength;
2348 if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2349 {
2350 CHECK_PARTIAL();
2351 break;
2352 }
2353 eptr += slength;
2354 }
2355 while (eptr >= pp)
2356 {
2357 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM15);
2358 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2359 eptr -= length;
2360 }
2361 MRRETURN(MATCH_NOMATCH);
2362 }
2363 /* Control never gets here */
2364
2365 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2366 used when all the characters in the class have values in the range 0-255,
2367 and either the matching is caseful, or the characters are in the range
2368 0-127 when UTF-8 processing is enabled. The only difference between
2369 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2370 encountered.
2371
2372 First, look past the end of the item to see if there is repeat information
2373 following. Then obey similar code to character type repeats - written out
2374 again for speed. */
2375
2376 case OP_NCLASS:
2377 case OP_CLASS:
2378 {
2379 data = ecode + 1; /* Save for matching */
2380 ecode += 33; /* Advance past the item */
2381
2382 switch (*ecode)
2383 {
2384 case OP_CRSTAR:
2385 case OP_CRMINSTAR:
2386 case OP_CRPLUS:
2387 case OP_CRMINPLUS:
2388 case OP_CRQUERY:
2389 case OP_CRMINQUERY:
2390 c = *ecode++ - OP_CRSTAR;
2391 minimize = (c & 1) != 0;
2392 min = rep_min[c]; /* Pick up values from tables; */
2393 max = rep_max[c]; /* zero for max => infinity */
2394 if (max == 0) max = INT_MAX;
2395 break;
2396
2397 case OP_CRRANGE:
2398 case OP_CRMINRANGE:
2399 minimize = (*ecode == OP_CRMINRANGE);
2400 min = GET2(ecode, 1);
2401 max = GET2(ecode, 3);
2402 if (max == 0) max = INT_MAX;
2403 ecode += 5;
2404 break;
2405
2406 default: /* No repeat follows */
2407 min = max = 1;
2408 break;
2409 }
2410
2411 /* First, ensure the minimum number of matches are present. */
2412
2413 #ifdef SUPPORT_UTF8
2414 /* UTF-8 mode */
2415 if (utf8)
2416 {
2417 for (i = 1; i <= min; i++)
2418 {
2419 if (eptr >= md->end_subject)
2420 {
2421 SCHECK_PARTIAL();
2422 MRRETURN(MATCH_NOMATCH);
2423 }
2424 GETCHARINC(c, eptr);
2425 if (c > 255)
2426 {
2427 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2428 }
2429 else
2430 {
2431 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2432 }
2433 }
2434 }
2435 else
2436 #endif
2437 /* Not UTF-8 mode */
2438 {
2439 for (i = 1; i <= min; i++)
2440 {
2441 if (eptr >= md->end_subject)
2442 {
2443 SCHECK_PARTIAL();
2444 MRRETURN(MATCH_NOMATCH);
2445 }
2446 c = *eptr++;
2447 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2448 }
2449 }
2450
2451 /* If max == min we can continue with the main loop without the
2452 need to recurse. */
2453
2454 if (min == max) continue;
2455
2456 /* If minimizing, keep testing the rest of the expression and advancing
2457 the pointer while it matches the class. */
2458
2459 if (minimize)
2460 {
2461 #ifdef SUPPORT_UTF8
2462 /* UTF-8 mode */
2463 if (utf8)
2464 {
2465 for (fi = min;; fi++)
2466 {
2467 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM16);
2468 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2469 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2470 if (eptr >= md->end_subject)
2471 {
2472 SCHECK_PARTIAL();
2473 MRRETURN(MATCH_NOMATCH);
2474 }
2475 GETCHARINC(c, eptr);
2476 if (c > 255)
2477 {
2478 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2479 }
2480 else
2481 {
2482 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2483 }
2484 }
2485 }
2486 else
2487 #endif
2488 /* Not UTF-8 mode */
2489 {
2490 for (fi = min;; fi++)
2491 {
2492 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM17);
2493 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2494 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2495 if (eptr >= md->end_subject)
2496 {
2497 SCHECK_PARTIAL();
2498 MRRETURN(MATCH_NOMATCH);
2499 }
2500 c = *eptr++;
2501 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2502 }
2503 }
2504 /* Control never gets here */
2505 }
2506
2507 /* If maximizing, find the longest possible run, then work backwards. */
2508
2509 else
2510 {
2511 pp = eptr;
2512
2513 #ifdef SUPPORT_UTF8
2514 /* UTF-8 mode */
2515 if (utf8)
2516 {
2517 for (i = min; i < max; i++)
2518 {
2519 int len = 1;
2520 if (eptr >= md->end_subject)
2521 {
2522 SCHECK_PARTIAL();
2523 break;
2524 }
2525 GETCHARLEN(c, eptr, len);
2526 if (c > 255)
2527 {
2528 if (op == OP_CLASS) break;
2529 }
2530 else
2531 {
2532 if ((data[c/8] & (1 << (c&7))) == 0) break;
2533 }
2534 eptr += len;
2535 }
2536 for (;;)
2537 {
2538 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM18);
2539 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540 if (eptr-- == pp) break; /* Stop if tried at original pos */
2541 BACKCHAR(eptr);
2542 }
2543 }
2544 else
2545 #endif
2546 /* Not UTF-8 mode */
2547 {
2548 for (i = min; i < max; i++)
2549 {
2550 if (eptr >= md->end_subject)
2551 {
2552 SCHECK_PARTIAL();
2553 break;
2554 }
2555 c = *eptr;
2556 if ((data[c/8] & (1 << (c&7))) == 0) break;
2557 eptr++;
2558 }
2559 while (eptr >= pp)
2560 {
2561 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM19);
2562 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2563 eptr--;
2564 }
2565 }
2566
2567 MRRETURN(MATCH_NOMATCH);
2568 }
2569 }
2570 /* Control never gets here */
2571
2572
2573 /* Match an extended character class. This opcode is encountered only
2574 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2575 mode, because Unicode properties are supported in non-UTF-8 mode. */
2576
2577 #ifdef SUPPORT_UTF8
2578 case OP_XCLASS:
2579 {
2580 data = ecode + 1 + LINK_SIZE; /* Save for matching */
2581 ecode += GET(ecode, 1); /* Advance past the item */
2582
2583 switch (*ecode)
2584 {
2585 case OP_CRSTAR:
2586 case OP_CRMINSTAR:
2587 case OP_CRPLUS:
2588 case OP_CRMINPLUS:
2589 case OP_CRQUERY:
2590 case OP_CRMINQUERY:
2591 c = *ecode++ - OP_CRSTAR;
2592 minimize = (c & 1) != 0;
2593 min = rep_min[c]; /* Pick up values from tables; */
2594 max = rep_max[c]; /* zero for max => infinity */
2595 if (max == 0) max = INT_MAX;
2596 break;
2597
2598 case OP_CRRANGE:
2599 case OP_CRMINRANGE:
2600 minimize = (*ecode == OP_CRMINRANGE);
2601 min = GET2(ecode, 1);
2602 max = GET2(ecode, 3);
2603 if (max == 0) max = INT_MAX;
2604 ecode += 5;
2605 break;
2606
2607 default: /* No repeat follows */
2608 min = max = 1;
2609 break;
2610 }
2611
2612 /* First, ensure the minimum number of matches are present. */
2613
2614 for (i = 1; i <= min; i++)
2615 {
2616 if (eptr >= md->end_subject)
2617 {
2618 SCHECK_PARTIAL();
2619 MRRETURN(MATCH_NOMATCH);
2620 }
2621 GETCHARINCTEST(c, eptr);
2622 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2623 }
2624
2625 /* If max == min we can continue with the main loop without the
2626 need to recurse. */
2627
2628 if (min == max) continue;
2629
2630 /* If minimizing, keep testing the rest of the expression and advancing
2631 the pointer while it matches the class. */
2632
2633 if (minimize)
2634 {
2635 for (fi = min;; fi++)
2636 {
2637 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM20);
2638 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2639 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2640 if (eptr >= md->end_subject)
2641 {
2642 SCHECK_PARTIAL();
2643 MRRETURN(MATCH_NOMATCH);
2644 }
2645 GETCHARINCTEST(c, eptr);
2646 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2647 }
2648 /* Control never gets here */
2649 }
2650
2651 /* If maximizing, find the longest possible run, then work backwards. */
2652
2653 else
2654 {
2655 pp = eptr;
2656 for (i = min; i < max; i++)
2657 {
2658 int len = 1;
2659 if (eptr >= md->end_subject)
2660 {
2661 SCHECK_PARTIAL();
2662 break;
2663 }
2664 GETCHARLENTEST(c, eptr, len);
2665 if (!_pcre_xclass(c, data)) break;
2666 eptr += len;
2667 }
2668 for(;;)
2669 {
2670 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM21);
2671 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2672 if (eptr-- == pp) break; /* Stop if tried at original pos */
2673 if (utf8) BACKCHAR(eptr);
2674 }
2675 MRRETURN(MATCH_NOMATCH);
2676 }
2677
2678 /* Control never gets here */
2679 }
2680 #endif /* End of XCLASS */
2681
2682 /* Match a single character, casefully */
2683
2684 case OP_CHAR:
2685 #ifdef SUPPORT_UTF8
2686 if (utf8)
2687 {
2688 length = 1;
2689 ecode++;
2690 GETCHARLEN(fc, ecode, length);
2691 if (length > md->end_subject - eptr)
2692 {
2693 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2694 MRRETURN(MATCH_NOMATCH);
2695 }
2696 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2697 }
2698 else
2699 #endif
2700
2701 /* Non-UTF-8 mode */
2702 {
2703 if (md->end_subject - eptr < 1)
2704 {
2705 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2706 MRRETURN(MATCH_NOMATCH);
2707 }
2708 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2709 ecode += 2;
2710 }
2711 break;
2712
2713 /* Match a single character, caselessly */
2714
2715 case OP_CHARI:
2716 #ifdef SUPPORT_UTF8
2717 if (utf8)
2718 {
2719 length = 1;
2720 ecode++;
2721 GETCHARLEN(fc, ecode, length);
2722
2723 if (length > md->end_subject - eptr)
2724 {
2725 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2726 MRRETURN(MATCH_NOMATCH);
2727 }
2728
2729 /* If the pattern character's value is < 128, we have only one byte, and
2730 can use the fast lookup table. */
2731
2732 if (fc < 128)
2733 {
2734 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2735 }
2736
2737 /* Otherwise we must pick up the subject character */
2738
2739 else
2740 {
2741 unsigned int dc;
2742 GETCHARINC(dc, eptr);
2743 ecode += length;
2744
2745 /* If we have Unicode property support, we can use it to test the other
2746 case of the character, if there is one. */
2747
2748 if (fc != dc)
2749 {
2750 #ifdef SUPPORT_UCP
2751 if (dc != UCD_OTHERCASE(fc))
2752 #endif
2753 MRRETURN(MATCH_NOMATCH);
2754 }
2755 }
2756 }
2757 else
2758 #endif /* SUPPORT_UTF8 */
2759
2760 /* Non-UTF-8 mode */
2761 {
2762 if (md->end_subject - eptr < 1)
2763 {
2764 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2765 MRRETURN(MATCH_NOMATCH);
2766 }
2767 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2768 ecode += 2;
2769 }
2770 break;
2771
2772 /* Match a single character repeatedly. */
2773
2774 case OP_EXACT:
2775 case OP_EXACTI:
2776 min = max = GET2(ecode, 1);
2777 ecode += 3;
2778 goto REPEATCHAR;
2779
2780 case OP_POSUPTO:
2781 case OP_POSUPTOI:
2782 possessive = TRUE;
2783 /* Fall through */
2784
2785 case OP_UPTO:
2786 case OP_UPTOI:
2787 case OP_MINUPTO:
2788 case OP_MINUPTOI:
2789 min = 0;
2790 max = GET2(ecode, 1);
2791 minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
2792 ecode += 3;
2793 goto REPEATCHAR;
2794
2795 case OP_POSSTAR:
2796 case OP_POSSTARI:
2797 possessive = TRUE;
2798 min = 0;
2799 max = INT_MAX;
2800 ecode++;
2801 goto REPEATCHAR;
2802
2803 case OP_POSPLUS:
2804 case OP_POSPLUSI:
2805 possessive = TRUE;
2806 min = 1;
2807 max = INT_MAX;
2808 ecode++;
2809 goto REPEATCHAR;
2810
2811 case OP_POSQUERY:
2812 case OP_POSQUERYI:
2813 possessive = TRUE;
2814 min = 0;
2815 max = 1;
2816 ecode++;
2817 goto REPEATCHAR;
2818
2819 case OP_STAR:
2820 case OP_STARI:
2821 case OP_MINSTAR:
2822 case OP_MINSTARI:
2823 case OP_PLUS:
2824 case OP_PLUSI:
2825 case OP_MINPLUS:
2826 case OP_MINPLUSI:
2827 case OP_QUERY:
2828 case OP_QUERYI:
2829 case OP_MINQUERY:
2830 case OP_MINQUERYI:
2831 c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
2832 minimize = (c & 1) != 0;
2833 min = rep_min[c]; /* Pick up values from tables; */
2834 max = rep_max[c]; /* zero for max => infinity */
2835 if (max == 0) max = INT_MAX;
2836
2837 /* Common code for all repeated single-character matches. */
2838
2839 REPEATCHAR:
2840 #ifdef SUPPORT_UTF8
2841 if (utf8)
2842 {
2843 length = 1;
2844 charptr = ecode;
2845 GETCHARLEN(fc, ecode, length);
2846 ecode += length;
2847
2848 /* Handle multibyte character matching specially here. There is
2849 support for caseless matching if UCP support is present. */
2850
2851 if (length > 1)
2852 {
2853 #ifdef SUPPORT_UCP
2854 unsigned int othercase;
2855 if (op >= OP_STARI && /* Caseless */
2856 (othercase = UCD_OTHERCASE(fc)) != fc)
2857 oclength = _pcre_ord2utf8(othercase, occhars);
2858 else oclength = 0;
2859 #endif /* SUPPORT_UCP */
2860
2861 for (i = 1; i <= min; i++)
2862 {
2863 if (eptr <= md->end_subject - length &&
2864 memcmp(eptr, charptr, length) == 0) eptr += length;
2865 #ifdef SUPPORT_UCP
2866 else if (oclength > 0 &&
2867 eptr <= md->end_subject - oclength &&
2868 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2869 #endif /* SUPPORT_UCP */
2870 else
2871 {
2872 CHECK_PARTIAL();
2873 MRRETURN(MATCH_NOMATCH);
2874 }
2875 }
2876
2877 if (min == max) continue;
2878
2879 if (minimize)
2880 {
2881 for (fi = min;; fi++)
2882 {
2883 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM22);
2884 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886 if (eptr <= md->end_subject - length &&
2887 memcmp(eptr, charptr, length) == 0) eptr += length;
2888 #ifdef SUPPORT_UCP
2889 else if (oclength > 0 &&
2890 eptr <= md->end_subject - oclength &&
2891 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2892 #endif /* SUPPORT_UCP */
2893 else
2894 {
2895 CHECK_PARTIAL();
2896 MRRETURN(MATCH_NOMATCH);
2897 }
2898 }
2899 /* Control never gets here */
2900 }
2901
2902 else /* Maximize */
2903 {
2904 pp = eptr;
2905 for (i = min; i < max; i++)
2906 {
2907 if (eptr <= md->end_subject - length &&
2908 memcmp(eptr, charptr, length) == 0) eptr += length;
2909 #ifdef SUPPORT_UCP
2910 else if (oclength > 0 &&
2911 eptr <= md->end_subject - oclength &&
2912 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913 #endif /* SUPPORT_UCP */
2914 else
2915 {
2916 CHECK_PARTIAL();
2917 break;
2918 }
2919 }
2920
2921 if (possessive) continue;
2922
2923 for(;;)
2924 {
2925 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM23);
2926 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928 #ifdef SUPPORT_UCP
2929 eptr--;
2930 BACKCHAR(eptr);
2931 #else /* without SUPPORT_UCP */
2932 eptr -= length;
2933 #endif /* SUPPORT_UCP */
2934 }
2935 }
2936 /* Control never gets here */
2937 }
2938
2939 /* If the length of a UTF-8 character is 1, we fall through here, and
2940 obey the code as for non-UTF-8 characters below, though in this case the
2941 value of fc will always be < 128. */
2942 }
2943 else
2944 #endif /* SUPPORT_UTF8 */
2945
2946 /* When not in UTF-8 mode, load a single-byte character. */
2947
2948 fc = *ecode++;
2949
2950 /* The value of fc at this point is always less than 256, though we may or
2951 may not be in UTF-8 mode. The code is duplicated for the caseless and
2952 caseful cases, for speed, since matching characters is likely to be quite
2953 common. First, ensure the minimum number of matches are present. If min =
2954 max, continue at the same level without recursing. Otherwise, if
2955 minimizing, keep trying the rest of the expression and advancing one
2956 matching character if failing, up to the maximum. Alternatively, if
2957 maximizing, find the maximum number of characters and work backwards. */
2958
2959 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2960 max, eptr));
2961
2962 if (op >= OP_STARI) /* Caseless */
2963 {
2964 fc = md->lcc[fc];
2965 for (i = 1; i <= min; i++)
2966 {
2967 if (eptr >= md->end_subject)
2968 {
2969 SCHECK_PARTIAL();
2970 MRRETURN(MATCH_NOMATCH);
2971 }
2972 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2973 }
2974 if (min == max) continue;
2975 if (minimize)
2976 {
2977 for (fi = min;; fi++)
2978 {
2979 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM24);
2980 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982 if (eptr >= md->end_subject)
2983 {
2984 SCHECK_PARTIAL();
2985 MRRETURN(MATCH_NOMATCH);
2986 }
2987 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988 }
2989 /* Control never gets here */
2990 }
2991 else /* Maximize */
2992 {
2993 pp = eptr;
2994 for (i = min; i < max; i++)
2995 {
2996 if (eptr >= md->end_subject)
2997 {
2998 SCHECK_PARTIAL();
2999 break;
3000 }
3001 if (fc != md->lcc[*eptr]) break;
3002 eptr++;
3003 }
3004
3005 if (possessive) continue;
3006
3007 while (eptr >= pp)
3008 {
3009 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM25);
3010 eptr--;
3011 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012 }
3013 MRRETURN(MATCH_NOMATCH);
3014 }
3015 /* Control never gets here */
3016 }
3017
3018 /* Caseful comparisons (includes all multi-byte characters) */
3019
3020 else
3021 {
3022 for (i = 1; i <= min; i++)
3023 {
3024 if (eptr >= md->end_subject)
3025 {
3026 SCHECK_PARTIAL();
3027 MRRETURN(MATCH_NOMATCH);
3028 }
3029 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3030 }
3031
3032 if (min == max) continue;
3033
3034 if (minimize)
3035 {
3036 for (fi = min;; fi++)
3037 {
3038 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM26);
3039 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041 if (eptr >= md->end_subject)
3042 {
3043 SCHECK_PARTIAL();
3044 MRRETURN(MATCH_NOMATCH);
3045 }
3046 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3047 }
3048 /* Control never gets here */
3049 }
3050 else /* Maximize */
3051 {
3052 pp = eptr;
3053 for (i = min; i < max; i++)
3054 {
3055 if (eptr >= md->end_subject)
3056 {
3057 SCHECK_PARTIAL();
3058 break;
3059 }
3060 if (fc != *eptr) break;
3061 eptr++;
3062 }
3063 if (possessive) continue;
3064
3065 while (eptr >= pp)
3066 {
3067 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM27);
3068 eptr--;
3069 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070 }
3071 MRRETURN(MATCH_NOMATCH);
3072 }
3073 }
3074 /* Control never gets here */
3075
3076 /* Match a negated single one-byte character. The character we are
3077 checking can be multibyte. */
3078
3079 case OP_NOT:
3080 case OP_NOTI:
3081 if (eptr >= md->end_subject)
3082 {
3083 SCHECK_PARTIAL();
3084 MRRETURN(MATCH_NOMATCH);
3085 }
3086 ecode++;
3087 GETCHARINCTEST(c, eptr);
3088 if (op == OP_NOTI) /* The caseless case */
3089 {
3090 #ifdef SUPPORT_UTF8
3091 if (c < 256)
3092 #endif
3093 c = md->lcc[c];
3094 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3095 }
3096 else /* Caseful */
3097 {
3098 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3099 }
3100 break;
3101
3102 /* Match a negated single one-byte character repeatedly. This is almost a
3103 repeat of the code for a repeated single character, but I haven't found a
3104 nice way of commoning these up that doesn't require a test of the
3105 positive/negative option for each character match. Maybe that wouldn't add
3106 very much to the time taken, but character matching *is* what this is all
3107 about... */
3108
3109 case OP_NOTEXACT:
3110 case OP_NOTEXACTI:
3111 min = max = GET2(ecode, 1);
3112 ecode += 3;
3113 goto REPEATNOTCHAR;
3114
3115 case OP_NOTUPTO:
3116 case OP_NOTUPTOI:
3117 case OP_NOTMINUPTO:
3118 case OP_NOTMINUPTOI:
3119 min = 0;
3120 max = GET2(ecode, 1);
3121 minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3122 ecode += 3;
3123 goto REPEATNOTCHAR;
3124
3125 case OP_NOTPOSSTAR:
3126 case OP_NOTPOSSTARI:
3127 possessive = TRUE;
3128 min = 0;
3129 max = INT_MAX;
3130 ecode++;
3131 goto REPEATNOTCHAR;
3132
3133 case OP_NOTPOSPLUS:
3134 case OP_NOTPOSPLUSI:
3135 possessive = TRUE;
3136 min = 1;
3137 max = INT_MAX;
3138 ecode++;
3139 goto REPEATNOTCHAR;
3140
3141 case OP_NOTPOSQUERY:
3142 case OP_NOTPOSQUERYI:
3143 possessive = TRUE;
3144 min = 0;
3145 max = 1;
3146 ecode++;
3147 goto REPEATNOTCHAR;
3148
3149 case OP_NOTPOSUPTO:
3150 case OP_NOTPOSUPTOI:
3151 possessive = TRUE;
3152 min = 0;
3153 max = GET2(ecode, 1);
3154 ecode += 3;
3155 goto REPEATNOTCHAR;
3156
3157 case OP_NOTSTAR:
3158 case OP_NOTSTARI:
3159 case OP_NOTMINSTAR:
3160 case OP_NOTMINSTARI:
3161 case OP_NOTPLUS:
3162 case OP_NOTPLUSI:
3163 case OP_NOTMINPLUS:
3164 case OP_NOTMINPLUSI:
3165 case OP_NOTQUERY:
3166 case OP_NOTQUERYI:
3167 case OP_NOTMINQUERY:
3168 case OP_NOTMINQUERYI:
3169 c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3170 minimize = (c & 1) != 0;
3171 min = rep_min[c]; /* Pick up values from tables; */
3172 max = rep_max[c]; /* zero for max => infinity */
3173 if (max == 0) max = INT_MAX;
3174
3175 /* Common code for all repeated single-byte matches. */
3176
3177 REPEATNOTCHAR:
3178 fc = *ecode++;
3179
3180 /* The code is duplicated for the caseless and caseful cases, for speed,
3181 since matching characters is likely to be quite common. First, ensure the
3182 minimum number of matches are present. If min = max, continue at the same
3183 level without recursing. Otherwise, if minimizing, keep trying the rest of
3184 the expression and advancing one matching character if failing, up to the
3185 maximum. Alternatively, if maximizing, find the maximum number of
3186 characters and work backwards. */
3187
3188 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3189 max, eptr));
3190
3191 if (op >= OP_NOTSTARI) /* Caseless */
3192 {
3193 fc = md->lcc[fc];
3194
3195 #ifdef SUPPORT_UTF8
3196 /* UTF-8 mode */
3197 if (utf8)
3198 {
3199 register unsigned int d;
3200 for (i = 1; i <= min; i++)
3201 {
3202 if (eptr >= md->end_subject)
3203 {
3204 SCHECK_PARTIAL();
3205 MRRETURN(MATCH_NOMATCH);
3206 }
3207 GETCHARINC(d, eptr);
3208 if (d < 256) d = md->lcc[d];
3209 if (fc == d) MRRETURN(MATCH_NOMATCH);
3210 }
3211 }
3212 else
3213 #endif
3214
3215 /* Not UTF-8 mode */
3216 {
3217 for (i = 1; i <= min; i++)
3218 {
3219 if (eptr >= md->end_subject)
3220 {
3221 SCHECK_PARTIAL();
3222 MRRETURN(MATCH_NOMATCH);
3223 }
3224 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3225 }
3226 }
3227
3228 if (min == max) continue;
3229
3230 if (minimize)
3231 {
3232 #ifdef SUPPORT_UTF8
3233 /* UTF-8 mode */
3234 if (utf8)
3235 {
3236 register unsigned int d;
3237 for (fi = min;; fi++)
3238 {
3239 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM28);
3240 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3241 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3242 if (eptr >= md->end_subject)
3243 {
3244 SCHECK_PARTIAL();
3245 MRRETURN(MATCH_NOMATCH);
3246 }
3247 GETCHARINC(d, eptr);
3248 if (d < 256) d = md->lcc[d];
3249 if (fc == d) MRRETURN(MATCH_NOMATCH);
3250 }
3251 }
3252 else
3253 #endif
3254 /* Not UTF-8 mode */
3255 {
3256 for (fi = min;; fi++)
3257 {
3258 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM29);
3259 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3260 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3261 if (eptr >= md->end_subject)
3262 {
3263 SCHECK_PARTIAL();
3264 MRRETURN(MATCH_NOMATCH);
3265 }
3266 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3267 }
3268 }
3269 /* Control never gets here */
3270 }
3271
3272 /* Maximize case */
3273
3274 else
3275 {
3276 pp = eptr;
3277
3278 #ifdef SUPPORT_UTF8
3279 /* UTF-8 mode */
3280 if (utf8)
3281 {
3282 register unsigned int d;
3283 for (i = min; i < max; i++)
3284 {
3285 int len = 1;
3286 if (eptr >= md->end_subject)
3287 {
3288 SCHECK_PARTIAL();
3289 break;
3290 }
3291 GETCHARLEN(d, eptr, len);
3292 if (d < 256) d = md->lcc[d];
3293 if (fc == d) break;
3294 eptr += len;
3295 }
3296 if (possessive) continue;
3297 for(;;)
3298 {
3299 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM30);
3300 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301 if (eptr-- == pp) break; /* Stop if tried at original pos */
3302 BACKCHAR(eptr);
3303 }
3304 }
3305 else
3306 #endif
3307 /* Not UTF-8 mode */
3308 {
3309 for (i = min; i < max; i++)
3310 {
3311 if (eptr >= md->end_subject)
3312 {
3313 SCHECK_PARTIAL();
3314 break;
3315 }
3316 if (fc == md->lcc[*eptr]) break;
3317 eptr++;
3318 }
3319 if (possessive) continue;
3320 while (eptr >= pp)
3321 {
3322 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM31);
3323 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3324 eptr--;
3325 }
3326 }
3327
3328 MRRETURN(MATCH_NOMATCH);
3329 }
3330 /* Control never gets here */
3331 }
3332
3333 /* Caseful comparisons */
3334
3335 else
3336 {
3337 #ifdef SUPPORT_UTF8
3338 /* UTF-8 mode */
3339 if (utf8)
3340 {
3341 register unsigned int d;
3342 for (i = 1; i <= min; i++)
3343 {
3344 if (eptr >= md->end_subject)
3345 {
3346 SCHECK_PARTIAL();
3347 MRRETURN(MATCH_NOMATCH);
3348 }
3349 GETCHARINC(d, eptr);
3350 if (fc == d) MRRETURN(MATCH_NOMATCH);
3351 }
3352 }
3353 else
3354 #endif
3355 /* Not UTF-8 mode */
3356 {
3357 for (i = 1; i <= min; i++)
3358 {
3359 if (eptr >= md->end_subject)
3360 {
3361 SCHECK_PARTIAL();
3362 MRRETURN(MATCH_NOMATCH);
3363 }
3364 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3365 }
3366 }
3367
3368 if (min == max) continue;
3369
3370 if (minimize)
3371 {
3372 #ifdef SUPPORT_UTF8
3373 /* UTF-8 mode */
3374 if (utf8)
3375 {
3376 register unsigned int d;
3377 for (fi = min;; fi++)
3378 {
3379 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM32);
3380 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3381 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3382 if (eptr >= md->end_subject)
3383 {
3384 SCHECK_PARTIAL();
3385 MRRETURN(MATCH_NOMATCH);
3386 }
3387 GETCHARINC(d, eptr);
3388 if (fc == d) MRRETURN(MATCH_NOMATCH);
3389 }
3390 }
3391 else
3392 #endif
3393 /* Not UTF-8 mode */
3394 {
3395 for (fi = min;; fi++)
3396 {
3397 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM33);
3398 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3399 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3400 if (eptr >= md->end_subject)
3401 {
3402 SCHECK_PARTIAL();
3403 MRRETURN(MATCH_NOMATCH);
3404 }
3405 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3406 }
3407 }
3408 /* Control never gets here */
3409 }
3410
3411 /* Maximize case */
3412
3413 else
3414 {
3415 pp = eptr;
3416
3417 #ifdef SUPPORT_UTF8
3418 /* UTF-8 mode */
3419 if (utf8)
3420 {
3421 register unsigned int d;
3422 for (i = min; i < max; i++)
3423 {
3424 int len = 1;
3425 if (eptr >= md->end_subject)
3426 {
3427 SCHECK_PARTIAL();
3428 break;
3429 }
3430 GETCHARLEN(d, eptr, len);
3431 if (fc == d) break;
3432 eptr += len;
3433 }
3434 if (possessive) continue;
3435 for(;;)
3436 {
3437 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM34);
3438 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3439 if (eptr-- == pp) break; /* Stop if tried at original pos */
3440 BACKCHAR(eptr);
3441 }
3442 }
3443 else
3444 #endif
3445 /* Not UTF-8 mode */
3446 {
3447 for (i = min; i < max; i++)
3448 {
3449 if (eptr >= md->end_subject)
3450 {
3451 SCHECK_PARTIAL();
3452 break;
3453 }
3454 if (fc == *eptr) break;
3455 eptr++;
3456 }
3457 if (possessive) continue;
3458 while (eptr >= pp)
3459 {
3460 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM35);
3461 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462 eptr--;
3463 }
3464 }
3465
3466 MRRETURN(MATCH_NOMATCH);
3467 }
3468 }
3469 /* Control never gets here */
3470
3471 /* Match a single character type repeatedly; several different opcodes
3472 share code. This is very similar to the code for single characters, but we
3473 repeat it in the interests of efficiency. */
3474
3475 case OP_TYPEEXACT:
3476 min = max = GET2(ecode, 1);
3477 minimize = TRUE;
3478 ecode += 3;
3479 goto REPEATTYPE;
3480
3481 case OP_TYPEUPTO:
3482 case OP_TYPEMINUPTO:
3483 min = 0;
3484 max = GET2(ecode, 1);
3485 minimize = *ecode == OP_TYPEMINUPTO;
3486 ecode += 3;
3487 goto REPEATTYPE;
3488
3489 case OP_TYPEPOSSTAR:
3490 possessive = TRUE;
3491 min = 0;
3492 max = INT_MAX;
3493 ecode++;
3494 goto REPEATTYPE;
3495
3496 case OP_TYPEPOSPLUS:
3497 possessive = TRUE;
3498 min = 1;
3499 max = INT_MAX;
3500 ecode++;
3501 goto REPEATTYPE;
3502
3503 case OP_TYPEPOSQUERY:
3504 possessive = TRUE;
3505 min = 0;
3506 max = 1;
3507 ecode++;
3508 goto REPEATTYPE;
3509
3510 case OP_TYPEPOSUPTO:
3511 possessive = TRUE;
3512 min = 0;
3513 max = GET2(ecode, 1);
3514 ecode += 3;
3515 goto REPEATTYPE;
3516
3517 case OP_TYPESTAR:
3518 case OP_TYPEMINSTAR:
3519 case OP_TYPEPLUS:
3520 case OP_TYPEMINPLUS:
3521 case OP_TYPEQUERY:
3522 case OP_TYPEMINQUERY:
3523 c = *ecode++ - OP_TYPESTAR;
3524 minimize = (c & 1) != 0;
3525 min = rep_min[c]; /* Pick up values from tables; */
3526 max = rep_max[c]; /* zero for max => infinity */
3527 if (max == 0) max = INT_MAX;
3528
3529 /* Common code for all repeated single character type matches. Note that
3530 in UTF-8 mode, '.' matches a character of any length, but for the other
3531 character types, the valid characters are all one-byte long. */
3532
3533 REPEATTYPE:
3534 ctype = *ecode++; /* Code for the character type */
3535
3536 #ifdef SUPPORT_UCP
3537 if (ctype == OP_PROP || ctype == OP_NOTPROP)
3538 {
3539 prop_fail_result = ctype == OP_NOTPROP;
3540 prop_type = *ecode++;
3541 prop_value = *ecode++;
3542 }
3543 else prop_type = -1;
3544 #endif
3545
3546 /* First, ensure the minimum number of matches are present. Use inline
3547 code for maximizing the speed, and do the type test once at the start
3548 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3549 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3550 and single-bytes. */
3551
3552 if (min > 0)
3553 {
3554 #ifdef SUPPORT_UCP
3555 if (prop_type >= 0)
3556 {
3557 switch(prop_type)
3558 {
3559 case PT_ANY:
3560 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3561 for (i = 1; i <= min; i++)
3562 {
3563 if (eptr >= md->end_subject)
3564 {
3565 SCHECK_PARTIAL();
3566 MRRETURN(MATCH_NOMATCH);
3567 }
3568 GETCHARINCTEST(c, eptr);
3569 }
3570 break;
3571
3572 case PT_LAMP:
3573 for (i = 1; i <= min; i++)
3574 {
3575 if (eptr >= md->end_subject)
3576 {
3577 SCHECK_PARTIAL();
3578 MRRETURN(MATCH_NOMATCH);
3579 }
3580 GETCHARINCTEST(c, eptr);
3581 prop_chartype = UCD_CHARTYPE(c);
3582 if ((prop_chartype == ucp_Lu ||
3583 prop_chartype == ucp_Ll ||
3584 prop_chartype == ucp_Lt) == prop_fail_result)
3585 MRRETURN(MATCH_NOMATCH);
3586 }
3587 break;
3588
3589 case PT_GC:
3590 for (i = 1; i <= min; i++)
3591 {
3592 if (eptr >= md->end_subject)
3593 {
3594 SCHECK_PARTIAL();
3595 MRRETURN(MATCH_NOMATCH);
3596 }
3597 GETCHARINCTEST(c, eptr);
3598 prop_category = UCD_CATEGORY(c);
3599 if ((prop_category == prop_value) == prop_fail_result)
3600 MRRETURN(MATCH_NOMATCH);
3601 }
3602 break;
3603
3604 case PT_PC:
3605 for (i = 1; i <= min; i++)
3606 {
3607 if (eptr >= md->end_subject)
3608 {
3609 SCHECK_PARTIAL();
3610 MRRETURN(MATCH_NOMATCH);
3611 }
3612 GETCHARINCTEST(c, eptr);
3613 prop_chartype = UCD_CHARTYPE(c);
3614 if ((prop_chartype == prop_value) == prop_fail_result)
3615 MRRETURN(MATCH_NOMATCH);
3616 }
3617 break;
3618
3619 case PT_SC:
3620 for (i = 1; i <= min; i++)
3621 {
3622 if (eptr >= md->end_subject)
3623 {
3624 SCHECK_PARTIAL();
3625 MRRETURN(MATCH_NOMATCH);
3626 }
3627 GETCHARINCTEST(c, eptr);
3628 prop_script = UCD_SCRIPT(c);
3629 if ((prop_script == prop_value) == prop_fail_result)
3630 MRRETURN(MATCH_NOMATCH);
3631 }
3632 break;
3633
3634 case PT_ALNUM:
3635 for (i = 1; i <= min; i++)
3636 {
3637 if (eptr >= md->end_subject)
3638 {
3639 SCHECK_PARTIAL();
3640 MRRETURN(MATCH_NOMATCH);
3641 }
3642 GETCHARINCTEST(c, eptr);
3643 prop_category = UCD_CATEGORY(c);
3644 if ((prop_category == ucp_L || prop_category == ucp_N)
3645 == prop_fail_result)
3646 MRRETURN(MATCH_NOMATCH);
3647 }
3648 break;
3649
3650 case PT_SPACE: /* Perl space */
3651 for (i = 1; i <= min; i++)
3652 {
3653 if (eptr >= md->end_subject)
3654 {
3655 SCHECK_PARTIAL();
3656 MRRETURN(MATCH_NOMATCH);
3657 }
3658 GETCHARINCTEST(c, eptr);
3659 prop_category = UCD_CATEGORY(c);
3660 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3661 c == CHAR_FF || c == CHAR_CR)
3662 == prop_fail_result)
3663 MRRETURN(MATCH_NOMATCH);
3664 }
3665 break;
3666
3667 case PT_PXSPACE: /* POSIX space */
3668 for (i = 1; i <= min; i++)
3669 {
3670 if (eptr >= md->end_subject)
3671 {
3672 SCHECK_PARTIAL();
3673 MRRETURN(MATCH_NOMATCH);
3674 }
3675 GETCHARINCTEST(c, eptr);
3676 prop_category = UCD_CATEGORY(c);
3677 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3678 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3679 == prop_fail_result)
3680 MRRETURN(MATCH_NOMATCH);
3681 }
3682 break;
3683
3684 case PT_WORD:
3685 for (i = 1; i <= min; i++)
3686 {
3687 if (eptr >= md->end_subject)
3688 {
3689 SCHECK_PARTIAL();
3690 MRRETURN(MATCH_NOMATCH);
3691 }
3692 GETCHARINCTEST(c, eptr);
3693 prop_category = UCD_CATEGORY(c);
3694 if ((prop_category == ucp_L || prop_category == ucp_N ||
3695 c == CHAR_UNDERSCORE)
3696 == prop_fail_result)
3697 MRRETURN(MATCH_NOMATCH);
3698 }
3699 break;
3700
3701 /* This should not occur */
3702
3703 default:
3704 RRETURN(PCRE_ERROR_INTERNAL);
3705 }
3706 }
3707
3708 /* Match extended Unicode sequences. We will get here only if the
3709 support is in the binary; otherwise a compile-time error occurs. */
3710
3711 else if (ctype == OP_EXTUNI)
3712 {
3713 for (i = 1; i <= min; i++)
3714 {
3715 if (eptr >= md->end_subject)
3716 {
3717 SCHECK_PARTIAL();
3718 MRRETURN(MATCH_NOMATCH);
3719 }
3720 GETCHARINCTEST(c, eptr);
3721 prop_category = UCD_CATEGORY(c);
3722 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3723 while (eptr < md->end_subject)
3724 {
3725 int len = 1;
3726 if (!utf8) c = *eptr;
3727 else { GETCHARLEN(c, eptr, len); }
3728 prop_category = UCD_CATEGORY(c);
3729 if (prop_category != ucp_M) break;
3730 eptr += len;
3731 }
3732 }
3733 }
3734
3735 else
3736 #endif /* SUPPORT_UCP */
3737
3738 /* Handle all other cases when the coding is UTF-8 */
3739
3740 #ifdef SUPPORT_UTF8
3741 if (utf8) switch(ctype)
3742 {
3743 case OP_ANY:
3744 for (i = 1; i <= min; i++)
3745 {
3746 if (eptr >= md->end_subject)
3747 {
3748 SCHECK_PARTIAL();
3749 MRRETURN(MATCH_NOMATCH);
3750 }
3751 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3752 eptr++;
3753 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3754 }
3755 break;
3756
3757 case OP_ALLANY:
3758 for (i = 1; i <= min; i++)
3759 {
3760 if (eptr >= md->end_subject)
3761 {
3762 SCHECK_PARTIAL();
3763 MRRETURN(MATCH_NOMATCH);
3764 }
3765 eptr++;
3766 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3767 }
3768 break;
3769
3770 case OP_ANYBYTE:
3771 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3772 eptr += min;
3773 break;
3774
3775 case OP_ANYNL:
3776 for (i = 1; i <= min; i++)
3777 {
3778 if (eptr >= md->end_subject)
3779 {
3780 SCHECK_PARTIAL();
3781 MRRETURN(MATCH_NOMATCH);
3782 }
3783 GETCHARINC(c, eptr);
3784 switch(c)
3785 {
3786 default: MRRETURN(MATCH_NOMATCH);
3787
3788 case 0x000d:
3789 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790 break;
3791
3792 case 0x000a:
3793 break;
3794
3795 case 0x000b:
3796 case 0x000c:
3797 case 0x0085:
3798 case 0x2028:
3799 case 0x2029:
3800 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3801 break;
3802 }
3803 }
3804 break;
3805
3806 case OP_NOT_HSPACE:
3807 for (i = 1; i <= min; i++)
3808 {
3809 if (eptr >= md->end_subject)
3810 {
3811 SCHECK_PARTIAL();
3812 MRRETURN(MATCH_NOMATCH);
3813 }
3814 GETCHARINC(c, eptr);
3815 switch(c)
3816 {
3817 default: break;
3818 case 0x09: /* HT */
3819 case 0x20: /* SPACE */
3820 case 0xa0: /* NBSP */
3821 case 0x1680: /* OGHAM SPACE MARK */
3822 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3823 case 0x2000: /* EN QUAD */
3824 case 0x2001: /* EM QUAD */
3825 case 0x2002: /* EN SPACE */
3826 case 0x2003: /* EM SPACE */
3827 case 0x2004: /* THREE-PER-EM SPACE */
3828 case 0x2005: /* FOUR-PER-EM SPACE */
3829 case 0x2006: /* SIX-PER-EM SPACE */
3830 case 0x2007: /* FIGURE SPACE */
3831 case 0x2008: /* PUNCTUATION SPACE */
3832 case 0x2009: /* THIN SPACE */
3833 case 0x200A: /* HAIR SPACE */
3834 case 0x202f: /* NARROW NO-BREAK SPACE */
3835 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3836 case 0x3000: /* IDEOGRAPHIC SPACE */
3837 MRRETURN(MATCH_NOMATCH);
3838 }
3839 }
3840 break;
3841
3842 case OP_HSPACE:
3843 for (i = 1; i <= min; i++)
3844 {
3845 if (eptr >= md->end_subject)
3846 {
3847 SCHECK_PARTIAL();
3848 MRRETURN(MATCH_NOMATCH);
3849 }
3850 GETCHARINC(c, eptr);
3851 switch(c)
3852 {
3853 default: MRRETURN(MATCH_NOMATCH);
3854 case 0x09: /* HT */
3855 case 0x20: /* SPACE */
3856 case 0xa0: /* NBSP */
3857 case 0x1680: /* OGHAM SPACE MARK */
3858 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3859 case 0x2000: /* EN QUAD */
3860 case 0x2001: /* EM QUAD */
3861 case 0x2002: /* EN SPACE */
3862 case 0x2003: /* EM SPACE */
3863 case 0x2004: /* THREE-PER-EM SPACE */
3864 case 0x2005: /* FOUR-PER-EM SPACE */
3865 case 0x2006: /* SIX-PER-EM SPACE */
3866 case 0x2007: /* FIGURE SPACE */
3867 case 0x2008: /* PUNCTUATION SPACE */
3868 case 0x2009: /* THIN SPACE */
3869 case 0x200A: /* HAIR SPACE */
3870 case 0x202f: /* NARROW NO-BREAK SPACE */
3871 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3872 case 0x3000: /* IDEOGRAPHIC SPACE */
3873 break;
3874 }
3875 }
3876 break;
3877
3878 case OP_NOT_VSPACE:
3879 for (i = 1; i <= min; i++)
3880 {
3881 if (eptr >= md->end_subject)
3882 {
3883 SCHECK_PARTIAL();
3884 MRRETURN(MATCH_NOMATCH);
3885 }
3886 GETCHARINC(c, eptr);
3887 switch(c)
3888 {
3889 default: break;
3890 case 0x0a: /* LF */
3891 case 0x0b: /* VT */
3892 case 0x0c: /* FF */
3893 case 0x0d: /* CR */
3894 case 0x85: /* NEL */
3895 case 0x2028: /* LINE SEPARATOR */
3896 case 0x2029: /* PARAGRAPH SEPARATOR */
3897 MRRETURN(MATCH_NOMATCH);
3898 }
3899 }
3900 break;
3901
3902 case OP_VSPACE:
3903 for (i = 1; i <= min; i++)
3904 {
3905 if (eptr >= md->end_subject)
3906 {
3907 SCHECK_PARTIAL();
3908 MRRETURN(MATCH_NOMATCH);
3909 }
3910 GETCHARINC(c, eptr);
3911 switch(c)
3912 {
3913 default: MRRETURN(MATCH_NOMATCH);
3914 case 0x0a: /* LF */
3915 case 0x0b: /* VT */
3916 case 0x0c: /* FF */
3917 case 0x0d: /* CR */
3918 case 0x85: /* NEL */
3919 case 0x2028: /* LINE SEPARATOR */
3920 case 0x2029: /* PARAGRAPH SEPARATOR */
3921 break;
3922 }
3923 }
3924 break;
3925
3926 case OP_NOT_DIGIT:
3927 for (i = 1; i <= min; i++)
3928 {
3929 if (eptr >= md->end_subject)
3930 {
3931 SCHECK_PARTIAL();
3932 MRRETURN(MATCH_NOMATCH);
3933 }
3934 GETCHARINC(c, eptr);
3935 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3936 MRRETURN(MATCH_NOMATCH);
3937 }
3938 break;
3939
3940 case OP_DIGIT:
3941 for (i = 1; i <= min; i++)
3942 {
3943 if (eptr >= md->end_subject)
3944 {
3945 SCHECK_PARTIAL();
3946 MRRETURN(MATCH_NOMATCH);
3947 }
3948 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3949 MRRETURN(MATCH_NOMATCH);
3950 /* No need to skip more bytes - we know it's a 1-byte character */
3951 }
3952 break;
3953
3954 case OP_NOT_WHITESPACE:
3955 for (i = 1; i <= min; i++)
3956 {
3957 if (eptr >= md->end_subject)
3958 {
3959 SCHECK_PARTIAL();
3960 MRRETURN(MATCH_NOMATCH);
3961 }
3962 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3963 MRRETURN(MATCH_NOMATCH);
3964 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3965 }
3966 break;
3967
3968 case OP_WHITESPACE:
3969 for (i = 1; i <= min; i++)
3970 {
3971 if (eptr >= md->end_subject)
3972 {
3973 SCHECK_PARTIAL();
3974 MRRETURN(MATCH_NOMATCH);
3975 }
3976 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3977 MRRETURN(MATCH_NOMATCH);
3978 /* No need to skip more bytes - we know it's a 1-byte character */
3979 }
3980 break;
3981
3982 case OP_NOT_WORDCHAR:
3983 for (i = 1; i <= min; i++)
3984 {
3985 if (eptr >= md->end_subject)
3986 {
3987 SCHECK_PARTIAL();
3988 MRRETURN(MATCH_NOMATCH);
3989 }
3990 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3991 MRRETURN(MATCH_NOMATCH);
3992 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3993 }
3994 break;
3995
3996 case OP_WORDCHAR:
3997 for (i = 1; i <= min; i++)
3998 {
3999 if (eptr >= md->end_subject)
4000 {
4001 SCHECK_PARTIAL();
4002 MRRETURN(MATCH_NOMATCH);
4003 }
4004 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
4005 MRRETURN(MATCH_NOMATCH);
4006 /* No need to skip more bytes - we know it's a 1-byte character */
4007 }
4008 break;
4009
4010 default:
4011 RRETURN(PCRE_ERROR_INTERNAL);
4012 } /* End switch(ctype) */
4013
4014 else
4015 #endif /* SUPPORT_UTF8 */
4016
4017 /* Code for the non-UTF-8 case for minimum matching of operators other
4018 than OP_PROP and OP_NOTPROP. */
4019
4020 switch(ctype)
4021 {
4022 case OP_ANY:
4023 for (i = 1; i <= min; i++)
4024 {
4025 if (eptr >= md->end_subject)
4026 {
4027 SCHECK_PARTIAL();
4028 MRRETURN(MATCH_NOMATCH);
4029 }
4030 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4031 eptr++;
4032 }
4033 break;
4034
4035 case OP_ALLANY:
4036 if (eptr > md->end_subject - min)
4037 {
4038 SCHECK_PARTIAL();
4039 MRRETURN(MATCH_NOMATCH);
4040 }
4041 eptr += min;
4042 break;
4043
4044 case OP_ANYBYTE:
4045 if (eptr > md->end_subject - min)
4046 {
4047 SCHECK_PARTIAL();
4048 MRRETURN(MATCH_NOMATCH);
4049 }
4050 eptr += min;
4051 break;
4052
4053 case OP_ANYNL:
4054 for (i = 1; i <= min; i++)
4055 {
4056 if (eptr >= md->end_subject)
4057 {
4058 SCHECK_PARTIAL();
4059 MRRETURN(MATCH_NOMATCH);
4060 }
4061 switch(*eptr++)
4062 {
4063 default: MRRETURN(MATCH_NOMATCH);
4064
4065 case 0x000d:
4066 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4067 break;
4068
4069 case 0x000a:
4070 break;
4071
4072 case 0x000b:
4073 case 0x000c:
4074 case 0x0085:
4075 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4076 break;
4077 }
4078 }
4079 break;
4080
4081 case OP_NOT_HSPACE:
4082 for (i = 1; i <= min; i++)
4083 {
4084 if (eptr >= md->end_subject)
4085 {
4086 SCHECK_PARTIAL();
4087 MRRETURN(MATCH_NOMATCH);
4088 }
4089 switch(*eptr++)
4090 {
4091 default: break;
4092 case 0x09: /* HT */
4093 case 0x20: /* SPACE */
4094 case 0xa0: /* NBSP */
4095 MRRETURN(MATCH_NOMATCH);
4096 }
4097 }
4098 break;
4099
4100 case OP_HSPACE:
4101 for (i = 1; i <= min; i++)
4102 {
4103 if (eptr >= md->end_subject)
4104 {
4105 SCHECK_PARTIAL();
4106 MRRETURN(MATCH_NOMATCH);
4107 }
4108 switch(*eptr++)
4109 {
4110 default: MRRETURN(MATCH_NOMATCH);
4111 case 0x09: /* HT */
4112 case 0x20: /* SPACE */
4113 case 0xa0: /* NBSP */
4114 break;
4115 }
4116 }
4117 break;
4118
4119 case OP_NOT_VSPACE:
4120 for (i = 1; i <= min; i++)
4121 {
4122 if (eptr >= md->end_subject)
4123 {
4124 SCHECK_PARTIAL();
4125 MRRETURN(MATCH_NOMATCH);
4126 }
4127 switch(*eptr++)
4128 {
4129 default: break;
4130 case 0x0a: /* LF */
4131 case 0x0b: /* VT */
4132 case 0x0c: /* FF */
4133 case 0x0d: /* CR */
4134 case 0x85: /* NEL */
4135 MRRETURN(MATCH_NOMATCH);
4136 }
4137 }
4138 break;
4139
4140 case OP_VSPACE:
4141 for (i = 1; i <= min; i++)
4142 {
4143 if (eptr >= md->end_subject)
4144 {
4145 SCHECK_PARTIAL();
4146 MRRETURN(MATCH_NOMATCH);
4147 }
4148 switch(*eptr++)
4149 {
4150 default: MRRETURN(MATCH_NOMATCH);
4151 case 0x0a: /* LF */
4152 case 0x0b: /* VT */
4153 case 0x0c: /* FF */
4154 case 0x0d: /* CR */
4155 case 0x85: /* NEL */
4156 break;
4157 }
4158 }
4159 break;
4160
4161 case OP_NOT_DIGIT:
4162 for (i = 1; i <= min; i++)
4163 {
4164 if (eptr >= md->end_subject)
4165 {
4166 SCHECK_PARTIAL();
4167 MRRETURN(MATCH_NOMATCH);
4168 }
4169 if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4170 }
4171 break;
4172
4173 case OP_DIGIT:
4174 for (i = 1; i <= min; i++)
4175 {
4176 if (eptr >= md->end_subject)
4177 {
4178 SCHECK_PARTIAL();
4179 MRRETURN(MATCH_NOMATCH);
4180 }
4181 if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4182 }
4183 break;
4184
4185 case OP_NOT_WHITESPACE:
4186 for (i = 1; i <= min; i++)
4187 {
4188 if (eptr >= md->end_subject)
4189 {
4190 SCHECK_PARTIAL();
4191 MRRETURN(MATCH_NOMATCH);
4192 }
4193 if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4194 }
4195 break;
4196
4197 case OP_WHITESPACE:
4198 for (i = 1; i <= min; i++)
4199 {
4200 if (eptr >= md->end_subject)
4201 {
4202 SCHECK_PARTIAL();
4203 MRRETURN(MATCH_NOMATCH);
4204 }
4205 if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4206 }
4207 break;
4208
4209 case OP_NOT_WORDCHAR:
4210 for (i = 1; i <= min; i++)
4211 {
4212 if (eptr >= md->end_subject)
4213 {
4214 SCHECK_PARTIAL();
4215 MRRETURN(MATCH_NOMATCH);
4216 }
4217 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4218 MRRETURN(MATCH_NOMATCH);
4219 }
4220 break;
4221
4222 case OP_WORDCHAR:
4223 for (i = 1; i <= min; i++)
4224 {
4225 if (eptr >= md->end_subject)
4226 {
4227 SCHECK_PARTIAL();
4228 MRRETURN(MATCH_NOMATCH);
4229 }
4230 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4231 MRRETURN(MATCH_NOMATCH);
4232 }
4233 break;
4234
4235 default:
4236 RRETURN(PCRE_ERROR_INTERNAL);
4237 }
4238 }
4239
4240 /* If min = max, continue at the same level without recursing */
4241
4242 if (min == max) continue;
4243
4244 /* If minimizing, we have to test the rest of the pattern before each
4245 subsequent match. Again, separate the UTF-8 case for speed, and also
4246 separate the UCP cases. */
4247
4248 if (minimize)
4249 {
4250 #ifdef SUPPORT_UCP
4251 if (prop_type >= 0)
4252 {
4253 switch(prop_type)
4254 {
4255 case PT_ANY:
4256 for (fi = min;; fi++)
4257 {
4258 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM36);
4259 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4260 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4261 if (eptr >= md->end_subject)
4262 {
4263 SCHECK_PARTIAL();
4264 MRRETURN(MATCH_NOMATCH);
4265 }
4266 GETCHARINCTEST(c, eptr);
4267 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4268 }
4269 /* Control never gets here */
4270
4271 case PT_LAMP:
4272 for (fi = min;; fi++)
4273 {
4274 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM37);
4275 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4276 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4277 if (eptr >= md->end_subject)
4278 {
4279 SCHECK_PARTIAL();
4280 MRRETURN(MATCH_NOMATCH);
4281 }
4282 GETCHARINCTEST(c, eptr);
4283 prop_chartype = UCD_CHARTYPE(c);
4284 if ((prop_chartype == ucp_Lu ||
4285 prop_chartype == ucp_Ll ||
4286 prop_chartype == ucp_Lt) == prop_fail_result)
4287 MRRETURN(MATCH_NOMATCH);
4288 }
4289 /* Control never gets here */
4290
4291 case PT_GC:
4292 for (fi = min;; fi++)
4293 {
4294 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM38);
4295 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4296 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4297 if (eptr >= md->end_subject)
4298 {
4299 SCHECK_PARTIAL();
4300 MRRETURN(MATCH_NOMATCH);
4301 }
4302 GETCHARINCTEST(c, eptr);
4303 prop_category = UCD_CATEGORY(c);
4304 if ((prop_category == prop_value) == prop_fail_result)
4305 MRRETURN(MATCH_NOMATCH);
4306 }
4307 /* Control never gets here */
4308
4309 case PT_PC:
4310 for (fi = min;; fi++)
4311 {
4312 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM39);
4313 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4314 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4315 if (eptr >= md->end_subject)
4316 {
4317 SCHECK_PARTIAL();
4318 MRRETURN(MATCH_NOMATCH);
4319 }
4320 GETCHARINCTEST(c, eptr);
4321 prop_chartype = UCD_CHARTYPE(c);
4322 if ((prop_chartype == prop_value) == prop_fail_result)
4323 MRRETURN(MATCH_NOMATCH);
4324 }
4325 /* Control never gets here */
4326
4327 case PT_SC:
4328 for (fi = min;; fi++)
4329 {
4330 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM40);
4331 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4332 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4333 if (eptr >= md->end_subject)
4334 {
4335 SCHECK_PARTIAL();
4336 MRRETURN(MATCH_NOMATCH);
4337 }
4338 GETCHARINCTEST(c, eptr);
4339 prop_script = UCD_SCRIPT(c);
4340 if ((prop_script == prop_value) == prop_fail_result)
4341 MRRETURN(MATCH_NOMATCH);
4342 }
4343 /* Control never gets here */
4344
4345 case PT_ALNUM:
4346 for (fi = min;; fi++)
4347 {
4348 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM59);
4349 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4350 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4351 if (eptr >= md->end_subject)
4352 {
4353 SCHECK_PARTIAL();
4354 MRRETURN(MATCH_NOMATCH);
4355 }
4356 GETCHARINCTEST(c, eptr);
4357 prop_category = UCD_CATEGORY(c);
4358 if ((prop_category == ucp_L || prop_category == ucp_N)
4359 == prop_fail_result)
4360 MRRETURN(MATCH_NOMATCH);
4361 }
4362 /* Control never gets here */
4363
4364 case PT_SPACE: /* Perl space */
4365 for (fi = min;; fi++)
4366 {
4367 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM60);
4368 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4369 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4370 if (eptr >= md->end_subject)
4371 {
4372 SCHECK_PARTIAL();
4373 MRRETURN(MATCH_NOMATCH);
4374 }
4375 GETCHARINCTEST(c, eptr);
4376 prop_category = UCD_CATEGORY(c);
4377 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4378 c == CHAR_FF || c == CHAR_CR)
4379 == prop_fail_result)
4380 MRRETURN(MATCH_NOMATCH);
4381 }
4382 /* Control never gets here */
4383
4384 case PT_PXSPACE: /* POSIX space */
4385 for (fi = min;; fi++)
4386 {
4387 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM61);
4388 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4389 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4390 if (eptr >= md->end_subject)
4391 {
4392 SCHECK_PARTIAL();
4393 MRRETURN(MATCH_NOMATCH);
4394 }
4395 GETCHARINCTEST(c, eptr);
4396 prop_category = UCD_CATEGORY(c);
4397 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4398 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4399 == prop_fail_result)
4400 MRRETURN(MATCH_NOMATCH);
4401 }
4402 /* Control never gets here */
4403
4404 case PT_WORD:
4405 for (fi = min;; fi++)
4406 {
4407 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM62);
4408 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4409 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4410 if (eptr >= md->end_subject)
4411 {
4412 SCHECK_PARTIAL();
4413 MRRETURN(MATCH_NOMATCH);
4414 }
4415 GETCHARINCTEST(c, eptr);
4416 prop_category = UCD_CATEGORY(c);
4417 if ((prop_category == ucp_L ||
4418 prop_category == ucp_N ||
4419 c == CHAR_UNDERSCORE)
4420 == prop_fail_result)
4421 MRRETURN(MATCH_NOMATCH);
4422 }
4423 /* Control never gets here */
4424
4425 /* This should never occur */
4426
4427 default:
4428 RRETURN(PCRE_ERROR_INTERNAL);
4429 }
4430 }
4431
4432 /* Match extended Unicode sequences. We will get here only if the
4433 support is in the binary; otherwise a compile-time error occurs. */
4434
4435 else if (ctype == OP_EXTUNI)
4436 {
4437 for (fi = min;; fi++)
4438 {
4439 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM41);
4440 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4441 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4442 if (eptr >= md->end_subject)
4443 {
4444 SCHECK_PARTIAL();
4445 MRRETURN(MATCH_NOMATCH);
4446 }
4447 GETCHARINCTEST(c, eptr);
4448 prop_category = UCD_CATEGORY(c);
4449 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4450 while (eptr < md->end_subject)
4451 {
4452 int len = 1;
4453 if (!utf8) c = *eptr;
4454 else { GETCHARLEN(c, eptr, len); }
4455 prop_category = UCD_CATEGORY(c);
4456 if (prop_category != ucp_M) break;
4457 eptr += len;
4458 }
4459 }
4460 }
4461
4462 else
4463 #endif /* SUPPORT_UCP */
4464
4465 #ifdef SUPPORT_UTF8
4466 /* UTF-8 mode */
4467 if (utf8)
4468 {
4469 for (fi = min;; fi++)
4470 {
4471 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM42);
4472 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4473 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4474 if (eptr >= md->end_subject)
4475 {
4476 SCHECK_PARTIAL();
4477 MRRETURN(MATCH_NOMATCH);
4478 }
4479 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4480 MRRETURN(MATCH_NOMATCH);
4481 GETCHARINC(c, eptr);
4482 switch(ctype)
4483 {
4484 case OP_ANY: /* This is the non-NL case */
4485 case OP_ALLANY:
4486 case OP_ANYBYTE:
4487 break;
4488
4489 case OP_ANYNL:
4490 switch(c)
4491 {
4492 default: MRRETURN(MATCH_NOMATCH);
4493 case 0x000d:
4494 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4495 break;
4496 case 0x000a:
4497 break;
4498
4499 case 0x000b:
4500 case 0x000c:
4501 case 0x0085:
4502 case 0x2028:
4503 case 0x2029:
4504 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4505 break;
4506 }
4507 break;
4508
4509 case OP_NOT_HSPACE:
4510 switch(c)
4511 {
4512 default: break;
4513 case 0x09: /* HT */
4514 case 0x20: /* SPACE */
4515 case 0xa0: /* NBSP */
4516 case 0x1680: /* OGHAM SPACE MARK */
4517 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4518 case 0x2000: /* EN QUAD */
4519 case 0x2001: /* EM QUAD */
4520 case 0x2002: /* EN SPACE */
4521 case 0x2003: /* EM SPACE */
4522 case 0x2004: /* THREE-PER-EM SPACE */
4523 case 0x2005: /* FOUR-PER-EM SPACE */
4524 case 0x2006: /* SIX-PER-EM SPACE */
4525 case 0x2007: /* FIGURE SPACE */
4526 case 0x2008: /* PUNCTUATION SPACE */
4527 case 0x2009: /* THIN SPACE */
4528 case 0x200A: /* HAIR SPACE */
4529 case 0x202f: /* NARROW NO-BREAK SPACE */
4530 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4531 case 0x3000: /* IDEOGRAPHIC SPACE */
4532 MRRETURN(MATCH_NOMATCH);
4533 }
4534 break;
4535
4536 case OP_HSPACE:
4537 switch(c)
4538 {
4539 default: MRRETURN(MATCH_NOMATCH);
4540 case 0x09: /* HT */
4541 case 0x20: /* SPACE */
4542 case 0xa0: /* NBSP */
4543 case 0x1680: /* OGHAM SPACE MARK */
4544 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4545 case 0x2000: /* EN QUAD */
4546 case 0x2001: /* EM QUAD */
4547 case 0x2002: /* EN SPACE */
4548 case 0x2003: /* EM SPACE */
4549 case 0x2004: /* THREE-PER-EM SPACE */
4550 case 0x2005: /* FOUR-PER-EM SPACE */
4551 case 0x2006: /* SIX-PER-EM SPACE */
4552 case 0x2007: /* FIGURE SPACE */
4553 case 0x2008: /* PUNCTUATION SPACE */
4554 case 0x2009: /* THIN SPACE */
4555 case 0x200A: /* HAIR SPACE */
4556 case 0x202f: /* NARROW NO-BREAK SPACE */
4557 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4558 case 0x3000: /* IDEOGRAPHIC SPACE */
4559 break;
4560 }
4561 break;
4562
4563 case OP_NOT_VSPACE:
4564 switch(c)
4565 {
4566 default: break;
4567 case 0x0a: /* LF */
4568 case 0x0b: /* VT */
4569 case 0x0c: /* FF */
4570 case 0x0d: /* CR */
4571 case 0x85: /* NEL */
4572 case 0x2028: /* LINE SEPARATOR */
4573 case 0x2029: /* PARAGRAPH SEPARATOR */
4574 MRRETURN(MATCH_NOMATCH);
4575 }
4576 break;
4577
4578 case OP_VSPACE:
4579 switch(c)
4580 {
4581 default: MRRETURN(MATCH_NOMATCH);
4582 case 0x0a: /* LF */
4583 case 0x0b: /* VT */
4584 case 0x0c: /* FF */
4585 case 0x0d: /* CR */
4586 case 0x85: /* NEL */
4587 case 0x2028: /* LINE SEPARATOR */
4588 case 0x2029: /* PARAGRAPH SEPARATOR */
4589 break;
4590 }
4591 break;
4592
4593 case OP_NOT_DIGIT:
4594 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4595 MRRETURN(MATCH_NOMATCH);
4596 break;
4597
4598 case OP_DIGIT:
4599 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4600 MRRETURN(MATCH_NOMATCH);
4601 break;
4602
4603 case OP_NOT_WHITESPACE:
4604 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4605 MRRETURN(MATCH_NOMATCH);
4606 break;
4607
4608 case OP_WHITESPACE:
4609 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4610 MRRETURN(MATCH_NOMATCH);
4611 break;
4612
4613 case OP_NOT_WORDCHAR:
4614 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4615 MRRETURN(MATCH_NOMATCH);
4616 break;
4617
4618 case OP_WORDCHAR:
4619 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4620 MRRETURN(MATCH_NOMATCH);
4621 break;
4622
4623 default:
4624 RRETURN(PCRE_ERROR_INTERNAL);
4625 }
4626 }
4627 }
4628 else
4629 #endif
4630 /* Not UTF-8 mode */
4631 {
4632 for (fi = min;; fi++)
4633 {
4634 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM43);
4635 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4636 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4637 if (eptr >= md->end_subject)
4638 {
4639 SCHECK_PARTIAL();
4640 MRRETURN(MATCH_NOMATCH);
4641 }
4642 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4643 MRRETURN(MATCH_NOMATCH);
4644 c = *eptr++;
4645 switch(ctype)
4646 {
4647 case OP_ANY: /* This is the non-NL case */
4648 case OP_ALLANY:
4649 case OP_ANYBYTE:
4650 break;
4651
4652 case OP_ANYNL:
4653 switch(c)
4654 {
4655 default: MRRETURN(MATCH_NOMATCH);
4656 case 0x000d:
4657 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4658 break;
4659
4660 case 0x000a:
4661 break;
4662
4663 case 0x000b:
4664 case 0x000c:
4665 case 0x0085:
4666 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4667 break;
4668 }
4669 break;
4670
4671 case OP_NOT_HSPACE:
4672 switch(c)
4673 {
4674 default: break;
4675 case 0x09: /* HT */
4676 case 0x20: /* SPACE */
4677 case 0xa0: /* NBSP */
4678 MRRETURN(MATCH_NOMATCH);
4679 }
4680 break;
4681
4682 case OP_HSPACE:
4683 switch(c)
4684 {
4685 default: MRRETURN(MATCH_NOMATCH);
4686 case 0x09: /* HT */
4687 case 0x20: /* SPACE */
4688 case 0xa0: /* NBSP */
4689 break;
4690 }
4691 break;
4692
4693 case OP_NOT_VSPACE:
4694 switch(c)
4695 {
4696 default: break;
4697 case 0x0a: /* LF */
4698 case 0x0b: /* VT */
4699 case 0x0c: /* FF */
4700 case 0x0d: /* CR */
4701 case 0x85: /* NEL */
4702 MRRETURN(MATCH_NOMATCH);
4703 }
4704 break;
4705
4706 case OP_VSPACE:
4707 switch(c)
4708 {
4709 default: MRRETURN(MATCH_NOMATCH);
4710 case 0x0a: /* LF */
4711 case 0x0b: /* VT */
4712 case 0x0c: /* FF */
4713 case 0x0d: /* CR */
4714 case 0x85: /* NEL */
4715 break;
4716 }
4717 break;
4718
4719 case OP_NOT_DIGIT:
4720 if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4721 break;
4722
4723 case OP_DIGIT:
4724 if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4725 break;
4726
4727 case OP_NOT_WHITESPACE:
4728 if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4729 break;
4730
4731 case OP_WHITESPACE:
4732 if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4733 break;
4734
4735 case OP_NOT_WORDCHAR:
4736 if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4737 break;
4738
4739 case OP_WORDCHAR:
4740 if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4741 break;
4742
4743 default:
4744 RRETURN(PCRE_ERROR_INTERNAL);
4745 }
4746 }
4747 }
4748 /* Control never gets here */
4749 }
4750
4751 /* If maximizing, it is worth using inline code for speed, doing the type
4752 test once at the start (i.e. keep it out of the loop). Again, keep the
4753 UTF-8 and UCP stuff separate. */
4754
4755 else
4756 {
4757 pp = eptr; /* Remember where we started */
4758
4759 #ifdef SUPPORT_UCP
4760 if (prop_type >= 0)
4761 {
4762 switch(prop_type)
4763 {
4764 case PT_ANY:
4765 for (i = min; i < max; i++)
4766 {
4767 int len = 1;
4768 if (eptr >= md->end_subject)
4769 {
4770 SCHECK_PARTIAL();
4771 break;
4772 }
4773 GETCHARLENTEST(c, eptr, len);
4774 if (prop_fail_result) break;
4775 eptr+= len;
4776 }
4777 break;
4778
4779 case PT_LAMP:
4780 for (i = min; i < max; i++)
4781 {
4782 int len = 1;
4783 if (eptr >= md->end_subject)
4784 {
4785 SCHECK_PARTIAL();
4786 break;
4787 }
4788 GETCHARLENTEST(c, eptr, len);
4789 prop_chartype = UCD_CHARTYPE(c);
4790 if ((prop_chartype == ucp_Lu ||
4791 prop_chartype == ucp_Ll ||
4792 prop_chartype == ucp_Lt) == prop_fail_result)
4793 break;
4794 eptr+= len;
4795 }
4796 break;
4797
4798 case PT_GC:
4799 for (i = min; i < max; i++)
4800 {
4801 int len = 1;
4802 if (eptr >= md->end_subject)
4803 {
4804 SCHECK_PARTIAL();
4805 break;
4806 }
4807 GETCHARLENTEST(c, eptr, len);
4808 prop_category = UCD_CATEGORY(c);
4809 if ((prop_category == prop_value) == prop_fail_result)
4810 break;
4811 eptr+= len;
4812 }
4813 break;
4814
4815 case PT_PC:
4816 for (i = min; i < max; i++)
4817 {
4818 int len = 1;
4819 if (eptr >= md->end_subject)
4820 {
4821 SCHECK_PARTIAL();
4822 break;
4823 }
4824 GETCHARLENTEST(c, eptr, len);
4825 prop_chartype = UCD_CHARTYPE(c);
4826 if ((prop_chartype == prop_value) == prop_fail_result)
4827 break;
4828 eptr+= len;
4829 }
4830 break;
4831
4832 case PT_SC:
4833 for (i = min; i < max; i++)
4834 {
4835 int len = 1;
4836 if (eptr >= md->end_subject)
4837 {
4838 SCHECK_PARTIAL();
4839 break;
4840 }
4841 GETCHARLENTEST(c, eptr, len);
4842 prop_script = UCD_SCRIPT(c);
4843 if ((prop_script == prop_value) == prop_fail_result)
4844 break;
4845 eptr+= len;
4846 }
4847 break;
4848
4849 case PT_ALNUM:
4850 for (i = min; i < max; i++)
4851 {
4852 int len = 1;
4853 if (eptr >= md->end_subject)
4854 {
4855 SCHECK_PARTIAL();
4856 break;
4857 }
4858 GETCHARLENTEST(c, eptr, len);
4859 prop_category = UCD_CATEGORY(c);
4860 if ((prop_category == ucp_L || prop_category == ucp_N)
4861 == prop_fail_result)
4862 break;
4863 eptr+= len;
4864 }
4865 break;
4866
4867 case PT_SPACE: /* Perl space */
4868 for (i = min; i < max; i++)
4869 {
4870 int len = 1;
4871 if (eptr >= md->end_subject)
4872 {
4873 SCHECK_PARTIAL();
4874 break;
4875 }
4876 GETCHARLENTEST(c, eptr, len);
4877 prop_category = UCD_CATEGORY(c);
4878 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4879 c == CHAR_FF || c == CHAR_CR)
4880 == prop_fail_result)
4881 break;
4882 eptr+= len;
4883 }
4884 break;
4885
4886 case PT_PXSPACE: /* POSIX space */
4887 for (i = min; i < max; i++)
4888 {
4889 int len = 1;
4890 if (eptr >= md->end_subject)
4891 {
4892 SCHECK_PARTIAL();
4893 break;
4894 }
4895 GETCHARLENTEST(c, eptr, len);
4896 prop_category = UCD_CATEGORY(c);
4897 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4898 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4899 == prop_fail_result)
4900 break;
4901 eptr+= len;
4902 }
4903 break;
4904
4905 case PT_WORD:
4906 for (i = min; i < max; i++)
4907 {
4908 int len = 1;
4909 if (eptr >= md->end_subject)
4910 {
4911 SCHECK_PARTIAL();
4912 break;
4913 }
4914 GETCHARLENTEST(c, eptr, len);
4915 prop_category = UCD_CATEGORY(c);
4916 if ((prop_category == ucp_L || prop_category == ucp_N ||
4917 c == CHAR_UNDERSCORE) == prop_fail_result)
4918 break;
4919 eptr+= len;
4920 }
4921 break;
4922
4923 default:
4924 RRETURN(PCRE_ERROR_INTERNAL);
4925 }
4926
4927 /* eptr is now past the end of the maximum run */
4928
4929 if (possessive) continue;
4930 for(;;)
4931 {
4932 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM44);
4933 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934 if (eptr-- == pp) break; /* Stop if tried at original pos */
4935 if (utf8) BACKCHAR(eptr);
4936 }
4937 }
4938
4939 /* Match extended Unicode sequences. We will get here only if the
4940 support is in the binary; otherwise a compile-time error occurs. */
4941
4942 else if (ctype == OP_EXTUNI)
4943 {
4944 for (i = min; i < max; i++)
4945 {
4946 if (eptr >= md->end_subject)
4947 {
4948 SCHECK_PARTIAL();
4949 break;
4950 }
4951 GETCHARINCTEST(c, eptr);
4952 prop_category = UCD_CATEGORY(c);
4953 if (prop_category == ucp_M) break;
4954 while (eptr < md->end_subject)
4955 {
4956 int len = 1;
4957 if (!utf8) c = *eptr; else
4958 {
4959 GETCHARLEN(c, eptr, len);
4960 }
4961 prop_category = UCD_CATEGORY(c);
4962 if (prop_category != ucp_M) break;
4963 eptr += len;
4964 }
4965 }
4966
4967 /* eptr is now past the end of the maximum run */
4968
4969 if (possessive) continue;
4970
4971 for(;;)
4972 {
4973 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM45);
4974 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4975 if (eptr-- == pp) break; /* Stop if tried at original pos */
4976 for (;;) /* Move back over one extended */
4977 {
4978 int len = 1;
4979 if (!utf8) c = *eptr; else
4980 {
4981 BACKCHAR(eptr);
4982 GETCHARLEN(c, eptr, len);
4983 }
4984 prop_category = UCD_CATEGORY(c);
4985 if (prop_category != ucp_M) break;
4986 eptr--;
4987 }
4988 }
4989 }
4990
4991 else
4992 #endif /* SUPPORT_UCP */
4993
4994 #ifdef SUPPORT_UTF8
4995 /* UTF-8 mode */
4996
4997 if (utf8)
4998 {
4999 switch(ctype)
5000 {
5001 case OP_ANY:
5002 if (max < INT_MAX)
5003 {
5004 for (i = min; i < max; i++)
5005 {
5006 if (eptr >= md->end_subject)
5007 {
5008 SCHECK_PARTIAL();
5009 break;
5010 }
5011 if (IS_NEWLINE(eptr)) break;
5012 eptr++;
5013 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5014 }
5015 }
5016
5017 /* Handle unlimited UTF-8 repeat */
5018
5019 else
5020 {
5021 for (i = min; i < max; i++)
5022 {
5023 if (eptr >= md->end_subject)
5024 {
5025 SCHECK_PARTIAL();
5026 break;
5027 }
5028 if (IS_NEWLINE(eptr)) break;
5029 eptr++;
5030 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5031 }
5032 }
5033 break;
5034
5035 case OP_ALLANY:
5036 if (max < INT_MAX)
5037 {
5038 for (i = min; i < max; i++)
5039 {
5040 if (eptr >= md->end_subject)
5041 {
5042 SCHECK_PARTIAL();
5043 break;
5044 }
5045 eptr++;
5046 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5047 }
5048 }
5049 else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5050 break;
5051
5052 /* The byte case is the same as non-UTF8 */
5053
5054 case OP_ANYBYTE:
5055 c = max - min;
5056 if (c > (unsigned int)(md->end_subject - eptr))
5057 {
5058 eptr = md->end_subject;
5059 SCHECK_PARTIAL();
5060 }
5061 else eptr += c;
5062 break;
5063
5064 case OP_ANYNL:
5065 for (i = min; i < max; i++)
5066 {
5067 int len = 1;
5068 if (eptr >= md->end_subject)
5069 {
5070 SCHECK_PARTIAL();
5071 break;
5072 }
5073 GETCHARLEN(c, eptr, len);
5074 if (c == 0x000d)
5075 {
5076 if (++eptr >= md->end_subject) break;
5077 if (*eptr == 0x000a) eptr++;
5078 }
5079 else
5080 {
5081 if (c != 0x000a &&
5082 (md->bsr_anycrlf ||
5083 (c != 0x000b && c != 0x000c &&
5084 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5085 break;
5086 eptr += len;
5087 }
5088 }
5089 break;
5090
5091 case OP_NOT_HSPACE:
5092 case OP_HSPACE:
5093 for (i = min; i < max; i++)
5094 {
5095 BOOL gotspace;
5096 int len = 1;
5097 if (eptr >= md->end_subject)
5098 {
5099 SCHECK_PARTIAL();
5100 break;
5101 }
5102 GETCHARLEN(c, eptr, len);
5103 switch(c)
5104 {
5105 default: gotspace = FALSE; break;
5106 case 0x09: /* HT */
5107 case 0x20: /* SPACE */
5108 case 0xa0: /* NBSP */
5109 case 0x1680: /* OGHAM SPACE MARK */
5110 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5111 case 0x2000: /* EN QUAD */
5112 case 0x2001: /* EM QUAD */
5113 case 0x2002: /* EN SPACE */
5114 case 0x2003: /* EM SPACE */
5115 case 0x2004: /* THREE-PER-EM SPACE */
5116 case 0x2005: /* FOUR-PER-EM SPACE */
5117 case 0x2006: /* SIX-PER-EM SPACE */
5118 case 0x2007: /* FIGURE SPACE */
5119 case 0x2008: /* PUNCTUATION SPACE */
5120 case 0x2009: /* THIN SPACE */
5121 case 0x200A: /* HAIR SPACE */
5122 case 0x202f: /* NARROW NO-BREAK SPACE */
5123 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5124 case 0x3000: /* IDEOGRAPHIC SPACE */
5125 gotspace = TRUE;
5126 break;
5127 }
5128 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5129 eptr += len;
5130 }
5131 break;
5132
5133 case OP_NOT_VSPACE:
5134 case OP_VSPACE:
5135 for (i = min; i < max; i++)
5136 {
5137 BOOL gotspace;
5138 int len = 1;
5139 if (eptr >= md->end_subject)
5140 {
5141 SCHECK_PARTIAL();
5142 break;
5143 }
5144 GETCHARLEN(c, eptr, len);
5145 switch(c)
5146 {
5147 default: gotspace = FALSE; break;
5148 case 0x0a: /* LF */
5149 case 0x0b: /* VT */
5150 case 0x0c: /* FF */
5151 case 0x0d: /* CR */
5152 case 0x85: /* NEL */
5153 case 0x2028: /* LINE SEPARATOR */
5154 case 0x2029: /* PARAGRAPH SEPARATOR */
5155 gotspace = TRUE;
5156 break;
5157 }
5158 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5159 eptr += len;
5160 }
5161 break;
5162
5163 case OP_NOT_DIGIT:
5164 for (i = min; i < max; i++)
5165 {
5166 int len = 1;
5167 if (eptr >= md->end_subject)
5168 {
5169 SCHECK_PARTIAL();
5170 break;
5171 }
5172 GETCHARLEN(c, eptr, len);
5173 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5174 eptr+= len;
5175 }
5176 break;
5177
5178 case OP_DIGIT:
5179 for (i = min; i < max; i++)
5180 {
5181 int len = 1;
5182 if (eptr >= md->end_subject)
5183 {
5184 SCHECK_PARTIAL();
5185 break;
5186 }
5187 GETCHARLEN(c, eptr, len);
5188 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5189 eptr+= len;
5190 }
5191 break;
5192
5193 case OP_NOT_WHITESPACE:
5194 for (i = min; i < max; i++)
5195 {
5196 int len = 1;
5197 if (eptr >= md->end_subject)
5198 {
5199 SCHECK_PARTIAL();
5200 break;
5201 }
5202 GETCHARLEN(c, eptr, len);
5203 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5204 eptr+= len;
5205 }
5206 break;
5207
5208 case OP_WHITESPACE:
5209 for (i = min; i < max; i++)
5210 {
5211 int len = 1;
5212 if (eptr >= md->end_subject)
5213 {
5214 SCHECK_PARTIAL();
5215 break;
5216 }
5217 GETCHARLEN(c, eptr, len);
5218 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5219 eptr+= len;
5220 }
5221 break;
5222
5223 case OP_NOT_WORDCHAR:
5224 for (i = min; i < max; i++)
5225 {
5226 int len = 1;
5227 if (eptr >= md->end_subject)
5228 {
5229 SCHECK_PARTIAL();
5230 break;
5231 }
5232 GETCHARLEN(c, eptr, len);
5233 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5234 eptr+= len;
5235 }
5236 break;
5237
5238 case OP_WORDCHAR:
5239 for (i = min; i < max; i++)
5240 {
5241 int len = 1;
5242 if (eptr >= md->end_subject)
5243 {
5244 SCHECK_PARTIAL();
5245 break;
5246 }
5247 GETCHARLEN(c, eptr, len);
5248 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5249 eptr+= len;
5250 }
5251 break;
5252
5253 default:
5254 RRETURN(PCRE_ERROR_INTERNAL);
5255 }
5256
5257 /* eptr is now past the end of the maximum run. If possessive, we are
5258 done (no backing up). Otherwise, match at this position; anything other
5259 than no match is immediately returned. For nomatch, back up one
5260 character, unless we are matching \R and the last thing matched was
5261 \r\n, in which case, back up two bytes. */
5262
5263 if (possessive) continue;
5264 for(;;)
5265 {
5266 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM46);
5267 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5268 if (eptr-- == pp) break; /* Stop if tried at original pos */
5269 BACKCHAR(eptr);
5270 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
5271 eptr[-1] == '\r') eptr--;
5272 }
5273 }
5274 else
5275 #endif /* SUPPORT_UTF8 */
5276
5277 /* Not UTF-8 mode */
5278 {
5279 switch(ctype)
5280 {
5281 case OP_ANY:
5282 for (i = min; i < max; i++)
5283 {
5284 if (eptr >= md->end_subject)
5285 {
5286 SCHECK_PARTIAL();
5287 break;
5288 }
5289 if (IS_NEWLINE(eptr)) break;
5290 eptr++;
5291 }
5292 break;
5293
5294 case OP_ALLANY:
5295 case OP_ANYBYTE:
5296 c = max - min;
5297 if (c > (unsigned int)(md->end_subject - eptr))
5298 {
5299 eptr = md->end_subject;
5300 SCHECK_PARTIAL();
5301 }
5302 else eptr += c;
5303 break;
5304
5305 case OP_ANYNL:
5306 for (i = min; i < max; i++)
5307 {
5308 if (eptr >= md->end_subject)
5309 {
5310 SCHECK_PARTIAL();
5311 break;
5312 }
5313 c = *eptr;
5314 if (c == 0x000d)
5315 {
5316 if (++eptr >= md->end_subject) break;
5317 if (*eptr == 0x000a) eptr++;
5318 }
5319 else
5320 {
5321 if (c != 0x000a &&
5322 (md->bsr_anycrlf ||
5323 (c != 0x000b && c != 0x000c && c != 0x0085)))
5324 break;
5325 eptr++;
5326 }
5327 }
5328 break;
5329
5330 case OP_NOT_HSPACE:
5331 for (i = min; i < max; i++)
5332 {
5333 if (eptr >= md->end_subject)
5334 {
5335 SCHECK_PARTIAL();
5336 break;
5337 }
5338 c = *eptr;
5339 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
5340 eptr++;
5341 }
5342 break;
5343
5344 case OP_HSPACE:
5345 for (i = min; i < max; i++)
5346 {
5347 if (eptr >= md->end_subject)
5348 {
5349 SCHECK_PARTIAL();
5350 break;
5351 }
5352 c = *eptr;
5353 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
5354 eptr++;
5355 }
5356 break;
5357
5358 case OP_NOT_VSPACE:
5359 for (i = min; i < max; i++)
5360 {
5361 if (eptr >= md->end_subject)
5362 {
5363 SCHECK_PARTIAL();
5364 break;
5365 }
5366 c = *eptr;
5367 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
5368 break;
5369 eptr++;
5370 }
5371 break;
5372
5373 case OP_VSPACE:
5374 for (i = min; i < max; i++)
5375 {
5376 if (eptr >= md->end_subject)
5377 {
5378 SCHECK_PARTIAL();
5379 break;
5380 }
5381 c = *eptr;
5382 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
5383 break;
5384 eptr++;
5385 }
5386 break;
5387
5388 case OP_NOT_DIGIT:
5389 for (i = min; i < max; i++)
5390 {
5391 if (eptr >= md->end_subject)
5392 {
5393 SCHECK_PARTIAL();
5394 break;
5395 }
5396 if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
5397 eptr++;
5398 }
5399 break;
5400
5401 case OP_DIGIT:
5402 for (i = min; i < max; i++)
5403 {
5404 if (eptr >= md->end_subject)
5405 {
5406 SCHECK_PARTIAL();
5407 break;
5408 }
5409 if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
5410 eptr++;
5411 }
5412 break;
5413
5414 case OP_NOT_WHITESPACE:
5415 for (i = min; i < max; i++)
5416 {
5417 if (eptr >= md->end_subject)
5418 {
5419 SCHECK_PARTIAL();
5420 break;
5421 }
5422 if ((md->ctypes[*eptr] & ctype_space) != 0) break;
5423 eptr++;
5424 }
5425 break;
5426
5427 case OP_WHITESPACE:
5428 for (i = min; i < max; i++)
5429 {
5430 if (eptr >= md->end_subject)
5431 {
5432 SCHECK_PARTIAL();
5433 break;
5434 }
5435 if ((md->ctypes[*eptr] & ctype_space) == 0) break;
5436 eptr++;
5437 }
5438 break;
5439
5440 case OP_NOT_WORDCHAR:
5441 for (i = min; i < max; i++)
5442 {
5443 if (eptr >= md->end_subject)
5444 {
5445 SCHECK_PARTIAL();
5446 break;
5447 }
5448 if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5449 eptr++;
5450 }
5451 break;
5452
5453 case OP_WORDCHAR:
5454 for (i = min; i < max; i++)
5455 {
5456 if (eptr >= md->end_subject)
5457 {
5458 SCHECK_PARTIAL();
5459 break;
5460 }
5461 if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5462 eptr++;
5463 }
5464 break;
5465
5466 default:
5467 RRETURN(PCRE_ERROR_INTERNAL);
5468 }
5469
5470 /* eptr is now past the end of the maximum run. If possessive, we are
5471 done (no backing up). Otherwise, match at this position; anything other
5472 than no match is immediately returned. For nomatch, back up one
5473 character (byte), unless we are matching \R and the last thing matched
5474 was \r\n, in which case, back up two bytes. */
5475
5476 if (possessive) continue;
5477 while (eptr >= pp)
5478 {
5479 RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM47);
5480 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5481 eptr--;
5482 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
5483 eptr[-1] == '\r') eptr--;
5484 }
5485 }
5486
5487 /* Get here if we can't make it match with any permitted repetitions */
5488
5489 MRRETURN(MATCH_NOMATCH);
5490 }
5491 /* Control never gets here */
5492
5493 /* There's been some horrible disaster. Arrival here can only mean there is
5494 something seriously wrong in the code above or the OP_xxx definitions. */
5495
5496 default:
5497 DPRINTF(("Unknown opcode %d\n", *ecode));
5498 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5499 }
5500
5501 /* Do not stick any code in here without much thought; it is assumed
5502 that "continue" in the code above comes out to here to repeat the main
5503 loop. */
5504
5505 } /* End of main loop */
5506 /* Control never reaches here */
5507
5508
5509 /* When compiling to use the heap rather than the stack for recursive calls to
5510 match(), the RRETURN() macro jumps here. The number that is saved in
5511 frame->Xwhere indicates which label we actually want to return to. */
5512
5513 #ifdef NO_RECURSE
5514 #define LBL(val) case val: goto L_RM##val;
5515 HEAP_RETURN:
5516 switch (frame->Xwhere)
5517 {
5518 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5519 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5520 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5521 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5522 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5523 #ifdef SUPPORT_UTF8
5524 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5525 LBL(32) LBL(34) LBL(42) LBL(46)
5526 #ifdef SUPPORT_UCP
5527 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5528 LBL(59) LBL(60) LBL(61) LBL(62)
5529 #endif /* SUPPORT_UCP */
5530 #endif /* SUPPORT_UTF8 */
5531 default:
5532 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5533 return PCRE_ERROR_INTERNAL;
5534 }
5535 #undef LBL
5536 #endif /* NO_RECURSE */
5537 }
5538
5539
5540 /***************************************************************************
5541 ****************************************************************************
5542 RECURSION IN THE match() FUNCTION
5543
5544 Undefine all the macros that were defined above to handle this. */
5545
5546 #ifdef NO_RECURSE
5547 #undef eptr
5548 #undef ecode
5549 #undef mstart
5550 #undef offset_top
5551 #undef eptrb
5552 #undef flags
5553
5554 #undef callpat
5555 #undef charptr
5556 #undef data
5557 #undef next
5558 #undef pp
5559 #undef prev
5560 #undef saved_eptr
5561
5562 #undef new_recursive
5563
5564 #undef cur_is_word
5565 #undef condition
5566 #undef prev_is_word
5567
5568 #undef ctype
5569 #undef length
5570 #undef max
5571 #undef min
5572 #undef number
5573 #undef offset
5574 #undef op
5575 #undef save_capture_last
5576 #undef save_offset1
5577 #undef save_offset2
5578 #undef save_offset3
5579 #undef stacksave
5580
5581 #undef newptrb
5582
5583 #endif
5584
5585 /* These two are defined as macros in both cases */
5586
5587 #undef fc
5588 #undef fi
5589
5590 /***************************************************************************
5591 ***************************************************************************/
5592
5593
5594
5595 /*************************************************
5596 * Execute a Regular Expression *
5597 *************************************************/
5598
5599 /* This function applies a compiled re to a subject string and picks out
5600 portions of the string if it matches. Two elements in the vector are set for
5601 each substring: the offsets to the start and end of the substring.
5602
5603 Arguments:
5604 argument_re points to the compiled expression
5605 extra_data points to extra data or is NULL
5606 subject points to the subject string
5607 length length of subject string (may contain binary zeros)
5608 start_offset where to start in the subject string
5609 options option bits
5610 offsets points to a vector of ints to be filled in with offsets
5611 offsetcount the number of elements in the vector
5612
5613 Returns: > 0 => success; value is the number of elements filled in
5614 = 0 => success, but offsets is not big enough
5615 -1 => failed to match
5616 < -1 => some kind of unexpected problem
5617 */
5618
5619 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5620 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5621 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5622 int offsetcount)
5623 {
5624 int rc, resetcount, ocount;
5625 int first_byte = -1;
5626 int req_byte = -1;
5627 int req_byte2 = -1;
5628 int newline;
5629 BOOL using_temporary_offsets = FALSE;
5630 BOOL anchored;
5631 BOOL startline;
5632 BOOL firstline;
5633 BOOL first_byte_caseless = FALSE;
5634 BOOL req_byte_caseless = FALSE;
5635 BOOL utf8;
5636 match_data match_block;
5637 match_data *md = &match_block;
5638 const uschar *tables;
5639 const uschar *start_bits = NULL;
5640 USPTR start_match = (USPTR)subject + start_offset;
5641 USPTR end_subject;
5642 USPTR start_partial = NULL;
5643 USPTR req_byte_ptr = start_match - 1;
5644
5645 pcre_study_data internal_study;
5646 const pcre_study_data *study;
5647
5648 real_pcre internal_re;
5649 const real_pcre *external_re = (const real_pcre *)argument_re;
5650 const real_pcre *re = external_re;
5651
5652 /* Plausibility checks */
5653
5654 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5655 if (re == NULL || subject == NULL ||
5656 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5657 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5658 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5659
5660 /* This information is for finding all the numbers associated with a given
5661 name, for condition testing. */
5662
5663 md->name_table = (uschar *)re + re->name_table_offset;
5664 md->name_count = re->name_count;
5665 md->name_entry_size = re->name_entry_size;
5666
5667 /* Fish out the optional data from the extra_data structure, first setting
5668 the default values. */
5669
5670 study = NULL;
5671 md->match_limit = MATCH_LIMIT;
5672 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5673 md->callout_data = NULL;
5674
5675 /* The table pointer is always in native byte order. */
5676
5677 tables = external_re->tables;
5678
5679 if (extra_data != NULL)
5680 {
5681 register unsigned int flags = extra_data->flags;
5682 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5683 study = (const pcre_study_data *)extra_data->study_data;
5684 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5685 md->match_limit = extra_data->match_limit;
5686 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5687 md->match_limit_recursion = extra_data->match_limit_recursion;
5688 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5689 md->callout_data = extra_data->callout_data;
5690 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5691 }
5692
5693 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5694 is a feature that makes it possible to save compiled regex and re-use them
5695 in other programs later. */
5696
5697 if (tables == NULL) tables = _pcre_default_tables;
5698
5699 /* Check that the first field in the block is the magic number. If it is not,
5700 test for a regex that was compiled on a host of opposite endianness. If this is
5701 the case, flipped values are put in internal_re and internal_study if there was
5702 study data too. */
5703
5704 if (re->magic_number != MAGIC_NUMBER)
5705 {
5706 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5707 if (re == NULL) return PCRE_ERROR_BADMAGIC;
5708 if (study != NULL) study = &internal_study;
5709 }
5710
5711 /* Set up other data */
5712
5713 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5714 startline = (re->flags & PCRE_STARTLINE) != 0;
5715 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5716
5717 /* The code starts after the real_pcre block and the capture name table. */
5718
5719 md->start_code = (const uschar *)external_re + re->name_table_offset +
5720 re->name_count * re->name_entry_size;
5721
5722 md->start_subject = (USPTR)subject;
5723 md->start_offset = start_offset;
5724 md->end_subject = md->start_subject + length;
5725 end_subject = md->end_subject;
5726
5727 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5728 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5729 md->use_ucp = (re->options & PCRE_UCP) != 0;
5730 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5731
5732 md->notbol = (options & PCRE_NOTBOL) != 0;
5733 md->noteol = (options & PCRE_NOTEOL) != 0;
5734 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5735 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5736 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5737 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5738 md->hitend = FALSE;
5739 md->mark = NULL; /* In case never set */
5740
5741 md->recursive = NULL; /* No recursion at top level */
5742
5743 md->lcc = tables + lcc_offset;
5744 md->ctypes = tables + ctypes_offset;
5745
5746 /* Handle different \R options. */
5747
5748 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5749 {
5750 case 0:
5751 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5752 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5753 else
5754 #ifdef BSR_ANYCRLF
5755 md->bsr_anycrlf = TRUE;
5756 #else
5757 md->bsr_anycrlf = FALSE;
5758 #endif
5759 break;
5760
5761 case PCRE_BSR_ANYCRLF:
5762 md->bsr_anycrlf = TRUE;
5763 break;
5764
5765 case PCRE_BSR_UNICODE:
5766 md->bsr_anycrlf = FALSE;
5767 break;
5768
5769 default: return PCRE_ERROR_BADNEWLINE;
5770 }
5771
5772 /* Handle different types of newline. The three bits give eight cases. If
5773 nothing is set at run time, whatever was used at compile time applies. */
5774
5775 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5776 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5777 {
5778 case 0: newline = NEWLINE; break; /* Compile-time default */
5779 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5780 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5781 case PCRE_NEWLINE_CR+
5782 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5783 case PCRE_NEWLINE_ANY: newline = -1; break;
5784 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5785 default: return PCRE_ERROR_BADNEWLINE;
5786 }
5787
5788 if (newline == -2)
5789 {
5790 md->nltype = NLTYPE_ANYCRLF;
5791 }
5792 else if (newline < 0)
5793 {
5794 md->nltype = NLTYPE_ANY;
5795 }
5796 else
5797 {
5798 md->nltype = NLTYPE_FIXED;
5799 if (newline > 255)
5800 {
5801 md->nllen = 2;
5802 md->nl[0] = (newline >> 8) & 255;
5803 md->nl[1] = newline & 255;
5804 }
5805 else
5806 {
5807 md->nllen = 1;
5808 md->nl[0] = newline;
5809 }
5810 }
5811
5812 /* Partial matching was originally supported only for a restricted set of
5813 regexes; from release 8.00 there are no restrictions, but the bits are still
5814 defined (though never set). So there's no harm in leaving this code. */
5815
5816 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5817 return PCRE_ERROR_BADPARTIAL;
5818
5819 /* Check a UTF-8 string if required. Pass back the character offset and error
5820 code if a results vector is available. */
5821
5822 #ifdef SUPPORT_UTF8
5823 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5824 {
5825 int errorcode;
5826 int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);
5827 if (tb >= 0)
5828 {
5829 if (offsetcount >= 2)
5830 {
5831 offsets[0] = tb;
5832 offsets[1] = errorcode;
5833 }
5834 return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5835 PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5836 }
5837 if (start_offset > 0 && start_offset < length)
5838 {
5839 tb = ((USPTR)subject)[start_offset] & 0xc0;
5840 if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5841 }
5842 }
5843 #endif
5844
5845 /* If the expression has got more back references than the offsets supplied can
5846 hold, we get a temporary chunk of working store to use during the matching.
5847 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5848 of 3. */
5849
5850 ocount = offsetcount - (offsetcount % 3);
5851
5852 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5853 {
5854 ocount = re->top_backref * 3 + 3;
5855 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5856 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5857 using_temporary_offsets = TRUE;
5858 DPRINTF(("Got memory to hold back references\n"));
5859 }
5860 else md->offset_vector = offsets;
5861
5862 md->offset_end = ocount;
5863 md->offset_max = (2*ocount)/3;
5864 md->offset_overflow = FALSE;
5865 md->capture_last = -1;
5866
5867 /* Compute the minimum number of offsets that we need to reset each time. Doing
5868 this makes a huge difference to execution time when there aren't many brackets
5869 in the pattern. */
5870
5871 resetcount = 2 + re->top_bracket * 2;
5872 if (resetcount > offsetcount) resetcount = ocount;
5873
5874 /* Reset the working variable associated with each extraction. These should
5875 never be used unless previously set, but they get saved and restored, and so we
5876 initialize them to avoid reading uninitialized locations. */
5877
5878 if (md->offset_vector != NULL)
5879 {
5880 register int *iptr = md->offset_vector + ocount;
5881 register int *iend = iptr - resetcount/2 + 1;
5882 while (--iptr >= iend) *iptr = -1;
5883 }
5884
5885 /* Set up the first character to match, if available. The first_byte value is
5886 never set for an anchored regular expression, but the anchoring may be forced
5887 at run time, so we have to test for anchoring. The first char may be unset for
5888 an unanchored pattern, of course. If there's no first char and the pattern was
5889 studied, there may be a bitmap of possible first characters. */
5890
5891 if (!anchored)
5892 {
5893 if ((re->flags & PCRE_FIRSTSET) != 0)
5894 {
5895 first_byte = re->first_byte & 255;
5896 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5897 first_byte = md->lcc[first_byte];
5898 }
5899 else
5900 if (!startline && study != NULL &&
5901 (study->flags & PCRE_STUDY_MAPPED) != 0)
5902 start_bits = study->start_bits;
5903 }
5904
5905 /* For anchored or unanchored matches, there may be a "last known required
5906 character" set. */
5907
5908 if ((re->flags & PCRE_REQCHSET) != 0)
5909 {
5910 req_byte = re->req_byte & 255;
5911 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5912 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
5913 }
5914
5915
5916 /* ==========================================================================*/
5917
5918 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5919 the loop runs just once. */
5920
5921 for(;;)
5922 {
5923 USPTR save_end_subject = end_subject;
5924 USPTR new_start_match;
5925
5926 /* Reset the maximum number of extractions we might see. */
5927
5928 if (md->offset_vector != NULL)
5929 {
5930 register int *iptr = md->offset_vector;
5931 register int *iend = iptr + resetcount;
5932 while (iptr < iend) *iptr++ = -1;
5933 }
5934
5935 /* If firstline is TRUE, the start of the match is constrained to the first
5936 line of a multiline string. That is, the match must be before or at the first
5937 newline. Implement this by temporarily adjusting end_subject so that we stop
5938 scanning at a newline. If the match fails at the newline, later code breaks
5939 this loop. */
5940
5941 if (firstline)
5942 {
5943 USPTR t = start_match;
5944 #ifdef SUPPORT_UTF8
5945 if (utf8)
5946 {
5947 while (t < md->end_subject && !IS_NEWLINE(t))
5948 {
5949 t++;
5950 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5951 }
5952 }
5953 else
5954 #endif
5955 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5956 end_subject = t;
5957 }
5958
5959 /* There are some optimizations that avoid running the match if a known
5960 starting point is not found, or if a known later character is not present.
5961 However, there is an option that disables these, for testing and for ensuring
5962 that all callouts do actually occur. The option can be set in the regex by
5963 (*NO_START_OPT) or passed in match-time options. */
5964
5965 if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5966 {
5967 /* Advance to a unique first byte if there is one. */
5968
5969 if (first_byte >= 0)
5970 {
5971 if (first_byte_caseless)
5972 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5973 start_match++;
5974 else
5975 while (start_match < end_subject && *start_match != first_byte)
5976 start_match++;
5977 }
5978
5979 /* Or to just after a linebreak for a multiline match */
5980
5981 else if (startline)
5982 {
5983 if (start_match > md->start_subject + start_offset)
5984 {
5985 #ifdef SUPPORT_UTF8
5986 if (utf8)
5987 {
5988 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5989 {
5990 start_match++;
5991 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5992 start_match++;
5993 }
5994 }
5995 else
5996 #endif
5997 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5998 start_match++;
5999
6000 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6001 and we are now at a LF, advance the match position by one more character.
6002 */
6003
6004 if (start_match[-1] == CHAR_CR &&
6005 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6006 start_match < end_subject &&
6007 *start_match == CHAR_NL)
6008 start_match++;
6009 }
6010 }
6011
6012 /* Or to a non-unique first byte after study */
6013
6014 else if (start_bits != NULL)
6015 {
6016 while (start_match < end_subject)
6017 {
6018 register unsigned int c = *start_match;
6019 if ((start_bits[c/8] & (1 << (c&7))) == 0)
6020 {
6021 start_match++;
6022 #ifdef SUPPORT_UTF8
6023 if (utf8)
6024 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6025 start_match++;
6026 #endif
6027 }
6028 else break;
6029 }
6030 }
6031 } /* Starting optimizations */
6032
6033 /* Restore fudged end_subject */
6034
6035 end_subject = save_end_subject;
6036
6037 /* The following two optimizations are disabled for partial matching or if
6038 disabling is explicitly requested. */
6039
6040 if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6041 {
6042 /* If the pattern was studied, a minimum subject length may be set. This is
6043 a lower bound; no actual string of that length may actually match the
6044 pattern. Although the value is, strictly, in characters, we treat it as
6045 bytes to avoid spending too much time in this optimization. */
6046
6047 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6048 (pcre_uint32)(end_subject - start_match) < study->minlength)
6049 {
6050 rc = MATCH_NOMATCH;
6051 break;
6052 }
6053
6054 /* If req_byte is set, we know that that character must appear in the
6055 subject for the match to succeed. If the first character is set, req_byte
6056 must be later in the subject; otherwise the test starts at the match point.
6057 This optimization can save a huge amount of backtracking in patterns with
6058 nested unlimited repeats that aren't going to match. Writing separate code
6059 for cased/caseless versions makes it go faster, as does using an
6060 autoincrement and backing off on a match.
6061
6062 HOWEVER: when the subject string is very, very long, searching to its end
6063 can take a long time, and give bad performance on quite ordinary patterns.
6064 This showed up when somebody was matching something like /^\d+C/ on a
6065 32-megabyte string... so we don't do this when the string is sufficiently
6066 long. */
6067
6068 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
6069 {
6070 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
6071
6072 /* We don't need to repeat the search if we haven't yet reached the
6073 place we found it at last time. */
6074
6075 if (p > req_byte_ptr)
6076 {
6077 if (req_byte_caseless)
6078 {
6079 while (p < end_subject)
6080 {
6081 register int pp = *p++;
6082 if (pp == req_byte || pp == req_byte2) { p--; break; }
6083 }
6084 }
6085 else
6086 {
6087 while (p < end_subject)
6088 {
6089 if (*p++ == req_byte) { p--; break; }
6090 }
6091 }
6092
6093 /* If we can't find the required character, break the matching loop,
6094 forcing a match failure. */
6095
6096 if (p >= end_subject)
6097 {
6098 rc = MATCH_NOMATCH;
6099 break;
6100 }
6101
6102 /* If we have found the required character, save the point where we
6103 found it, so that we don't search again next time round the loop if
6104 the start hasn't passed this character yet. */
6105
6106 req_byte_ptr = p;
6107 }
6108 }
6109 }
6110
6111 #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6112 printf(">>>> Match against: ");
6113 pchars(start_match, end_subject - start_match, TRUE, md);
6114 printf("\n");
6115 #endif
6116
6117 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6118 first starting point for which a partial match was found. */
6119
6120 md->start_match_ptr = start_match;
6121 md->start_used_ptr = start_match;
6122 md->match_call_count = 0;
6123 rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL,
6124 0, 0);
6125 if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6126
6127 switch(rc)
6128 {
6129 /* SKIP passes back the next starting point explicitly, but if it is the
6130 same as the match we have just done, treat it as NOMATCH. */
6131
6132 case MATCH_SKIP:
6133 if (md->start_match_ptr != start_match)
6134 {
6135 new_start_match = md->start_match_ptr;
6136 break;
6137 }
6138 /* Fall through */
6139
6140 /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6141 the SKIP's arg was not found. We also treat this as NOMATCH. */
6142
6143 case MATCH_SKIP_ARG:
6144 /* Fall through */
6145
6146 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6147 exactly like PRUNE. */
6148
6149 case MATCH_NOMATCH:
6150 case MATCH_PRUNE:
6151 case MATCH_THEN:
6152 new_start_match = start_match + 1;
6153 #ifdef SUPPORT_UTF8
6154 if (utf8)
6155 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6156 new_start_match++;
6157 #endif
6158 break;
6159
6160 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6161
6162 case MATCH_COMMIT:
6163 rc = MATCH_NOMATCH;
6164 goto ENDLOOP;
6165
6166 /* Any other return is either a match, or some kind of error. */
6167
6168 default:
6169 goto ENDLOOP;
6170 }
6171
6172 /* Control reaches here for the various types of "no match at this point"
6173 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6174
6175 rc = MATCH_NOMATCH;
6176
6177 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6178 newline in the subject (though it may continue over the newline). Therefore,
6179 if we have just failed to match, starting at a newline, do not continue. */
6180
6181 if (firstline && IS_NEWLINE(start_match)) break;
6182
6183 /* Advance to new matching position */
6184
6185 start_match = new_start_match;
6186
6187 /* Break the loop if the pattern is anchored or if we have passed the end of
6188 the subject. */
6189
6190 if (anchored || start_match > end_subject) break;
6191
6192 /* If we have just passed a CR and we are now at a LF, and the pattern does
6193 not contain any explicit matches for \r or \n, and the newline option is CRLF
6194 or ANY or ANYCRLF, advance the match position by one more character. */
6195
6196 if (start_match[-1] == CHAR_CR &&
6197 start_match < end_subject &&
6198 *start_match == CHAR_NL &&
6199 (re->flags & PCRE_HASCRORLF) == 0 &&
6200 (md->nltype == NLTYPE_ANY ||
6201 md->nltype == NLTYPE_ANYCRLF ||
6202 md->nllen == 2))
6203 start_match++;
6204
6205 md->mark = NULL; /* Reset for start of next match attempt */
6206 } /* End of for(;;) "bumpalong" loop */
6207
6208 /* ==========================================================================*/
6209
6210 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6211 conditions is true:
6212
6213 (1) The pattern is anchored or the match was failed by (*COMMIT);
6214
6215 (2) We are past the end of the subject;
6216
6217 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6218 this option requests that a match occur at or before the first newline in
6219 the subject.
6220
6221 When we have a match and the offset vector is big enough to deal with any
6222 backreferences, captured substring offsets will already be set up. In the case
6223 where we had to get some local store to hold offsets for backreference
6224 processing, copy those that we can. In this case there need not be overflow if
6225 certain parts of the pattern were not used, even though there are more
6226 capturing parentheses than vector slots. */
6227
6228 ENDLOOP:
6229
6230 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6231 {
6232 if (using_temporary_offsets)
6233 {
6234 if (offsetcount >= 4)
6235 {
6236 memcpy(offsets + 2, md->offset_vector + 2,
6237 (offsetcount - 2) * sizeof(int));
6238 DPRINTF(("Copied offsets from temporary memory\n"));
6239 }
6240 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6241 DPRINTF(("Freeing temporary memory\n"));
6242 (pcre_free)(md->offset_vector);
6243 }
6244
6245 /* Set the return code to the number of captured strings, or 0 if there are
6246 too many to fit into the vector. */
6247
6248 rc = md->offset_overflow? 0 : md->end_offset_top/2;
6249
6250 /* If there is space, set up the whole thing as substring 0. The value of
6251 md->start_match_ptr might be modified if \K was encountered on the success
6252 matching path. */
6253
6254 if (offsetcount < 2) rc = 0; else
6255 {
6256 offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6257 offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6258 }
6259
6260 DPRINTF((">>>> returning %d\n", rc));
6261 goto RETURN_MARK;
6262 }
6263
6264 /* Control gets here if there has been an error, or if the overall match
6265 attempt has failed at all permitted starting positions. */
6266
6267 if (using_temporary_offsets)
6268 {
6269 DPRINTF(("Freeing temporary memory\n"));
6270 (pcre_free)(md->offset_vector);
6271 }
6272
6273 /* For anything other than nomatch or partial match, just return the code. */
6274
6275 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6276 {
6277 DPRINTF((">>>> error: returning %d\n", rc));
6278 return rc;
6279 }
6280
6281 /* Handle partial matches - disable any mark data */
6282
6283 if (start_partial != NULL)
6284 {
6285 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6286 md->mark = NULL;
6287 if (offsetcount > 1)
6288 {
6289 offsets[0] = (int)(start_partial - (USPTR)subject);
6290 offsets[1] = (int)(end_subject - (USPTR)subject);
6291 }
6292 rc = PCRE_ERROR_PARTIAL;
6293 }
6294
6295 /* This is the classic nomatch case */
6296
6297 else
6298 {
6299 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6300 rc = PCRE_ERROR_NOMATCH;
6301 }
6302
6303 /* Return the MARK data if it has been requested. */
6304
6305 RETURN_MARK:
6306
6307 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6308 *(extra_data->mark) = (unsigned char *)(md->mark);
6309 return rc;
6310 }
6311
6312 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5