/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 551 - (show annotations)
Sun Oct 10 17:33:07 2010 UTC (8 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 186694 byte(s)
Make (*COMMIT) override (*THEN) and similar.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
73
74 #define MATCH_ACCEPT (-999)
75 #define MATCH_COMMIT (-998)
76 #define MATCH_PRUNE (-997)
77 #define MATCH_SKIP (-996)
78 #define MATCH_SKIP_ARG (-995)
79 #define MATCH_THEN (-994)
80
81 /* This is a convenience macro for code that occurs many times. */
82
83 #define MRRETURN(ra) \
84 { \
85 md->mark = markptr; \
86 RRETURN(ra); \
87 }
88
89 /* Maximum number of ints of offset to save on the stack for recursive calls.
90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91 because the offset vector is always a multiple of 3 long. */
92
93 #define REC_STACK_SAVE_MAX 30
94
95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96
97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99
100
101
102 #ifdef PCRE_DEBUG
103 /*************************************************
104 * Debugging function to print chars *
105 *************************************************/
106
107 /* Print a sequence of chars in printable format, stopping at the end of the
108 subject if the requested.
109
110 Arguments:
111 p points to characters
112 length number to print
113 is_subject TRUE if printing from within md->start_subject
114 md pointer to matching data block, if is_subject is TRUE
115
116 Returns: nothing
117 */
118
119 static void
120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121 {
122 unsigned int c;
123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124 while (length-- > 0)
125 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126 }
127 #endif
128
129
130
131 /*************************************************
132 * Match a back-reference *
133 *************************************************/
134
135 /* If a back reference hasn't been set, the length that is passed is greater
136 than the number of characters left in the string, so the match fails.
137
138 Arguments:
139 offset index into the offset vector
140 eptr points into the subject
141 length length to be matched
142 md points to match data block
143 ims the ims flags
144
145 Returns: TRUE if matched
146 */
147
148 static BOOL
149 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 unsigned long int ims)
151 {
152 USPTR p = md->start_subject + md->offset_vector[offset];
153
154 #ifdef PCRE_DEBUG
155 if (eptr >= md->end_subject)
156 printf("matching subject <null>");
157 else
158 {
159 printf("matching subject ");
160 pchars(eptr, length, TRUE, md);
161 }
162 printf(" against backref ");
163 pchars(p, length, FALSE, md);
164 printf("\n");
165 #endif
166
167 /* Always fail if not enough characters left */
168
169 if (length > md->end_subject - eptr) return FALSE;
170
171 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172 properly if Unicode properties are supported. Otherwise, we can check only
173 ASCII characters. */
174
175 if ((ims & PCRE_CASELESS) != 0)
176 {
177 #ifdef SUPPORT_UTF8
178 #ifdef SUPPORT_UCP
179 if (md->utf8)
180 {
181 USPTR endptr = eptr + length;
182 while (eptr < endptr)
183 {
184 int c, d;
185 GETCHARINC(c, eptr);
186 GETCHARINC(d, p);
187 if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 }
189 }
190 else
191 #endif
192 #endif
193
194 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195 is no UCP support. */
196
197 while (length-- > 0)
198 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 }
200
201 /* In the caseful case, we can just compare the bytes, whether or not we
202 are in UTF-8 mode. */
203
204 else
205 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206
207 return TRUE;
208 }
209
210
211
212 /***************************************************************************
213 ****************************************************************************
214 RECURSION IN THE match() FUNCTION
215
216 The match() function is highly recursive, though not every recursive call
217 increases the recursive depth. Nevertheless, some regular expressions can cause
218 it to recurse to a great depth. I was writing for Unix, so I just let it call
219 itself recursively. This uses the stack for saving everything that has to be
220 saved for a recursive call. On Unix, the stack can be large, and this works
221 fine.
222
223 It turns out that on some non-Unix-like systems there are problems with
224 programs that use a lot of stack. (This despite the fact that every last chip
225 has oodles of memory these days, and techniques for extending the stack have
226 been known for decades.) So....
227
228 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229 calls by keeping local variables that need to be preserved in blocks of memory
230 obtained from malloc() instead instead of on the stack. Macros are used to
231 achieve this so that the actual code doesn't look very different to what it
232 always used to.
233
234 The original heap-recursive code used longjmp(). However, it seems that this
235 can be very slow on some operating systems. Following a suggestion from Stan
236 Switzer, the use of longjmp() has been abolished, at the cost of having to
237 provide a unique number for each call to RMATCH. There is no way of generating
238 a sequence of numbers at compile time in C. I have given them names, to make
239 them stand out more clearly.
240
241 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 tests. Furthermore, not using longjmp() means that local dynamic variables
244 don't have indeterminate values; this has meant that the frame size can be
245 reduced because the result can be "passed back" by straight setting of the
246 variable instead of being passed in the frame.
247 ****************************************************************************
248 ***************************************************************************/
249
250 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251 below must be updated in sync. */
252
253 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259 RM61, RM62 };
260
261 /* These versions of the macros use the stack, as normal. There are debugging
262 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 actually used in this definition. */
264
265 #ifndef NO_RECURSE
266 #define REGISTER register
267
268 #ifdef PCRE_DEBUG
269 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270 { \
271 printf("match() called in line %d\n", __LINE__); \
272 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 printf("to line %d\n", __LINE__); \
274 }
275 #define RRETURN(ra) \
276 { \
277 printf("match() returned %d from line %d ", ra, __LINE__); \
278 return ra; \
279 }
280 #else
281 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 #define RRETURN(ra) return ra
284 #endif
285
286 #else
287
288
289 /* These versions of the macros manage a private stack on the heap. Note that
290 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291 argument of match(), which never changes. */
292
293 #define REGISTER
294
295 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296 {\
297 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299 frame->Xwhere = rw; \
300 newframe->Xeptr = ra;\
301 newframe->Xecode = rb;\
302 newframe->Xmstart = mstart;\
303 newframe->Xmarkptr = markptr;\
304 newframe->Xoffset_top = rc;\
305 newframe->Xims = re;\
306 newframe->Xeptrb = rf;\
307 newframe->Xflags = rg;\
308 newframe->Xrdepth = frame->Xrdepth + 1;\
309 newframe->Xprevframe = frame;\
310 frame = newframe;\
311 DPRINTF(("restarting from line %d\n", __LINE__));\
312 goto HEAP_RECURSE;\
313 L_##rw:\
314 DPRINTF(("jumped back to line %d\n", __LINE__));\
315 }
316
317 #define RRETURN(ra)\
318 {\
319 heapframe *oldframe = frame;\
320 frame = oldframe->Xprevframe;\
321 (pcre_stack_free)(oldframe);\
322 if (frame != NULL)\
323 {\
324 rrc = ra;\
325 goto HEAP_RETURN;\
326 }\
327 return ra;\
328 }
329
330
331 /* Structure for remembering the local variables in a private frame */
332
333 typedef struct heapframe {
334 struct heapframe *Xprevframe;
335
336 /* Function arguments that may change */
337
338 USPTR Xeptr;
339 const uschar *Xecode;
340 USPTR Xmstart;
341 USPTR Xmarkptr;
342 int Xoffset_top;
343 long int Xims;
344 eptrblock *Xeptrb;
345 int Xflags;
346 unsigned int Xrdepth;
347
348 /* Function local variables */
349
350 USPTR Xcallpat;
351 #ifdef SUPPORT_UTF8
352 USPTR Xcharptr;
353 #endif
354 USPTR Xdata;
355 USPTR Xnext;
356 USPTR Xpp;
357 USPTR Xprev;
358 USPTR Xsaved_eptr;
359
360 recursion_info Xnew_recursive;
361
362 BOOL Xcur_is_word;
363 BOOL Xcondition;
364 BOOL Xprev_is_word;
365
366 unsigned long int Xoriginal_ims;
367
368 #ifdef SUPPORT_UCP
369 int Xprop_type;
370 int Xprop_value;
371 int Xprop_fail_result;
372 int Xprop_category;
373 int Xprop_chartype;
374 int Xprop_script;
375 int Xoclength;
376 uschar Xocchars[8];
377 #endif
378
379 int Xcodelink;
380 int Xctype;
381 unsigned int Xfc;
382 int Xfi;
383 int Xlength;
384 int Xmax;
385 int Xmin;
386 int Xnumber;
387 int Xoffset;
388 int Xop;
389 int Xsave_capture_last;
390 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
391 int Xstacksave[REC_STACK_SAVE_MAX];
392
393 eptrblock Xnewptrb;
394
395 /* Where to jump back to */
396
397 int Xwhere;
398
399 } heapframe;
400
401 #endif
402
403
404 /***************************************************************************
405 ***************************************************************************/
406
407
408
409 /*************************************************
410 * Match from current position *
411 *************************************************/
412
413 /* This function is called recursively in many circumstances. Whenever it
414 returns a negative (error) response, the outer incarnation must also return the
415 same response. */
416
417 /* These macros pack up tests that are used for partial matching, and which
418 appears several times in the code. We set the "hit end" flag if the pointer is
419 at the end of the subject and also past the start of the subject (i.e.
420 something has been matched). For hard partial matching, we then return
421 immediately. The second one is used when we already know we are past the end of
422 the subject. */
423
424 #define CHECK_PARTIAL()\
425 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426 {\
427 md->hitend = TRUE;\
428 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429 }
430
431 #define SCHECK_PARTIAL()\
432 if (md->partial != 0 && eptr > mstart)\
433 {\
434 md->hitend = TRUE;\
435 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436 }
437
438
439 /* Performance note: It might be tempting to extract commonly used fields from
440 the md structure (e.g. utf8, end_subject) into individual variables to improve
441 performance. Tests using gcc on a SPARC disproved this; in the first case, it
442 made performance worse.
443
444 Arguments:
445 eptr pointer to current character in subject
446 ecode pointer to current position in compiled code
447 mstart pointer to the current match start position (can be modified
448 by encountering \K)
449 markptr pointer to the most recent MARK name, or NULL
450 offset_top current top pointer
451 md pointer to "static" info for the match
452 ims current /i, /m, and /s options
453 eptrb pointer to chain of blocks containing eptr at start of
454 brackets - for testing for empty matches
455 flags can contain
456 match_condassert - this is an assertion condition
457 match_cbegroup - this is the start of an unlimited repeat
458 group that can match an empty string
459 rdepth the recursion depth
460
461 Returns: MATCH_MATCH if matched ) these values are >= 0
462 MATCH_NOMATCH if failed to match )
463 a negative MATCH_xxx value for PRUNE, SKIP, etc
464 a negative PCRE_ERROR_xxx value if aborted by an error condition
465 (e.g. stopped by repeated call or recursion limit)
466 */
467
468 static int
469 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470 const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471 eptrblock *eptrb, int flags, unsigned int rdepth)
472 {
473 /* These variables do not need to be preserved over recursion in this function,
474 so they can be ordinary variables in all cases. Mark some of them with
475 "register" because they are used a lot in loops. */
476
477 register int rrc; /* Returns from recursive calls */
478 register int i; /* Used for loops not involving calls to RMATCH() */
479 register unsigned int c; /* Character values not kept over RMATCH() calls */
480 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
481
482 BOOL minimize, possessive; /* Quantifier options */
483 int condcode;
484
485 /* When recursion is not being used, all "local" variables that have to be
486 preserved over calls to RMATCH() are part of a "frame" which is obtained from
487 heap storage. Set up the top-level frame here; others are obtained from the
488 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
489
490 #ifdef NO_RECURSE
491 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493 frame->Xprevframe = NULL; /* Marks the top level */
494
495 /* Copy in the original argument variables */
496
497 frame->Xeptr = eptr;
498 frame->Xecode = ecode;
499 frame->Xmstart = mstart;
500 frame->Xmarkptr = markptr;
501 frame->Xoffset_top = offset_top;
502 frame->Xims = ims;
503 frame->Xeptrb = eptrb;
504 frame->Xflags = flags;
505 frame->Xrdepth = rdepth;
506
507 /* This is where control jumps back to to effect "recursion" */
508
509 HEAP_RECURSE:
510
511 /* Macros make the argument variables come from the current frame */
512
513 #define eptr frame->Xeptr
514 #define ecode frame->Xecode
515 #define mstart frame->Xmstart
516 #define markptr frame->Xmarkptr
517 #define offset_top frame->Xoffset_top
518 #define ims frame->Xims
519 #define eptrb frame->Xeptrb
520 #define flags frame->Xflags
521 #define rdepth frame->Xrdepth
522
523 /* Ditto for the local variables */
524
525 #ifdef SUPPORT_UTF8
526 #define charptr frame->Xcharptr
527 #endif
528 #define callpat frame->Xcallpat
529 #define codelink frame->Xcodelink
530 #define data frame->Xdata
531 #define next frame->Xnext
532 #define pp frame->Xpp
533 #define prev frame->Xprev
534 #define saved_eptr frame->Xsaved_eptr
535
536 #define new_recursive frame->Xnew_recursive
537
538 #define cur_is_word frame->Xcur_is_word
539 #define condition frame->Xcondition
540 #define prev_is_word frame->Xprev_is_word
541
542 #define original_ims frame->Xoriginal_ims
543
544 #ifdef SUPPORT_UCP
545 #define prop_type frame->Xprop_type
546 #define prop_value frame->Xprop_value
547 #define prop_fail_result frame->Xprop_fail_result
548 #define prop_category frame->Xprop_category
549 #define prop_chartype frame->Xprop_chartype
550 #define prop_script frame->Xprop_script
551 #define oclength frame->Xoclength
552 #define occhars frame->Xocchars
553 #endif
554
555 #define ctype frame->Xctype
556 #define fc frame->Xfc
557 #define fi frame->Xfi
558 #define length frame->Xlength
559 #define max frame->Xmax
560 #define min frame->Xmin
561 #define number frame->Xnumber
562 #define offset frame->Xoffset
563 #define op frame->Xop
564 #define save_capture_last frame->Xsave_capture_last
565 #define save_offset1 frame->Xsave_offset1
566 #define save_offset2 frame->Xsave_offset2
567 #define save_offset3 frame->Xsave_offset3
568 #define stacksave frame->Xstacksave
569
570 #define newptrb frame->Xnewptrb
571
572 /* When recursion is being used, local variables are allocated on the stack and
573 get preserved during recursion in the normal way. In this environment, fi and
574 i, and fc and c, can be the same variables. */
575
576 #else /* NO_RECURSE not defined */
577 #define fi i
578 #define fc c
579
580
581 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
582 const uschar *charptr; /* in small blocks of the code. My normal */
583 #endif /* style of coding would have declared */
584 const uschar *callpat; /* them within each of those blocks. */
585 const uschar *data; /* However, in order to accommodate the */
586 const uschar *next; /* version of this code that uses an */
587 USPTR pp; /* external "stack" implemented on the */
588 const uschar *prev; /* heap, it is easier to declare them all */
589 USPTR saved_eptr; /* here, so the declarations can be cut */
590 /* out in a block. The only declarations */
591 recursion_info new_recursive; /* within blocks below are for variables */
592 /* that do not have to be preserved over */
593 BOOL cur_is_word; /* a recursive call to RMATCH(). */
594 BOOL condition;
595 BOOL prev_is_word;
596
597 unsigned long int original_ims;
598
599 #ifdef SUPPORT_UCP
600 int prop_type;
601 int prop_value;
602 int prop_fail_result;
603 int prop_category;
604 int prop_chartype;
605 int prop_script;
606 int oclength;
607 uschar occhars[8];
608 #endif
609
610 int codelink;
611 int ctype;
612 int length;
613 int max;
614 int min;
615 int number;
616 int offset;
617 int op;
618 int save_capture_last;
619 int save_offset1, save_offset2, save_offset3;
620 int stacksave[REC_STACK_SAVE_MAX];
621
622 eptrblock newptrb;
623 #endif /* NO_RECURSE */
624
625 /* These statements are here to stop the compiler complaining about unitialized
626 variables. */
627
628 #ifdef SUPPORT_UCP
629 prop_value = 0;
630 prop_fail_result = 0;
631 #endif
632
633
634 /* This label is used for tail recursion, which is used in a few cases even
635 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
636 used. Thanks to Ian Taylor for noticing this possibility and sending the
637 original patch. */
638
639 TAIL_RECURSE:
640
641 /* OK, now we can get on with the real code of the function. Recursive calls
642 are specified by the macro RMATCH and RRETURN is used to return. When
643 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645 defined). However, RMATCH isn't like a function call because it's quite a
646 complicated macro. It has to be used in one particular way. This shouldn't,
647 however, impact performance when true recursion is being used. */
648
649 #ifdef SUPPORT_UTF8
650 utf8 = md->utf8; /* Local copy of the flag */
651 #else
652 utf8 = FALSE;
653 #endif
654
655 /* First check that we haven't called match() too many times, or that we
656 haven't exceeded the recursive call limit. */
657
658 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
659 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
660
661 original_ims = ims; /* Save for resetting on ')' */
662
663 /* At the start of a group with an unlimited repeat that may match an empty
664 string, the match_cbegroup flag is set. When this is the case, add the current
665 subject pointer to the chain of such remembered pointers, to be checked when we
666 hit the closing ket, in order to break infinite loops that match no characters.
667 When match() is called in other circumstances, don't add to the chain. The
668 match_cbegroup flag must NOT be used with tail recursion, because the memory
669 block that is used is on the stack, so a new one may be required for each
670 match(). */
671
672 if ((flags & match_cbegroup) != 0)
673 {
674 newptrb.epb_saved_eptr = eptr;
675 newptrb.epb_prev = eptrb;
676 eptrb = &newptrb;
677 }
678
679 /* Now start processing the opcodes. */
680
681 for (;;)
682 {
683 minimize = possessive = FALSE;
684 op = *ecode;
685
686 switch(op)
687 {
688 case OP_MARK:
689 markptr = ecode + 2;
690 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691 ims, eptrb, flags, RM55);
692
693 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694 argument, and we must check whether that argument matches this MARK's
695 argument. It is passed back in md->start_match_ptr (an overloading of that
696 variable). If it does match, we reset that variable to the current subject
697 position and return MATCH_SKIP. Otherwise, pass back the return code
698 unaltered. */
699
700 if (rrc == MATCH_SKIP_ARG &&
701 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702 {
703 md->start_match_ptr = eptr;
704 RRETURN(MATCH_SKIP);
705 }
706
707 if (md->mark == NULL) md->mark = markptr;
708 RRETURN(rrc);
709
710 case OP_FAIL:
711 MRRETURN(MATCH_NOMATCH);
712
713 /* COMMIT overrides PRUNE, SKIP, and THEN */
714
715 case OP_COMMIT:
716 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717 ims, eptrb, flags, RM52);
718 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
719 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
720 rrc != MATCH_THEN)
721 RRETURN(rrc);
722 MRRETURN(MATCH_COMMIT);
723
724 /* PRUNE overrides THEN */
725
726 case OP_PRUNE:
727 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
728 ims, eptrb, flags, RM51);
729 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
730 MRRETURN(MATCH_PRUNE);
731
732 case OP_PRUNE_ARG:
733 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
734 ims, eptrb, flags, RM56);
735 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736 md->mark = ecode + 2;
737 RRETURN(MATCH_PRUNE);
738
739 /* SKIP overrides PRUNE and THEN */
740
741 case OP_SKIP:
742 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
743 ims, eptrb, flags, RM53);
744 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
745 RRETURN(rrc);
746 md->start_match_ptr = eptr; /* Pass back current position */
747 MRRETURN(MATCH_SKIP);
748
749 case OP_SKIP_ARG:
750 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
751 ims, eptrb, flags, RM57);
752 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
753 RRETURN(rrc);
754
755 /* Pass back the current skip name by overloading md->start_match_ptr and
756 returning the special MATCH_SKIP_ARG return code. This will either be
757 caught by a matching MARK, or get to the top, where it is treated the same
758 as PRUNE. */
759
760 md->start_match_ptr = ecode + 2;
761 RRETURN(MATCH_SKIP_ARG);
762
763 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
764 the alt that is at the start of the current branch. This makes it possible
765 to skip back past alternatives that precede the THEN within the current
766 branch. */
767
768 case OP_THEN:
769 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
770 ims, eptrb, flags, RM54);
771 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
772 md->start_match_ptr = ecode - GET(ecode, 1);
773 MRRETURN(MATCH_THEN);
774
775 case OP_THEN_ARG:
776 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
777 offset_top, md, ims, eptrb, flags, RM58);
778 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
779 md->start_match_ptr = ecode - GET(ecode, 1);
780 md->mark = ecode + LINK_SIZE + 2;
781 RRETURN(MATCH_THEN);
782
783 /* Handle a capturing bracket. If there is space in the offset vector, save
784 the current subject position in the working slot at the top of the vector.
785 We mustn't change the current values of the data slot, because they may be
786 set from a previous iteration of this group, and be referred to by a
787 reference inside the group.
788
789 If the bracket fails to match, we need to restore this value and also the
790 values of the final offsets, in case they were set by a previous iteration
791 of the same bracket.
792
793 If there isn't enough space in the offset vector, treat this as if it were
794 a non-capturing bracket. Don't worry about setting the flag for the error
795 case here; that is handled in the code for KET. */
796
797 case OP_CBRA:
798 case OP_SCBRA:
799 number = GET2(ecode, 1+LINK_SIZE);
800 offset = number << 1;
801
802 #ifdef PCRE_DEBUG
803 printf("start bracket %d\n", number);
804 printf("subject=");
805 pchars(eptr, 16, TRUE, md);
806 printf("\n");
807 #endif
808
809 if (offset < md->offset_max)
810 {
811 save_offset1 = md->offset_vector[offset];
812 save_offset2 = md->offset_vector[offset+1];
813 save_offset3 = md->offset_vector[md->offset_end - number];
814 save_capture_last = md->capture_last;
815
816 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
817 md->offset_vector[md->offset_end - number] =
818 (int)(eptr - md->start_subject);
819
820 flags = (op == OP_SCBRA)? match_cbegroup : 0;
821 do
822 {
823 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
824 ims, eptrb, flags, RM1);
825 if (rrc != MATCH_NOMATCH &&
826 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
827 RRETURN(rrc);
828 md->capture_last = save_capture_last;
829 ecode += GET(ecode, 1);
830 }
831 while (*ecode == OP_ALT);
832
833 DPRINTF(("bracket %d failed\n", number));
834
835 md->offset_vector[offset] = save_offset1;
836 md->offset_vector[offset+1] = save_offset2;
837 md->offset_vector[md->offset_end - number] = save_offset3;
838
839 if (rrc != MATCH_THEN) md->mark = markptr;
840 RRETURN(MATCH_NOMATCH);
841 }
842
843 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
844 as a non-capturing bracket. */
845
846 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
847 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
848
849 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
850
851 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
852 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853
854 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
855 final alternative within the brackets, we would return the result of a
856 recursive call to match() whatever happened. We can reduce stack usage by
857 turning this into a tail recursion, except in the case when match_cbegroup
858 is set.*/
859
860 case OP_BRA:
861 case OP_SBRA:
862 DPRINTF(("start non-capturing bracket\n"));
863 flags = (op >= OP_SBRA)? match_cbegroup : 0;
864 for (;;)
865 {
866 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
867 {
868 if (flags == 0) /* Not a possibly empty group */
869 {
870 ecode += _pcre_OP_lengths[*ecode];
871 DPRINTF(("bracket 0 tail recursion\n"));
872 goto TAIL_RECURSE;
873 }
874
875 /* Possibly empty group; can't use tail recursion. */
876
877 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
878 eptrb, flags, RM48);
879 if (rrc == MATCH_NOMATCH) md->mark = markptr;
880 RRETURN(rrc);
881 }
882
883 /* For non-final alternatives, continue the loop for a NOMATCH result;
884 otherwise return. */
885
886 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
887 eptrb, flags, RM2);
888 if (rrc != MATCH_NOMATCH &&
889 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
890 RRETURN(rrc);
891 ecode += GET(ecode, 1);
892 }
893 /* Control never reaches here. */
894
895 /* Conditional group: compilation checked that there are no more than
896 two branches. If the condition is false, skipping the first branch takes us
897 past the end if there is only one branch, but that's OK because that is
898 exactly what going to the ket would do. As there is only one branch to be
899 obeyed, we can use tail recursion to avoid using another stack frame. */
900
901 case OP_COND:
902 case OP_SCOND:
903 codelink= GET(ecode, 1);
904
905 /* Because of the way auto-callout works during compile, a callout item is
906 inserted between OP_COND and an assertion condition. */
907
908 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
909 {
910 if (pcre_callout != NULL)
911 {
912 pcre_callout_block cb;
913 cb.version = 1; /* Version 1 of the callout block */
914 cb.callout_number = ecode[LINK_SIZE+2];
915 cb.offset_vector = md->offset_vector;
916 cb.subject = (PCRE_SPTR)md->start_subject;
917 cb.subject_length = (int)(md->end_subject - md->start_subject);
918 cb.start_match = (int)(mstart - md->start_subject);
919 cb.current_position = (int)(eptr - md->start_subject);
920 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
921 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
922 cb.capture_top = offset_top/2;
923 cb.capture_last = md->capture_last;
924 cb.callout_data = md->callout_data;
925 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
926 if (rrc < 0) RRETURN(rrc);
927 }
928 ecode += _pcre_OP_lengths[OP_CALLOUT];
929 }
930
931 condcode = ecode[LINK_SIZE+1];
932
933 /* Now see what the actual condition is */
934
935 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
936 {
937 if (md->recursive == NULL) /* Not recursing => FALSE */
938 {
939 condition = FALSE;
940 ecode += GET(ecode, 1);
941 }
942 else
943 {
944 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
945 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
946
947 /* If the test is for recursion into a specific subpattern, and it is
948 false, but the test was set up by name, scan the table to see if the
949 name refers to any other numbers, and test them. The condition is true
950 if any one is set. */
951
952 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
953 {
954 uschar *slotA = md->name_table;
955 for (i = 0; i < md->name_count; i++)
956 {
957 if (GET2(slotA, 0) == recno) break;
958 slotA += md->name_entry_size;
959 }
960
961 /* Found a name for the number - there can be only one; duplicate
962 names for different numbers are allowed, but not vice versa. First
963 scan down for duplicates. */
964
965 if (i < md->name_count)
966 {
967 uschar *slotB = slotA;
968 while (slotB > md->name_table)
969 {
970 slotB -= md->name_entry_size;
971 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
972 {
973 condition = GET2(slotB, 0) == md->recursive->group_num;
974 if (condition) break;
975 }
976 else break;
977 }
978
979 /* Scan up for duplicates */
980
981 if (!condition)
982 {
983 slotB = slotA;
984 for (i++; i < md->name_count; i++)
985 {
986 slotB += md->name_entry_size;
987 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
988 {
989 condition = GET2(slotB, 0) == md->recursive->group_num;
990 if (condition) break;
991 }
992 else break;
993 }
994 }
995 }
996 }
997
998 /* Chose branch according to the condition */
999
1000 ecode += condition? 3 : GET(ecode, 1);
1001 }
1002 }
1003
1004 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1005 {
1006 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1007 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1008
1009 /* If the numbered capture is unset, but the reference was by name,
1010 scan the table to see if the name refers to any other numbers, and test
1011 them. The condition is true if any one is set. This is tediously similar
1012 to the code above, but not close enough to try to amalgamate. */
1013
1014 if (!condition && condcode == OP_NCREF)
1015 {
1016 int refno = offset >> 1;
1017 uschar *slotA = md->name_table;
1018
1019 for (i = 0; i < md->name_count; i++)
1020 {
1021 if (GET2(slotA, 0) == refno) break;
1022 slotA += md->name_entry_size;
1023 }
1024
1025 /* Found a name for the number - there can be only one; duplicate names
1026 for different numbers are allowed, but not vice versa. First scan down
1027 for duplicates. */
1028
1029 if (i < md->name_count)
1030 {
1031 uschar *slotB = slotA;
1032 while (slotB > md->name_table)
1033 {
1034 slotB -= md->name_entry_size;
1035 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1036 {
1037 offset = GET2(slotB, 0) << 1;
1038 condition = offset < offset_top &&
1039 md->offset_vector[offset] >= 0;
1040 if (condition) break;
1041 }
1042 else break;
1043 }
1044
1045 /* Scan up for duplicates */
1046
1047 if (!condition)
1048 {
1049 slotB = slotA;
1050 for (i++; i < md->name_count; i++)
1051 {
1052 slotB += md->name_entry_size;
1053 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054 {
1055 offset = GET2(slotB, 0) << 1;
1056 condition = offset < offset_top &&
1057 md->offset_vector[offset] >= 0;
1058 if (condition) break;
1059 }
1060 else break;
1061 }
1062 }
1063 }
1064 }
1065
1066 /* Chose branch according to the condition */
1067
1068 ecode += condition? 3 : GET(ecode, 1);
1069 }
1070
1071 else if (condcode == OP_DEF) /* DEFINE - always false */
1072 {
1073 condition = FALSE;
1074 ecode += GET(ecode, 1);
1075 }
1076
1077 /* The condition is an assertion. Call match() to evaluate it - setting
1078 the final argument match_condassert causes it to stop at the end of an
1079 assertion. */
1080
1081 else
1082 {
1083 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1084 match_condassert, RM3);
1085 if (rrc == MATCH_MATCH)
1086 {
1087 condition = TRUE;
1088 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1089 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1090 }
1091 else if (rrc != MATCH_NOMATCH &&
1092 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1093 {
1094 RRETURN(rrc); /* Need braces because of following else */
1095 }
1096 else
1097 {
1098 condition = FALSE;
1099 ecode += codelink;
1100 }
1101 }
1102
1103 /* We are now at the branch that is to be obeyed. As there is only one,
1104 we can use tail recursion to avoid using another stack frame, except when
1105 match_cbegroup is required for an unlimited repeat of a possibly empty
1106 group. If the second alternative doesn't exist, we can just plough on. */
1107
1108 if (condition || *ecode == OP_ALT)
1109 {
1110 ecode += 1 + LINK_SIZE;
1111 if (op == OP_SCOND) /* Possibly empty group */
1112 {
1113 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1114 RRETURN(rrc);
1115 }
1116 else /* Group must match something */
1117 {
1118 flags = 0;
1119 goto TAIL_RECURSE;
1120 }
1121 }
1122 else /* Condition false & no alternative */
1123 {
1124 ecode += 1 + LINK_SIZE;
1125 }
1126 break;
1127
1128
1129 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1130 to close any currently open capturing brackets. */
1131
1132 case OP_CLOSE:
1133 number = GET2(ecode, 1);
1134 offset = number << 1;
1135
1136 #ifdef PCRE_DEBUG
1137 printf("end bracket %d at *ACCEPT", number);
1138 printf("\n");
1139 #endif
1140
1141 md->capture_last = number;
1142 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1143 {
1144 md->offset_vector[offset] =
1145 md->offset_vector[md->offset_end - number];
1146 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1147 if (offset_top <= offset) offset_top = offset + 2;
1148 }
1149 ecode += 3;
1150 break;
1151
1152
1153 /* End of the pattern, either real or forced. If we are in a top-level
1154 recursion, we should restore the offsets appropriately and continue from
1155 after the call. */
1156
1157 case OP_ACCEPT:
1158 case OP_END:
1159 if (md->recursive != NULL && md->recursive->group_num == 0)
1160 {
1161 recursion_info *rec = md->recursive;
1162 DPRINTF(("End of pattern in a (?0) recursion\n"));
1163 md->recursive = rec->prevrec;
1164 memmove(md->offset_vector, rec->offset_save,
1165 rec->saved_max * sizeof(int));
1166 offset_top = rec->save_offset_top;
1167 ims = original_ims;
1168 ecode = rec->after_call;
1169 break;
1170 }
1171
1172 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1173 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1174 the subject. In both cases, backtracking will then try other alternatives,
1175 if any. */
1176
1177 if (eptr == mstart &&
1178 (md->notempty ||
1179 (md->notempty_atstart &&
1180 mstart == md->start_subject + md->start_offset)))
1181 MRRETURN(MATCH_NOMATCH);
1182
1183 /* Otherwise, we have a match. */
1184
1185 md->end_match_ptr = eptr; /* Record where we ended */
1186 md->end_offset_top = offset_top; /* and how many extracts were taken */
1187 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1188
1189 /* For some reason, the macros don't work properly if an expression is
1190 given as the argument to MRRETURN when the heap is in use. */
1191
1192 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1193 MRRETURN(rrc);
1194
1195 /* Change option settings */
1196
1197 case OP_OPT:
1198 ims = ecode[1];
1199 ecode += 2;
1200 DPRINTF(("ims set to %02lx\n", ims));
1201 break;
1202
1203 /* Assertion brackets. Check the alternative branches in turn - the
1204 matching won't pass the KET for an assertion. If any one branch matches,
1205 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1206 start of each branch to move the current point backwards, so the code at
1207 this level is identical to the lookahead case. */
1208
1209 case OP_ASSERT:
1210 case OP_ASSERTBACK:
1211 do
1212 {
1213 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214 RM4);
1215 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1216 {
1217 mstart = md->start_match_ptr; /* In case \K reset it */
1218 break;
1219 }
1220 if (rrc != MATCH_NOMATCH &&
1221 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1222 RRETURN(rrc);
1223 ecode += GET(ecode, 1);
1224 }
1225 while (*ecode == OP_ALT);
1226 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1227
1228 /* If checking an assertion for a condition, return MATCH_MATCH. */
1229
1230 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1231
1232 /* Continue from after the assertion, updating the offsets high water
1233 mark, since extracts may have been taken during the assertion. */
1234
1235 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1236 ecode += 1 + LINK_SIZE;
1237 offset_top = md->end_offset_top;
1238 continue;
1239
1240 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1241 PRUNE, or COMMIT means we must assume failure without checking subsequent
1242 branches. */
1243
1244 case OP_ASSERT_NOT:
1245 case OP_ASSERTBACK_NOT:
1246 do
1247 {
1248 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1249 RM5);
1250 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1251 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1252 {
1253 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254 break;
1255 }
1256 if (rrc != MATCH_NOMATCH &&
1257 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1258 RRETURN(rrc);
1259 ecode += GET(ecode,1);
1260 }
1261 while (*ecode == OP_ALT);
1262
1263 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1264
1265 ecode += 1 + LINK_SIZE;
1266 continue;
1267
1268 /* Move the subject pointer back. This occurs only at the start of
1269 each branch of a lookbehind assertion. If we are too close to the start to
1270 move back, this match function fails. When working with UTF-8 we move
1271 back a number of characters, not bytes. */
1272
1273 case OP_REVERSE:
1274 #ifdef SUPPORT_UTF8
1275 if (utf8)
1276 {
1277 i = GET(ecode, 1);
1278 while (i-- > 0)
1279 {
1280 eptr--;
1281 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1282 BACKCHAR(eptr);
1283 }
1284 }
1285 else
1286 #endif
1287
1288 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1289
1290 {
1291 eptr -= GET(ecode, 1);
1292 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1293 }
1294
1295 /* Save the earliest consulted character, then skip to next op code */
1296
1297 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1298 ecode += 1 + LINK_SIZE;
1299 break;
1300
1301 /* The callout item calls an external function, if one is provided, passing
1302 details of the match so far. This is mainly for debugging, though the
1303 function is able to force a failure. */
1304
1305 case OP_CALLOUT:
1306 if (pcre_callout != NULL)
1307 {
1308 pcre_callout_block cb;
1309 cb.version = 1; /* Version 1 of the callout block */
1310 cb.callout_number = ecode[1];
1311 cb.offset_vector = md->offset_vector;
1312 cb.subject = (PCRE_SPTR)md->start_subject;
1313 cb.subject_length = (int)(md->end_subject - md->start_subject);
1314 cb.start_match = (int)(mstart - md->start_subject);
1315 cb.current_position = (int)(eptr - md->start_subject);
1316 cb.pattern_position = GET(ecode, 2);
1317 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1318 cb.capture_top = offset_top/2;
1319 cb.capture_last = md->capture_last;
1320 cb.callout_data = md->callout_data;
1321 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1322 if (rrc < 0) RRETURN(rrc);
1323 }
1324 ecode += 2 + 2*LINK_SIZE;
1325 break;
1326
1327 /* Recursion either matches the current regex, or some subexpression. The
1328 offset data is the offset to the starting bracket from the start of the
1329 whole pattern. (This is so that it works from duplicated subpatterns.)
1330
1331 If there are any capturing brackets started but not finished, we have to
1332 save their starting points and reinstate them after the recursion. However,
1333 we don't know how many such there are (offset_top records the completed
1334 total) so we just have to save all the potential data. There may be up to
1335 65535 such values, which is too large to put on the stack, but using malloc
1336 for small numbers seems expensive. As a compromise, the stack is used when
1337 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1338 is used. A problem is what to do if the malloc fails ... there is no way of
1339 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1340 values on the stack, and accept that the rest may be wrong.
1341
1342 There are also other values that have to be saved. We use a chained
1343 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1344 for the original version of this logic. */
1345
1346 case OP_RECURSE:
1347 {
1348 callpat = md->start_code + GET(ecode, 1);
1349 new_recursive.group_num = (callpat == md->start_code)? 0 :
1350 GET2(callpat, 1 + LINK_SIZE);
1351
1352 /* Add to "recursing stack" */
1353
1354 new_recursive.prevrec = md->recursive;
1355 md->recursive = &new_recursive;
1356
1357 /* Find where to continue from afterwards */
1358
1359 ecode += 1 + LINK_SIZE;
1360 new_recursive.after_call = ecode;
1361
1362 /* Now save the offset data. */
1363
1364 new_recursive.saved_max = md->offset_end;
1365 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1366 new_recursive.offset_save = stacksave;
1367 else
1368 {
1369 new_recursive.offset_save =
1370 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1371 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1372 }
1373
1374 memcpy(new_recursive.offset_save, md->offset_vector,
1375 new_recursive.saved_max * sizeof(int));
1376 new_recursive.save_offset_top = offset_top;
1377
1378 /* OK, now we can do the recursion. For each top-level alternative we
1379 restore the offset and recursion data. */
1380
1381 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1382 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1383 do
1384 {
1385 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1386 md, ims, eptrb, flags, RM6);
1387 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1388 {
1389 DPRINTF(("Recursion matched\n"));
1390 md->recursive = new_recursive.prevrec;
1391 if (new_recursive.offset_save != stacksave)
1392 (pcre_free)(new_recursive.offset_save);
1393 MRRETURN(MATCH_MATCH);
1394 }
1395 else if (rrc != MATCH_NOMATCH &&
1396 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1397 {
1398 DPRINTF(("Recursion gave error %d\n", rrc));
1399 if (new_recursive.offset_save != stacksave)
1400 (pcre_free)(new_recursive.offset_save);
1401 RRETURN(rrc);
1402 }
1403
1404 md->recursive = &new_recursive;
1405 memcpy(md->offset_vector, new_recursive.offset_save,
1406 new_recursive.saved_max * sizeof(int));
1407 callpat += GET(callpat, 1);
1408 }
1409 while (*callpat == OP_ALT);
1410
1411 DPRINTF(("Recursion didn't match\n"));
1412 md->recursive = new_recursive.prevrec;
1413 if (new_recursive.offset_save != stacksave)
1414 (pcre_free)(new_recursive.offset_save);
1415 MRRETURN(MATCH_NOMATCH);
1416 }
1417 /* Control never reaches here */
1418
1419 /* "Once" brackets are like assertion brackets except that after a match,
1420 the point in the subject string is not moved back. Thus there can never be
1421 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1422 Check the alternative branches in turn - the matching won't pass the KET
1423 for this kind of subpattern. If any one branch matches, we carry on as at
1424 the end of a normal bracket, leaving the subject pointer, but resetting
1425 the start-of-match value in case it was changed by \K. */
1426
1427 case OP_ONCE:
1428 prev = ecode;
1429 saved_eptr = eptr;
1430
1431 do
1432 {
1433 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1434 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1435 {
1436 mstart = md->start_match_ptr;
1437 break;
1438 }
1439 if (rrc != MATCH_NOMATCH &&
1440 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1441 RRETURN(rrc);
1442 ecode += GET(ecode,1);
1443 }
1444 while (*ecode == OP_ALT);
1445
1446 /* If hit the end of the group (which could be repeated), fail */
1447
1448 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1449
1450 /* Continue as from after the assertion, updating the offsets high water
1451 mark, since extracts may have been taken. */
1452
1453 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1454
1455 offset_top = md->end_offset_top;
1456 eptr = md->end_match_ptr;
1457
1458 /* For a non-repeating ket, just continue at this level. This also
1459 happens for a repeating ket if no characters were matched in the group.
1460 This is the forcible breaking of infinite loops as implemented in Perl
1461 5.005. If there is an options reset, it will get obeyed in the normal
1462 course of events. */
1463
1464 if (*ecode == OP_KET || eptr == saved_eptr)
1465 {
1466 ecode += 1+LINK_SIZE;
1467 break;
1468 }
1469
1470 /* The repeating kets try the rest of the pattern or restart from the
1471 preceding bracket, in the appropriate order. The second "call" of match()
1472 uses tail recursion, to avoid using another stack frame. We need to reset
1473 any options that changed within the bracket before re-running it, so
1474 check the next opcode. */
1475
1476 if (ecode[1+LINK_SIZE] == OP_OPT)
1477 {
1478 ims = (ims & ~PCRE_IMS) | ecode[4];
1479 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1480 }
1481
1482 if (*ecode == OP_KETRMIN)
1483 {
1484 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1485 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1486 ecode = prev;
1487 flags = 0;
1488 goto TAIL_RECURSE;
1489 }
1490 else /* OP_KETRMAX */
1491 {
1492 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1493 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1494 ecode += 1 + LINK_SIZE;
1495 flags = 0;
1496 goto TAIL_RECURSE;
1497 }
1498 /* Control never gets here */
1499
1500 /* An alternation is the end of a branch; scan along to find the end of the
1501 bracketed group and go to there. */
1502
1503 case OP_ALT:
1504 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1505 break;
1506
1507 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1508 indicating that it may occur zero times. It may repeat infinitely, or not
1509 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1510 with fixed upper repeat limits are compiled as a number of copies, with the
1511 optional ones preceded by BRAZERO or BRAMINZERO. */
1512
1513 case OP_BRAZERO:
1514 {
1515 next = ecode+1;
1516 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1517 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518 do next += GET(next,1); while (*next == OP_ALT);
1519 ecode = next + 1 + LINK_SIZE;
1520 }
1521 break;
1522
1523 case OP_BRAMINZERO:
1524 {
1525 next = ecode+1;
1526 do next += GET(next, 1); while (*next == OP_ALT);
1527 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1528 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1529 ecode++;
1530 }
1531 break;
1532
1533 case OP_SKIPZERO:
1534 {
1535 next = ecode+1;
1536 do next += GET(next,1); while (*next == OP_ALT);
1537 ecode = next + 1 + LINK_SIZE;
1538 }
1539 break;
1540
1541 /* End of a group, repeated or non-repeating. */
1542
1543 case OP_KET:
1544 case OP_KETRMIN:
1545 case OP_KETRMAX:
1546 prev = ecode - GET(ecode, 1);
1547
1548 /* If this was a group that remembered the subject start, in order to break
1549 infinite repeats of empty string matches, retrieve the subject start from
1550 the chain. Otherwise, set it NULL. */
1551
1552 if (*prev >= OP_SBRA)
1553 {
1554 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1555 eptrb = eptrb->epb_prev; /* Backup to previous group */
1556 }
1557 else saved_eptr = NULL;
1558
1559 /* If we are at the end of an assertion group or an atomic group, stop
1560 matching and return MATCH_MATCH, but record the current high water mark for
1561 use by positive assertions. We also need to record the match start in case
1562 it was changed by \K. */
1563
1564 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1565 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1566 *prev == OP_ONCE)
1567 {
1568 md->end_match_ptr = eptr; /* For ONCE */
1569 md->end_offset_top = offset_top;
1570 md->start_match_ptr = mstart;
1571 MRRETURN(MATCH_MATCH);
1572 }
1573
1574 /* For capturing groups we have to check the group number back at the start
1575 and if necessary complete handling an extraction by setting the offsets and
1576 bumping the high water mark. Note that whole-pattern recursion is coded as
1577 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1578 when the OP_END is reached. Other recursion is handled here. */
1579
1580 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1581 {
1582 number = GET2(prev, 1+LINK_SIZE);
1583 offset = number << 1;
1584
1585 #ifdef PCRE_DEBUG
1586 printf("end bracket %d", number);
1587 printf("\n");
1588 #endif
1589
1590 md->capture_last = number;
1591 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1592 {
1593 md->offset_vector[offset] =
1594 md->offset_vector[md->offset_end - number];
1595 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1596 if (offset_top <= offset) offset_top = offset + 2;
1597 }
1598
1599 /* Handle a recursively called group. Restore the offsets
1600 appropriately and continue from after the call. */
1601
1602 if (md->recursive != NULL && md->recursive->group_num == number)
1603 {
1604 recursion_info *rec = md->recursive;
1605 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1606 md->recursive = rec->prevrec;
1607 memcpy(md->offset_vector, rec->offset_save,
1608 rec->saved_max * sizeof(int));
1609 offset_top = rec->save_offset_top;
1610 ecode = rec->after_call;
1611 ims = original_ims;
1612 break;
1613 }
1614 }
1615
1616 /* For both capturing and non-capturing groups, reset the value of the ims
1617 flags, in case they got changed during the group. */
1618
1619 ims = original_ims;
1620 DPRINTF(("ims reset to %02lx\n", ims));
1621
1622 /* For a non-repeating ket, just continue at this level. This also
1623 happens for a repeating ket if no characters were matched in the group.
1624 This is the forcible breaking of infinite loops as implemented in Perl
1625 5.005. If there is an options reset, it will get obeyed in the normal
1626 course of events. */
1627
1628 if (*ecode == OP_KET || eptr == saved_eptr)
1629 {
1630 ecode += 1 + LINK_SIZE;
1631 break;
1632 }
1633
1634 /* The repeating kets try the rest of the pattern or restart from the
1635 preceding bracket, in the appropriate order. In the second case, we can use
1636 tail recursion to avoid using another stack frame, unless we have an
1637 unlimited repeat of a group that can match an empty string. */
1638
1639 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1640
1641 if (*ecode == OP_KETRMIN)
1642 {
1643 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1644 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1645 if (flags != 0) /* Could match an empty string */
1646 {
1647 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1648 RRETURN(rrc);
1649 }
1650 ecode = prev;
1651 goto TAIL_RECURSE;
1652 }
1653 else /* OP_KETRMAX */
1654 {
1655 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1656 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657 ecode += 1 + LINK_SIZE;
1658 flags = 0;
1659 goto TAIL_RECURSE;
1660 }
1661 /* Control never gets here */
1662
1663 /* Start of subject unless notbol, or after internal newline if multiline */
1664
1665 case OP_CIRC:
1666 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1667 if ((ims & PCRE_MULTILINE) != 0)
1668 {
1669 if (eptr != md->start_subject &&
1670 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1671 MRRETURN(MATCH_NOMATCH);
1672 ecode++;
1673 break;
1674 }
1675 /* ... else fall through */
1676
1677 /* Start of subject assertion */
1678
1679 case OP_SOD:
1680 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1681 ecode++;
1682 break;
1683
1684 /* Start of match assertion */
1685
1686 case OP_SOM:
1687 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1688 ecode++;
1689 break;
1690
1691 /* Reset the start of match point */
1692
1693 case OP_SET_SOM:
1694 mstart = eptr;
1695 ecode++;
1696 break;
1697
1698 /* Assert before internal newline if multiline, or before a terminating
1699 newline unless endonly is set, else end of subject unless noteol is set. */
1700
1701 case OP_DOLL:
1702 if ((ims & PCRE_MULTILINE) != 0)
1703 {
1704 if (eptr < md->end_subject)
1705 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1706 else
1707 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1708 ecode++;
1709 break;
1710 }
1711 else
1712 {
1713 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1714 if (!md->endonly)
1715 {
1716 if (eptr != md->end_subject &&
1717 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718 MRRETURN(MATCH_NOMATCH);
1719 ecode++;
1720 break;
1721 }
1722 }
1723 /* ... else fall through for endonly */
1724
1725 /* End of subject assertion (\z) */
1726
1727 case OP_EOD:
1728 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1729 ecode++;
1730 break;
1731
1732 /* End of subject or ending \n assertion (\Z) */
1733
1734 case OP_EODN:
1735 if (eptr != md->end_subject &&
1736 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737 MRRETURN(MATCH_NOMATCH);
1738 ecode++;
1739 break;
1740
1741 /* Word boundary assertions */
1742
1743 case OP_NOT_WORD_BOUNDARY:
1744 case OP_WORD_BOUNDARY:
1745 {
1746
1747 /* Find out if the previous and current characters are "word" characters.
1748 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1749 be "non-word" characters. Remember the earliest consulted character for
1750 partial matching. */
1751
1752 #ifdef SUPPORT_UTF8
1753 if (utf8)
1754 {
1755 /* Get status of previous character */
1756
1757 if (eptr == md->start_subject) prev_is_word = FALSE; else
1758 {
1759 USPTR lastptr = eptr - 1;
1760 while((*lastptr & 0xc0) == 0x80) lastptr--;
1761 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1762 GETCHAR(c, lastptr);
1763 #ifdef SUPPORT_UCP
1764 if (md->use_ucp)
1765 {
1766 if (c == '_') prev_is_word = TRUE; else
1767 {
1768 int cat = UCD_CATEGORY(c);
1769 prev_is_word = (cat == ucp_L || cat == ucp_N);
1770 }
1771 }
1772 else
1773 #endif
1774 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1775 }
1776
1777 /* Get status of next character */
1778
1779 if (eptr >= md->end_subject)
1780 {
1781 SCHECK_PARTIAL();
1782 cur_is_word = FALSE;
1783 }
1784 else
1785 {
1786 GETCHAR(c, eptr);
1787 #ifdef SUPPORT_UCP
1788 if (md->use_ucp)
1789 {
1790 if (c == '_') cur_is_word = TRUE; else
1791 {
1792 int cat = UCD_CATEGORY(c);
1793 cur_is_word = (cat == ucp_L || cat == ucp_N);
1794 }
1795 }
1796 else
1797 #endif
1798 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1799 }
1800 }
1801 else
1802 #endif
1803
1804 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1805 consistency with the behaviour of \w we do use it in this case. */
1806
1807 {
1808 /* Get status of previous character */
1809
1810 if (eptr == md->start_subject) prev_is_word = FALSE; else
1811 {
1812 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1813 #ifdef SUPPORT_UCP
1814 if (md->use_ucp)
1815 {
1816 c = eptr[-1];
1817 if (c == '_') prev_is_word = TRUE; else
1818 {
1819 int cat = UCD_CATEGORY(c);
1820 prev_is_word = (cat == ucp_L || cat == ucp_N);
1821 }
1822 }
1823 else
1824 #endif
1825 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1826 }
1827
1828 /* Get status of next character */
1829
1830 if (eptr >= md->end_subject)
1831 {
1832 SCHECK_PARTIAL();
1833 cur_is_word = FALSE;
1834 }
1835 else
1836 #ifdef SUPPORT_UCP
1837 if (md->use_ucp)
1838 {
1839 c = *eptr;
1840 if (c == '_') cur_is_word = TRUE; else
1841 {
1842 int cat = UCD_CATEGORY(c);
1843 cur_is_word = (cat == ucp_L || cat == ucp_N);
1844 }
1845 }
1846 else
1847 #endif
1848 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1849 }
1850
1851 /* Now see if the situation is what we want */
1852
1853 if ((*ecode++ == OP_WORD_BOUNDARY)?
1854 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1855 MRRETURN(MATCH_NOMATCH);
1856 }
1857 break;
1858
1859 /* Match a single character type; inline for speed */
1860
1861 case OP_ANY:
1862 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1863 /* Fall through */
1864
1865 case OP_ALLANY:
1866 if (eptr++ >= md->end_subject)
1867 {
1868 SCHECK_PARTIAL();
1869 MRRETURN(MATCH_NOMATCH);
1870 }
1871 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1872 ecode++;
1873 break;
1874
1875 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1876 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1877
1878 case OP_ANYBYTE:
1879 if (eptr++ >= md->end_subject)
1880 {
1881 SCHECK_PARTIAL();
1882 MRRETURN(MATCH_NOMATCH);
1883 }
1884 ecode++;
1885 break;
1886
1887 case OP_NOT_DIGIT:
1888 if (eptr >= md->end_subject)
1889 {
1890 SCHECK_PARTIAL();
1891 MRRETURN(MATCH_NOMATCH);
1892 }
1893 GETCHARINCTEST(c, eptr);
1894 if (
1895 #ifdef SUPPORT_UTF8
1896 c < 256 &&
1897 #endif
1898 (md->ctypes[c] & ctype_digit) != 0
1899 )
1900 MRRETURN(MATCH_NOMATCH);
1901 ecode++;
1902 break;
1903
1904 case OP_DIGIT:
1905 if (eptr >= md->end_subject)
1906 {
1907 SCHECK_PARTIAL();
1908 MRRETURN(MATCH_NOMATCH);
1909 }
1910 GETCHARINCTEST(c, eptr);
1911 if (
1912 #ifdef SUPPORT_UTF8
1913 c >= 256 ||
1914 #endif
1915 (md->ctypes[c] & ctype_digit) == 0
1916 )
1917 MRRETURN(MATCH_NOMATCH);
1918 ecode++;
1919 break;
1920
1921 case OP_NOT_WHITESPACE:
1922 if (eptr >= md->end_subject)
1923 {
1924 SCHECK_PARTIAL();
1925 MRRETURN(MATCH_NOMATCH);
1926 }
1927 GETCHARINCTEST(c, eptr);
1928 if (
1929 #ifdef SUPPORT_UTF8
1930 c < 256 &&
1931 #endif
1932 (md->ctypes[c] & ctype_space) != 0
1933 )
1934 MRRETURN(MATCH_NOMATCH);
1935 ecode++;
1936 break;
1937
1938 case OP_WHITESPACE:
1939 if (eptr >= md->end_subject)
1940 {
1941 SCHECK_PARTIAL();
1942 MRRETURN(MATCH_NOMATCH);
1943 }
1944 GETCHARINCTEST(c, eptr);
1945 if (
1946 #ifdef SUPPORT_UTF8
1947 c >= 256 ||
1948 #endif
1949 (md->ctypes[c] & ctype_space) == 0
1950 )
1951 MRRETURN(MATCH_NOMATCH);
1952 ecode++;
1953 break;
1954
1955 case OP_NOT_WORDCHAR:
1956 if (eptr >= md->end_subject)
1957 {
1958 SCHECK_PARTIAL();
1959 MRRETURN(MATCH_NOMATCH);
1960 }
1961 GETCHARINCTEST(c, eptr);
1962 if (
1963 #ifdef SUPPORT_UTF8
1964 c < 256 &&
1965 #endif
1966 (md->ctypes[c] & ctype_word) != 0
1967 )
1968 MRRETURN(MATCH_NOMATCH);
1969 ecode++;
1970 break;
1971
1972 case OP_WORDCHAR:
1973 if (eptr >= md->end_subject)
1974 {
1975 SCHECK_PARTIAL();
1976 MRRETURN(MATCH_NOMATCH);
1977 }
1978 GETCHARINCTEST(c, eptr);
1979 if (
1980 #ifdef SUPPORT_UTF8
1981 c >= 256 ||
1982 #endif
1983 (md->ctypes[c] & ctype_word) == 0
1984 )
1985 MRRETURN(MATCH_NOMATCH);
1986 ecode++;
1987 break;
1988
1989 case OP_ANYNL:
1990 if (eptr >= md->end_subject)
1991 {
1992 SCHECK_PARTIAL();
1993 MRRETURN(MATCH_NOMATCH);
1994 }
1995 GETCHARINCTEST(c, eptr);
1996 switch(c)
1997 {
1998 default: MRRETURN(MATCH_NOMATCH);
1999 case 0x000d:
2000 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2001 break;
2002
2003 case 0x000a:
2004 break;
2005
2006 case 0x000b:
2007 case 0x000c:
2008 case 0x0085:
2009 case 0x2028:
2010 case 0x2029:
2011 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2012 break;
2013 }
2014 ecode++;
2015 break;
2016
2017 case OP_NOT_HSPACE:
2018 if (eptr >= md->end_subject)
2019 {
2020 SCHECK_PARTIAL();
2021 MRRETURN(MATCH_NOMATCH);
2022 }
2023 GETCHARINCTEST(c, eptr);
2024 switch(c)
2025 {
2026 default: break;
2027 case 0x09: /* HT */
2028 case 0x20: /* SPACE */
2029 case 0xa0: /* NBSP */
2030 case 0x1680: /* OGHAM SPACE MARK */
2031 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2032 case 0x2000: /* EN QUAD */
2033 case 0x2001: /* EM QUAD */
2034 case 0x2002: /* EN SPACE */
2035 case 0x2003: /* EM SPACE */
2036 case 0x2004: /* THREE-PER-EM SPACE */
2037 case 0x2005: /* FOUR-PER-EM SPACE */
2038 case 0x2006: /* SIX-PER-EM SPACE */
2039 case 0x2007: /* FIGURE SPACE */
2040 case 0x2008: /* PUNCTUATION SPACE */
2041 case 0x2009: /* THIN SPACE */
2042 case 0x200A: /* HAIR SPACE */
2043 case 0x202f: /* NARROW NO-BREAK SPACE */
2044 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2045 case 0x3000: /* IDEOGRAPHIC SPACE */
2046 MRRETURN(MATCH_NOMATCH);
2047 }
2048 ecode++;
2049 break;
2050
2051 case OP_HSPACE:
2052 if (eptr >= md->end_subject)
2053 {
2054 SCHECK_PARTIAL();
2055 MRRETURN(MATCH_NOMATCH);
2056 }
2057 GETCHARINCTEST(c, eptr);
2058 switch(c)
2059 {
2060 default: MRRETURN(MATCH_NOMATCH);
2061 case 0x09: /* HT */
2062 case 0x20: /* SPACE */
2063 case 0xa0: /* NBSP */
2064 case 0x1680: /* OGHAM SPACE MARK */
2065 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2066 case 0x2000: /* EN QUAD */
2067 case 0x2001: /* EM QUAD */
2068 case 0x2002: /* EN SPACE */
2069 case 0x2003: /* EM SPACE */
2070 case 0x2004: /* THREE-PER-EM SPACE */
2071 case 0x2005: /* FOUR-PER-EM SPACE */
2072 case 0x2006: /* SIX-PER-EM SPACE */
2073 case 0x2007: /* FIGURE SPACE */
2074 case 0x2008: /* PUNCTUATION SPACE */
2075 case 0x2009: /* THIN SPACE */
2076 case 0x200A: /* HAIR SPACE */
2077 case 0x202f: /* NARROW NO-BREAK SPACE */
2078 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2079 case 0x3000: /* IDEOGRAPHIC SPACE */
2080 break;
2081 }
2082 ecode++;
2083 break;
2084
2085 case OP_NOT_VSPACE:
2086 if (eptr >= md->end_subject)
2087 {
2088 SCHECK_PARTIAL();
2089 MRRETURN(MATCH_NOMATCH);
2090 }
2091 GETCHARINCTEST(c, eptr);
2092 switch(c)
2093 {
2094 default: break;
2095 case 0x0a: /* LF */
2096 case 0x0b: /* VT */
2097 case 0x0c: /* FF */
2098 case 0x0d: /* CR */
2099 case 0x85: /* NEL */
2100 case 0x2028: /* LINE SEPARATOR */
2101 case 0x2029: /* PARAGRAPH SEPARATOR */
2102 MRRETURN(MATCH_NOMATCH);
2103 }
2104 ecode++;
2105 break;
2106
2107 case OP_VSPACE:
2108 if (eptr >= md->end_subject)
2109 {
2110 SCHECK_PARTIAL();
2111 MRRETURN(MATCH_NOMATCH);
2112 }
2113 GETCHARINCTEST(c, eptr);
2114 switch(c)
2115 {
2116 default: MRRETURN(MATCH_NOMATCH);
2117 case 0x0a: /* LF */
2118 case 0x0b: /* VT */
2119 case 0x0c: /* FF */
2120 case 0x0d: /* CR */
2121 case 0x85: /* NEL */
2122 case 0x2028: /* LINE SEPARATOR */
2123 case 0x2029: /* PARAGRAPH SEPARATOR */
2124 break;
2125 }
2126 ecode++;
2127 break;
2128
2129 #ifdef SUPPORT_UCP
2130 /* Check the next character by Unicode property. We will get here only
2131 if the support is in the binary; otherwise a compile-time error occurs. */
2132
2133 case OP_PROP:
2134 case OP_NOTPROP:
2135 if (eptr >= md->end_subject)
2136 {
2137 SCHECK_PARTIAL();
2138 MRRETURN(MATCH_NOMATCH);
2139 }
2140 GETCHARINCTEST(c, eptr);
2141 {
2142 const ucd_record *prop = GET_UCD(c);
2143
2144 switch(ecode[1])
2145 {
2146 case PT_ANY:
2147 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2148 break;
2149
2150 case PT_LAMP:
2151 if ((prop->chartype == ucp_Lu ||
2152 prop->chartype == ucp_Ll ||
2153 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2154 MRRETURN(MATCH_NOMATCH);
2155 break;
2156
2157 case PT_GC:
2158 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2159 MRRETURN(MATCH_NOMATCH);
2160 break;
2161
2162 case PT_PC:
2163 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2164 MRRETURN(MATCH_NOMATCH);
2165 break;
2166
2167 case PT_SC:
2168 if ((ecode[2] != prop->script) == (op == OP_PROP))
2169 MRRETURN(MATCH_NOMATCH);
2170 break;
2171
2172 /* These are specials */
2173
2174 case PT_ALNUM:
2175 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2176 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2177 MRRETURN(MATCH_NOMATCH);
2178 break;
2179
2180 case PT_SPACE: /* Perl space */
2181 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2182 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2183 == (op == OP_NOTPROP))
2184 MRRETURN(MATCH_NOMATCH);
2185 break;
2186
2187 case PT_PXSPACE: /* POSIX space */
2188 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2189 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2190 c == CHAR_FF || c == CHAR_CR)
2191 == (op == OP_NOTPROP))
2192 MRRETURN(MATCH_NOMATCH);
2193 break;
2194
2195 case PT_WORD:
2196 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2197 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2198 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2199 MRRETURN(MATCH_NOMATCH);
2200 break;
2201
2202 /* This should never occur */
2203
2204 default:
2205 RRETURN(PCRE_ERROR_INTERNAL);
2206 }
2207
2208 ecode += 3;
2209 }
2210 break;
2211
2212 /* Match an extended Unicode sequence. We will get here only if the support
2213 is in the binary; otherwise a compile-time error occurs. */
2214
2215 case OP_EXTUNI:
2216 if (eptr >= md->end_subject)
2217 {
2218 SCHECK_PARTIAL();
2219 MRRETURN(MATCH_NOMATCH);
2220 }
2221 GETCHARINCTEST(c, eptr);
2222 {
2223 int category = UCD_CATEGORY(c);
2224 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2225 while (eptr < md->end_subject)
2226 {
2227 int len = 1;
2228 if (!utf8) c = *eptr; else
2229 {
2230 GETCHARLEN(c, eptr, len);
2231 }
2232 category = UCD_CATEGORY(c);
2233 if (category != ucp_M) break;
2234 eptr += len;
2235 }
2236 }
2237 ecode++;
2238 break;
2239 #endif
2240
2241
2242 /* Match a back reference, possibly repeatedly. Look past the end of the
2243 item to see if there is repeat information following. The code is similar
2244 to that for character classes, but repeated for efficiency. Then obey
2245 similar code to character type repeats - written out again for speed.
2246 However, if the referenced string is the empty string, always treat
2247 it as matched, any number of times (otherwise there could be infinite
2248 loops). */
2249
2250 case OP_REF:
2251 {
2252 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2253 ecode += 3;
2254
2255 /* If the reference is unset, there are two possibilities:
2256
2257 (a) In the default, Perl-compatible state, set the length to be longer
2258 than the amount of subject left; this ensures that every attempt at a
2259 match fails. We can't just fail here, because of the possibility of
2260 quantifiers with zero minima.
2261
2262 (b) If the JavaScript compatibility flag is set, set the length to zero
2263 so that the back reference matches an empty string.
2264
2265 Otherwise, set the length to the length of what was matched by the
2266 referenced subpattern. */
2267
2268 if (offset >= offset_top || md->offset_vector[offset] < 0)
2269 length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2270 else
2271 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2272
2273 /* Set up for repetition, or handle the non-repeated case */
2274
2275 switch (*ecode)
2276 {
2277 case OP_CRSTAR:
2278 case OP_CRMINSTAR:
2279 case OP_CRPLUS:
2280 case OP_CRMINPLUS:
2281 case OP_CRQUERY:
2282 case OP_CRMINQUERY:
2283 c = *ecode++ - OP_CRSTAR;
2284 minimize = (c & 1) != 0;
2285 min = rep_min[c]; /* Pick up values from tables; */
2286 max = rep_max[c]; /* zero for max => infinity */
2287 if (max == 0) max = INT_MAX;
2288 break;
2289
2290 case OP_CRRANGE:
2291 case OP_CRMINRANGE:
2292 minimize = (*ecode == OP_CRMINRANGE);
2293 min = GET2(ecode, 1);
2294 max = GET2(ecode, 3);
2295 if (max == 0) max = INT_MAX;
2296 ecode += 5;
2297 break;
2298
2299 default: /* No repeat follows */
2300 if (!match_ref(offset, eptr, length, md, ims))
2301 {
2302 CHECK_PARTIAL();
2303 MRRETURN(MATCH_NOMATCH);
2304 }
2305 eptr += length;
2306 continue; /* With the main loop */
2307 }
2308
2309 /* If the length of the reference is zero, just continue with the
2310 main loop. */
2311
2312 if (length == 0) continue;
2313
2314 /* First, ensure the minimum number of matches are present. We get back
2315 the length of the reference string explicitly rather than passing the
2316 address of eptr, so that eptr can be a register variable. */
2317
2318 for (i = 1; i <= min; i++)
2319 {
2320 if (!match_ref(offset, eptr, length, md, ims))
2321 {
2322 CHECK_PARTIAL();
2323 MRRETURN(MATCH_NOMATCH);
2324 }
2325 eptr += length;
2326 }
2327
2328 /* If min = max, continue at the same level without recursion.
2329 They are not both allowed to be zero. */
2330
2331 if (min == max) continue;
2332
2333 /* If minimizing, keep trying and advancing the pointer */
2334
2335 if (minimize)
2336 {
2337 for (fi = min;; fi++)
2338 {
2339 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2340 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2341 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2342 if (!match_ref(offset, eptr, length, md, ims))
2343 {
2344 CHECK_PARTIAL();
2345 MRRETURN(MATCH_NOMATCH);
2346 }
2347 eptr += length;
2348 }
2349 /* Control never gets here */
2350 }
2351
2352 /* If maximizing, find the longest string and work backwards */
2353
2354 else
2355 {
2356 pp = eptr;
2357 for (i = min; i < max; i++)
2358 {
2359 if (!match_ref(offset, eptr, length, md, ims))
2360 {
2361 CHECK_PARTIAL();
2362 break;
2363 }
2364 eptr += length;
2365 }
2366 while (eptr >= pp)
2367 {
2368 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2369 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2370 eptr -= length;
2371 }
2372 MRRETURN(MATCH_NOMATCH);
2373 }
2374 }
2375 /* Control never gets here */
2376
2377 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2378 used when all the characters in the class have values in the range 0-255,
2379 and either the matching is caseful, or the characters are in the range
2380 0-127 when UTF-8 processing is enabled. The only difference between
2381 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2382 encountered.
2383
2384 First, look past the end of the item to see if there is repeat information
2385 following. Then obey similar code to character type repeats - written out
2386 again for speed. */
2387
2388 case OP_NCLASS:
2389 case OP_CLASS:
2390 {
2391 data = ecode + 1; /* Save for matching */
2392 ecode += 33; /* Advance past the item */
2393
2394 switch (*ecode)
2395 {
2396 case OP_CRSTAR:
2397 case OP_CRMINSTAR:
2398 case OP_CRPLUS:
2399 case OP_CRMINPLUS:
2400 case OP_CRQUERY:
2401 case OP_CRMINQUERY:
2402 c = *ecode++ - OP_CRSTAR;
2403 minimize = (c & 1) != 0;
2404 min = rep_min[c]; /* Pick up values from tables; */
2405 max = rep_max[c]; /* zero for max => infinity */
2406 if (max == 0) max = INT_MAX;
2407 break;
2408
2409 case OP_CRRANGE:
2410 case OP_CRMINRANGE:
2411 minimize = (*ecode == OP_CRMINRANGE);
2412 min = GET2(ecode, 1);
2413 max = GET2(ecode, 3);
2414 if (max == 0) max = INT_MAX;
2415 ecode += 5;
2416 break;
2417
2418 default: /* No repeat follows */
2419 min = max = 1;
2420 break;
2421 }
2422
2423 /* First, ensure the minimum number of matches are present. */
2424
2425 #ifdef SUPPORT_UTF8
2426 /* UTF-8 mode */
2427 if (utf8)
2428 {
2429 for (i = 1; i <= min; i++)
2430 {
2431 if (eptr >= md->end_subject)
2432 {
2433 SCHECK_PARTIAL();
2434 MRRETURN(MATCH_NOMATCH);
2435 }
2436 GETCHARINC(c, eptr);
2437 if (c > 255)
2438 {
2439 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2440 }
2441 else
2442 {
2443 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2444 }
2445 }
2446 }
2447 else
2448 #endif
2449 /* Not UTF-8 mode */
2450 {
2451 for (i = 1; i <= min; i++)
2452 {
2453 if (eptr >= md->end_subject)
2454 {
2455 SCHECK_PARTIAL();
2456 MRRETURN(MATCH_NOMATCH);
2457 }
2458 c = *eptr++;
2459 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2460 }
2461 }
2462
2463 /* If max == min we can continue with the main loop without the
2464 need to recurse. */
2465
2466 if (min == max) continue;
2467
2468 /* If minimizing, keep testing the rest of the expression and advancing
2469 the pointer while it matches the class. */
2470
2471 if (minimize)
2472 {
2473 #ifdef SUPPORT_UTF8
2474 /* UTF-8 mode */
2475 if (utf8)
2476 {
2477 for (fi = min;; fi++)
2478 {
2479 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2480 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2481 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2482 if (eptr >= md->end_subject)
2483 {
2484 SCHECK_PARTIAL();
2485 MRRETURN(MATCH_NOMATCH);
2486 }
2487 GETCHARINC(c, eptr);
2488 if (c > 255)
2489 {
2490 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2491 }
2492 else
2493 {
2494 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2495 }
2496 }
2497 }
2498 else
2499 #endif
2500 /* Not UTF-8 mode */
2501 {
2502 for (fi = min;; fi++)
2503 {
2504 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2505 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2506 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2507 if (eptr >= md->end_subject)
2508 {
2509 SCHECK_PARTIAL();
2510 MRRETURN(MATCH_NOMATCH);
2511 }
2512 c = *eptr++;
2513 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2514 }
2515 }
2516 /* Control never gets here */
2517 }
2518
2519 /* If maximizing, find the longest possible run, then work backwards. */
2520
2521 else
2522 {
2523 pp = eptr;
2524
2525 #ifdef SUPPORT_UTF8
2526 /* UTF-8 mode */
2527 if (utf8)
2528 {
2529 for (i = min; i < max; i++)
2530 {
2531 int len = 1;
2532 if (eptr >= md->end_subject)
2533 {
2534 SCHECK_PARTIAL();
2535 break;
2536 }
2537 GETCHARLEN(c, eptr, len);
2538 if (c > 255)
2539 {
2540 if (op == OP_CLASS) break;
2541 }
2542 else
2543 {
2544 if ((data[c/8] & (1 << (c&7))) == 0) break;
2545 }
2546 eptr += len;
2547 }
2548 for (;;)
2549 {
2550 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2551 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2552 if (eptr-- == pp) break; /* Stop if tried at original pos */
2553 BACKCHAR(eptr);
2554 }
2555 }
2556 else
2557 #endif
2558 /* Not UTF-8 mode */
2559 {
2560 for (i = min; i < max; i++)
2561 {
2562 if (eptr >= md->end_subject)
2563 {
2564 SCHECK_PARTIAL();
2565 break;
2566 }
2567 c = *eptr;
2568 if ((data[c/8] & (1 << (c&7))) == 0) break;
2569 eptr++;
2570 }
2571 while (eptr >= pp)
2572 {
2573 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2574 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2575 eptr--;
2576 }
2577 }
2578
2579 MRRETURN(MATCH_NOMATCH);
2580 }
2581 }
2582 /* Control never gets here */
2583
2584
2585 /* Match an extended character class. This opcode is encountered only
2586 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2587 mode, because Unicode properties are supported in non-UTF-8 mode. */
2588
2589 #ifdef SUPPORT_UTF8
2590 case OP_XCLASS:
2591 {
2592 data = ecode + 1 + LINK_SIZE; /* Save for matching */
2593 ecode += GET(ecode, 1); /* Advance past the item */
2594
2595 switch (*ecode)
2596 {
2597 case OP_CRSTAR:
2598 case OP_CRMINSTAR:
2599 case OP_CRPLUS:
2600 case OP_CRMINPLUS:
2601 case OP_CRQUERY:
2602 case OP_CRMINQUERY:
2603 c = *ecode++ - OP_CRSTAR;
2604 minimize = (c & 1) != 0;
2605 min = rep_min[c]; /* Pick up values from tables; */
2606 max = rep_max[c]; /* zero for max => infinity */
2607 if (max == 0) max = INT_MAX;
2608 break;
2609
2610 case OP_CRRANGE:
2611 case OP_CRMINRANGE:
2612 minimize = (*ecode == OP_CRMINRANGE);
2613 min = GET2(ecode, 1);
2614 max = GET2(ecode, 3);
2615 if (max == 0) max = INT_MAX;
2616 ecode += 5;
2617 break;
2618
2619 default: /* No repeat follows */
2620 min = max = 1;
2621 break;
2622 }
2623
2624 /* First, ensure the minimum number of matches are present. */
2625
2626 for (i = 1; i <= min; i++)
2627 {
2628 if (eptr >= md->end_subject)
2629 {
2630 SCHECK_PARTIAL();
2631 MRRETURN(MATCH_NOMATCH);
2632 }
2633 GETCHARINCTEST(c, eptr);
2634 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2635 }
2636
2637 /* If max == min we can continue with the main loop without the
2638 need to recurse. */
2639
2640 if (min == max) continue;
2641
2642 /* If minimizing, keep testing the rest of the expression and advancing
2643 the pointer while it matches the class. */
2644
2645 if (minimize)
2646 {
2647 for (fi = min;; fi++)
2648 {
2649 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2650 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2651 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2652 if (eptr >= md->end_subject)
2653 {
2654 SCHECK_PARTIAL();
2655 MRRETURN(MATCH_NOMATCH);
2656 }
2657 GETCHARINCTEST(c, eptr);
2658 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2659 }
2660 /* Control never gets here */
2661 }
2662
2663 /* If maximizing, find the longest possible run, then work backwards. */
2664
2665 else
2666 {
2667 pp = eptr;
2668 for (i = min; i < max; i++)
2669 {
2670 int len = 1;
2671 if (eptr >= md->end_subject)
2672 {
2673 SCHECK_PARTIAL();
2674 break;
2675 }
2676 GETCHARLENTEST(c, eptr, len);
2677 if (!_pcre_xclass(c, data)) break;
2678 eptr += len;
2679 }
2680 for(;;)
2681 {
2682 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2683 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684 if (eptr-- == pp) break; /* Stop if tried at original pos */
2685 if (utf8) BACKCHAR(eptr);
2686 }
2687 MRRETURN(MATCH_NOMATCH);
2688 }
2689
2690 /* Control never gets here */
2691 }
2692 #endif /* End of XCLASS */
2693
2694 /* Match a single character, casefully */
2695
2696 case OP_CHAR:
2697 #ifdef SUPPORT_UTF8
2698 if (utf8)
2699 {
2700 length = 1;
2701 ecode++;
2702 GETCHARLEN(fc, ecode, length);
2703 if (length > md->end_subject - eptr)
2704 {
2705 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2706 MRRETURN(MATCH_NOMATCH);
2707 }
2708 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2709 }
2710 else
2711 #endif
2712
2713 /* Non-UTF-8 mode */
2714 {
2715 if (md->end_subject - eptr < 1)
2716 {
2717 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2718 MRRETURN(MATCH_NOMATCH);
2719 }
2720 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2721 ecode += 2;
2722 }
2723 break;
2724
2725 /* Match a single character, caselessly */
2726
2727 case OP_CHARNC:
2728 #ifdef SUPPORT_UTF8
2729 if (utf8)
2730 {
2731 length = 1;
2732 ecode++;
2733 GETCHARLEN(fc, ecode, length);
2734
2735 if (length > md->end_subject - eptr)
2736 {
2737 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2738 MRRETURN(MATCH_NOMATCH);
2739 }
2740
2741 /* If the pattern character's value is < 128, we have only one byte, and
2742 can use the fast lookup table. */
2743
2744 if (fc < 128)
2745 {
2746 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2747 }
2748
2749 /* Otherwise we must pick up the subject character */
2750
2751 else
2752 {
2753 unsigned int dc;
2754 GETCHARINC(dc, eptr);
2755 ecode += length;
2756
2757 /* If we have Unicode property support, we can use it to test the other
2758 case of the character, if there is one. */
2759
2760 if (fc != dc)
2761 {
2762 #ifdef SUPPORT_UCP
2763 if (dc != UCD_OTHERCASE(fc))
2764 #endif
2765 MRRETURN(MATCH_NOMATCH);
2766 }
2767 }
2768 }
2769 else
2770 #endif /* SUPPORT_UTF8 */
2771
2772 /* Non-UTF-8 mode */
2773 {
2774 if (md->end_subject - eptr < 1)
2775 {
2776 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2777 MRRETURN(MATCH_NOMATCH);
2778 }
2779 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2780 ecode += 2;
2781 }
2782 break;
2783
2784 /* Match a single character repeatedly. */
2785
2786 case OP_EXACT:
2787 min = max = GET2(ecode, 1);
2788 ecode += 3;
2789 goto REPEATCHAR;
2790
2791 case OP_POSUPTO:
2792 possessive = TRUE;
2793 /* Fall through */
2794
2795 case OP_UPTO:
2796 case OP_MINUPTO:
2797 min = 0;
2798 max = GET2(ecode, 1);
2799 minimize = *ecode == OP_MINUPTO;
2800 ecode += 3;
2801 goto REPEATCHAR;
2802
2803 case OP_POSSTAR:
2804 possessive = TRUE;
2805 min = 0;
2806 max = INT_MAX;
2807 ecode++;
2808 goto REPEATCHAR;
2809
2810 case OP_POSPLUS:
2811 possessive = TRUE;
2812 min = 1;
2813 max = INT_MAX;
2814 ecode++;
2815 goto REPEATCHAR;
2816
2817 case OP_POSQUERY:
2818 possessive = TRUE;
2819 min = 0;
2820 max = 1;
2821 ecode++;
2822 goto REPEATCHAR;
2823
2824 case OP_STAR:
2825 case OP_MINSTAR:
2826 case OP_PLUS:
2827 case OP_MINPLUS:
2828 case OP_QUERY:
2829 case OP_MINQUERY:
2830 c = *ecode++ - OP_STAR;
2831 minimize = (c & 1) != 0;
2832
2833 min = rep_min[c]; /* Pick up values from tables; */
2834 max = rep_max[c]; /* zero for max => infinity */
2835 if (max == 0) max = INT_MAX;
2836
2837 /* Common code for all repeated single-character matches. */
2838
2839 REPEATCHAR:
2840 #ifdef SUPPORT_UTF8
2841 if (utf8)
2842 {
2843 length = 1;
2844 charptr = ecode;
2845 GETCHARLEN(fc, ecode, length);
2846 ecode += length;
2847
2848 /* Handle multibyte character matching specially here. There is
2849 support for caseless matching if UCP support is present. */
2850
2851 if (length > 1)
2852 {
2853 #ifdef SUPPORT_UCP
2854 unsigned int othercase;
2855 if ((ims & PCRE_CASELESS) != 0 &&
2856 (othercase = UCD_OTHERCASE(fc)) != fc)
2857 oclength = _pcre_ord2utf8(othercase, occhars);
2858 else oclength = 0;
2859 #endif /* SUPPORT_UCP */
2860
2861 for (i = 1; i <= min; i++)
2862 {
2863 if (eptr <= md->end_subject - length &&
2864 memcmp(eptr, charptr, length) == 0) eptr += length;
2865 #ifdef SUPPORT_UCP
2866 else if (oclength > 0 &&
2867 eptr <= md->end_subject - oclength &&
2868 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2869 #endif /* SUPPORT_UCP */
2870 else
2871 {
2872 CHECK_PARTIAL();
2873 MRRETURN(MATCH_NOMATCH);
2874 }
2875 }
2876
2877 if (min == max) continue;
2878
2879 if (minimize)
2880 {
2881 for (fi = min;; fi++)
2882 {
2883 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2884 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886 if (eptr <= md->end_subject - length &&
2887 memcmp(eptr, charptr, length) == 0) eptr += length;
2888 #ifdef SUPPORT_UCP
2889 else if (oclength > 0 &&
2890 eptr <= md->end_subject - oclength &&
2891 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2892 #endif /* SUPPORT_UCP */
2893 else
2894 {
2895 CHECK_PARTIAL();
2896 MRRETURN(MATCH_NOMATCH);
2897 }
2898 }
2899 /* Control never gets here */
2900 }
2901
2902 else /* Maximize */
2903 {
2904 pp = eptr;
2905 for (i = min; i < max; i++)
2906 {
2907 if (eptr <= md->end_subject - length &&
2908 memcmp(eptr, charptr, length) == 0) eptr += length;
2909 #ifdef SUPPORT_UCP
2910 else if (oclength > 0 &&
2911 eptr <= md->end_subject - oclength &&
2912 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913 #endif /* SUPPORT_UCP */
2914 else
2915 {
2916 CHECK_PARTIAL();
2917 break;
2918 }
2919 }
2920
2921 if (possessive) continue;
2922
2923 for(;;)
2924 {
2925 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2926 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928 #ifdef SUPPORT_UCP
2929 eptr--;
2930 BACKCHAR(eptr);
2931 #else /* without SUPPORT_UCP */
2932 eptr -= length;
2933 #endif /* SUPPORT_UCP */
2934 }
2935 }
2936 /* Control never gets here */
2937 }
2938
2939 /* If the length of a UTF-8 character is 1, we fall through here, and
2940 obey the code as for non-UTF-8 characters below, though in this case the
2941 value of fc will always be < 128. */
2942 }
2943 else
2944 #endif /* SUPPORT_UTF8 */
2945
2946 /* When not in UTF-8 mode, load a single-byte character. */
2947
2948 fc = *ecode++;
2949
2950 /* The value of fc at this point is always less than 256, though we may or
2951 may not be in UTF-8 mode. The code is duplicated for the caseless and
2952 caseful cases, for speed, since matching characters is likely to be quite
2953 common. First, ensure the minimum number of matches are present. If min =
2954 max, continue at the same level without recursing. Otherwise, if
2955 minimizing, keep trying the rest of the expression and advancing one
2956 matching character if failing, up to the maximum. Alternatively, if
2957 maximizing, find the maximum number of characters and work backwards. */
2958
2959 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2960 max, eptr));
2961
2962 if ((ims & PCRE_CASELESS) != 0)
2963 {
2964 fc = md->lcc[fc];
2965 for (i = 1; i <= min; i++)
2966 {
2967 if (eptr >= md->end_subject)
2968 {
2969 SCHECK_PARTIAL();
2970 MRRETURN(MATCH_NOMATCH);
2971 }
2972 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2973 }
2974 if (min == max) continue;
2975 if (minimize)
2976 {
2977 for (fi = min;; fi++)
2978 {
2979 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2980 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982 if (eptr >= md->end_subject)
2983 {
2984 SCHECK_PARTIAL();
2985 MRRETURN(MATCH_NOMATCH);
2986 }
2987 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988 }
2989 /* Control never gets here */
2990 }
2991 else /* Maximize */
2992 {
2993 pp = eptr;
2994 for (i = min; i < max; i++)
2995 {
2996 if (eptr >= md->end_subject)
2997 {
2998 SCHECK_PARTIAL();
2999 break;
3000 }
3001 if (fc != md->lcc[*eptr]) break;
3002 eptr++;
3003 }
3004
3005 if (possessive) continue;
3006
3007 while (eptr >= pp)
3008 {
3009 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3010 eptr--;
3011 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012 }
3013 MRRETURN(MATCH_NOMATCH);
3014 }
3015 /* Control never gets here */
3016 }
3017
3018 /* Caseful comparisons (includes all multi-byte characters) */
3019
3020 else
3021 {
3022 for (i = 1; i <= min; i++)
3023 {
3024 if (eptr >= md->end_subject)
3025 {
3026 SCHECK_PARTIAL();
3027 MRRETURN(MATCH_NOMATCH);
3028 }
3029 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3030 }
3031
3032 if (min == max) continue;
3033
3034 if (minimize)
3035 {
3036 for (fi = min;; fi++)
3037 {
3038 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3039 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041 if (eptr >= md->end_subject)
3042 {
3043 SCHECK_PARTIAL();
3044 MRRETURN(MATCH_NOMATCH);
3045 }
3046 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3047 }
3048 /* Control never gets here */
3049 }
3050 else /* Maximize */
3051 {
3052 pp = eptr;
3053 for (i = min; i < max; i++)
3054 {
3055 if (eptr >= md->end_subject)
3056 {
3057 SCHECK_PARTIAL();
3058 break;
3059 }
3060 if (fc != *eptr) break;
3061 eptr++;
3062 }
3063 if (possessive) continue;
3064
3065 while (eptr >= pp)
3066 {
3067 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3068 eptr--;
3069 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070 }
3071 MRRETURN(MATCH_NOMATCH);
3072 }
3073 }
3074 /* Control never gets here */
3075
3076 /* Match a negated single one-byte character. The character we are
3077 checking can be multibyte. */
3078
3079 case OP_NOT:
3080 if (eptr >= md->end_subject)
3081 {
3082 SCHECK_PARTIAL();
3083 MRRETURN(MATCH_NOMATCH);
3084 }
3085 ecode++;
3086 GETCHARINCTEST(c, eptr);
3087 if ((ims & PCRE_CASELESS) != 0)
3088 {
3089 #ifdef SUPPORT_UTF8
3090 if (c < 256)
3091 #endif
3092 c = md->lcc[c];
3093 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3094 }
3095 else
3096 {
3097 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3098 }
3099 break;
3100
3101 /* Match a negated single one-byte character repeatedly. This is almost a
3102 repeat of the code for a repeated single character, but I haven't found a
3103 nice way of commoning these up that doesn't require a test of the
3104 positive/negative option for each character match. Maybe that wouldn't add
3105 very much to the time taken, but character matching *is* what this is all
3106 about... */
3107
3108 case OP_NOTEXACT:
3109 min = max = GET2(ecode, 1);
3110 ecode += 3;
3111 goto REPEATNOTCHAR;
3112
3113 case OP_NOTUPTO:
3114 case OP_NOTMINUPTO:
3115 min = 0;
3116 max = GET2(ecode, 1);
3117 minimize = *ecode == OP_NOTMINUPTO;
3118 ecode += 3;
3119 goto REPEATNOTCHAR;
3120
3121 case OP_NOTPOSSTAR:
3122 possessive = TRUE;
3123 min = 0;
3124 max = INT_MAX;
3125 ecode++;
3126 goto REPEATNOTCHAR;
3127
3128 case OP_NOTPOSPLUS:
3129 possessive = TRUE;
3130 min = 1;
3131 max = INT_MAX;
3132 ecode++;
3133 goto REPEATNOTCHAR;
3134
3135 case OP_NOTPOSQUERY:
3136 possessive = TRUE;
3137 min = 0;
3138 max = 1;
3139 ecode++;
3140 goto REPEATNOTCHAR;
3141
3142 case OP_NOTPOSUPTO:
3143 possessive = TRUE;
3144 min = 0;
3145 max = GET2(ecode, 1);
3146 ecode += 3;
3147 goto REPEATNOTCHAR;
3148
3149 case OP_NOTSTAR:
3150 case OP_NOTMINSTAR:
3151 case OP_NOTPLUS:
3152 case OP_NOTMINPLUS:
3153 case OP_NOTQUERY:
3154 case OP_NOTMINQUERY:
3155 c = *ecode++ - OP_NOTSTAR;
3156 minimize = (c & 1) != 0;
3157 min = rep_min[c]; /* Pick up values from tables; */
3158 max = rep_max[c]; /* zero for max => infinity */
3159 if (max == 0) max = INT_MAX;
3160
3161 /* Common code for all repeated single-byte matches. */
3162
3163 REPEATNOTCHAR:
3164 fc = *ecode++;
3165
3166 /* The code is duplicated for the caseless and caseful cases, for speed,
3167 since matching characters is likely to be quite common. First, ensure the
3168 minimum number of matches are present. If min = max, continue at the same
3169 level without recursing. Otherwise, if minimizing, keep trying the rest of
3170 the expression and advancing one matching character if failing, up to the
3171 maximum. Alternatively, if maximizing, find the maximum number of
3172 characters and work backwards. */
3173
3174 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3175 max, eptr));
3176
3177 if ((ims & PCRE_CASELESS) != 0)
3178 {
3179 fc = md->lcc[fc];
3180
3181 #ifdef SUPPORT_UTF8
3182 /* UTF-8 mode */
3183 if (utf8)
3184 {
3185 register unsigned int d;
3186 for (i = 1; i <= min; i++)
3187 {
3188 if (eptr >= md->end_subject)
3189 {
3190 SCHECK_PARTIAL();
3191 MRRETURN(MATCH_NOMATCH);
3192 }
3193 GETCHARINC(d, eptr);
3194 if (d < 256) d = md->lcc[d];
3195 if (fc == d) MRRETURN(MATCH_NOMATCH);
3196 }
3197 }
3198 else
3199 #endif
3200
3201 /* Not UTF-8 mode */
3202 {
3203 for (i = 1; i <= min; i++)
3204 {
3205 if (eptr >= md->end_subject)
3206 {
3207 SCHECK_PARTIAL();
3208 MRRETURN(MATCH_NOMATCH);
3209 }
3210 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3211 }
3212 }
3213
3214 if (min == max) continue;
3215
3216 if (minimize)
3217 {
3218 #ifdef SUPPORT_UTF8
3219 /* UTF-8 mode */
3220 if (utf8)
3221 {
3222 register unsigned int d;
3223 for (fi = min;; fi++)
3224 {
3225 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3226 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3227 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3228 if (eptr >= md->end_subject)
3229 {
3230 SCHECK_PARTIAL();
3231 MRRETURN(MATCH_NOMATCH);
3232 }
3233 GETCHARINC(d, eptr);
3234 if (d < 256) d = md->lcc[d];
3235 if (fc == d) MRRETURN(MATCH_NOMATCH);
3236 }
3237 }
3238 else
3239 #endif
3240 /* Not UTF-8 mode */
3241 {
3242 for (fi = min;; fi++)
3243 {
3244 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3245 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3246 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3247 if (eptr >= md->end_subject)
3248 {
3249 SCHECK_PARTIAL();
3250 MRRETURN(MATCH_NOMATCH);
3251 }
3252 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3253 }
3254 }
3255 /* Control never gets here */
3256 }
3257
3258 /* Maximize case */
3259
3260 else
3261 {
3262 pp = eptr;
3263
3264 #ifdef SUPPORT_UTF8
3265 /* UTF-8 mode */
3266 if (utf8)
3267 {
3268 register unsigned int d;
3269 for (i = min; i < max; i++)
3270 {
3271 int len = 1;
3272 if (eptr >= md->end_subject)
3273 {
3274 SCHECK_PARTIAL();
3275 break;
3276 }
3277 GETCHARLEN(d, eptr, len);
3278 if (d < 256) d = md->lcc[d];
3279 if (fc == d) break;
3280 eptr += len;
3281 }
3282 if (possessive) continue;
3283 for(;;)
3284 {
3285 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3286 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3287 if (eptr-- == pp) break; /* Stop if tried at original pos */
3288 BACKCHAR(eptr);
3289 }
3290 }
3291 else
3292 #endif
3293 /* Not UTF-8 mode */
3294 {
3295 for (i = min; i < max; i++)
3296 {
3297 if (eptr >= md->end_subject)
3298 {
3299 SCHECK_PARTIAL();
3300 break;
3301 }
3302 if (fc == md->lcc[*eptr]) break;
3303 eptr++;
3304 }
3305 if (possessive) continue;
3306 while (eptr >= pp)
3307 {
3308 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3309 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3310 eptr--;
3311 }
3312 }
3313
3314 MRRETURN(MATCH_NOMATCH);
3315 }
3316 /* Control never gets here */
3317 }
3318
3319 /* Caseful comparisons */
3320
3321 else
3322 {
3323 #ifdef SUPPORT_UTF8
3324 /* UTF-8 mode */
3325 if (utf8)
3326 {
3327 register unsigned int d;
3328 for (i = 1; i <= min; i++)
3329 {
3330 if (eptr >= md->end_subject)
3331 {
3332 SCHECK_PARTIAL();
3333 MRRETURN(MATCH_NOMATCH);
3334 }
3335 GETCHARINC(d, eptr);
3336 if (fc == d) MRRETURN(MATCH_NOMATCH);
3337 }
3338 }
3339 else
3340 #endif
3341 /* Not UTF-8 mode */
3342 {
3343 for (i = 1; i <= min; i++)
3344 {
3345 if (eptr >= md->end_subject)
3346 {
3347 SCHECK_PARTIAL();
3348 MRRETURN(MATCH_NOMATCH);
3349 }
3350 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3351 }
3352 }
3353
3354 if (min == max) continue;
3355
3356 if (minimize)
3357 {
3358 #ifdef SUPPORT_UTF8
3359 /* UTF-8 mode */
3360 if (utf8)
3361 {
3362 register unsigned int d;
3363 for (fi = min;; fi++)
3364 {
3365 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3366 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3367 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3368 if (eptr >= md->end_subject)
3369 {
3370 SCHECK_PARTIAL();
3371 MRRETURN(MATCH_NOMATCH);
3372 }
3373 GETCHARINC(d, eptr);
3374 if (fc == d) MRRETURN(MATCH_NOMATCH);
3375 }
3376 }
3377 else
3378 #endif
3379 /* Not UTF-8 mode */
3380 {
3381 for (fi = min;; fi++)
3382 {
3383 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3384 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3385 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3386 if (eptr >= md->end_subject)
3387 {
3388 SCHECK_PARTIAL();
3389 MRRETURN(MATCH_NOMATCH);
3390 }
3391 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3392 }
3393 }
3394 /* Control never gets here */
3395 }
3396
3397 /* Maximize case */
3398
3399 else
3400 {
3401 pp = eptr;
3402
3403 #ifdef SUPPORT_UTF8
3404 /* UTF-8 mode */
3405 if (utf8)
3406 {
3407 register unsigned int d;
3408 for (i = min; i < max; i++)
3409 {
3410 int len = 1;
3411 if (eptr >= md->end_subject)
3412 {
3413 SCHECK_PARTIAL();
3414 break;
3415 }
3416 GETCHARLEN(d, eptr, len);
3417 if (fc == d) break;
3418 eptr += len;
3419 }
3420 if (possessive) continue;
3421 for(;;)
3422 {
3423 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3424 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3425 if (eptr-- == pp) break; /* Stop if tried at original pos */
3426 BACKCHAR(eptr);
3427 }
3428 }
3429 else
3430 #endif
3431 /* Not UTF-8 mode */
3432 {
3433 for (i = min; i < max; i++)
3434 {
3435 if (eptr >= md->end_subject)
3436 {
3437 SCHECK_PARTIAL();
3438 break;
3439 }
3440 if (fc == *eptr) break;
3441 eptr++;
3442 }
3443 if (possessive) continue;
3444 while (eptr >= pp)
3445 {
3446 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3447 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3448 eptr--;
3449 }
3450 }
3451
3452 MRRETURN(MATCH_NOMATCH);
3453 }
3454 }
3455 /* Control never gets here */
3456
3457 /* Match a single character type repeatedly; several different opcodes
3458 share code. This is very similar to the code for single characters, but we
3459 repeat it in the interests of efficiency. */
3460
3461 case OP_TYPEEXACT:
3462 min = max = GET2(ecode, 1);
3463 minimize = TRUE;
3464 ecode += 3;
3465 goto REPEATTYPE;
3466
3467 case OP_TYPEUPTO:
3468 case OP_TYPEMINUPTO:
3469 min = 0;
3470 max = GET2(ecode, 1);
3471 minimize = *ecode == OP_TYPEMINUPTO;
3472 ecode += 3;
3473 goto REPEATTYPE;
3474
3475 case OP_TYPEPOSSTAR:
3476 possessive = TRUE;
3477 min = 0;
3478 max = INT_MAX;
3479 ecode++;
3480 goto REPEATTYPE;
3481
3482 case OP_TYPEPOSPLUS:
3483 possessive = TRUE;
3484 min = 1;
3485 max = INT_MAX;
3486 ecode++;
3487 goto REPEATTYPE;
3488
3489 case OP_TYPEPOSQUERY:
3490 possessive = TRUE;
3491 min = 0;
3492 max = 1;
3493 ecode++;
3494 goto REPEATTYPE;
3495
3496 case OP_TYPEPOSUPTO:
3497 possessive = TRUE;
3498 min = 0;
3499 max = GET2(ecode, 1);
3500 ecode += 3;
3501 goto REPEATTYPE;
3502
3503 case OP_TYPESTAR:
3504 case OP_TYPEMINSTAR:
3505 case OP_TYPEPLUS:
3506 case OP_TYPEMINPLUS:
3507 case OP_TYPEQUERY:
3508 case OP_TYPEMINQUERY:
3509 c = *ecode++ - OP_TYPESTAR;
3510 minimize = (c & 1) != 0;
3511 min = rep_min[c]; /* Pick up values from tables; */
3512 max = rep_max[c]; /* zero for max => infinity */
3513 if (max == 0) max = INT_MAX;
3514
3515 /* Common code for all repeated single character type matches. Note that
3516 in UTF-8 mode, '.' matches a character of any length, but for the other
3517 character types, the valid characters are all one-byte long. */
3518
3519 REPEATTYPE:
3520 ctype = *ecode++; /* Code for the character type */
3521
3522 #ifdef SUPPORT_UCP
3523 if (ctype == OP_PROP || ctype == OP_NOTPROP)
3524 {
3525 prop_fail_result = ctype == OP_NOTPROP;
3526 prop_type = *ecode++;
3527 prop_value = *ecode++;
3528 }
3529 else prop_type = -1;
3530 #endif
3531
3532 /* First, ensure the minimum number of matches are present. Use inline
3533 code for maximizing the speed, and do the type test once at the start
3534 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3535 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3536 and single-bytes. */
3537
3538 if (min > 0)
3539 {
3540 #ifdef SUPPORT_UCP
3541 if (prop_type >= 0)
3542 {
3543 switch(prop_type)
3544 {
3545 case PT_ANY:
3546 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3547 for (i = 1; i <= min; i++)
3548 {
3549 if (eptr >= md->end_subject)
3550 {
3551 SCHECK_PARTIAL();
3552 MRRETURN(MATCH_NOMATCH);
3553 }
3554 GETCHARINCTEST(c, eptr);
3555 }
3556 break;
3557
3558 case PT_LAMP:
3559 for (i = 1; i <= min; i++)
3560 {
3561 if (eptr >= md->end_subject)
3562 {
3563 SCHECK_PARTIAL();
3564 MRRETURN(MATCH_NOMATCH);
3565 }
3566 GETCHARINCTEST(c, eptr);
3567 prop_chartype = UCD_CHARTYPE(c);
3568 if ((prop_chartype == ucp_Lu ||
3569 prop_chartype == ucp_Ll ||
3570 prop_chartype == ucp_Lt) == prop_fail_result)
3571 MRRETURN(MATCH_NOMATCH);
3572 }
3573 break;
3574
3575 case PT_GC:
3576 for (i = 1; i <= min; i++)
3577 {
3578 if (eptr >= md->end_subject)
3579 {
3580 SCHECK_PARTIAL();
3581 MRRETURN(MATCH_NOMATCH);
3582 }
3583 GETCHARINCTEST(c, eptr);
3584 prop_category = UCD_CATEGORY(c);
3585 if ((prop_category == prop_value) == prop_fail_result)
3586 MRRETURN(MATCH_NOMATCH);
3587 }
3588 break;
3589
3590 case PT_PC:
3591 for (i = 1; i <= min; i++)
3592 {
3593 if (eptr >= md->end_subject)
3594 {
3595 SCHECK_PARTIAL();
3596 MRRETURN(MATCH_NOMATCH);
3597 }
3598 GETCHARINCTEST(c, eptr);
3599 prop_chartype = UCD_CHARTYPE(c);
3600 if ((prop_chartype == prop_value) == prop_fail_result)
3601 MRRETURN(MATCH_NOMATCH);
3602 }
3603 break;
3604
3605 case PT_SC:
3606 for (i = 1; i <= min; i++)
3607 {
3608 if (eptr >= md->end_subject)
3609 {
3610 SCHECK_PARTIAL();
3611 MRRETURN(MATCH_NOMATCH);
3612 }
3613 GETCHARINCTEST(c, eptr);
3614 prop_script = UCD_SCRIPT(c);
3615 if ((prop_script == prop_value) == prop_fail_result)
3616 MRRETURN(MATCH_NOMATCH);
3617 }
3618 break;
3619
3620 case PT_ALNUM:
3621 for (i = 1; i <= min; i++)
3622 {
3623 if (eptr >= md->end_subject)
3624 {
3625 SCHECK_PARTIAL();
3626 MRRETURN(MATCH_NOMATCH);
3627 }
3628 GETCHARINCTEST(c, eptr);
3629 prop_category = UCD_CATEGORY(c);
3630 if ((prop_category == ucp_L || prop_category == ucp_N)
3631 == prop_fail_result)
3632 MRRETURN(MATCH_NOMATCH);
3633 }
3634 break;
3635
3636 case PT_SPACE: /* Perl space */
3637 for (i = 1; i <= min; i++)
3638 {
3639 if (eptr >= md->end_subject)
3640 {
3641 SCHECK_PARTIAL();
3642 MRRETURN(MATCH_NOMATCH);
3643 }
3644 GETCHARINCTEST(c, eptr);
3645 prop_category = UCD_CATEGORY(c);
3646 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3647 c == CHAR_FF || c == CHAR_CR)
3648 == prop_fail_result)
3649 MRRETURN(MATCH_NOMATCH);
3650 }
3651 break;
3652
3653 case PT_PXSPACE: /* POSIX space */
3654 for (i = 1; i <= min; i++)
3655 {
3656 if (eptr >= md->end_subject)
3657 {
3658 SCHECK_PARTIAL();
3659 MRRETURN(MATCH_NOMATCH);
3660 }
3661 GETCHARINCTEST(c, eptr);
3662 prop_category = UCD_CATEGORY(c);
3663 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3664 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3665 == prop_fail_result)
3666 MRRETURN(MATCH_NOMATCH);
3667 }
3668 break;
3669
3670 case PT_WORD:
3671 for (i = 1; i <= min; i++)
3672 {
3673 if (eptr >= md->end_subject)
3674 {
3675 SCHECK_PARTIAL();
3676 MRRETURN(MATCH_NOMATCH);
3677 }
3678 GETCHARINCTEST(c, eptr);
3679 prop_category = UCD_CATEGORY(c);
3680 if ((prop_category == ucp_L || prop_category == ucp_N ||
3681 c == CHAR_UNDERSCORE)
3682 == prop_fail_result)
3683 MRRETURN(MATCH_NOMATCH);
3684 }
3685 break;
3686
3687 /* This should not occur */
3688
3689 default:
3690 RRETURN(PCRE_ERROR_INTERNAL);
3691 }
3692 }
3693
3694 /* Match extended Unicode sequences. We will get here only if the
3695 support is in the binary; otherwise a compile-time error occurs. */
3696
3697 else if (ctype == OP_EXTUNI)
3698 {
3699 for (i = 1; i <= min; i++)
3700 {
3701 if (eptr >= md->end_subject)
3702 {
3703 SCHECK_PARTIAL();
3704 MRRETURN(MATCH_NOMATCH);
3705 }
3706 GETCHARINCTEST(c, eptr);
3707 prop_category = UCD_CATEGORY(c);
3708 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3709 while (eptr < md->end_subject)
3710 {
3711 int len = 1;
3712 if (!utf8) c = *eptr;
3713 else { GETCHARLEN(c, eptr, len); }
3714 prop_category = UCD_CATEGORY(c);
3715 if (prop_category != ucp_M) break;
3716 eptr += len;
3717 }
3718 }
3719 }
3720
3721 else
3722 #endif /* SUPPORT_UCP */
3723
3724 /* Handle all other cases when the coding is UTF-8 */
3725
3726 #ifdef SUPPORT_UTF8
3727 if (utf8) switch(ctype)
3728 {
3729 case OP_ANY:
3730 for (i = 1; i <= min; i++)
3731 {
3732 if (eptr >= md->end_subject)
3733 {
3734 SCHECK_PARTIAL();
3735 MRRETURN(MATCH_NOMATCH);
3736 }
3737 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3738 eptr++;
3739 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3740 }
3741 break;
3742
3743 case OP_ALLANY:
3744 for (i = 1; i <= min; i++)
3745 {
3746 if (eptr >= md->end_subject)
3747 {
3748 SCHECK_PARTIAL();
3749 MRRETURN(MATCH_NOMATCH);
3750 }
3751 eptr++;
3752 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3753 }
3754 break;
3755
3756 case OP_ANYBYTE:
3757 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3758 eptr += min;
3759 break;
3760
3761 case OP_ANYNL:
3762 for (i = 1; i <= min; i++)
3763 {
3764 if (eptr >= md->end_subject)
3765 {
3766 SCHECK_PARTIAL();
3767 MRRETURN(MATCH_NOMATCH);
3768 }
3769 GETCHARINC(c, eptr);
3770 switch(c)
3771 {
3772 default: MRRETURN(MATCH_NOMATCH);
3773 case 0x000d:
3774 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3775 break;
3776
3777 case 0x000a:
3778 break;
3779
3780 case 0x000b:
3781 case 0x000c:
3782 case 0x0085:
3783 case 0x2028:
3784 case 0x2029:
3785 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3786 break;
3787 }
3788 }
3789 break;
3790
3791 case OP_NOT_HSPACE:
3792 for (i = 1; i <= min; i++)
3793 {
3794 if (eptr >= md->end_subject)
3795 {
3796 SCHECK_PARTIAL();
3797 MRRETURN(MATCH_NOMATCH);
3798 }
3799 GETCHARINC(c, eptr);
3800 switch(c)
3801 {
3802 default: break;
3803 case 0x09: /* HT */
3804 case 0x20: /* SPACE */
3805 case 0xa0: /* NBSP */
3806 case 0x1680: /* OGHAM SPACE MARK */
3807 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3808 case 0x2000: /* EN QUAD */
3809 case 0x2001: /* EM QUAD */
3810 case 0x2002: /* EN SPACE */
3811 case 0x2003: /* EM SPACE */
3812 case 0x2004: /* THREE-PER-EM SPACE */
3813 case 0x2005: /* FOUR-PER-EM SPACE */
3814 case 0x2006: /* SIX-PER-EM SPACE */
3815 case 0x2007: /* FIGURE SPACE */
3816 case 0x2008: /* PUNCTUATION SPACE */
3817 case 0x2009: /* THIN SPACE */
3818 case 0x200A: /* HAIR SPACE */
3819 case 0x202f: /* NARROW NO-BREAK SPACE */
3820 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3821 case 0x3000: /* IDEOGRAPHIC SPACE */
3822 MRRETURN(MATCH_NOMATCH);
3823 }
3824 }
3825 break;
3826
3827 case OP_HSPACE:
3828 for (i = 1; i <= min; i++)
3829 {
3830 if (eptr >= md->end_subject)
3831 {
3832 SCHECK_PARTIAL();
3833 MRRETURN(MATCH_NOMATCH);
3834 }
3835 GETCHARINC(c, eptr);
3836 switch(c)
3837 {
3838 default: MRRETURN(MATCH_NOMATCH);
3839 case 0x09: /* HT */
3840 case 0x20: /* SPACE */
3841 case 0xa0: /* NBSP */
3842 case 0x1680: /* OGHAM SPACE MARK */
3843 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3844 case 0x2000: /* EN QUAD */
3845 case 0x2001: /* EM QUAD */
3846 case 0x2002: /* EN SPACE */
3847 case 0x2003: /* EM SPACE */
3848 case 0x2004: /* THREE-PER-EM SPACE */
3849 case 0x2005: /* FOUR-PER-EM SPACE */
3850 case 0x2006: /* SIX-PER-EM SPACE */
3851 case 0x2007: /* FIGURE SPACE */
3852 case 0x2008: /* PUNCTUATION SPACE */
3853 case 0x2009: /* THIN SPACE */
3854 case 0x200A: /* HAIR SPACE */
3855 case 0x202f: /* NARROW NO-BREAK SPACE */
3856 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3857 case 0x3000: /* IDEOGRAPHIC SPACE */
3858 break;
3859 }
3860 }
3861 break;
3862
3863 case OP_NOT_VSPACE:
3864 for (i = 1; i <= min; i++)
3865 {
3866 if (eptr >= md->end_subject)
3867 {
3868 SCHECK_PARTIAL();
3869 MRRETURN(MATCH_NOMATCH);
3870 }
3871 GETCHARINC(c, eptr);
3872 switch(c)
3873 {
3874 default: break;
3875 case 0x0a: /* LF */
3876 case 0x0b: /* VT */
3877 case 0x0c: /* FF */
3878 case 0x0d: /* CR */
3879 case 0x85: /* NEL */
3880 case 0x2028: /* LINE SEPARATOR */
3881 case 0x2029: /* PARAGRAPH SEPARATOR */
3882 MRRETURN(MATCH_NOMATCH);
3883 }
3884 }
3885 break;
3886
3887 case OP_VSPACE:
3888 for (i = 1; i <= min; i++)
3889 {
3890 if (eptr >= md->end_subject)
3891 {
3892 SCHECK_PARTIAL();
3893 MRRETURN(MATCH_NOMATCH);
3894 }
3895 GETCHARINC(c, eptr);
3896 switch(c)
3897 {
3898 default: MRRETURN(MATCH_NOMATCH);
3899 case 0x0a: /* LF */
3900 case 0x0b: /* VT */
3901 case 0x0c: /* FF */
3902 case 0x0d: /* CR */
3903 case 0x85: /* NEL */
3904 case 0x2028: /* LINE SEPARATOR */
3905 case 0x2029: /* PARAGRAPH SEPARATOR */
3906 break;
3907 }
3908 }
3909 break;
3910
3911 case OP_NOT_DIGIT:
3912 for (i = 1; i <= min; i++)
3913 {
3914 if (eptr >= md->end_subject)
3915 {
3916 SCHECK_PARTIAL();
3917 MRRETURN(MATCH_NOMATCH);
3918 }
3919 GETCHARINC(c, eptr);
3920 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3921 MRRETURN(MATCH_NOMATCH);
3922 }
3923 break;
3924
3925 case OP_DIGIT:
3926 for (i = 1; i <= min; i++)
3927 {
3928 if (eptr >= md->end_subject)
3929 {
3930 SCHECK_PARTIAL();
3931 MRRETURN(MATCH_NOMATCH);
3932 }
3933 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3934 MRRETURN(MATCH_NOMATCH);
3935 /* No need to skip more bytes - we know it's a 1-byte character */
3936 }
3937 break;
3938
3939 case OP_NOT_WHITESPACE:
3940 for (i = 1; i <= min; i++)
3941 {
3942 if (eptr >= md->end_subject)
3943 {
3944 SCHECK_PARTIAL();
3945 MRRETURN(MATCH_NOMATCH);
3946 }
3947 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3948 MRRETURN(MATCH_NOMATCH);
3949 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3950 }
3951 break;
3952
3953 case OP_WHITESPACE:
3954 for (i = 1; i <= min; i++)
3955 {
3956 if (eptr >= md->end_subject)
3957 {
3958 SCHECK_PARTIAL();
3959 MRRETURN(MATCH_NOMATCH);
3960 }
3961 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3962 MRRETURN(MATCH_NOMATCH);
3963 /* No need to skip more bytes - we know it's a 1-byte character */
3964 }
3965 break;
3966
3967 case OP_NOT_WORDCHAR:
3968 for (i = 1; i <= min; i++)
3969 {
3970 if (eptr >= md->end_subject)
3971 {
3972 SCHECK_PARTIAL();
3973 MRRETURN(MATCH_NOMATCH);
3974 }
3975 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3976 MRRETURN(MATCH_NOMATCH);
3977 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3978 }
3979 break;
3980
3981 case OP_WORDCHAR:
3982 for (i = 1; i <= min; i++)
3983 {
3984 if (eptr >= md->end_subject)
3985 {
3986 SCHECK_PARTIAL();
3987 MRRETURN(MATCH_NOMATCH);
3988 }
3989 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3990 MRRETURN(MATCH_NOMATCH);
3991 /* No need to skip more bytes - we know it's a 1-byte character */
3992 }
3993 break;
3994
3995 default:
3996 RRETURN(PCRE_ERROR_INTERNAL);
3997 } /* End switch(ctype) */
3998
3999 else
4000 #endif /* SUPPORT_UTF8 */
4001
4002 /* Code for the non-UTF-8 case for minimum matching of operators other
4003 than OP_PROP and OP_NOTPROP. */
4004
4005 switch(ctype)
4006 {
4007 case OP_ANY:
4008 for (i = 1; i <= min; i++)
4009 {
4010 if (eptr >= md->end_subject)
4011 {
4012 SCHECK_PARTIAL();
4013 MRRETURN(MATCH_NOMATCH);
4014 }
4015 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4016 eptr++;
4017 }
4018 break;
4019
4020 case OP_ALLANY:
4021 if (eptr > md->end_subject - min)
4022 {
4023 SCHECK_PARTIAL();
4024 MRRETURN(MATCH_NOMATCH);
4025 }
4026 eptr += min;
4027 break;
4028
4029 case OP_ANYBYTE:
4030 if (eptr > md->end_subject - min)
4031 {
4032 SCHECK_PARTIAL();
4033 MRRETURN(MATCH_NOMATCH);
4034 }
4035 eptr += min;
4036 break;
4037
4038 case OP_ANYNL:
4039 for (i = 1; i <= min; i++)
4040 {
4041 if (eptr >= md->end_subject)
4042 {
4043 SCHECK_PARTIAL();
4044 MRRETURN(MATCH_NOMATCH);
4045 }
4046 switch(*eptr++)
4047 {
4048 default: MRRETURN(MATCH_NOMATCH);
4049 case 0x000d:
4050 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4051 break;
4052 case 0x000a:
4053 break;
4054
4055 case 0x000b:
4056 case 0x000c:
4057 case 0x0085:
4058 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4059 break;
4060 }
4061 }
4062 break;
4063
4064 case OP_NOT_HSPACE:
4065 for (i = 1; i <= min; i++)
4066 {
4067 if (eptr >= md->end_subject)
4068 {
4069 SCHECK_PARTIAL();
4070 MRRETURN(MATCH_NOMATCH);
4071 }
4072 switch(*eptr++)
4073 {
4074 default: break;
4075 case 0x09: /* HT */
4076 case 0x20: /* SPACE */
4077 case 0xa0: /* NBSP */
4078 MRRETURN(MATCH_NOMATCH);
4079 }
4080 }
4081 break;
4082
4083 case OP_HSPACE:
4084 for (i = 1; i <= min; i++)
4085 {
4086 if (eptr >= md->end_subject)
4087 {
4088 SCHECK_PARTIAL();
4089 MRRETURN(MATCH_NOMATCH);
4090 }
4091 switch(*eptr++)
4092 {
4093 default: MRRETURN(MATCH_NOMATCH);
4094 case 0x09: /* HT */
4095 case 0x20: /* SPACE */
4096 case 0xa0: /* NBSP */
4097 break;
4098 }
4099 }
4100 break;
4101
4102 case OP_NOT_VSPACE:
4103 for (i = 1; i <= min; i++)
4104 {
4105 if (eptr >= md->end_subject)
4106 {
4107 SCHECK_PARTIAL();
4108 MRRETURN(MATCH_NOMATCH);
4109 }
4110 switch(*eptr++)
4111 {
4112 default: break;
4113 case 0x0a: /* LF */
4114 case 0x0b: /* VT */
4115 case 0x0c: /* FF */
4116 case 0x0d: /* CR */
4117 case 0x85: /* NEL */
4118 MRRETURN(MATCH_NOMATCH);
4119 }
4120 }
4121 break;
4122
4123 case OP_VSPACE:
4124 for (i = 1; i <= min; i++)
4125 {
4126 if (eptr >= md->end_subject)
4127 {
4128 SCHECK_PARTIAL();
4129 MRRETURN(MATCH_NOMATCH);
4130 }
4131 switch(*eptr++)
4132 {
4133 default: MRRETURN(MATCH_NOMATCH);
4134 case 0x0a: /* LF */
4135 case 0x0b: /* VT */
4136 case 0x0c: /* FF */
4137 case 0x0d: /* CR */
4138 case 0x85: /* NEL */
4139 break;
4140 }
4141 }
4142 break;
4143
4144 case OP_NOT_DIGIT:
4145 for (i = 1; i <= min; i++)
4146 {
4147 if (eptr >= md->end_subject)
4148 {
4149 SCHECK_PARTIAL();
4150 MRRETURN(MATCH_NOMATCH);
4151 }
4152 if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4153 }
4154 break;
4155
4156 case OP_DIGIT:
4157 for (i = 1; i <= min; i++)
4158 {
4159 if (eptr >= md->end_subject)
4160 {
4161 SCHECK_PARTIAL();
4162 MRRETURN(MATCH_NOMATCH);
4163 }
4164 if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4165 }
4166 break;
4167
4168 case OP_NOT_WHITESPACE:
4169 for (i = 1; i <= min; i++)
4170 {
4171 if (eptr >= md->end_subject)
4172 {
4173 SCHECK_PARTIAL();
4174 MRRETURN(MATCH_NOMATCH);
4175 }
4176 if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4177 }
4178 break;
4179
4180 case OP_WHITESPACE:
4181 for (i = 1; i <= min; i++)
4182 {
4183 if (eptr >= md->end_subject)
4184 {
4185 SCHECK_PARTIAL();
4186 MRRETURN(MATCH_NOMATCH);
4187 }
4188 if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4189 }
4190 break;
4191
4192 case OP_NOT_WORDCHAR:
4193 for (i = 1; i <= min; i++)
4194 {
4195 if (eptr >= md->end_subject)
4196 {
4197 SCHECK_PARTIAL();
4198 MRRETURN(MATCH_NOMATCH);
4199 }
4200 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4201 MRRETURN(MATCH_NOMATCH);
4202 }
4203 break;
4204
4205 case OP_WORDCHAR:
4206 for (i = 1; i <= min; i++)
4207 {
4208 if (eptr >= md->end_subject)
4209 {
4210 SCHECK_PARTIAL();
4211 MRRETURN(MATCH_NOMATCH);
4212 }
4213 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4214 MRRETURN(MATCH_NOMATCH);
4215 }
4216 break;
4217
4218 default:
4219 RRETURN(PCRE_ERROR_INTERNAL);
4220 }
4221 }
4222
4223 /* If min = max, continue at the same level without recursing */
4224
4225 if (min == max) continue;
4226
4227 /* If minimizing, we have to test the rest of the pattern before each
4228 subsequent match. Again, separate the UTF-8 case for speed, and also
4229 separate the UCP cases. */
4230
4231 if (minimize)
4232 {
4233 #ifdef SUPPORT_UCP
4234 if (prop_type >= 0)
4235 {
4236 switch(prop_type)
4237 {
4238 case PT_ANY:
4239 for (fi = min;; fi++)
4240 {
4241 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4242 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4243 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4244 if (eptr >= md->end_subject)
4245 {
4246 SCHECK_PARTIAL();
4247 MRRETURN(MATCH_NOMATCH);
4248 }
4249 GETCHARINCTEST(c, eptr);
4250 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4251 }
4252 /* Control never gets here */
4253
4254 case PT_LAMP:
4255 for (fi = min;; fi++)
4256 {
4257 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4258 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4259 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4260 if (eptr >= md->end_subject)
4261 {
4262 SCHECK_PARTIAL();
4263 MRRETURN(MATCH_NOMATCH);
4264 }
4265 GETCHARINCTEST(c, eptr);
4266 prop_chartype = UCD_CHARTYPE(c);
4267 if ((prop_chartype == ucp_Lu ||
4268 prop_chartype == ucp_Ll ||
4269 prop_chartype == ucp_Lt) == prop_fail_result)
4270 MRRETURN(MATCH_NOMATCH);
4271 }
4272 /* Control never gets here */
4273
4274 case PT_GC:
4275 for (fi = min;; fi++)
4276 {
4277 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4278 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4279 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4280 if (eptr >= md->end_subject)
4281 {
4282 SCHECK_PARTIAL();
4283 MRRETURN(MATCH_NOMATCH);
4284 }
4285 GETCHARINCTEST(c, eptr);
4286 prop_category = UCD_CATEGORY(c);
4287 if ((prop_category == prop_value) == prop_fail_result)
4288 MRRETURN(MATCH_NOMATCH);
4289 }
4290 /* Control never gets here */
4291
4292 case PT_PC:
4293 for (fi = min;; fi++)
4294 {
4295 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4296 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4297 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4298 if (eptr >= md->end_subject)
4299 {
4300 SCHECK_PARTIAL();
4301 MRRETURN(MATCH_NOMATCH);
4302 }
4303 GETCHARINCTEST(c, eptr);
4304 prop_chartype = UCD_CHARTYPE(c);
4305 if ((prop_chartype == prop_value) == prop_fail_result)
4306 MRRETURN(MATCH_NOMATCH);
4307 }
4308 /* Control never gets here */
4309
4310 case PT_SC:
4311 for (fi = min;; fi++)
4312 {
4313 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4314 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4315 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4316 if (eptr >= md->end_subject)
4317 {
4318 SCHECK_PARTIAL();
4319 MRRETURN(MATCH_NOMATCH);
4320 }
4321 GETCHARINCTEST(c, eptr);
4322 prop_script = UCD_SCRIPT(c);
4323 if ((prop_script == prop_value) == prop_fail_result)
4324 MRRETURN(MATCH_NOMATCH);
4325 }
4326 /* Control never gets here */
4327
4328 case PT_ALNUM:
4329 for (fi = min;; fi++)
4330 {
4331 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4332 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4333 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4334 if (eptr >= md->end_subject)
4335 {
4336 SCHECK_PARTIAL();
4337 MRRETURN(MATCH_NOMATCH);
4338 }
4339 GETCHARINCTEST(c, eptr);
4340 prop_category = UCD_CATEGORY(c);
4341 if ((prop_category == ucp_L || prop_category == ucp_N)
4342 == prop_fail_result)
4343 MRRETURN(MATCH_NOMATCH);
4344 }
4345 /* Control never gets here */
4346
4347 case PT_SPACE: /* Perl space */
4348 for (fi = min;; fi++)
4349 {
4350 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4351 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4352 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4353 if (eptr >= md->end_subject)
4354 {
4355 SCHECK_PARTIAL();
4356 MRRETURN(MATCH_NOMATCH);
4357 }
4358 GETCHARINCTEST(c, eptr);
4359 prop_category = UCD_CATEGORY(c);
4360 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4361 c == CHAR_FF || c == CHAR_CR)
4362 == prop_fail_result)
4363 MRRETURN(MATCH_NOMATCH);
4364 }
4365 /* Control never gets here */
4366
4367 case PT_PXSPACE: /* POSIX space */
4368 for (fi = min;; fi++)
4369 {
4370 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4371 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4372 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4373 if (eptr >= md->end_subject)
4374 {
4375 SCHECK_PARTIAL();
4376 MRRETURN(MATCH_NOMATCH);
4377 }
4378 GETCHARINCTEST(c, eptr);
4379 prop_category = UCD_CATEGORY(c);
4380 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4381 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4382 == prop_fail_result)
4383 MRRETURN(MATCH_NOMATCH);
4384 }
4385 /* Control never gets here */
4386
4387 case PT_WORD:
4388 for (fi = min;; fi++)
4389 {
4390 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4391 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4392 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4393 if (eptr >= md->end_subject)
4394 {
4395 SCHECK_PARTIAL();
4396 MRRETURN(MATCH_NOMATCH);
4397 }
4398 GETCHARINCTEST(c, eptr);
4399 prop_category = UCD_CATEGORY(c);
4400 if ((prop_category == ucp_L ||
4401 prop_category == ucp_N ||
4402 c == CHAR_UNDERSCORE)
4403 == prop_fail_result)
4404 MRRETURN(MATCH_NOMATCH);
4405 }
4406 /* Control never gets here */
4407
4408 /* This should never occur */
4409
4410 default:
4411 RRETURN(PCRE_ERROR_INTERNAL);
4412 }
4413 }
4414
4415 /* Match extended Unicode sequences. We will get here only if the
4416 support is in the binary; otherwise a compile-time error occurs. */
4417
4418 else if (ctype == OP_EXTUNI)
4419 {
4420 for (fi = min;; fi++)
4421 {
4422 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4423 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4424 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4425 if (eptr >= md->end_subject)
4426 {
4427 SCHECK_PARTIAL();
4428 MRRETURN(MATCH_NOMATCH);
4429 }
4430 GETCHARINCTEST(c, eptr);
4431 prop_category = UCD_CATEGORY(c);
4432 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4433 while (eptr < md->end_subject)
4434 {
4435 int len = 1;
4436 if (!utf8) c = *eptr;
4437 else { GETCHARLEN(c, eptr, len); }
4438 prop_category = UCD_CATEGORY(c);
4439 if (prop_category != ucp_M) break;
4440 eptr += len;
4441 }
4442 }
4443 }
4444
4445 else
4446 #endif /* SUPPORT_UCP */
4447
4448 #ifdef SUPPORT_UTF8
4449 /* UTF-8 mode */
4450 if (utf8)
4451 {
4452 for (fi = min;; fi++)
4453 {
4454 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4455 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4456 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4457 if (eptr >= md->end_subject)
4458 {
4459 SCHECK_PARTIAL();
4460 MRRETURN(MATCH_NOMATCH);
4461 }
4462 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4463 MRRETURN(MATCH_NOMATCH);
4464 GETCHARINC(c, eptr);
4465 switch(ctype)
4466 {
4467 case OP_ANY: /* This is the non-NL case */
4468 case OP_ALLANY:
4469 case OP_ANYBYTE:
4470 break;
4471
4472 case OP_ANYNL:
4473 switch(c)
4474 {
4475 default: MRRETURN(MATCH_NOMATCH);
4476 case 0x000d:
4477 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4478 break;
4479 case 0x000a:
4480 break;
4481
4482 case 0x000b:
4483 case 0x000c:
4484 case 0x0085:
4485 case 0x2028:
4486 case 0x2029:
4487 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4488 break;
4489 }
4490 break;
4491
4492 case OP_NOT_HSPACE:
4493 switch(c)
4494 {
4495 default: break;
4496 case 0x09: /* HT */
4497 case 0x20: /* SPACE */
4498 case 0xa0: /* NBSP */
4499 case 0x1680: /* OGHAM SPACE MARK */
4500 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4501 case 0x2000: /* EN QUAD */
4502 case 0x2001: /* EM QUAD */
4503 case 0x2002: /* EN SPACE */
4504 case 0x2003: /* EM SPACE */
4505 case 0x2004: /* THREE-PER-EM SPACE */
4506 case 0x2005: /* FOUR-PER-EM SPACE */
4507 case 0x2006: /* SIX-PER-EM SPACE */
4508 case 0x2007: /* FIGURE SPACE */
4509 case 0x2008: /* PUNCTUATION SPACE */
4510 case 0x2009: /* THIN SPACE */
4511 case 0x200A: /* HAIR SPACE */
4512 case 0x202f: /* NARROW NO-BREAK SPACE */
4513 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4514 case 0x3000: /* IDEOGRAPHIC SPACE */
4515 MRRETURN(MATCH_NOMATCH);
4516 }
4517 break;
4518
4519 case OP_HSPACE:
4520 switch(c)
4521 {
4522 default: MRRETURN(MATCH_NOMATCH);
4523 case 0x09: /* HT */
4524 case 0x20: /* SPACE */
4525 case 0xa0: /* NBSP */
4526 case 0x1680: /* OGHAM SPACE MARK */
4527 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4528 case 0x2000: /* EN QUAD */
4529 case 0x2001: /* EM QUAD */
4530 case 0x2002: /* EN SPACE */
4531 case 0x2003: /* EM SPACE */
4532 case 0x2004: /* THREE-PER-EM SPACE */
4533 case 0x2005: /* FOUR-PER-EM SPACE */
4534 case 0x2006: /* SIX-PER-EM SPACE */
4535 case 0x2007: /* FIGURE SPACE */
4536 case 0x2008: /* PUNCTUATION SPACE */
4537 case 0x2009: /* THIN SPACE */
4538 case 0x200A: /* HAIR SPACE */
4539 case 0x202f: /* NARROW NO-BREAK SPACE */
4540 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4541 case 0x3000: /* IDEOGRAPHIC SPACE */
4542 break;
4543 }
4544 break;
4545
4546 case OP_NOT_VSPACE:
4547 switch(c)
4548 {
4549 default: break;
4550 case 0x0a: /* LF */
4551 case 0x0b: /* VT */
4552 case 0x0c: /* FF */
4553 case 0x0d: /* CR */
4554 case 0x85: /* NEL */
4555 case 0x2028: /* LINE SEPARATOR */
4556 case 0x2029: /* PARAGRAPH SEPARATOR */
4557 MRRETURN(MATCH_NOMATCH);
4558 }
4559 break;
4560
4561 case OP_VSPACE:
4562 switch(c)
4563 {
4564 default: MRRETURN(MATCH_NOMATCH);
4565 case 0x0a: /* LF */
4566 case 0x0b: /* VT */
4567 case 0x0c: /* FF */
4568 case 0x0d: /* CR */
4569 case 0x85: /* NEL */
4570 case 0x2028: /* LINE SEPARATOR */
4571 case 0x2029: /* PARAGRAPH SEPARATOR */
4572 break;
4573 }
4574 break;
4575
4576 case OP_NOT_DIGIT:
4577 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4578 MRRETURN(MATCH_NOMATCH);
4579 break;
4580
4581 case OP_DIGIT:
4582 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4583 MRRETURN(MATCH_NOMATCH);
4584 break;
4585
4586 case OP_NOT_WHITESPACE:
4587 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4588 MRRETURN(MATCH_NOMATCH);
4589 break;
4590
4591 case OP_WHITESPACE:
4592 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4593 MRRETURN(MATCH_NOMATCH);
4594 break;
4595
4596 case OP_NOT_WORDCHAR:
4597 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4598 MRRETURN(MATCH_NOMATCH);
4599 break;
4600
4601 case OP_WORDCHAR:
4602 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4603 MRRETURN(MATCH_NOMATCH);
4604 break;
4605
4606 default:
4607 RRETURN(PCRE_ERROR_INTERNAL);
4608 }
4609 }
4610 }
4611 else
4612 #endif
4613 /* Not UTF-8 mode */
4614 {
4615 for (fi = min;; fi++)
4616 {
4617 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4618 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4619 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4620 if (eptr >= md->end_subject)
4621 {
4622 SCHECK_PARTIAL();
4623 MRRETURN(MATCH_NOMATCH);
4624 }
4625 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4626 MRRETURN(MATCH_NOMATCH);
4627 c = *eptr++;
4628 switch(ctype)
4629 {
4630 case OP_ANY: /* This is the non-NL case */
4631 case OP_ALLANY:
4632 case OP_ANYBYTE:
4633 break;
4634
4635 case OP_ANYNL:
4636 switch(c)
4637 {
4638 default: MRRETURN(MATCH_NOMATCH);
4639 case 0x000d:
4640 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4641 break;
4642
4643 case 0x000a:
4644 break;
4645
4646 case 0x000b:
4647 case 0x000c:
4648 case 0x0085:
4649 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4650 break;
4651 }
4652 break;
4653
4654 case OP_NOT_HSPACE:
4655 switch(c)
4656 {
4657 default: break;
4658 case 0x09: /* HT */
4659 case 0x20: /* SPACE */
4660 case 0xa0: /* NBSP */
4661 MRRETURN(MATCH_NOMATCH);
4662 }
4663 break;
4664
4665 case OP_HSPACE:
4666 switch(c)
4667 {
4668 default: MRRETURN(MATCH_NOMATCH);
4669 case 0x09: /* HT */
4670 case 0x20: /* SPACE */
4671 case 0xa0: /* NBSP */
4672 break;
4673 }
4674 break;
4675
4676 case OP_NOT_VSPACE:
4677 switch(c)
4678 {
4679 default: break;
4680 case 0x0a: /* LF */
4681 case 0x0b: /* VT */
4682 case 0x0c: /* FF */
4683 case 0x0d: /* CR */
4684 case 0x85: /* NEL */
4685 MRRETURN(MATCH_NOMATCH);
4686 }
4687 break;
4688
4689 case OP_VSPACE:
4690 switch(c)
4691 {
4692 default: MRRETURN(MATCH_NOMATCH);
4693 case 0x0a: /* LF */
4694 case 0x0b: /* VT */
4695 case 0x0c: /* FF */
4696 case 0x0d: /* CR */
4697 case 0x85: /* NEL */
4698 break;
4699 }
4700 break;
4701
4702 case OP_NOT_DIGIT:
4703 if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4704 break;
4705
4706 case OP_DIGIT:
4707 if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4708 break;
4709
4710 case OP_NOT_WHITESPACE:
4711 if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4712 break;
4713
4714 case OP_WHITESPACE:
4715 if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4716 break;
4717
4718 case OP_NOT_WORDCHAR:
4719 if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4720 break;
4721
4722 case OP_WORDCHAR:
4723 if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4724 break;
4725
4726 default:
4727 RRETURN(PCRE_ERROR_INTERNAL);
4728 }
4729 }
4730 }
4731 /* Control never gets here */
4732 }
4733
4734 /* If maximizing, it is worth using inline code for speed, doing the type
4735 test once at the start (i.e. keep it out of the loop). Again, keep the
4736 UTF-8 and UCP stuff separate. */
4737
4738 else
4739 {
4740 pp = eptr; /* Remember where we started */
4741
4742 #ifdef SUPPORT_UCP
4743 if (prop_type >= 0)
4744 {
4745 switch(prop_type)
4746 {
4747 case PT_ANY:
4748 for (i = min; i < max; i++)
4749 {
4750 int len = 1;
4751 if (eptr >= md->end_subject)
4752 {
4753 SCHECK_PARTIAL();
4754 break;
4755 }
4756 GETCHARLENTEST(c, eptr, len);
4757 if (prop_fail_result) break;
4758 eptr+= len;
4759 }
4760 break;
4761
4762 case PT_LAMP:
4763 for (i = min; i < max; i++)
4764 {
4765 int len = 1;
4766 if (eptr >= md->end_subject)
4767 {
4768 SCHECK_PARTIAL();
4769 break;
4770 }
4771 GETCHARLENTEST(c, eptr, len);
4772 prop_chartype = UCD_CHARTYPE(c);
4773 if ((prop_chartype == ucp_Lu ||
4774 prop_chartype == ucp_Ll ||
4775 prop_chartype == ucp_Lt) == prop_fail_result)
4776 break;
4777 eptr+= len;
4778 }
4779 break;
4780
4781 case PT_GC:
4782 for (i = min; i < max; i++)
4783 {
4784 int len = 1;
4785 if (eptr >= md->end_subject)
4786 {
4787 SCHECK_PARTIAL();
4788 break;
4789 }
4790 GETCHARLENTEST(c, eptr, len);
4791 prop_category = UCD_CATEGORY(c);
4792 if ((prop_category == prop_value) == prop_fail_result)
4793 break;
4794 eptr+= len;
4795 }
4796 break;
4797
4798 case PT_PC:
4799 for (i = min; i < max; i++)
4800 {
4801 int len = 1;
4802 if (eptr >= md->end_subject)
4803 {
4804 SCHECK_PARTIAL();
4805 break;
4806 }
4807 GETCHARLENTEST(c, eptr, len);
4808 prop_chartype = UCD_CHARTYPE(c);
4809 if ((prop_chartype == prop_value) == prop_fail_result)
4810 break;
4811 eptr+= len;
4812 }
4813 break;
4814
4815 case PT_SC:
4816 for (i = min; i < max; i++)
4817 {
4818 int len = 1;
4819 if (eptr >= md->end_subject)
4820 {
4821 SCHECK_PARTIAL();
4822 break;
4823 }
4824 GETCHARLENTEST(c, eptr, len);
4825 prop_script = UCD_SCRIPT(c);
4826 if ((prop_script == prop_value) == prop_fail_result)
4827 break;
4828 eptr+= len;
4829 }
4830 break;
4831
4832 case PT_ALNUM:
4833 for (i = min; i < max; i++)
4834 {
4835 int len = 1;
4836 if (eptr >= md->end_subject)
4837 {
4838 SCHECK_PARTIAL();
4839 break;
4840 }
4841 GETCHARLENTEST(c, eptr, len);
4842 prop_category = UCD_CATEGORY(c);
4843 if ((prop_category == ucp_L || prop_category == ucp_N)
4844 == prop_fail_result)
4845 break;
4846 eptr+= len;
4847 }
4848 break;
4849
4850 case PT_SPACE: /* Perl space */
4851 for (i = min; i < max; i++)
4852 {
4853 int len = 1;
4854 if (eptr >= md->end_subject)
4855 {
4856 SCHECK_PARTIAL();
4857 break;
4858 }
4859 GETCHARLENTEST(c, eptr, len);
4860 prop_category = UCD_CATEGORY(c);
4861 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4862 c == CHAR_FF || c == CHAR_CR)
4863 == prop_fail_result)
4864 break;
4865 eptr+= len;
4866 }
4867 break;
4868
4869 case PT_PXSPACE: /* POSIX space */
4870 for (i = min; i < max; i++)
4871 {
4872 int len = 1;
4873 if (eptr >= md->end_subject)
4874 {
4875 SCHECK_PARTIAL();
4876 break;
4877 }
4878 GETCHARLENTEST(c, eptr, len);
4879 prop_category = UCD_CATEGORY(c);
4880 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4881 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4882 == prop_fail_result)
4883 break;
4884 eptr+= len;
4885 }
4886 break;
4887
4888 case PT_WORD:
4889 for (i = min; i < max; i++)
4890 {
4891 int len = 1;
4892 if (eptr >= md->end_subject)
4893 {
4894 SCHECK_PARTIAL();
4895 break;
4896 }
4897 GETCHARLENTEST(c, eptr, len);
4898 prop_category = UCD_CATEGORY(c);
4899 if ((prop_category == ucp_L || prop_category == ucp_N ||
4900 c == CHAR_UNDERSCORE) == prop_fail_result)
4901 break;
4902 eptr+= len;
4903 }
4904 break;
4905
4906 default:
4907 RRETURN(PCRE_ERROR_INTERNAL);
4908 }
4909
4910 /* eptr is now past the end of the maximum run */
4911
4912 if (possessive) continue;
4913 for(;;)
4914 {
4915 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4916 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4917 if (eptr-- == pp) break; /* Stop if tried at original pos */
4918 if (utf8) BACKCHAR(eptr);
4919 }
4920 }
4921
4922 /* Match extended Unicode sequences. We will get here only if the
4923 support is in the binary; otherwise a compile-time error occurs. */
4924
4925 else if (ctype == OP_EXTUNI)
4926 {
4927 for (i = min; i < max; i++)
4928 {
4929 if (eptr >= md->end_subject)
4930 {
4931 SCHECK_PARTIAL();
4932 break;
4933 }
4934 GETCHARINCTEST(c, eptr);
4935 prop_category = UCD_CATEGORY(c);
4936 if (prop_category == ucp_M) break;
4937 while (eptr < md->end_subject)
4938 {
4939 int len = 1;
4940 if (!utf8) c = *eptr; else
4941 {
4942 GETCHARLEN(c, eptr, len);
4943 }
4944 prop_category = UCD_CATEGORY(c);
4945 if (prop_category != ucp_M) break;
4946 eptr += len;
4947 }
4948 }
4949
4950 /* eptr is now past the end of the maximum run */
4951
4952 if (possessive) continue;
4953
4954 for(;;)
4955 {
4956 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4957 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4958 if (eptr-- == pp) break; /* Stop if tried at original pos */
4959 for (;;) /* Move back over one extended */
4960 {
4961 int len = 1;
4962 if (!utf8) c = *eptr; else
4963 {
4964 BACKCHAR(eptr);
4965 GETCHARLEN(c, eptr, len);
4966 }
4967 prop_category = UCD_CATEGORY(c);
4968 if (prop_category != ucp_M) break;
4969 eptr--;
4970 }
4971 }
4972 }
4973
4974 else
4975 #endif /* SUPPORT_UCP */
4976
4977 #ifdef SUPPORT_UTF8
4978 /* UTF-8 mode */
4979
4980 if (utf8)
4981 {
4982 switch(ctype)
4983 {
4984 case OP_ANY:
4985 if (max < INT_MAX)
4986 {
4987 for (i = min; i < max; i++)
4988 {
4989 if (eptr >= md->end_subject)
4990 {
4991 SCHECK_PARTIAL();
4992 break;
4993 }
4994 if (IS_NEWLINE(eptr)) break;
4995 eptr++;
4996 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4997 }
4998 }
4999
5000 /* Handle unlimited UTF-8 repeat */
5001
5002 else
5003 {
5004 for (i = min; i < max; i++)
5005 {
5006 if (eptr >= md->end_subject)
5007 {
5008 SCHECK_PARTIAL();
5009 break;
5010 }
5011 if (IS_NEWLINE(eptr)) break;
5012 eptr++;
5013 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5014 }
5015 }
5016 break;
5017
5018 case OP_ALLANY:
5019 if (max < INT_MAX)
5020 {
5021 for (i = min; i < max; i++)
5022 {
5023 if (eptr >= md->end_subject)
5024 {
5025 SCHECK_PARTIAL();
5026 break;
5027 }
5028 eptr++;
5029 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5030 }
5031 }
5032 else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5033 break;
5034
5035 /* The byte case is the same as non-UTF8 */
5036
5037 case OP_ANYBYTE:
5038 c = max - min;
5039 if (c > (unsigned int)(md->end_subject - eptr))
5040 {
5041 eptr = md->end_subject;
5042 SCHECK_PARTIAL();
5043 }
5044 else eptr += c;
5045 break;
5046
5047 case OP_ANYNL:
5048 for (i = min; i < max; i++)
5049 {
5050 int len = 1;
5051 if (eptr >= md->end_subject)
5052 {
5053 SCHECK_PARTIAL();
5054 break;
5055 }
5056 GETCHARLEN(c, eptr, len);
5057 if (c == 0x000d)
5058 {
5059 if (++eptr >= md->end_subject) break;
5060 if (*eptr == 0x000a) eptr++;
5061 }
5062 else
5063 {
5064 if (c != 0x000a &&
5065 (md->bsr_anycrlf ||
5066 (c != 0x000b && c != 0x000c &&
5067 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5068 break;
5069 eptr += len;
5070 }
5071 }
5072 break;
5073
5074 case OP_NOT_HSPACE:
5075 case OP_HSPACE:
5076 for (i = min; i < max; i++)
5077 {
5078 BOOL gotspace;
5079 int len = 1;
5080 if (eptr >= md->end_subject)
5081 {
5082 SCHECK_PARTIAL();
5083 break;
5084 }
5085 GETCHARLEN(c, eptr, len);
5086 switch(c)
5087 {
5088 default: gotspace = FALSE; break;
5089 case 0x09: /* HT */
5090 case 0x20: /* SPACE */
5091 case 0xa0: /* NBSP */
5092 case 0x1680: /* OGHAM SPACE MARK */
5093 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5094 case 0x2000: /* EN QUAD */
5095 case 0x2001: /* EM QUAD */
5096 case 0x2002: /* EN SPACE */
5097 case 0x2003: /* EM SPACE */
5098 case 0x2004: /* THREE-PER-EM SPACE */
5099 case 0x2005: /* FOUR-PER-EM SPACE */
5100 case 0x2006: /* SIX-PER-EM SPACE */
5101 case 0x2007: /* FIGURE SPACE */
5102 case 0x2008: /* PUNCTUATION SPACE */
5103 case 0x2009: /* THIN SPACE */
5104 case 0x200A: /* HAIR SPACE */
5105 case 0x202f: /* NARROW NO-BREAK SPACE */
5106 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5107 case 0x3000: /* IDEOGRAPHIC SPACE */
5108 gotspace = TRUE;
5109 break;
5110 }
5111 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5112 eptr += len;
5113 }
5114 break;
5115
5116 case OP_NOT_VSPACE:
5117 case OP_VSPACE:
5118 for (i = min; i < max; i++)
5119 {
5120 BOOL gotspace;
5121 int len = 1;
5122 if (eptr >= md->end_subject)
5123 {
5124 SCHECK_PARTIAL();
5125 break;
5126 }
5127 GETCHARLEN(c, eptr, len);
5128 switch(c)
5129 {
5130 default: gotspace = FALSE; break;
5131 case 0x0a: /* LF */
5132 case 0x0b: /* VT */
5133 case 0x0c: /* FF */
5134 case 0x0d: /* CR */
5135 case 0x85: /* NEL */
5136 case 0x2028: /* LINE SEPARATOR */
5137 case 0x2029: /* PARAGRAPH SEPARATOR */
5138 gotspace = TRUE;
5139 break;
5140 }
5141 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5142 eptr += len;
5143 }
5144 break;
5145
5146 case OP_NOT_DIGIT:
5147 for (i = min; i < max; i++)
5148 {
5149 int len = 1;
5150 if (eptr >= md->end_subject)
5151 {
5152 SCHECK_PARTIAL();
5153 break;
5154 }
5155 GETCHARLEN(c, eptr, len);
5156 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5157 eptr+= len;
5158 }
5159 break;
5160
5161 case OP_DIGIT:
5162 for (i = min; i < max; i++)
5163 {
5164 int len = 1;
5165 if (eptr >= md->end_subject)
5166 {
5167 SCHECK_PARTIAL();
5168 break;
5169 }
5170 GETCHARLEN(c, eptr, len);
5171 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5172 eptr+= len;
5173 }
5174 break;
5175
5176 case OP_NOT_WHITESPACE:
5177 for (i = min; i < max; i++)
5178 {
5179 int len = 1;
5180 if (eptr >= md->end_subject)
5181 {
5182 SCHECK_PARTIAL();
5183 break;
5184 }
5185 GETCHARLEN(c, eptr, len);
5186 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5187 eptr+= len;
5188 }
5189 break;
5190
5191 case OP_WHITESPACE:
5192 for (i = min; i < max; i++)
5193 {
5194 int len = 1;
5195 if (eptr >= md->end_subject)
5196 {
5197 SCHECK_PARTIAL();
5198 break;
5199 }
5200 GETCHARLEN(c, eptr, len);
5201 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5202 eptr+= len;
5203 }
5204 break;
5205
5206 case OP_NOT_WORDCHAR:
5207 for (i = min; i < max; i++)
5208 {
5209 int len = 1;
5210 if (eptr >= md->end_subject)
5211 {
5212 SCHECK_PARTIAL();
5213 break;
5214 }
5215 GETCHARLEN(c, eptr, len);
5216 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5217 eptr+= len;
5218 }
5219 break;
5220
5221 case OP_WORDCHAR:
5222 for (i = min; i < max; i++)
5223 {
5224 int len = 1;
5225 if (eptr >= md->end_subject)
5226 {
5227 SCHECK_PARTIAL();
5228 break;
5229 }
5230 GETCHARLEN(c, eptr, len);
5231 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5232 eptr+= len;
5233 }
5234 break;
5235
5236 default:
5237 RRETURN(PCRE_ERROR_INTERNAL);
5238 }
5239
5240 /* eptr is now past the end of the maximum run */
5241
5242 if (possessive) continue;
5243 for(;;)
5244 {
5245 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
5246 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5247 if (eptr-- == pp) break; /* Stop if tried at original pos */
5248 BACKCHAR(eptr);
5249 }
5250 }
5251 else
5252 #endif /* SUPPORT_UTF8 */
5253
5254 /* Not UTF-8 mode */
5255 {
5256 switch(ctype)
5257 {
5258 case OP_ANY:
5259 for (i = min; i < max; i++)
5260 {
5261 if (eptr >= md->end_subject)
5262 {
5263 SCHECK_PARTIAL();
5264 break;
5265 }
5266 if (IS_NEWLINE(eptr)) break;
5267 eptr++;
5268 }
5269 break;
5270
5271 case OP_ALLANY:
5272 case OP_ANYBYTE:
5273 c = max - min;
5274 if (c > (unsigned int)(md->end_subject - eptr))
5275 {
5276 eptr = md->end_subject;
5277 SCHECK_PARTIAL();
5278 }
5279 else eptr += c;
5280 break;
5281
5282 case OP_ANYNL:
5283 for (i = min; i < max; i++)
5284 {
5285 if (eptr >= md->end_subject)
5286 {
5287 SCHECK_PARTIAL();
5288 break;
5289 }
5290 c = *eptr;
5291 if (c == 0x000d)
5292 {
5293 if (++eptr >= md->end_subject) break;
5294 if (*eptr == 0x000a) eptr++;
5295 }
5296 else
5297 {
5298 if (c != 0x000a &&
5299 (md->bsr_anycrlf ||
5300 (c != 0x000b && c != 0x000c && c != 0x0085)))
5301 break;
5302 eptr++;
5303 }
5304 }
5305 break;
5306
5307 case OP_NOT_HSPACE:
5308 for (i = min; i < max; i++)
5309 {
5310 if (eptr >= md->end_subject)
5311 {
5312 SCHECK_PARTIAL();
5313 break;
5314 }
5315 c = *eptr;
5316 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
5317 eptr++;
5318 }
5319 break;
5320
5321 case OP_HSPACE:
5322 for (i = min; i < max; i++)
5323 {
5324 if (eptr >= md->end_subject)
5325 {
5326 SCHECK_PARTIAL();
5327 break;
5328 }
5329 c = *eptr;
5330 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
5331 eptr++;
5332 }
5333 break;
5334
5335 case OP_NOT_VSPACE:
5336 for (i = min; i < max; i++)
5337 {
5338 if (eptr >= md->end_subject)
5339 {
5340 SCHECK_PARTIAL();
5341 break;
5342 }
5343 c = *eptr;
5344 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
5345 break;
5346 eptr++;
5347 }
5348 break;
5349
5350 case OP_VSPACE:
5351 for (i = min; i < max; i++)
5352 {
5353 if (eptr >= md->end_subject)
5354 {
5355 SCHECK_PARTIAL();
5356 break;
5357 }
5358 c = *eptr;
5359 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
5360 break;
5361 eptr++;
5362 }
5363 break;
5364
5365 case OP_NOT_DIGIT:
5366 for (i = min; i < max; i++)
5367 {
5368 if (eptr >= md->end_subject)
5369 {
5370 SCHECK_PARTIAL();
5371 break;
5372 }
5373 if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
5374 eptr++;
5375 }
5376 break;
5377
5378 case OP_DIGIT:
5379 for (i = min; i < max; i++)
5380 {
5381 if (eptr >= md->end_subject)
5382 {
5383 SCHECK_PARTIAL();
5384 break;
5385 }
5386 if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
5387 eptr++;
5388 }
5389 break;
5390
5391 case OP_NOT_WHITESPACE:
5392 for (i = min; i < max; i++)
5393 {
5394 if (eptr >= md->end_subject)
5395 {
5396 SCHECK_PARTIAL();
5397 break;
5398 }
5399 if ((md->ctypes[*eptr] & ctype_space) != 0) break;
5400 eptr++;
5401 }
5402 break;
5403
5404 case OP_WHITESPACE:
5405 for (i = min; i < max; i++)
5406 {
5407 if (eptr >= md->end_subject)
5408 {
5409 SCHECK_PARTIAL();
5410 break;
5411 }
5412 if ((md->ctypes[*eptr] & ctype_space) == 0) break;
5413 eptr++;
5414 }
5415 break;
5416
5417 case OP_NOT_WORDCHAR:
5418 for (i = min; i < max; i++)
5419 {
5420 if (eptr >= md->end_subject)
5421 {
5422 SCHECK_PARTIAL();
5423 break;
5424 }
5425 if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5426 eptr++;
5427 }
5428 break;
5429
5430 case OP_WORDCHAR:
5431 for (i = min; i < max; i++)
5432 {
5433 if (eptr >= md->end_subject)
5434 {
5435 SCHECK_PARTIAL();
5436 break;
5437 }
5438 if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5439 eptr++;
5440 }
5441 break;
5442
5443 default:
5444 RRETURN(PCRE_ERROR_INTERNAL);
5445 }
5446
5447 /* eptr is now past the end of the maximum run */
5448
5449 if (possessive) continue;
5450 while (eptr >= pp)
5451 {
5452 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5453 eptr--;
5454 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5455 }
5456 }
5457
5458 /* Get here if we can't make it match with any permitted repetitions */
5459
5460 MRRETURN(MATCH_NOMATCH);
5461 }
5462 /* Control never gets here */
5463
5464 /* There's been some horrible disaster. Arrival here can only mean there is
5465 something seriously wrong in the code above or the OP_xxx definitions. */
5466
5467 default:
5468 DPRINTF(("Unknown opcode %d\n", *ecode));
5469 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5470 }
5471
5472 /* Do not stick any code in here without much thought; it is assumed
5473 that "continue" in the code above comes out to here to repeat the main
5474 loop. */
5475
5476 } /* End of main loop */
5477 /* Control never reaches here */
5478
5479
5480 /* When compiling to use the heap rather than the stack for recursive calls to
5481 match(), the RRETURN() macro jumps here. The number that is saved in
5482 frame->Xwhere indicates which label we actually want to return to. */
5483
5484 #ifdef NO_RECURSE
5485 #define LBL(val) case val: goto L_RM##val;
5486 HEAP_RETURN:
5487 switch (frame->Xwhere)
5488 {
5489 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5490 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5491 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5492 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5493 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5494 #ifdef SUPPORT_UTF8
5495 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5496 LBL(32) LBL(34) LBL(42) LBL(46)
5497 #ifdef SUPPORT_UCP
5498 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5499 LBL(59) LBL(60) LBL(61) LBL(62)
5500 #endif /* SUPPORT_UCP */
5501 #endif /* SUPPORT_UTF8 */
5502 default:
5503 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5504 return PCRE_ERROR_INTERNAL;
5505 }
5506 #undef LBL
5507 #endif /* NO_RECURSE */
5508 }
5509
5510
5511 /***************************************************************************
5512 ****************************************************************************
5513 RECURSION IN THE match() FUNCTION
5514
5515 Undefine all the macros that were defined above to handle this. */
5516
5517 #ifdef NO_RECURSE
5518 #undef eptr
5519 #undef ecode
5520 #undef mstart
5521 #undef offset_top
5522 #undef ims
5523 #undef eptrb
5524 #undef flags
5525
5526 #undef callpat
5527 #undef charptr
5528 #undef data
5529 #undef next
5530 #undef pp
5531 #undef prev
5532 #undef saved_eptr
5533
5534 #undef new_recursive
5535
5536 #undef cur_is_word
5537 #undef condition
5538 #undef prev_is_word
5539
5540 #undef original_ims
5541
5542 #undef ctype
5543 #undef length
5544 #undef max
5545 #undef min
5546 #undef number
5547 #undef offset
5548 #undef op
5549 #undef save_capture_last
5550 #undef save_offset1
5551 #undef save_offset2
5552 #undef save_offset3
5553 #undef stacksave
5554
5555 #undef newptrb
5556
5557 #endif
5558
5559 /* These two are defined as macros in both cases */
5560
5561 #undef fc
5562 #undef fi
5563
5564 /***************************************************************************
5565 ***************************************************************************/
5566
5567
5568
5569 /*************************************************
5570 * Execute a Regular Expression *
5571 *************************************************/
5572
5573 /* This function applies a compiled re to a subject string and picks out
5574 portions of the string if it matches. Two elements in the vector are set for
5575 each substring: the offsets to the start and end of the substring.
5576
5577 Arguments:
5578 argument_re points to the compiled expression
5579 extra_data points to extra data or is NULL
5580 subject points to the subject string
5581 length length of subject string (may contain binary zeros)
5582 start_offset where to start in the subject string
5583 options option bits
5584 offsets points to a vector of ints to be filled in with offsets
5585 offsetcount the number of elements in the vector
5586
5587 Returns: > 0 => success; value is the number of elements filled in
5588 = 0 => success, but offsets is not big enough
5589 -1 => failed to match
5590 < -1 => some kind of unexpected problem
5591 */
5592
5593 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5594 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5595 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5596 int offsetcount)
5597 {
5598 int rc, resetcount, ocount;
5599 int first_byte = -1;
5600 int req_byte = -1;
5601 int req_byte2 = -1;
5602 int newline;
5603 unsigned long int ims;
5604 BOOL using_temporary_offsets = FALSE;
5605 BOOL anchored;
5606 BOOL startline;
5607 BOOL firstline;
5608 BOOL first_byte_caseless = FALSE;
5609 BOOL req_byte_caseless = FALSE;
5610 BOOL utf8;
5611 match_data match_block;
5612 match_data *md = &match_block;
5613 const uschar *tables;
5614 const uschar *start_bits = NULL;
5615 USPTR start_match = (USPTR)subject + start_offset;
5616 USPTR end_subject;
5617 USPTR start_partial = NULL;
5618 USPTR req_byte_ptr = start_match - 1;
5619
5620 pcre_study_data internal_study;
5621 const pcre_study_data *study;
5622
5623 real_pcre internal_re;
5624 const real_pcre *external_re = (const real_pcre *)argument_re;
5625 const real_pcre *re = external_re;
5626
5627 /* Plausibility checks */
5628
5629 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5630 if (re == NULL || subject == NULL ||
5631 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5632 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5633
5634 /* This information is for finding all the numbers associated with a given
5635 name, for condition testing. */
5636
5637 md->name_table = (uschar *)re + re->name_table_offset;
5638 md->name_count = re->name_count;
5639 md->name_entry_size = re->name_entry_size;
5640
5641 /* Fish out the optional data from the extra_data structure, first setting
5642 the default values. */
5643
5644 study = NULL;
5645 md->match_limit = MATCH_LIMIT;
5646 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5647 md->callout_data = NULL;
5648
5649 /* The table pointer is always in native byte order. */
5650
5651 tables = external_re->tables;
5652
5653 if (extra_data != NULL)
5654 {
5655 register unsigned int flags = extra_data->flags;
5656 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5657 study = (const pcre_study_data *)extra_data->study_data;
5658 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5659 md->match_limit = extra_data->match_limit;
5660 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5661 md->match_limit_recursion = extra_data->match_limit_recursion;
5662 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5663 md->callout_data = extra_data->callout_data;
5664 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5665 }
5666
5667 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5668 is a feature that makes it possible to save compiled regex and re-use them
5669 in other programs later. */
5670
5671 if (tables == NULL) tables = _pcre_default_tables;
5672
5673 /* Check that the first field in the block is the magic number. If it is not,
5674 test for a regex that was compiled on a host of opposite endianness. If this is
5675 the case, flipped values are put in internal_re and internal_study if there was
5676 study data too. */
5677
5678 if (re->magic_number != MAGIC_NUMBER)
5679 {
5680 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5681 if (re == NULL) return PCRE_ERROR_BADMAGIC;
5682 if (study != NULL) study = &internal_study;
5683 }
5684
5685 /* Set up other data */
5686
5687 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5688 startline = (re->flags & PCRE_STARTLINE) != 0;
5689 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5690
5691 /* The code starts after the real_pcre block and the capture name table. */
5692
5693 md->start_code = (const uschar *)external_re + re->name_table_offset +
5694 re->name_count * re->name_entry_size;
5695
5696 md->start_subject = (USPTR)subject;
5697 md->start_offset = start_offset;
5698 md->end_subject = md->start_subject + length;
5699 end_subject = md->end_subject;
5700
5701 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5702 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5703 md->use_ucp = (re->options & PCRE_UCP) != 0;
5704 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5705
5706 md->notbol = (options & PCRE_NOTBOL) != 0;
5707 md->noteol = (options & PCRE_NOTEOL) != 0;
5708 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5709 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5710 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5711 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5712 md->hitend = FALSE;
5713 md->mark = NULL; /* In case never set */
5714
5715 md->recursive = NULL; /* No recursion at top level */
5716
5717 md->lcc = tables + lcc_offset;
5718 md->ctypes = tables + ctypes_offset;
5719
5720 /* Handle different \R options. */
5721
5722 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5723 {
5724 case 0:
5725 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5726 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5727 else
5728 #ifdef BSR_ANYCRLF
5729 md->bsr_anycrlf = TRUE;
5730 #else
5731 md->bsr_anycrlf = FALSE;
5732 #endif
5733 break;
5734
5735 case PCRE_BSR_ANYCRLF:
5736 md->bsr_anycrlf = TRUE;
5737 break;
5738
5739 case PCRE_BSR_UNICODE:
5740 md->bsr_anycrlf = FALSE;
5741 break;
5742
5743 default: return PCRE_ERROR_BADNEWLINE;
5744 }
5745
5746 /* Handle different types of newline. The three bits give eight cases. If
5747 nothing is set at run time, whatever was used at compile time applies. */
5748
5749 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5750 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5751 {
5752 case 0: newline = NEWLINE; break; /* Compile-time default */
5753 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5754 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5755 case PCRE_NEWLINE_CR+
5756 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5757 case PCRE_NEWLINE_ANY: newline = -1; break;
5758 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5759 default: return PCRE_ERROR_BADNEWLINE;
5760 }
5761
5762 if (newline == -2)
5763 {
5764 md->nltype = NLTYPE_ANYCRLF;
5765 }
5766 else if (newline < 0)
5767 {
5768 md->nltype = NLTYPE_ANY;
5769 }
5770 else
5771 {
5772 md->nltype = NLTYPE_FIXED;
5773 if (newline > 255)
5774 {
5775 md->nllen = 2;
5776 md->nl[0] = (newline >> 8) & 255;
5777 md->nl[1] = newline & 255;
5778 }
5779 else
5780 {
5781 md->nllen = 1;
5782 md->nl[0] = newline;
5783 }
5784 }
5785
5786 /* Partial matching was originally supported only for a restricted set of
5787 regexes; from release 8.00 there are no restrictions, but the bits are still
5788 defined (though never set). So there's no harm in leaving this code. */
5789
5790 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5791 return PCRE_ERROR_BADPARTIAL;
5792
5793 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
5794 back the character offset. */
5795
5796 #ifdef SUPPORT_UTF8
5797 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5798 {
5799 if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5800 return PCRE_ERROR_BADUTF8;
5801 if (start_offset > 0 && start_offset < length)
5802 {
5803 int tb = ((USPTR)subject)[start_offset];
5804 if (tb > 127)
5805 {
5806 tb &= 0xc0;
5807 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
5808 }
5809 }
5810 }
5811 #endif
5812
5813 /* The ims options can vary during the matching as a result of the presence
5814 of (?ims) items in the pattern. They are kept in a local variable so that
5815 restoring at the exit of a group is easy. */
5816
5817 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5818
5819 /* If the expression has got more back references than the offsets supplied can
5820 hold, we get a temporary chunk of working store to use during the matching.
5821 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5822 of 3. */
5823
5824 ocount = offsetcount - (offsetcount % 3);
5825
5826 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5827 {
5828 ocount = re->top_backref * 3 + 3;
5829 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5830 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5831 using_temporary_offsets = TRUE;
5832 DPRINTF(("Got memory to hold back references\n"));
5833 }
5834 else md->offset_vector = offsets;
5835
5836 md->offset_end = ocount;
5837 md->offset_max = (2*ocount)/3;
5838 md->offset_overflow = FALSE;
5839 md->capture_last = -1;
5840
5841 /* Compute the minimum number of offsets that we need to reset each time. Doing
5842 this makes a huge difference to execution time when there aren't many brackets
5843 in the pattern. */
5844
5845 resetcount = 2 + re->top_bracket * 2;
5846 if (resetcount > offsetcount) resetcount = ocount;
5847
5848 /* Reset the working variable associated with each extraction. These should
5849 never be used unless previously set, but they get saved and restored, and so we
5850 initialize them to avoid reading uninitialized locations. */
5851
5852 if (md->offset_vector != NULL)
5853 {
5854 register int *iptr = md->offset_vector + ocount;
5855 register int *iend = iptr - resetcount/2 + 1;
5856 while (--iptr >= iend) *iptr = -1;
5857 }
5858
5859 /* Set up the first character to match, if available. The first_byte value is
5860 never set for an anchored regular expression, but the anchoring may be forced
5861 at run time, so we have to test for anchoring. The first char may be unset for
5862 an unanchored pattern, of course. If there's no first char and the pattern was
5863 studied, there may be a bitmap of possible first characters. */
5864
5865 if (!anchored)
5866 {
5867 if ((re->flags & PCRE_FIRSTSET) != 0)
5868 {
5869 first_byte = re->first_byte & 255;
5870 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5871 first_byte = md->lcc[first_byte];
5872 }
5873 else
5874 if (!startline && study != NULL &&
5875 (study->flags & PCRE_STUDY_MAPPED) != 0)
5876 start_bits = study->start_bits;
5877 }
5878
5879 /* For anchored or unanchored matches, there may be a "last known required
5880 character" set. */
5881
5882 if ((re->flags & PCRE_REQCHSET) != 0)
5883 {
5884 req_byte = re->req_byte & 255;
5885 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5886 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
5887 }
5888
5889
5890 /* ==========================================================================*/
5891
5892 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5893 the loop runs just once. */
5894
5895 for(;;)
5896 {
5897 USPTR save_end_subject = end_subject;
5898 USPTR new_start_match;
5899
5900 /* Reset the maximum number of extractions we might see. */
5901
5902 if (md->offset_vector != NULL)
5903 {
5904 register int *iptr = md->offset_vector;
5905 register int *iend = iptr + resetcount;
5906 while (iptr < iend) *iptr++ = -1;
5907 }
5908
5909 /* If firstline is TRUE, the start of the match is constrained to the first
5910 line of a multiline string. That is, the match must be before or at the first
5911 newline. Implement this by temporarily adjusting end_subject so that we stop
5912 scanning at a newline. If the match fails at the newline, later code breaks
5913 this loop. */
5914
5915 if (firstline)
5916 {
5917 USPTR t = start_match;
5918 #ifdef SUPPORT_UTF8
5919 if (utf8)
5920 {
5921 while (t < md->end_subject && !IS_NEWLINE(t))
5922 {
5923 t++;
5924 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5925 }
5926 }
5927 else
5928 #endif
5929 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5930 end_subject = t;
5931 }
5932
5933 /* There are some optimizations that avoid running the match if a known
5934 starting point is not found, or if a known later character is not present.
5935 However, there is an option that disables these, for testing and for ensuring
5936 that all callouts do actually occur. */
5937
5938 if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5939 {
5940 /* Advance to a unique first byte if there is one. */
5941
5942 if (first_byte >= 0)
5943 {
5944 if (first_byte_caseless)
5945 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5946 start_match++;
5947 else
5948 while (start_match < end_subject && *start_match != first_byte)
5949 start_match++;
5950 }
5951
5952 /* Or to just after a linebreak for a multiline match */
5953
5954 else if (startline)
5955 {
5956 if (start_match > md->start_subject + start_offset)
5957 {
5958 #ifdef SUPPORT_UTF8
5959 if (utf8)
5960 {
5961 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5962 {
5963 start_match++;
5964 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5965 start_match++;
5966 }
5967 }
5968 else
5969 #endif
5970 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5971 start_match++;
5972
5973 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5974 and we are now at a LF, advance the match position by one more character.
5975 */
5976
5977 if (start_match[-1] == CHAR_CR &&
5978 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5979 start_match < end_subject &&
5980 *start_match == CHAR_NL)
5981 start_match++;
5982 }
5983 }
5984
5985 /* Or to a non-unique first byte after study */
5986
5987 else if (start_bits != NULL)
5988 {
5989 while (start_match < end_subject)
5990 {
5991 register unsigned int c = *start_match;
5992 if ((start_bits[c/8] & (1 << (c&7))) == 0)
5993 {
5994 start_match++;
5995 #ifdef SUPPORT_UTF8
5996 if (utf8)
5997 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5998 start_match++;
5999 #endif
6000 }
6001 else break;
6002 }
6003 }
6004 } /* Starting optimizations */
6005
6006 /* Restore fudged end_subject */
6007
6008 end_subject = save_end_subject;
6009
6010 /* The following two optimizations are disabled for partial matching or if
6011 disabling is explicitly requested. */
6012
6013 if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6014 {
6015 /* If the pattern was studied, a minimum subject length may be set. This is
6016 a lower bound; no actual string of that length may actually match the
6017 pattern. Although the value is, strictly, in characters, we treat it as
6018 bytes to avoid spending too much time in this optimization. */
6019
6020 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6021 (pcre_uint32)(end_subject - start_match) < study->minlength)
6022 {
6023 rc = MATCH_NOMATCH;
6024 break;
6025 }
6026
6027 /* If req_byte is set, we know that that character must appear in the
6028 subject for the match to succeed. If the first character is set, req_byte
6029 must be later in the subject; otherwise the test starts at the match point.
6030 This optimization can save a huge amount of backtracking in patterns with
6031 nested unlimited repeats that aren't going to match. Writing separate code
6032 for cased/caseless versions makes it go faster, as does using an
6033 autoincrement and backing off on a match.
6034
6035 HOWEVER: when the subject string is very, very long, searching to its end
6036 can take a long time, and give bad performance on quite ordinary patterns.
6037 This showed up when somebody was matching something like /^\d+C/ on a
6038 32-megabyte string... so we don't do this when the string is sufficiently
6039 long. */
6040
6041 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
6042 {
6043 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
6044
6045 /* We don't need to repeat the search if we haven't yet reached the
6046 place we found it at last time. */
6047
6048 if (p > req_byte_ptr)
6049 {
6050 if (req_byte_caseless)
6051 {
6052 while (p < end_subject)
6053 {
6054 register int pp = *p++;
6055 if (pp == req_byte || pp == req_byte2) { p--; break; }
6056 }
6057 }
6058 else
6059 {
6060 while (p < end_subject)
6061 {
6062 if (*p++ == req_byte) { p--; break; }
6063 }
6064 }
6065
6066 /* If we can't find the required character, break the matching loop,
6067 forcing a match failure. */
6068
6069 if (p >= end_subject)
6070 {
6071 rc = MATCH_NOMATCH;
6072 break;
6073 }
6074
6075 /* If we have found the required character, save the point where we
6076 found it, so that we don't search again next time round the loop if
6077 the start hasn't passed this character yet. */
6078
6079 req_byte_ptr = p;
6080 }
6081 }
6082 }
6083
6084 #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6085 printf(">>>> Match against: ");
6086 pchars(start_match, end_subject - start_match, TRUE, md);
6087 printf("\n");
6088 #endif
6089
6090 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6091 first starting point for which a partial match was found. */
6092
6093 md->start_match_ptr = start_match;
6094 md->start_used_ptr = start_match;
6095 md->match_call_count = 0;
6096 rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
6097 0, 0);
6098 if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6099
6100 switch(rc)
6101 {
6102 /* SKIP passes back the next starting point explicitly, but if it is the
6103 same as the match we have just done, treat it as NOMATCH. */
6104
6105 case MATCH_SKIP:
6106 if (md->start_match_ptr != start_match)
6107 {
6108 new_start_match = md->start_match_ptr;
6109 break;
6110 }
6111 /* Fall through */
6112
6113 /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6114 the SKIP's arg was not found. We also treat this as NOMATCH. */
6115
6116 case MATCH_SKIP_ARG:
6117 /* Fall through */
6118
6119 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6120 exactly like PRUNE. */
6121
6122 case MATCH_NOMATCH:
6123 case MATCH_PRUNE:
6124 case MATCH_THEN:
6125 new_start_match = start_match + 1;
6126 #ifdef SUPPORT_UTF8
6127 if (utf8)
6128 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6129 new_start_match++;
6130 #endif
6131 break;
6132
6133 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6134
6135 case MATCH_COMMIT:
6136 rc = MATCH_NOMATCH;
6137 goto ENDLOOP;
6138
6139 /* Any other return is either a match, or some kind of error. */
6140
6141 default:
6142 goto ENDLOOP;
6143 }
6144
6145 /* Control reaches here for the various types of "no match at this point"
6146 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6147
6148 rc = MATCH_NOMATCH;
6149
6150 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6151 newline in the subject (though it may continue over the newline). Therefore,
6152 if we have just failed to match, starting at a newline, do not continue. */
6153
6154 if (firstline && IS_NEWLINE(start_match)) break;
6155
6156 /* Advance to new matching position */
6157
6158 start_match = new_start_match;
6159
6160 /* Break the loop if the pattern is anchored or if we have passed the end of
6161 the subject. */
6162
6163 if (anchored || start_match > end_subject) break;
6164
6165 /* If we have just passed a CR and we are now at a LF, and the pattern does
6166 not contain any explicit matches for \r or \n, and the newline option is CRLF
6167 or ANY or ANYCRLF, advance the match position by one more character. */
6168
6169 if (start_match[-1] == CHAR_CR &&
6170 start_match < end_subject &&
6171 *start_match == CHAR_NL &&
6172 (re->flags & PCRE_HASCRORLF) == 0 &&
6173 (md->nltype == NLTYPE_ANY ||
6174 md->nltype == NLTYPE_ANYCRLF ||
6175 md->nllen == 2))
6176 start_match++;
6177
6178 md->mark = NULL; /* Reset for start of next match attempt */
6179 } /* End of for(;;) "bumpalong" loop */
6180
6181 /* ==========================================================================*/
6182
6183 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6184 conditions is true:
6185
6186 (1) The pattern is anchored or the match was failed by (*COMMIT);
6187
6188 (2) We are past the end of the subject;
6189
6190 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6191 this option requests that a match occur at or before the first newline in
6192 the subject.
6193
6194 When we have a match and the offset vector is big enough to deal with any
6195 backreferences, captured substring offsets will already be set up. In the case
6196 where we had to get some local store to hold offsets for backreference
6197 processing, copy those that we can. In this case there need not be overflow if
6198 certain parts of the pattern were not used, even though there are more
6199 capturing parentheses than vector slots. */
6200
6201 ENDLOOP:
6202
6203 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6204 {
6205 if (using_temporary_offsets)
6206 {
6207 if (offsetcount >= 4)
6208 {
6209 memcpy(offsets + 2, md->offset_vector + 2,
6210 (offsetcount - 2) * sizeof(int));
6211 DPRINTF(("Copied offsets from temporary memory\n"));
6212 }
6213 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6214 DPRINTF(("Freeing temporary memory\n"));
6215 (pcre_free)(md->offset_vector);
6216 }
6217
6218 /* Set the return code to the number of captured strings, or 0 if there are
6219 too many to fit into the vector. */
6220
6221 rc = md->offset_overflow? 0 : md->end_offset_top/2;
6222
6223 /* If there is space, set up the whole thing as substring 0. The value of
6224 md->start_match_ptr might be modified if \K was encountered on the success
6225 matching path. */
6226
6227 if (offsetcount < 2) rc = 0; else
6228 {
6229 offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6230 offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6231 }
6232
6233 DPRINTF((">>>> returning %d\n", rc));
6234 goto RETURN_MARK;
6235 }
6236
6237 /* Control gets here if there has been an error, or if the overall match
6238 attempt has failed at all permitted starting positions. */
6239
6240 if (using_temporary_offsets)
6241 {
6242 DPRINTF(("Freeing temporary memory\n"));
6243 (pcre_free)(md->offset_vector);
6244 }
6245
6246 /* For anything other than nomatch or partial match, just return the code. */
6247
6248 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6249 {
6250 DPRINTF((">>>> error: returning %d\n", rc));
6251 return rc;
6252 }
6253
6254 /* Handle partial matches - disable any mark data */
6255
6256 if (start_partial != NULL)
6257 {
6258 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6259 md->mark = NULL;
6260 if (offsetcount > 1)
6261 {
6262 offsets[0] = (int)(start_partial - (USPTR)subject);
6263 offsets[1] = (int)(end_subject - (USPTR)subject);
6264 }
6265 rc = PCRE_ERROR_PARTIAL;
6266 }
6267
6268 /* This is the classic nomatch case */
6269
6270 else
6271 {
6272 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6273 rc = PCRE_ERROR_NOMATCH;
6274 }
6275
6276 /* Return the MARK data if it has been requested. */
6277
6278 RETURN_MARK:
6279
6280 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6281 *(extra_data->mark) = (unsigned char *)(md->mark);
6282 return rc;
6283 }
6284
6285 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5